diff options
Diffstat (limited to 'drivers/infiniband/core')
22 files changed, 642 insertions, 268 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index dda9e856e3fa..61667705d746 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -5,15 +5,16 @@ user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \ $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o -obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ - $(user_access-y) +obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y) +obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - security.o nldev.o restrack.o + nldev.o restrack.o +ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o @@ -36,4 +37,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ uverbs_ioctl_merge.o uverbs_std_types_cq.o \ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ - uverbs_std_types_mr.o + uverbs_std_types_mr.o uverbs_std_types_counters.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 88a7542d8c7b..4f32c4062fb6 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -56,7 +56,6 @@ struct addr_req { struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; - struct rdma_addr_client *client; void *context; void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); @@ -68,11 +67,8 @@ struct addr_req { static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); -static void process_req(struct work_struct *work); - -static DEFINE_MUTEX(lock); +static DEFINE_SPINLOCK(lock); static LIST_HEAD(req_list); -static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { @@ -112,7 +108,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) memcpy(&gid, nla_data(curr), nla_len(curr)); } - mutex_lock(&lock); + spin_lock_bh(&lock); list_for_each_entry(req, &req_list, list) { if (nlh->nlmsg_seq != req->seq) continue; @@ -122,7 +118,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) found = 1; break; } - mutex_unlock(&lock); + spin_unlock_bh(&lock); if (!found) pr_info("Couldn't find request waiting for DGID: %pI6\n", @@ -223,28 +219,6 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr) } EXPORT_SYMBOL(rdma_addr_size_kss); -static struct rdma_addr_client self; - -void rdma_addr_register_client(struct rdma_addr_client *client) -{ - atomic_set(&client->refcount, 1); - init_completion(&client->comp); -} -EXPORT_SYMBOL(rdma_addr_register_client); - -static inline void put_client(struct rdma_addr_client *client) -{ - if (atomic_dec_and_test(&client->refcount)) - complete(&client->comp); -} - -void rdma_addr_unregister_client(struct rdma_addr_client *client) -{ - put_client(client); - wait_for_completion(&client->comp); -} -EXPORT_SYMBOL(rdma_addr_unregister_client); - void rdma_copy_addr(struct rdma_dev_addr *dev_addr, const struct net_device *dev, const unsigned char *dst_dev_addr) @@ -302,7 +276,7 @@ int rdma_translate_ip(const struct sockaddr *addr, } EXPORT_SYMBOL(rdma_translate_ip); -static void set_timeout(struct delayed_work *delayed_work, unsigned long time) +static void set_timeout(struct addr_req *req, unsigned long time) { unsigned long delay; @@ -310,23 +284,15 @@ static void set_timeout(struct delayed_work *delayed_work, unsigned long time) if ((long)delay < 0) delay = 0; - mod_delayed_work(addr_wq, delayed_work, delay); + mod_delayed_work(addr_wq, &req->work, delay); } static void queue_req(struct addr_req *req) { - struct addr_req *temp_req; - - mutex_lock(&lock); - list_for_each_entry_reverse(temp_req, &req_list, list) { - if (time_after_eq(req->timeout, temp_req->timeout)) - break; - } - - list_add(&req->list, &temp_req->list); - - set_timeout(&req->work, req->timeout); - mutex_unlock(&lock); + spin_lock_bh(&lock); + list_add_tail(&req->list, &req_list); + set_timeout(req, req->timeout); + spin_unlock_bh(&lock); } static int ib_nl_fetch_ha(const struct dst_entry *dst, @@ -584,7 +550,6 @@ static void process_one_req(struct work_struct *_work) struct addr_req *req; struct sockaddr *src_in, *dst_in; - mutex_lock(&lock); req = container_of(_work, struct addr_req, work.work); if (req->status == -ENODATA) { @@ -596,72 +561,33 @@ static void process_one_req(struct work_struct *_work) req->status = -ETIMEDOUT; } else if (req->status == -ENODATA) { /* requeue the work for retrying again */ - set_timeout(&req->work, req->timeout); - mutex_unlock(&lock); + spin_lock_bh(&lock); + if (!list_empty(&req->list)) + set_timeout(req, req->timeout); + spin_unlock_bh(&lock); return; } } - list_del(&req->list); - mutex_unlock(&lock); - - /* - * Although the work will normally have been canceled by the - * workqueue, it can still be requeued as long as it is on the - * req_list, so it could have been requeued before we grabbed &lock. - * We need to cancel it after it is removed from req_list to really be - * sure it is safe to free. - */ - cancel_delayed_work(&req->work); req->callback(req->status, (struct sockaddr *)&req->src_addr, req->addr, req->context); - put_client(req->client); - kfree(req); -} - -static void process_req(struct work_struct *work) -{ - struct addr_req *req, *temp_req; - struct sockaddr *src_in, *dst_in; - struct list_head done_list; - - INIT_LIST_HEAD(&done_list); - - mutex_lock(&lock); - list_for_each_entry_safe(req, temp_req, &req_list, list) { - if (req->status == -ENODATA) { - src_in = (struct sockaddr *) &req->src_addr; - dst_in = (struct sockaddr *) &req->dst_addr; - req->status = addr_resolve(src_in, dst_in, req->addr, - true, req->seq); - if (req->status && time_after_eq(jiffies, req->timeout)) - req->status = -ETIMEDOUT; - else if (req->status == -ENODATA) { - set_timeout(&req->work, req->timeout); - continue; - } - } - list_move_tail(&req->list, &done_list); - } - - mutex_unlock(&lock); - - list_for_each_entry_safe(req, temp_req, &done_list, list) { - list_del(&req->list); - /* It is safe to cancel other work items from this work item - * because at a time there can be only one work item running - * with this single threaded work queue. + req->callback = NULL; + + spin_lock_bh(&lock); + if (!list_empty(&req->list)) { + /* + * Although the work will normally have been canceled by the + * workqueue, it can still be requeued as long as it is on the + * req_list. */ cancel_delayed_work(&req->work); - req->callback(req->status, (struct sockaddr *) &req->src_addr, - req->addr, req->context); - put_client(req->client); + list_del_init(&req->list); kfree(req); } + spin_unlock_bh(&lock); } -int rdma_resolve_ip(struct rdma_addr_client *client, - struct sockaddr *src_addr, struct sockaddr *dst_addr, +int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), @@ -693,8 +619,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->addr = addr; req->callback = callback; req->context = context; - req->client = client; - atomic_inc(&client->refcount); INIT_DELAYED_WORK(&req->work, process_one_req); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); @@ -710,7 +634,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client, break; default: ret = req->status; - atomic_dec(&client->refcount); goto err; } return ret; @@ -742,18 +665,36 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, void rdma_addr_cancel(struct rdma_dev_addr *addr) { struct addr_req *req, *temp_req; + struct addr_req *found = NULL; - mutex_lock(&lock); + spin_lock_bh(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->addr == addr) { - req->status = -ECANCELED; - req->timeout = jiffies; - list_move(&req->list, &req_list); - set_timeout(&req->work, req->timeout); + /* + * Removing from the list means we take ownership of + * the req + */ + list_del_init(&req->list); + found = req; break; } } - mutex_unlock(&lock); + spin_unlock_bh(&lock); + + if (!found) + return; + + /* + * sync canceling the work after removing it from the req_list + * guarentees no work is running and none will be started. + */ + cancel_delayed_work_sync(&found->work); + + if (found->callback) + found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr, + found->addr, found->context); + + kfree(found); } EXPORT_SYMBOL(rdma_addr_cancel); @@ -791,8 +732,8 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, dev_addr.net = &init_net; init_completion(&ctx.comp); - ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, - &dev_addr, 1000, resolve_cb, &ctx); + ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, + &dev_addr, 1000, resolve_cb, &ctx); if (ret) return ret; @@ -810,11 +751,17 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { + struct addr_req *req; + if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; - if (neigh->nud_state & NUD_VALID) - set_timeout(&work, jiffies); + if (neigh->nud_state & NUD_VALID) { + spin_lock_bh(&lock); + list_for_each_entry(req, &req_list, list) + set_timeout(req, jiffies); + spin_unlock_bh(&lock); + } } return 0; } @@ -830,14 +777,13 @@ int addr_init(void) return -ENOMEM; register_netevent_notifier(&nb); - rdma_addr_register_client(&self); return 0; } void addr_cleanup(void) { - rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); + WARN_ON(!list_empty(&req_list)); } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 3330d97faa1e..71a34bee453d 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -125,6 +125,16 @@ const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) } EXPORT_SYMBOL(ib_cache_gid_type_str); +/** rdma_is_zero_gid - Check if given GID is zero or not. + * @gid: GID to check + * Returns true if given GID is zero, returns false otherwise. + */ +bool rdma_is_zero_gid(const union ib_gid *gid) +{ + return !memcmp(gid, &zgid, sizeof(*gid)); +} +EXPORT_SYMBOL(rdma_is_zero_gid); + int ib_cache_gid_parse_type_str(const char *buf) { unsigned int i; @@ -149,6 +159,11 @@ int ib_cache_gid_parse_type_str(const char *buf) } EXPORT_SYMBOL(ib_cache_gid_parse_type_str); +static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port) +{ + return device->cache.ports[port - rdma_start_port(device)].gid; +} + static void del_roce_gid(struct ib_device *device, u8 port_num, struct ib_gid_table *table, int ix) { @@ -231,7 +246,7 @@ static int add_modify_gid(struct ib_gid_table *table, * So ignore such behavior for IB link layer and don't * fail the call, but don't add such entry to GID cache. */ - if (!memcmp(gid, &zgid, sizeof(*gid))) + if (rdma_is_zero_gid(gid)) return 0; } @@ -264,7 +279,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port, if (rdma_protocol_roce(ib_dev, port)) del_roce_gid(ib_dev, port, table, ix); - memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid)); + memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid)); memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); table->data_vec[ix].context = NULL; } @@ -363,10 +378,10 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, * IB spec version 1.3 section 4.1.1 point (6) and * section 12.7.10 and section 12.7.20 */ - if (!memcmp(gid, &zgid, sizeof(*gid))) + if (rdma_is_zero_gid(gid)) return -EINVAL; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + table = rdma_gid_table(ib_dev, port); mutex_lock(&table->lock); @@ -433,7 +448,7 @@ _ib_cache_gid_del(struct ib_device *ib_dev, u8 port, int ret = 0; int ix; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + table = rdma_gid_table(ib_dev, port); mutex_lock(&table->lock); @@ -472,7 +487,7 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, int ix; bool deleted = false; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + table = rdma_gid_table(ib_dev, port); mutex_lock(&table->lock); @@ -496,7 +511,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, { struct ib_gid_table *table; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + table = rdma_gid_table(ib_dev, port); if (index < 0 || index >= table->sz) return -EINVAL; @@ -589,7 +604,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, if (!rdma_is_port_valid(ib_dev, port)) return -ENOENT; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + table = rdma_gid_table(ib_dev, port); if (ndev) mask |= GID_ATTR_FIND_MASK_NETDEV; @@ -647,7 +662,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, !rdma_protocol_roce(ib_dev, port)) return -EPROTONOSUPPORT; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + table = rdma_gid_table(ib_dev, port); read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { @@ -724,8 +739,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); for (i = 0; i < table->sz; ++i) { - if (memcmp(&table->data_vec[i].gid, &zgid, - sizeof(table->data_vec[i].gid))) { + if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { del_gid(ib_dev, port, table, i); deleted = true; } @@ -747,7 +761,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, unsigned int gid_type; unsigned long mask; - table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; + table = rdma_gid_table(ib_dev, port); mask = GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT | @@ -772,8 +786,8 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, } } -static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, - struct ib_gid_table *table) +static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port, + struct ib_gid_table *table) { unsigned int i; unsigned long roce_gid_type_mask; @@ -783,8 +797,7 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); for (i = 0; i < num_default_gids && i < table->sz; i++) { - struct ib_gid_table_entry *entry = - &table->data_vec[i]; + struct ib_gid_table_entry *entry = &table->data_vec[i]; entry->props |= GID_TABLE_ENTRY_DEFAULT; current_gid = find_next_bit(&roce_gid_type_mask, @@ -792,59 +805,42 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, current_gid); entry->attr.gid_type = current_gid++; } +} - return 0; + +static void gid_table_release_one(struct ib_device *ib_dev) +{ + struct ib_gid_table *table; + u8 port; + + for (port = 0; port < ib_dev->phys_port_cnt; port++) { + table = ib_dev->cache.ports[port].gid; + release_gid_table(table); + ib_dev->cache.ports[port].gid = NULL; + } } static int _gid_table_setup_one(struct ib_device *ib_dev) { u8 port; struct ib_gid_table *table; - int err = 0; for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); - table = - alloc_gid_table( + table = alloc_gid_table( ib_dev->port_immutable[rdma_port].gid_tbl_len); - if (!table) { - err = -ENOMEM; + if (!table) goto rollback_table_setup; - } - err = gid_table_reserve_default(ib_dev, - port + rdma_start_port(ib_dev), - table); - if (err) - goto rollback_table_setup; + gid_table_reserve_default(ib_dev, rdma_port, table); ib_dev->cache.ports[port].gid = table; } - return 0; rollback_table_setup: - for (port = 0; port < ib_dev->phys_port_cnt; port++) { - table = ib_dev->cache.ports[port].gid; - - cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), - table); - release_gid_table(table); - } - - return err; -} - -static void gid_table_release_one(struct ib_device *ib_dev) -{ - struct ib_gid_table *table; - u8 port; - - for (port = 0; port < ib_dev->phys_port_cnt; port++) { - table = ib_dev->cache.ports[port].gid; - release_gid_table(table); - ib_dev->cache.ports[port].gid = NULL; - } + gid_table_release_one(ib_dev); + return -ENOMEM; } static void gid_table_cleanup_one(struct ib_device *ib_dev) @@ -886,7 +882,7 @@ int ib_get_cached_gid(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - table = device->cache.ports[port_num - rdma_start_port(device)].gid; + table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); read_unlock_irqrestore(&table->rwlock, flags); @@ -1104,7 +1100,7 @@ static int config_non_roce_gid_cache(struct ib_device *device, gid_attr.device = device; gid_attr.port_num = port; - table = device->cache.ports[port - rdma_start_port(device)].gid; + table = rdma_gid_table(device, port); mutex_lock(&table->lock); for (i = 0; i < gid_tbl_len; ++i) { @@ -1137,7 +1133,7 @@ static void ib_cache_update(struct ib_device *device, if (!rdma_is_port_valid(device, port)) return; - table = device->cache.ports[port - rdma_start_port(device)].gid; + table = rdma_gid_table(device, port); tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) @@ -1300,13 +1296,3 @@ void ib_cache_cleanup_one(struct ib_device *device) flush_workqueue(ib_wq); gid_table_cleanup_one(device); } - -void __init ib_cache_setup(void) -{ - roce_gid_mgmt_init(); -} - -void __exit ib_cache_cleanup(void) -{ - roce_gid_mgmt_cleanup(); -} diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 36a4d90a7b47..27a7b0a2e27a 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -452,6 +452,32 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, cm_id_priv->private_data_len = private_data_len; } +static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, + struct ib_grh *grh, struct cm_av *av) +{ + struct rdma_ah_attr new_ah_attr; + int ret; + + av->port = port; + av->pkey_index = wc->pkey_index; + + /* + * av->ah_attr might be initialized based on past wc during incoming + * connect request or while sending out connect request. So initialize + * a new ah_attr on stack. If initialization fails, old ah_attr is + * used for sending any responses. If initialization is successful, + * than new ah_attr is used by overwriting old one. + */ + ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device, + port->port_num, wc, + grh, &new_ah_attr); + if (ret) + return ret; + + memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + return 0; +} + static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, struct ib_grh *grh, struct cm_av *av) { @@ -509,6 +535,7 @@ static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path) static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, struct cm_id_private *cm_id_priv) { + struct rdma_ah_attr new_ah_attr; struct cm_device *cm_dev; struct cm_port *port; int ret; @@ -524,15 +551,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, return ret; av->port = port; + + /* + * av->ah_attr might be initialized based on wc or during + * request processing time. So initialize a new ah_attr on stack. + * If initialization fails, old ah_attr is used for sending any + * responses. If initialization is successful, than new ah_attr + * is used by overwriting the old one. + */ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path, - &av->ah_attr); + &new_ah_attr); if (ret) return ret; av->timeout = path->packet_life_time + 1; ret = add_cm_id_to_port_list(cm_id_priv, av, port); - return ret; + if (ret) + return ret; + memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); + return 0; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) @@ -1669,7 +1707,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv, spin_lock_irq(&cm_id_priv->lock); work = cm_dequeue_work(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); - BUG_ON(!work); + if (!work) + return; + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); cm_free_work(work); @@ -3189,12 +3229,6 @@ static int cm_lap_handler(struct cm_work *work) if (!cm_id_priv) return -EINVAL; - ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc, - work->mad_recv_wc->recv_buf.grh, - &cm_id_priv->av); - if (ret) - goto deref; - param = &work->cm_event.param.lap_rcvd; memset(&work->path[0], 0, sizeof(work->path[1])); cm_path_set_rec_type(work->port->cm_dev->ib_device, @@ -3239,10 +3273,16 @@ static int cm_lap_handler(struct cm_work *work) goto unlock; } - cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; - cm_id_priv->tid = lap_msg->hdr.tid; + ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &cm_id_priv->av); + if (ret) + goto unlock; + cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, cm_id_priv); + cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; + cm_id_priv->tid = lap_msg->hdr.tid; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a693fcd4c513..6813ee717a38 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -146,6 +146,34 @@ const void *rdma_consumer_reject_data(struct rdma_cm_id *id, } EXPORT_SYMBOL(rdma_consumer_reject_data); +/** + * rdma_iw_cm_id() - return the iw_cm_id pointer for this cm_id. + * @id: Communication Identifier + */ +struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv; + + id_priv = container_of(id, struct rdma_id_private, id); + if (id->device->node_type == RDMA_NODE_RNIC) + return id_priv->cm_id.iw; + return NULL; +} +EXPORT_SYMBOL(rdma_iw_cm_id); + +/** + * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack. + * @res: rdma resource tracking entry pointer + */ +struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res) +{ + struct rdma_id_private *id_priv = + container_of(res, struct rdma_id_private, res); + + return &id_priv->id; +} +EXPORT_SYMBOL(rdma_res_to_id); + static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device, void *client_data); @@ -156,7 +184,6 @@ static struct ib_client cma_client = { }; static struct ib_sa_client sa_client; -static struct rdma_addr_client addr_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); @@ -2103,7 +2130,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) event.param.conn.responder_resources = iw_event->ord; break; default: - BUG_ON(1); + goto out; } event.status = iw_event->status; @@ -2936,7 +2963,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (dst_addr->sa_family == AF_IB) { ret = cma_resolve_ib_addr(id_priv); } else { - ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), + ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, id_priv); } @@ -4573,7 +4600,6 @@ static int __init cma_init(void) goto err_wq; ib_sa_register_client(&sa_client); - rdma_addr_register_client(&addr_client); register_netdevice_notifier(&cma_nb); ret = ib_register_client(&cma_client); @@ -4587,7 +4613,6 @@ static int __init cma_init(void) err: unregister_netdevice_notifier(&cma_nb); - rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); err_wq: destroy_workqueue(cma_wq); @@ -4600,7 +4625,6 @@ static void __exit cma_cleanup(void) rdma_nl_unregister(RDMA_NL_RDMA_CM); ib_unregister_client(&cma_client); unregister_netdevice_notifier(&cma_nb); - rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); unregister_pernet_subsys(&cma_pernet_operations); destroy_workqueue(cma_wq); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 54163a6e4067..fae417a391fb 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -88,9 +88,6 @@ int ib_device_register_sysfs(struct ib_device *device, u8, struct kobject *)); void ib_device_unregister_sysfs(struct ib_device *device); -void ib_cache_setup(void); -void ib_cache_cleanup(void); - typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ea9fbcfb21bd..84f51386e1e3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1225,7 +1225,7 @@ static int __init ib_core_init(void) nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); - ib_cache_setup(); + roce_gid_mgmt_init(); return 0; @@ -1248,7 +1248,7 @@ err: static void __exit ib_core_cleanup(void) { - ib_cache_cleanup(); + roce_gid_mgmt_cleanup(); nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); unregister_lsm_notifier(&ibdev_lsm_nb); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index b28452a55a08..f742ae7a768b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -651,7 +651,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) struct ib_mad_queue *mad_queue; unsigned long flags; - BUG_ON(!mad_list->mad_queue); mad_queue = mad_list->mad_queue; spin_lock_irqsave(&mad_queue->lock, flags); list_del(&mad_list->list); @@ -1557,7 +1556,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, mad_reg_req->oui, 3)) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(!*method); + if (!*method) + goto error3; goto check_in_use; } } @@ -1567,10 +1567,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, vclass]->oui[i])) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; - BUG_ON(*method); /* Allocate method table for this OUI */ - if ((ret = allocate_method_table(method))) - goto error3; + if (!*method) { + ret = allocate_method_table(method); + if (ret) + goto error3; + } memcpy((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3); goto check_in_use; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index eb567765f45c..340c7bea45ab 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -98,8 +98,83 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ }, + [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, + .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN }, + [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, + [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, + [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, }; +static int put_driver_name_print_type(struct sk_buff *msg, const char *name, + enum rdma_nldev_print_type print_type) +{ + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name)) + return -EMSGSIZE; + if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC && + nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type)) + return -EMSGSIZE; + + return 0; +} + +static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, + enum rdma_nldev_print_type print_type, + u32 value) +{ + if (put_driver_name_print_type(msg, name, print_type)) + return -EMSGSIZE; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value)) + return -EMSGSIZE; + + return 0; +} + +static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, + enum rdma_nldev_print_type print_type, + u64 value) +{ + if (put_driver_name_print_type(msg, name, print_type)) + return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value, + RDMA_NLDEV_ATTR_PAD)) + return -EMSGSIZE; + + return 0; +} + +int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) +{ + return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, + value); +} +EXPORT_SYMBOL(rdma_nl_put_driver_u32); + +int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, + u32 value) +{ + return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, + value); +} +EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex); + +int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value) +{ + return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, + value); +} +EXPORT_SYMBOL(rdma_nl_put_driver_u64); + +int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value) +{ + return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, + value); +} +EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex); + static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) @@ -122,7 +197,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - device->attrs.device_cap_flags, 0)) + device->attrs.device_cap_flags, + RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; ib_get_device_fw_str(device, fw); @@ -131,10 +207,12 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, - be64_to_cpu(device->node_guid), 0)) + be64_to_cpu(device->node_guid), + RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, - be64_to_cpu(device->attrs.sys_image_guid), 0)) + be64_to_cpu(device->attrs.sys_image_guid), + RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) return -EMSGSIZE; @@ -161,11 +239,11 @@ static int fill_port_info(struct sk_buff *msg, BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, - (u64)attr.port_cap_flags, 0)) + (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (rdma_protocol_ib(device, port) && nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, - attr.subnet_prefix, 0)) + attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (rdma_protocol_ib(device, port)) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) @@ -209,8 +287,8 @@ static int fill_res_info_entry(struct sk_buff *msg, if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name)) goto err; - if (nla_put_u64_64bit(msg, - RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0)) + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, + RDMA_NLDEV_ATTR_PAD)) goto err; nla_nest_end(msg, entry_attr); @@ -282,6 +360,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_qp *qp = container_of(res, struct ib_qp, res); + struct rdma_restrack_root *resroot = &qp->device->res; struct ib_qp_init_attr qp_init_attr; struct nlattr *entry_attr; struct ib_qp_attr qp_attr; @@ -331,6 +410,9 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -346,6 +428,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, { struct rdma_id_private *id_priv = container_of(res, struct rdma_id_private, res); + struct rdma_restrack_root *resroot = &id_priv->id.device->res; struct rdma_cm_id *cm_id = &id_priv->id; struct nlattr *entry_attr; @@ -387,6 +470,9 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -400,6 +486,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_cq *cq = container_of(res, struct ib_cq, res); + struct rdma_restrack_root *resroot = &cq->device->res; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY); @@ -409,7 +496,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) goto err; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, - atomic_read(&cq->usecnt), 0)) + atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) goto err; /* Poll context is only valid for kernel CQs */ @@ -420,6 +507,9 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -433,6 +523,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); + struct rdma_restrack_root *resroot = &mr->pd->device->res; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY); @@ -444,17 +535,18 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) goto err; - if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA, - mr->iova, 0)) - goto err; } - if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0)) + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, + RDMA_NLDEV_ATTR_PAD)) goto err; if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; @@ -468,6 +560,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); + struct rdma_restrack_root *resroot = &pd->device->res; struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY); @@ -484,7 +577,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, goto err; } if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, - atomic_read(&pd->usecnt), 0)) + atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD)) goto err; if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, @@ -494,6 +587,9 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, if (fill_res_name_pid(msg, res)) goto err; + if (resroot->fill_res_entry(msg, res)) + goto err; + nla_nest_end(msg, entry_attr); return 0; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index efddd13e3edb..3b7fa0ccaa08 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. */ @@ -12,9 +12,16 @@ #include "cma_priv.h" +static int fill_res_noop(struct sk_buff *msg, + struct rdma_restrack_entry *entry) +{ + return 0; +} + void rdma_restrack_init(struct rdma_restrack_root *res) { init_rwsem(&res->rwsem); + res->fill_res_entry = fill_res_noop; } static const char *type2str(enum rdma_restrack_type type) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index c0e4fd55e2cc..a4fbdc5d28fa 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -44,8 +44,6 @@ static struct workqueue_struct *gid_cache_wq; -static struct workqueue_struct *gid_cache_wq; - enum gid_op_type { GID_DEL = 0, GID_ADD diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index b61dda6b04fc..9b0bea8303e0 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -30,8 +30,6 @@ * SOFTWARE. */ -#ifdef CONFIG_SECURITY_INFINIBAND - #include <linux/security.h> #include <linux/completion.h> #include <linux/list.h> @@ -751,5 +749,3 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) pkey_index, map->agent.security); } - -#endif /* CONFIG_SECURITY_INFINIBAND */ diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index eab43b17e9cf..ec8fb289621f 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -235,7 +235,7 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) return NULL; mutex_lock(&mut); - mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); + mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL); mutex_unlock(&mut); if (mc->id < 0) goto error; @@ -1421,6 +1421,10 @@ static ssize_t ucma_process_join(struct ucma_file *file, goto err3; } + mutex_lock(&mut); + idr_replace(&multicast_idr, mc, mc->id); + mutex_unlock(&mut); + mutex_unlock(&file->mut); ucma_put_ctx(ctx); return 0; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 2b6c9b516070..54ab6335c48d 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -64,8 +64,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d } sg_free_table(&umem->sg_head); - return; - } /** @@ -119,16 +117,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, umem->length = size; umem->address = addr; umem->page_shift = PAGE_SHIFT; - /* - * We ask for writable memory if any of the following - * access flags are set. "Local write" and "remote write" - * obviously require write access. "Remote atomic" can do - * things like fetch and add, which will modify memory, and - * "MW bind" can change permissions by binding a window. - */ - umem->writable = !!(access & - (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); + umem->writable = ib_access_writable(access); if (access & IB_ACCESS_ON_DEMAND) { ret = ib_umem_odp_get(context, umem, access); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cfb51618ab7a..c0d40fc3a53a 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec { struct ib_uverbs_flow_spec_action_tag flow_tag; struct ib_uverbs_flow_spec_action_drop drop; struct ib_uverbs_flow_spec_action_handle action; + struct ib_uverbs_flow_spec_action_count flow_count; }; }; @@ -287,6 +288,7 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); +extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e3662a8ee465..3179a95c6f5e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2748,43 +2748,82 @@ out_put: struct ib_uflow_resources { size_t max; size_t num; - struct ib_flow_action *collection[0]; + size_t collection_num; + size_t counters_num; + struct ib_counters **counters; + struct ib_flow_action **collection; }; static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs) { struct ib_uflow_resources *resources; - resources = - kmalloc(struct_size(resources, collection, num_specs), - GFP_KERNEL); + resources = kzalloc(sizeof(*resources), GFP_KERNEL); if (!resources) - return NULL; + goto err_res; + + resources->counters = + kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); + + if (!resources->counters) + goto err_cnt; + + resources->collection = + kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); + + if (!resources->collection) + goto err_collection; - resources->num = 0; resources->max = num_specs; return resources; + +err_collection: + kfree(resources->counters); +err_cnt: + kfree(resources); +err_res: + return NULL; } void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) { unsigned int i; - for (i = 0; i < uflow_res->num; i++) + for (i = 0; i < uflow_res->collection_num; i++) atomic_dec(&uflow_res->collection[i]->usecnt); + for (i = 0; i < uflow_res->counters_num; i++) + atomic_dec(&uflow_res->counters[i]->usecnt); + + kfree(uflow_res->collection); + kfree(uflow_res->counters); kfree(uflow_res); } static void flow_resources_add(struct ib_uflow_resources *uflow_res, - struct ib_flow_action *action) + enum ib_flow_spec_type type, + void *ibobj) { WARN_ON(uflow_res->num >= uflow_res->max); - atomic_inc(&action->usecnt); - uflow_res->collection[uflow_res->num++] = action; + switch (type) { + case IB_FLOW_SPEC_ACTION_HANDLE: + atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt); + uflow_res->collection[uflow_res->collection_num++] = + (struct ib_flow_action *)ibobj; + break; + case IB_FLOW_SPEC_ACTION_COUNT: + atomic_inc(&((struct ib_counters *)ibobj)->usecnt); + uflow_res->counters[uflow_res->counters_num++] = + (struct ib_counters *)ibobj; + break; + default: + WARN_ON(1); + } + + uflow_res->num++; } static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, @@ -2821,9 +2860,29 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, return -EINVAL; ib_spec->action.size = sizeof(struct ib_flow_spec_action_handle); - flow_resources_add(uflow_res, ib_spec->action.act); + flow_resources_add(uflow_res, + IB_FLOW_SPEC_ACTION_HANDLE, + ib_spec->action.act); uobj_put_obj_read(ib_spec->action.act); break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (kern_spec->flow_count.size != + sizeof(struct ib_uverbs_flow_spec_action_count)) + return -EINVAL; + ib_spec->flow_count.counters = + uobj_get_obj_read(counters, + UVERBS_OBJECT_COUNTERS, + kern_spec->flow_count.handle, + ucontext); + if (!ib_spec->flow_count.counters) + return -EINVAL; + ib_spec->flow_count.size = + sizeof(struct ib_flow_spec_action_count); + flow_resources_add(uflow_res, + IB_FLOW_SPEC_ACTION_COUNT, + ib_spec->flow_count.counters); + uobj_put_obj_read(ib_spec->flow_count.counters); + break; default: return -EINVAL; } @@ -2948,6 +3007,28 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz); break; + case IB_FLOW_SPEC_GRE: + ib_filter_sz = offsetof(struct ib_flow_gre_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->gre.size = sizeof(struct ib_flow_spec_gre); + memcpy(&ib_spec->gre.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz); + break; + case IB_FLOW_SPEC_MPLS: + ib_filter_sz = offsetof(struct ib_flow_mpls_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->mpls.size = sizeof(struct ib_flow_spec_mpls); + memcpy(&ib_spec->mpls.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->mpls.mask, kern_spec_mask, actual_filter_sz); + break; default: return -EINVAL; } @@ -3507,6 +3588,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, uflow_res); if (err) goto err_free; + flow_attr->size += ((union ib_flow_spec *) ib_spec)->size; cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; @@ -3519,11 +3601,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = -EINVAL; goto err_free; } - flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + + flow_id = qp->device->create_flow(qp, flow_attr, + IB_FLOW_DOMAIN_USER, uhw); + if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); goto err_free; } + atomic_inc(&qp->usecnt); + flow_id->qp = qp; flow_id->uobject = uobj; uobj->object = flow_id; uflow = container_of(uobj, typeof(*uflow), uobject); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4445d8ee9314..3ae2339dd27a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -41,6 +41,8 @@ #include <linux/fs.h> #include <linux/poll.h> #include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/task.h> #include <linux/file.h> #include <linux/cdev.h> #include <linux/anon_inodes.h> @@ -1090,6 +1092,44 @@ err: return; } +static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + struct ib_device *ib_dev = ibcontext->device; + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + down_write(&owning_mm->mmap_sem); + ib_dev->disassociate_ucontext(ibcontext); + up_write(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { @@ -1130,7 +1170,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, * (e.g mmput). */ ib_uverbs_event_handler(&file->event_handler, &event); - ib_dev->disassociate_ucontext(ucontext); + ib_uverbs_disassociate_ucontext(ucontext); mutex_lock(&file->cleanup_mutex); ib_uverbs_cleanup_ucontext(file, ucontext, true); mutex_unlock(&file->cleanup_mutex); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 569f48bd821e..b570acbd94af 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -302,7 +302,8 @@ static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), - &UVERBS_OBJECT(UVERBS_OBJECT_DM)); + &UVERBS_OBJECT(UVERBS_OBJECT_DM), + &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS)); const struct uverbs_object_tree_def *uverbs_default_get_objects(void) { diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c new file mode 100644 index 000000000000..03b182a684a6 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "uverbs.h" +#include <rdma/uverbs_std_types.h> + +static int uverbs_free_counters(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_counters *counters = uobject->object; + + if (why == RDMA_REMOVE_DESTROY && + atomic_read(&counters->usecnt)) + return -EBUSY; + + return counters->device->destroy_counters(counters); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_counters *counters; + struct ib_uobject *uobj; + int ret; + + /* + * This check should be removed once the infrastructure + * have the ability to remove methods from parse tree once + * such condition is met. + */ + if (!ib_dev->create_counters) + return -EOPNOTSUPP; + + uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE); + counters = ib_dev->create_counters(ib_dev, attrs); + if (IS_ERR(counters)) { + ret = PTR_ERR(counters); + goto err_create_counters; + } + + counters->device = ib_dev; + counters->uobject = uobj; + uobj->object = counters; + atomic_set(&counters->usecnt, 0); + + return 0; + +err_create_counters: + return ret; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_counters_read_attr read_attr = {}; + const struct uverbs_attr *uattr; + struct ib_counters *counters = + uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); + int ret; + + if (!ib_dev->read_counters) + return -EOPNOTSUPP; + + if (!atomic_read(&counters->usecnt)) + return -EINVAL; + + ret = uverbs_copy_from(&read_attr.flags, attrs, + UVERBS_ATTR_READ_COUNTERS_FLAGS); + if (ret) + return ret; + + uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); + read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); + read_attr.counters_buff = kcalloc(read_attr.ncounters, + sizeof(u64), GFP_KERNEL); + if (!read_attr.counters_buff) + return -ENOMEM; + + ret = ib_dev->read_counters(counters, + &read_attr, + attrs); + if (ret) + goto err_read; + + ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, + read_attr.counters_buff, + read_attr.ncounters * sizeof(u64)); + +err_read: + kfree(read_attr.counters_buff); + return ret; +} + +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, + &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, + uverbs_destroy_def_handler, + &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX +static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, + &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, + UVERBS_OBJECT_COUNTERS, + UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, + UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, + UVERBS_ATTR_TYPE(__u32), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), + &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); + diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index b0dbae9dd0d7..3d293d01afea 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -65,7 +65,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, struct ib_cq_init_attr attr = {}; struct ib_cq *cq; struct ib_uverbs_completion_event_file *ev_file = NULL; - const struct uverbs_attr *ev_file_attr; struct ib_uobject *ev_file_uobj; if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) @@ -87,10 +86,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, UVERBS_ATTR_CREATE_CQ_FLAGS))) return -EFAULT; - ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); - if (!IS_ERR(ev_file_attr)) { - ev_file_uobj = ev_file_attr->obj_attr.uobject; - + ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); + if (!IS_ERR(ev_file_uobj)) { ev_file = container_of(ev_file_uobj, struct ib_uverbs_completion_event_file, uobj_file.uobj); @@ -102,8 +99,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, goto err_event_file; } - obj = container_of(uverbs_attr_get(attrs, - UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject, + obj = container_of(uverbs_attr_get_uobject(attrs, + UVERBS_ATTR_CREATE_CQ_HANDLE), typeof(*obj), uobject); obj->uverbs_file = ucontext->ufile; obj->comp_events_reported = 0; @@ -170,13 +167,17 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev, struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { - struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj = - uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject; - struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object, - uobject); + uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); + struct ib_uverbs_destroy_cq_resp resp; + struct ib_ucq_object *obj; int ret; + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + obj = container_of(uobj, struct ib_ucq_object, uobject); + if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) return -EOPNOTSUPP; diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index b4f016dfa23d..a7be51cf2e42 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -320,7 +320,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device return ret; /* No need to check as this attribute is marked as MANDATORY */ - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject; + uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); if (IS_ERR(action)) return PTR_ERR(action); @@ -350,7 +350,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device if (ret) return ret; - uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject; + uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE); action = uobj->object; if (action->type != IB_FLOW_ACTION_ESP) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6ddfb1fade79..0b56828c1319 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1983,7 +1983,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp, if (!qp->device->create_flow) return ERR_PTR(-EOPNOTSUPP); - flow_id = qp->device->create_flow(qp, flow_attr, domain); + flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL); if (!IS_ERR(flow_id)) { atomic_inc(&qp->usecnt); flow_id->qp = qp; |