summaryrefslogtreecommitdiff
path: root/include/rdma
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib_umem.h42
-rw-r--r--include/rdma/ib_verbs.h198
-rw-r--r--include/rdma/restrack.h24
-rw-r--r--include/rdma/uverbs_ioctl.h25
-rw-r--r--include/rdma/uverbs_types.h9
5 files changed, 189 insertions, 109 deletions
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 70597508c765..7752211c9638 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem)
return umem->address & ~PAGE_MASK;
}
+static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem,
+ unsigned long pgsz)
+{
+ return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) &
+ (pgsz - 1);
+}
+
static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
unsigned long pgsz)
{
@@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long pgsz_bitmap,
unsigned long virt);
+/**
+ * ib_umem_find_best_pgoff - Find best HW page size
+ *
+ * @umem: umem struct
+ * @pgsz_bitmap bitmap of HW supported page sizes
+ * @pgoff_bitmask: Mask of bits that can be represented with an offset
+ *
+ * This is very similar to ib_umem_find_best_pgsz() except instead of accepting
+ * an IOVA it accepts a bitmask specifying what address bits can be represented
+ * with a page offset.
+ *
+ * For instance if the HW has multiple page sizes, requires 64 byte alignemnt,
+ * and can support aligned offsets up to 4032 then pgoff_bitmask would be
+ * "111111000000".
+ *
+ * If the pgoff_bitmask requires either alignment in the low bit or an
+ * unavailable page size for the high bits, this function returns 0.
+ */
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ u64 pgoff_bitmask)
+{
+ struct scatterlist *sg = umem->sg_head.sgl;
+ dma_addr_t dma_addr;
+
+ dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK);
+ return ib_umem_find_best_pgsz(umem, pgsz_bitmap,
+ dma_addr & pgoff_bitmask);
+}
#else /* CONFIG_INFINIBAND_USER_MEM */
@@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
{
return 0;
}
+static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ u64 pgoff_bitmask)
+{
+ return 0;
+}
#endif /* CONFIG_INFINIBAND_USER_MEM */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3883efd588aa..9fed65bf9279 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1235,6 +1235,8 @@ enum ib_qp_attr_mask {
IB_QP_RESERVED3 = (1<<23),
IB_QP_RESERVED4 = (1<<24),
IB_QP_RATE_LIMIT = (1<<25),
+
+ IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0),
};
enum ib_qp_state {
@@ -1470,6 +1472,8 @@ enum rdma_remove_reason {
RDMA_REMOVE_DRIVER_REMOVE,
/* uobj is being cleaned-up before being committed */
RDMA_REMOVE_ABORT,
+ /* The driver failed to destroy the uobject and is being disconnected */
+ RDMA_REMOVE_DRIVER_FAILURE,
};
struct ib_rdmacg_object {
@@ -1482,8 +1486,6 @@ struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
- bool cleanup_retryable;
-
struct ib_rdmacg_object cg_obj;
/*
* Implementation details of the RDMA core, don't use in drivers:
@@ -2402,6 +2404,8 @@ struct ib_device_ops {
int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
struct ib_udata *udata);
+ int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int (*destroy_ah)(struct ib_ah *ah, u32 flags);
@@ -2430,9 +2434,10 @@ struct ib_device_ops {
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
- int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
+ struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
@@ -2666,7 +2671,6 @@ struct ib_device {
const struct attribute_group *groups[3];
u64 uverbs_cmd_mask;
- u64 uverbs_ex_cmd_mask;
char node_desc[IB_DEVICE_NODE_DESC_MAX];
__be64 node_guid;
@@ -2902,46 +2906,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
}
/**
- * ib_is_destroy_retryable - Check whether the uobject destruction
- * is retryable.
- * @ret: The initial destruction return code
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * This function is a helper function that IB layer and low-level drivers
- * can use to consider whether the destruction of the given uobject is
- * retry-able.
- * It checks the original return code, if it wasn't success the destruction
- * is retryable according to the ucontext state (i.e. cleanup_retryable) and
- * the remove reason. (i.e. why).
- * Must be called with the object locked for destroy.
- */
-static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- return ret && (why == RDMA_REMOVE_DESTROY ||
- uobj->context->cleanup_retryable);
-}
-
-/**
- * ib_destroy_usecnt - Called during destruction to check the usecnt
- * @usecnt: The usecnt atomic
- * @why: remove reason
- * @uobj: The uobject that is destroyed
- *
- * Non-zero usecnts will block destruction unless destruction was triggered by
- * a ucontext cleanup.
- */
-static inline int ib_destroy_usecnt(atomic_t *usecnt,
- enum rdma_remove_reason why,
- struct ib_uobject *uobj)
-{
- if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
- return -EBUSY;
- return 0;
-}
-
-/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -3431,6 +3395,17 @@ enum ib_pd_flags {
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
+/**
+ * ib_alloc_pd - Allocates an unused protection domain.
+ * @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ *
+ * A protection domain object provides an association between QPs, shared
+ * receive queues, address handles, memory regions, and memory windows.
+ *
+ * Every PD has a local_dma_lkey which can be used as the lkey value for local
+ * memory operations.
+ */
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
@@ -3656,8 +3631,14 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
bad_recv_wr ? : &dummy);
}
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr);
+struct ib_qp *ib_create_named_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller);
+static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr)
+{
+ return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME);
+}
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
@@ -3944,6 +3925,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
-ENOSYS;
}
+/*
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
+ * address into the dma address.
+ */
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
+}
+
/**
* ib_dma_mapping_error - check a DMA addr for error
* @dev: The device for which the dma_addr was created
@@ -3951,6 +3942,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
+ if (ib_uses_virt_dma(dev))
+ return 0;
return dma_mapping_error(dev->dma_device, dma_addr);
}
@@ -3965,6 +3958,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)cpu_addr;
return dma_map_single(dev->dma_device, cpu_addr, size, direction);
}
@@ -3979,7 +3974,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_single(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_single(dev->dma_device, addr, size, direction);
}
/**
@@ -3996,6 +3992,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)(page_address(page) + offset);
return dma_map_page(dev->dma_device, page, offset, size, direction);
}
@@ -4010,7 +4008,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_page(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (ib_uses_virt_dma(dev))
+ return ib_dma_virt_map_sg(dev, sg, nents);
+ return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
}
/**
@@ -4024,7 +4045,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- return dma_map_sg(dev->dma_device, sg, nents, direction);
+ return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4038,24 +4059,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
- dma_attrs);
-}
-
-static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
+ ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -4066,6 +4070,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
*/
static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
+ if (ib_uses_virt_dma(dev))
+ return UINT_MAX;
return dma_get_max_seg_size(dev->dma_device);
}
@@ -4081,7 +4087,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
}
/**
@@ -4096,36 +4103,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_device(dev->dma_device, addr, size, dir);
-}
-
-/**
- * ib_dma_alloc_coherent - Allocate memory and map it for DMA
- * @dev: The device for which the DMA address is requested
- * @size: The size of the region to allocate in bytes
- * @dma_handle: A pointer for returning the DMA address of the region
- * @flag: memory allocator flags
- */
-static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flag)
-{
- return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
-}
-
-/**
- * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
- * @dev: The device for which the DMA addresses were allocated
- * @size: The size of the region
- * @cpu_addr: the address returned by ib_dma_alloc_coherent()
- * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
- */
-static inline void ib_dma_free_coherent(struct ib_device *dev,
- size_t size, void *cpu_addr,
- dma_addr_t dma_handle)
-{
- dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
}
/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
@@ -4217,7 +4196,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
struct inode *inode, struct ib_udata *udata);
int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
-static inline int ib_check_mr_access(int flags)
+static inline int ib_check_mr_access(struct ib_device *ib_dev,
+ unsigned int flags)
{
/*
* Local write permission is required if remote write or
@@ -4230,6 +4210,9 @@ static inline int ib_check_mr_access(int flags)
if (flags & ~IB_ACCESS_SUPPORTED)
return -EINVAL;
+ if (flags & IB_ACCESS_ON_DEMAND &&
+ !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+ return -EINVAL;
return 0;
}
@@ -4617,6 +4600,19 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
}
/**
+ * ibdev_to_node - return the NUMA node for a given ib_device
+ * @dev: device to get the NUMA node for.
+ */
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+ struct device *parent = ibdev->dev.parent;
+
+ if (!parent)
+ return NUMA_NO_NODE;
+ return dev_to_node(parent);
+}
+
+/**
* rdma_device_to_drv_device - Helper macro to reach back to driver's
* ib_device holder structure from device pointer.
*
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index d3a1cc5be7bc..05e18839eaff 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -68,6 +68,14 @@ struct rdma_restrack_entry {
* As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI
*/
bool valid;
+ /**
+ * @no_track: don't add this entry to restrack DB
+ *
+ * This field is used to mark an entry that doesn't need to be added to
+ * internal restrack DB and presented later to the users at the nldev
+ * query stage.
+ */
+ u8 no_track : 1;
/*
* @kref: Protect destroy of the resource
*/
@@ -145,4 +153,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev,
enum rdma_restrack_type type,
u32 id);
+
+/**
+ * rdma_restrack_no_track() - don't add resource to the DB
+ * @res: resource entry
+ *
+ * Every user of thie API should be cross examined.
+ * Probaby you don't need to use this function.
+ */
+static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res)
+{
+ res->no_track = true;
+}
+static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res)
+{
+ return !res->no_track;
+}
#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index b00270c72740..39ef204753ec 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -647,12 +647,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
* 'ucontext'.
*
*/
-#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
- (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \
- driver_udata) \
- ->context, \
- drv_dev_struct, member) : \
- (drv_dev_struct *)NULL)
+static inline struct uverbs_attr_bundle *
+rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata)
+{
+ return container_of(udata, struct uverbs_attr_bundle, driver_udata);
+}
+
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
+ (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \
+ drv_dev_struct, member) : (drv_dev_struct *)NULL)
#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
@@ -862,6 +865,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
{
return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
}
+
+static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle,
+ size_t n, size_t size)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(n, size, &bytes)))
+ return ERR_PTR(-EOVERFLOW);
+ return uverbs_zalloc(bundle, bytes);
+}
int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, s64 lower_bound, u64 upper_bound,
s64 *def_val);
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 06db27e35f40..ccd11631c167 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -71,6 +71,8 @@ struct uverbs_obj_type_class {
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs);
void (*remove_handle)(struct ib_uobject *uobj);
+ void (*swap_uobjects)(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new);
};
struct uverbs_obj_type {
@@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
bool hw_obj_valid);
void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
struct uverbs_attr_bundle *attrs);
+void rdma_assign_uobject(struct ib_uobject *to_uobj,
+ struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs);
/*
* uverbs_uobject_get is called in order to increase the reference count on
@@ -138,8 +143,8 @@ struct uverbs_obj_fd_type {
* because the driver is removed or the FD is closed.
*/
struct uverbs_obj_type type;
- int (*destroy_object)(struct ib_uobject *uobj,
- enum rdma_remove_reason why);
+ void (*destroy_object)(struct ib_uobject *uobj,
+ enum rdma_remove_reason why);
const struct file_operations *fops;
const char *name;
int flags;