From 4e045572e2c2be674ed7e43cca7ca105e8a22f56 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 10 Oct 2016 06:14:28 -0700 Subject: IB/hfi1: Add unique txwait_lock for txreq events Profiling suggests that the read_seqbegin() in the txreq put logic is colliding with other uses of the iowait lock. The packet at a time use of this lock dictates a unique lock to avoid reader/writer collisions when the number of vTxWait events is low. In order to support a unique lock the iowait struct embedded in the QP is extended to remember the lock that protects the queue head. The QP destroy removes that QP from any wait list. It doesn't need to know the head because of the linked list API, but it does need to know the lock required to protect the head. This also opens up the wait logic to have unique per resources locks which needs to be in future refinement. Reviewed-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/iowait.h | 8 ++++++++ drivers/infiniband/hw/hfi1/pio.c | 1 + drivers/infiniband/hw/hfi1/qp.c | 11 ++++++++--- drivers/infiniband/hw/hfi1/verbs.c | 4 ++++ drivers/infiniband/hw/hfi1/verbs.h | 13 +++++++------ drivers/infiniband/hw/hfi1/verbs_txreq.c | 13 +++++++------ 6 files changed, 35 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 2ec6ef38d389..d9740ddea6f1 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -64,6 +64,7 @@ struct sdma_engine; /** * struct iowait - linkage for delayed progress/waiting * @list: used to add/insert into QP/PQ wait lists + * @lock: uses to record the list head lock * @tx_head: overflow list of sdma_txreq's * @sleep: no space callback * @wakeup: space callback wakeup @@ -91,6 +92,11 @@ struct sdma_engine; * so sleeping is not allowed. * * The wait_dma member along with the iow + * + * The lock field is used by waiters to record + * the seqlock_t that guards the list head. + * Waiters explicity know that, but the destroy + * code that unwaits QPs does not. */ struct iowait { @@ -103,6 +109,7 @@ struct iowait { unsigned seq); void (*wakeup)(struct iowait *wait, int reason); void (*sdma_drained)(struct iowait *wait); + seqlock_t *lock; struct work_struct iowork; wait_queue_head_t wait_dma; wait_queue_head_t wait_pio; @@ -141,6 +148,7 @@ static inline void iowait_init( void (*sdma_drained)(struct iowait *wait)) { wait->count = 0; + wait->lock = NULL; INIT_LIST_HEAD(&wait->list); INIT_LIST_HEAD(&wait->tx_head); INIT_WORK(&wait->iowork, func); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 50a3a36d9363..385e4dcf2cd3 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1580,6 +1580,7 @@ static void sc_piobufavail(struct send_context *sc) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ qps[n++] = qp; } diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 9fc75e7e8781..d752d6768a49 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp) static void flush_iowait(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); unsigned long flags; + seqlock_t *lock = priv->s_iowait.lock; - write_seqlock_irqsave(&dev->iowait_lock, flags); + if (!lock) + return; + write_seqlock_irqsave(lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(lock, flags); } static inline int opa_mtu_enum_to_int(int mtu) @@ -543,6 +546,7 @@ static int iowait_sleep( ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } @@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 4b7a16ceb362..feecacb62162 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -694,6 +694,7 @@ static void mem_timer(unsigned long data) qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); + priv->s_iowait.lock = NULL; /* refcount held until actual wake up */ if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); @@ -769,6 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, mod_timer(&dev->mem_timer, jiffies + 1); qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); rvt_get_qp(qp); } @@ -980,6 +982,7 @@ static int pio_wait(struct rvt_qp *qp, qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); + priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ @@ -1632,6 +1635,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); seqlock_init(&dev->iowait_lock); + seqlock_init(&dev->txwait_lock); INIT_LIST_HEAD(&dev->txwait); INIT_LIST_HEAD(&dev->memwait); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 1c3815d89eb7..7a8af39e99d4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -180,18 +180,19 @@ struct hfi1_ibdev { struct rvt_dev_info rdi; /* Must be first */ /* QP numbers are shared by all IB ports */ - /* protect wait lists */ - seqlock_t iowait_lock; + /* protect txwait list */ + seqlock_t txwait_lock ____cacheline_aligned_in_smp; struct list_head txwait; /* list for wait verbs_txreq */ struct list_head memwait; /* list for wait kernel memory */ - struct list_head txreq_free; struct kmem_cache *verbs_txreq_cache; - struct timer_list mem_timer; + u64 n_txwait; + u64 n_kmem_wait; + /* protect iowait lists */ + seqlock_t iowait_lock ____cacheline_aligned_in_smp; u64 n_piowait; u64 n_piodrain; - u64 n_txwait; - u64 n_kmem_wait; + struct timer_list mem_timer; #ifdef CONFIG_DEBUG_FS /* per HFI debugfs */ diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 094ab829ec42..5d23172c470f 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx) kmem_cache_free(dev->verbs_txreq_cache, tx); do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&dev->txwait_lock); if (!list_empty(&dev->txwait)) { struct iowait *wait; - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&dev->txwait_lock, flags); wait = list_first_entry(&dev->txwait, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); /* refcount held until actual wake up */ - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&dev->txwait_lock, flags); hfi1_qp_wakeup(qp, RVT_S_WAIT_TX); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&dev->txwait_lock, seq)); } struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, @@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, { struct verbs_txreq *tx = ERR_PTR(-EBUSY); - write_seqlock(&dev->iowait_lock); + write_seqlock(&dev->txwait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; @@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, dev->n_txwait++; qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); + priv->s_iowait.lock = &dev->txwait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } out: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&dev->txwait_lock); return tx; } -- cgit v1.2.3 From 63df8e09e185796ba532cff7c31f88811dd6cae6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 10 Oct 2016 06:14:34 -0700 Subject: IB/hfi1: Inline sdma_txclean() for verbs pio Short circuit sdma_txclean() by adding an __sdma_txclean() that is only called when the tx has sdma mappings. Convert internal calls to __sdma_txclean(). This removes a call from the critical path. Reviewed-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 18 +++++++++--------- drivers/infiniband/hw/hfi1/sdma.h | 12 +++++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index fd39bcaa062d..7102a076146d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -375,7 +375,7 @@ static inline void complete_tx(struct sdma_engine *sde, sde->head_sn, tx->sn); sde->head_sn++; #endif - sdma_txclean(sde->dd, tx); + __sdma_txclean(sde->dd, tx); if (complete) (*complete)(tx, res); if (wait && iowait_sdma_dec(wait)) @@ -1643,7 +1643,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) } /** - * sdma_txclean() - clean tx of mappings, descp *kmalloc's + * __sdma_txclean() - clean tx of mappings, descp *kmalloc's * @dd: hfi1_devdata for unmapping * @tx: tx request to clean * @@ -1653,7 +1653,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx) * The code can be called multiple times without issue. * */ -void sdma_txclean( +void __sdma_txclean( struct hfi1_devdata *dd, struct sdma_txreq *tx) { @@ -3080,7 +3080,7 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) tx->descp[i] = tx->descs[i]; return 0; enomem: - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOMEM; } @@ -3109,14 +3109,14 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } /* If coalesce buffer is allocated, copy data into it */ if (tx->coalesce_buf) { if (type == SDMA_MAP_NONE) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3124,7 +3124,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, kvaddr = kmap(page); kvaddr += offset; } else if (WARN_ON(!kvaddr)) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -EINVAL; } @@ -3154,7 +3154,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -3196,7 +3196,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return rval; } } diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 56257ea3598f..21f1e2834f37 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -667,7 +667,13 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, int type, void *kvaddr, struct page *page, unsigned long offset, u16 len); int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *); -void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); +void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *); + +static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx) +{ + if (tx->num_desc) + __sdma_txclean(dd, tx); +} /* helpers used by public routines */ static inline void _sdma_close_tx(struct hfi1_devdata *dd, @@ -753,7 +759,7 @@ static inline int sdma_txadd_page( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } @@ -834,7 +840,7 @@ static inline int sdma_txadd_kvaddr( DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) { - sdma_txclean(dd, tx); + __sdma_txclean(dd, tx); return -ENOSPC; } -- cgit v1.2.3 From 99f80d2f5fb6d4165186390ecba83952803b667b Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 10 Oct 2016 06:14:39 -0700 Subject: IB/hfi1: Optimize lkey validation structures Profiling shows that the key validation is susceptible to cache line trading when accessing the lkey table. Fix by separating out the read mostly fields from the write fields. In addition the shift amount, which is function of the lkey table size, is precomputed and stored with the table pointer. Since both the shift and table pointer are in the same read mostly cacheline, this saves a cache line in this hot path. Reviewed-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 10 +++++----- include/rdma/rdmavt_mr.h | 10 +++++++--- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 46b64970058e..4acf179adf3b 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -84,6 +84,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) lkey_table_size = rdi->dparms.lkey_table_size; } rdi->lkey_table.max = 1 << lkey_table_size; + rdi->lkey_table.shift = 32 - lkey_table_size; lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) vmalloc_node(lk_tab_size, rdi->dparms.node); @@ -774,7 +775,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_mregion *mr; unsigned n, m; size_t off; - struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); /* * We use LKEY == zero for kernel virtual addresses @@ -782,6 +782,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, */ rcu_read_lock(); if (sge->lkey == 0) { + struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); + if (pd->user) goto bail; mr = rcu_dereference(dev->dma_mr); @@ -798,8 +800,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } - mr = rcu_dereference( - rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; @@ -899,8 +900,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, goto ok; } - mr = rcu_dereference( - rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index 6b3c6c8b6b77..de59de28b6a2 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -90,11 +90,15 @@ struct rvt_mregion { #define RVT_MAX_LKEY_TABLE_BITS 23 struct rvt_lkey_table { - spinlock_t lock; /* protect changes in this struct */ - u32 next; /* next unused index (speeds search) */ - u32 gen; /* generation count */ + /* read mostly fields */ u32 max; /* size of the table */ + u32 shift; /* lkey/rkey shift */ struct rvt_mregion __rcu **table; + /* writeable fields */ + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ }; /* -- cgit v1.2.3 From 99c7abfb626d0ce4e6e190dc7aeeb55eb21e335f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 17 Oct 2016 04:19:13 -0700 Subject: IB/hfi1: Optimize pio cachelines Move buffers_allocated pcpu pointer to allocator line. Move hw_free pointer to releaser line. Fill other holes revealed by pahole. Reviewed-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index e709eaf743b5..bd19507b6bb0 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -104,7 +104,6 @@ struct send_context { void __iomem *base_addr; /* start of PIO memory */ union pio_shadow_ring *sr; /* shadow ring */ - volatile __le64 *hw_free; /* HW free counter */ struct work_struct halt_work; /* halted context work queue entry */ unsigned long flags; /* flags */ int node; /* context home node */ @@ -116,19 +115,20 @@ struct send_context { u32 group; /* credit return group */ /* allocator fields */ spinlock_t alloc_lock ____cacheline_aligned_in_smp; + u32 sr_head; /* shadow ring head */ unsigned long fill; /* official alloc count */ unsigned long alloc_free; /* copy of free (less cache thrash) */ - u32 sr_head; /* shadow ring head */ + u32 __percpu *buffers_allocated;/* count of buffers allocated */ /* releaser fields */ spinlock_t release_lock ____cacheline_aligned_in_smp; - unsigned long free; /* official free count */ u32 sr_tail; /* shadow ring tail */ + unsigned long free; /* official free count */ + volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; - u64 credit_ctrl; /* cache for credit control */ u32 credit_intr_count; /* count of credit intr users */ - u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u64 credit_ctrl; /* cache for credit control */ wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ }; -- cgit v1.2.3 From a6cd5f08e085176fe8dbd4c57217058e53dfc4d9 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Mon, 17 Oct 2016 04:19:30 -0700 Subject: IB/hfi1: Unify access to GUID entries This patch consolidates the node GUIDs and the port GUID handling and unifies access to these items. The knowledge of hfi1 GUIDs' design and their location are kept in accessors to centralize access. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Reviewed-by: Brian Welty Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++-- drivers/infiniband/hw/hfi1/hfi.h | 19 +++++++++++++++++-- drivers/infiniband/hw/hfi1/mad.c | 19 ++++++++++--------- drivers/infiniband/hw/hfi1/ruc.c | 16 +++------------- drivers/infiniband/hw/hfi1/verbs.c | 15 +++++++-------- drivers/infiniband/hw/hfi1/verbs.h | 3 --- 6 files changed, 39 insertions(+), 37 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9bf5f23544d4..f87d80567572 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9567,11 +9567,11 @@ int bringup_serdes(struct hfi1_pportdata *ppd) if (HFI1_CAP_IS_KSET(EXTENDED_PSN)) add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK); - guid = ppd->guid; + guid = ppd->guids[HFI1_PORT_GUID_INDEX]; if (!guid) { if (dd->base_guid) guid = dd->base_guid + ppd->port - 1; - ppd->guid = guid; + ppd->guids[HFI1_PORT_GUID_INDEX] = guid; } /* Set linkinit_reason on power up per OPA spec */ diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7eef11b316ff..a2ea643a79fc 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -512,6 +512,9 @@ struct rvt_sge_state; #define HFI1_MIN_VLS_SUPPORTED 1 #define HFI1_MAX_VLS_SUPPORTED 8 +#define HFI1_GUIDS_PER_PORT 5 +#define HFI1_PORT_GUID_INDEX 0 + static inline void incr_cntr64(u64 *cntr) { if (*cntr < (u64)-1LL) @@ -582,8 +585,9 @@ struct hfi1_pportdata { u32 port_type; struct qsfp_data qsfp_info; - /* GUID for this interface, in host order */ - u64 guid; + /* GUIDs for this interface, in host order, guids[0] is a port guid */ + u64 guids[HFI1_GUIDS_PER_PORT]; + /* GUID for peer interface, in host order */ u64 neighbor_guid; @@ -1632,6 +1636,17 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index) return ret; } +/* + * Return the indexed GUID from the port GUIDs table. + */ +static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index) +{ + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + WARN_ON(index >= HFI1_GUIDS_PER_PORT); + return cpu_to_be64(ppd->guids[index]); +} + /* * Called by readers of cc_state only, must call under rcu_read_lock(). */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 9487c9bb8920..0ef62e67f283 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -336,20 +336,20 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, ni = (struct opa_node_info *)data; /* GUID 0 is illegal */ - if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) { + if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } - ni->port_guid = cpu_to_be64(dd->pport[pidx].guid); + ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); ni->base_version = OPA_MGMT_BASE_VERSION; ni->class_version = OPA_SMI_CLASS_VERSION; ni->node_type = 1; /* channel adapter */ ni->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ ni->system_image_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - ni->node_guid = cpu_to_be64(dd->pport->guid); + ni->node_guid = ibdev->node_guid; ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); ni->device_id = cpu_to_be16(dd->pcidev->device); ni->revision = cpu_to_be32(dd->minrev); @@ -373,19 +373,20 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, /* GUID 0 is illegal */ if (smp->attr_mod || pidx >= dd->num_pports || - dd->pport[pidx].guid == 0) + ibdev->node_guid == 0 || + get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; - else - nip->port_guid = cpu_to_be64(dd->pport[pidx].guid); + return reply((struct ib_mad_hdr *)smp); + } + nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX); nip->base_version = OPA_MGMT_BASE_VERSION; nip->class_version = OPA_SMI_CLASS_VERSION; nip->node_type = 1; /* channel adapter */ nip->num_ports = ibdev->phys_port_cnt; /* This is already in network order */ nip->sys_guid = ib_hfi1_sys_image_guid; - /* Use first-port GUID as node */ - nip->node_guid = cpu_to_be64(dd->pport->guid); + nip->node_guid = ibdev->node_guid; nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd)); nip->device_id = cpu_to_be16(dd->pcidev->device); nip->revision = cpu_to_be32(dd->minrev); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a1576aea4756..5a70e91b5191 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -239,16 +239,6 @@ bail: return ret; } -static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index) -{ - if (!index) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - - return cpu_to_be64(ppd->guid); - } - return ibp->guids[index - 1]; -} - static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) { return (gid->global.interface_id == id && @@ -699,9 +689,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; hdr->sgid.global.interface_id = - grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ? - ibp->guids[grh->sgid_index - 1] : - cpu_to_be64(ppd_from_ibp(ibp)->guid); + grh->sgid_index < HFI1_GUIDS_PER_PORT ? + get_sguid(ibp, grh->sgid_index) : + get_sguid(ibp, HFI1_PORT_GUID_INDEX); hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index feecacb62162..f1dead369258 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1486,15 +1486,11 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid) { struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - if (guid_index == 0) - *guid = cpu_to_be64(ppd->guid); - else if (guid_index < HFI1_GUIDS_PER_PORT) - *guid = ibp->guids[guid_index - 1]; - else + if (guid_index >= HFI1_GUIDS_PER_PORT) return -EINVAL; + *guid = get_sguid(ibp, guid_index); return 0; } @@ -1623,6 +1619,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) struct hfi1_ibdev *dev = &dd->verbs_dev; struct ib_device *ibdev = &dev->rdi.ibdev; struct hfi1_pportdata *ppd = dd->pport; + struct hfi1_ibport *ibp = &ppd->ibport_data; unsigned i; int ret; size_t lcpysz = IB_DEVICE_NAME_MAX; @@ -1643,17 +1640,19 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) if (ret) goto err_verbs_txreq; + /* Use first-port GUID as node guid */ + ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX); + /* * The system image GUID is supposed to be the same for all * HFIs in a single system but since there can be other * device types in the system, we can't be sure this is unique. */ if (!ib_hfi1_sys_image_guid) - ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid); + ib_hfi1_sys_image_guid = ibdev->node_guid; lcpysz = strlcpy(ibdev->name, class_name(), lcpysz); strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); ibdev->owner = THIS_MODULE; - ibdev->node_guid = cpu_to_be64(ppd->guid); ibdev->phys_port_cnt = dd->num_pports; ibdev->dma_device = &dd->pcidev->dev; ibdev->modify_device = modify_device; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 7a8af39e99d4..e6b893010e6d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -73,7 +73,6 @@ struct hfi1_packet; #include "iowait.h" #define HFI1_MAX_RDMA_ATOMIC 16 -#define HFI1_GUIDS_PER_PORT 5 /* * Increment this value if any changes that break userspace ABI @@ -169,8 +168,6 @@ struct hfi1_ibport { struct rvt_qp __rcu *qp[2]; struct rvt_ibport rvp; - __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */ - /* the first 16 entries are sl_to_vl for !OPA */ u8 sl_to_sc[32]; u8 sc_to_sl[32]; -- cgit v1.2.3 From 6e768f0682e26e7683c4af5b0de04a2e08bc67db Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 17 Oct 2016 04:19:35 -0700 Subject: IB/hfi1: Optimize devdata cachelines Profiling shows hot path struct members that need to be in a minimum set of cachelines. Group these struct member in the same cacheline: sc2vl_lock sc2vl rhf_rcv_function_map rcv_limit rhf_offset Group these struct member in the same cacheline: process_pio_send process_dma_send pport rcd int_counter flags num_pports first_user_ctxt Fill holes in struct hfi1_devdata revealed by pahole. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 108 ++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 53 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index a2ea643a79fc..25e44c4af995 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -852,32 +852,29 @@ struct hfi1_devdata { u8 __iomem *kregend; /* physical address of chip for io_remap, etc. */ resource_size_t physaddr; - /* receive context data */ - struct hfi1_ctxtdata **rcd; + /* Per VL data. Enough for all VLs but not all elements are set/used. */ + struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* send context data */ struct send_context_info *send_contexts; /* map hardware send contexts to software index */ u8 *hw_to_sw; /* spinlock for allocating and releasing send context resources */ spinlock_t sc_lock; - /* Per VL data. Enough for all VLs but not all elements are set/used. */ - struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* lock for pio_map */ spinlock_t pio_map_lock; + /* Send Context initialization lock. */ + spinlock_t sc_init_lock; + /* lock for sdma_map */ + spinlock_t sde_map_lock; /* array of kernel send contexts */ struct send_context **kernel_send_context; /* array of vl maps */ struct pio_vl_map __rcu *pio_map; - /* seqlock for sc2vl */ - seqlock_t sc2vl_lock; - u64 sc2vl[4]; - /* Send Context initialization lock. */ - spinlock_t sc_init_lock; + /* default flags to last descriptor */ + u64 default_desc1; /* fields common to all SDMA engines */ - /* default flags to last descriptor */ - u64 default_desc1; volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ dma_addr_t sdma_heads_phys; void *sdma_pad_dma; /* DMA'ed by chip */ @@ -888,8 +885,6 @@ struct hfi1_devdata { u32 chip_sdma_engines; /* num used */ u32 num_sdma; - /* lock for sdma_map */ - spinlock_t sde_map_lock; /* array of engines sized by num_sdma */ struct sdma_engine *per_sdma; /* array of vl maps */ @@ -898,14 +893,11 @@ struct hfi1_devdata { wait_queue_head_t sdma_unfreeze_wq; atomic_t sdma_unfreeze_count; + u32 lcb_access_count; /* count of LCB users */ + /* common data between shared ASIC HFIs in this OS */ struct hfi1_asic_data *asic_data; - /* hfi1_pportdata, points to array of (physical) port-specific - * data structs, indexed by pidx (0..n-1) - */ - struct hfi1_pportdata *pport; - /* mem-mapped pointer to base of PIO buffers */ void __iomem *piobase; /* @@ -922,20 +914,13 @@ struct hfi1_devdata { /* send context numbers and sizes for each type */ struct sc_config_sizes sc_sizes[SC_MAX]; - u32 lcb_access_count; /* count of LCB users */ - char *boardname; /* human readable board info */ - /* device (not port) flags, basically device capabilities */ - u32 flags; - /* reset value */ u64 z_int_counter; u64 z_rcv_limit; u64 z_send_schedule; - /* percpu int_counter */ - u64 __percpu *int_counter; - u64 __percpu *rcv_limit; + u64 __percpu *send_schedule; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; @@ -950,6 +935,7 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; + u32 freezelen; /* max length of freezemsg */ u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ @@ -971,7 +957,6 @@ struct hfi1_devdata { * IB link status cheaply */ struct hfi1_status *status; - u32 freezelen; /* max length of freezemsg */ /* revision register shadow */ u64 revision; @@ -999,6 +984,8 @@ struct hfi1_devdata { u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; + /* default link down value (poll/sleep) */ + u8 link_default; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ @@ -1034,8 +1021,6 @@ struct hfi1_devdata { u8 hfi1_id; /* implementation code */ u8 icode; - /* default link down value (poll/sleep) */ - u8 link_default; /* vAU of this device */ u8 vau; /* vCU of this device */ @@ -1046,27 +1031,17 @@ struct hfi1_devdata { u16 vl15_init; /* Misc small ints */ - /* Number of physical ports available */ - u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; u8 n_krcv_queues; u8 qos_shift; - u8 qpn_mask; - u16 rhf_offset; /* offset of RHF within receive header entry */ u16 irev; /* implementation revision */ u16 dc8051_ver; /* 8051 firmware version */ + spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; struct platform_config_cache pcfg_cache; struct diag_client *diag_client; - spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ - - u8 psxmitwait_supported; - /* cycle length of PS* counters in HW (in picoseconds) */ - u16 psxmitwait_check_rate; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1081,6 +1056,9 @@ struct hfi1_devdata { struct rcv_array_data rcv_entries; + /* cycle length of PS* counters in HW (in picoseconds) */ + u16 psxmitwait_check_rate; + /* * 64 bit synthetic counters */ @@ -1113,11 +1091,11 @@ struct hfi1_devdata { struct err_info_rcvport err_info_rcvport; struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_xmit_constraint; - u8 err_info_uncorrectable; - u8 err_info_fmconfig; atomic_t drop_packet; u8 do_drop; + u8 err_info_uncorrectable; + u8 err_info_fmconfig; /* * Software counters for the status bits defined by the @@ -1140,47 +1118,71 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; + /* receive interrupt function */ rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; + /* Save the enabled LCB error bits */ + u64 lcb_err_en; + /* * Handlers for outgoing data so that snoop/capture does not * have to have its hooks in the send path */ - send_routine process_pio_send; + send_routine process_pio_send ____cacheline_aligned_in_smp; send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); + /* hfi1_pportdata, points to array of (physical) port-specific + * data structs, indexed by pidx (0..n-1) + */ + struct hfi1_pportdata *pport; + /* receive context data */ + struct hfi1_ctxtdata **rcd; + u64 __percpu *int_counter; + /* device (not port) flags, basically device capabilities */ + u16 flags; + /* Number of physical ports available */ + u8 num_pports; + /* Lowest context number which can be used by user processes */ + u8 first_user_ctxt; + /* adding a new field here would make it part of this cacheline */ + + /* seqlock for sc2vl */ + seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; + u64 sc2vl[4]; + /* receive interrupt functions */ + rhf_rcv_function_ptr *rhf_rcv_function_map; + u64 __percpu *rcv_limit; + u16 rhf_offset; /* offset of RHF within receive header entry */ + /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ u8 oui1; u8 oui2; u8 oui3; + u8 dc_shutdown; + /* Timer and counter used to detect RcvBufOvflCnt changes */ struct timer_list rcverr_timer; - u32 rcv_ovfl_cnt; wait_queue_head_t event_queue; - /* Save the enabled LCB error bits */ - u64 lcb_err_en; - u8 dc_shutdown; - /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; dma_addr_t rcvhdrtail_dummy_dma; - bool eprom_available; /* true if EPROM is available for this device */ - bool aspm_supported; /* Does HW support ASPM */ - bool aspm_enabled; /* ASPM state: enabled/disabled */ + u32 rcv_ovfl_cnt; /* Serialize ASPM enable/disable between multiple verbs contexts */ spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + bool eprom_available; /* true if EPROM is available for this device */ + bool aspm_supported; /* Does HW support ASPM */ + bool aspm_enabled; /* ASPM state: enabled/disabled */ struct rhashtable sdma_rht; + struct kobject kobj; }; -- cgit v1.2.3 From fe4d924396a861937256293ff4a84b76b84854d8 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 17 Oct 2016 04:19:47 -0700 Subject: IB/hfi1: Add active channel and backplane support for integrated devices Use scratch registers within the HFI1 device to recover signal integrity information that is then used to tune the channel. While there, update error messages to better convey the result of falling back to a backup file. Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Reviewed-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip_registers.h | 3 + drivers/infiniband/hw/hfi1/firmware.c | 136 +++++++++++++++++++++---- drivers/infiniband/hw/hfi1/hfi.h | 16 ++- drivers/infiniband/hw/hfi1/platform.c | 149 +++++++++++++++++++++++----- drivers/infiniband/hw/hfi1/platform.h | 117 ++++++++++++++++++++++ 5 files changed, 376 insertions(+), 45 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 5b9993899789..5bfa839d1c48 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -415,6 +415,9 @@ #define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32 #define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0 #define ASIC_CFG_SCRATCH (ASIC + 0x000000000020) +#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08) +#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10) +#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18) #define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050) #define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308) #define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 13db8eb4f4ec..9a36c8c2f8a1 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -707,6 +707,9 @@ static int obtain_firmware(struct hfi1_devdata *dd) &dd->pcidev->dev); if (err) { platform_config = NULL; + dd_dev_err(dd, + "%s: No default platform config file found\n", + __func__); goto done; } dd->platform_config.data = platform_config->data; @@ -1761,8 +1764,17 @@ int parse_platform_config(struct hfi1_devdata *dd) u32 record_idx = 0, table_type = 0, table_length_dwords = 0; int ret = -EINVAL; /* assume failure */ + /* + * For integrated devices that did not fall back to the default file, + * the SI tuning information for active channels is acquired from the + * scratch register bitmap, thus there is no platform config to parse. + * Skip parsing in these situations. + */ + if (is_integrated(dd) && !platform_config_load) + return 0; + if (!dd->platform_config.data) { - dd_dev_info(dd, "%s: Missing config file\n", __func__); + dd_dev_err(dd, "%s: Missing config file\n", __func__); goto bail; } ptr = (u32 *)dd->platform_config.data; @@ -1770,7 +1782,7 @@ int parse_platform_config(struct hfi1_devdata *dd) magic_num = *ptr; ptr++; if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) { - dd_dev_info(dd, "%s: Bad config file\n", __func__); + dd_dev_err(dd, "%s: Bad config file\n", __func__); goto bail; } @@ -1797,9 +1809,9 @@ int parse_platform_config(struct hfi1_devdata *dd) header1 = *ptr; header2 = *(ptr + 1); if (header1 != ~header2) { - dd_dev_info(dd, "%s: Failed validation at offset %ld\n", - __func__, (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed validation at offset %ld\n", + __func__, (ptr - (u32 *) + dd->platform_config.data)); goto bail; } @@ -1841,11 +1853,11 @@ int parse_platform_config(struct hfi1_devdata *dd) table_length_dwords; break; default: - dd_dev_info(dd, - "%s: Unknown data table %d, offset %ld\n", - __func__, table_type, - (ptr - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown data table %d, offset %ld\n", + __func__, table_type, + (ptr - (u32 *) + dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1865,11 +1877,11 @@ int parse_platform_config(struct hfi1_devdata *dd) case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: break; default: - dd_dev_info(dd, - "%s: Unknown meta table %d, offset %ld\n", - __func__, table_type, - (ptr - - (u32 *)dd->platform_config.data)); + dd_dev_err(dd, + "%s: Unknown meta table %d, offset %ld\n", + __func__, table_type, + (ptr - + (u32 *)dd->platform_config.data)); goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table_metadata = @@ -1884,10 +1896,9 @@ int parse_platform_config(struct hfi1_devdata *dd) /* Jump the table */ ptr += table_length_dwords; if (crc != *ptr) { - dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n", - __func__, (ptr - - (u32 *) - dd->platform_config.data)); + dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n", + __func__, (ptr - + (u32 *)dd->platform_config.data)); goto bail; } /* Jump the CRC DWORD */ @@ -1901,6 +1912,84 @@ bail: return ret; } +static void get_integrated_platform_config_field( + struct hfi1_devdata *dd, + enum platform_config_table_type_encoding table_type, + int field_index, u32 *data) +{ + struct hfi1_pportdata *ppd = dd->pport; + u8 *cache = ppd->qsfp_info.cache; + u32 tx_preset = 0; + + switch (table_type) { + case PLATFORM_CONFIG_SYSTEM_TABLE: + if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX) + *data = ppd->max_power_class; + else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G) + *data = ppd->default_atten; + break; + case PLATFORM_CONFIG_PORT_TABLE: + if (field_index == PORT_TABLE_PORT_TYPE) + *data = ppd->port_type; + else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G) + *data = ppd->local_atten; + else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G) + *data = ppd->remote_atten; + break; + case PLATFORM_CONFIG_RX_PRESET_TABLE: + if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY) + *data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >> + QSFP_RX_CDR_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >> + QSFP_RX_EMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY) + *data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >> + QSFP_RX_AMP_APPLY_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR) + *data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >> + QSFP_RX_CDR_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP) + *data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >> + QSFP_RX_EMP_SHIFT; + else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP) + *data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >> + QSFP_RX_AMP_SHIFT; + break; + case PLATFORM_CONFIG_TX_PRESET_TABLE: + if (cache[QSFP_EQ_INFO_OFFS] & 0x4) + tx_preset = ppd->tx_preset_eq; + else + tx_preset = ppd->tx_preset_noeq; + if (field_index == TX_PRESET_TABLE_PRECUR) + *data = (tx_preset & TX_PRECUR_SMASK) >> + TX_PRECUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_ATTN) + *data = (tx_preset & TX_ATTN_SMASK) >> + TX_ATTN_SHIFT; + else if (field_index == TX_PRESET_TABLE_POSTCUR) + *data = (tx_preset & TX_POSTCUR_SMASK) >> + TX_POSTCUR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY) + *data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >> + QSFP_TX_CDR_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY) + *data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >> + QSFP_TX_EQ_APPLY_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR) + *data = (tx_preset & QSFP_TX_CDR_SMASK) >> + QSFP_TX_CDR_SHIFT; + else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ) + *data = (tx_preset & QSFP_TX_EQ_SMASK) >> + QSFP_TX_EQ_SHIFT; + break; + case PLATFORM_CONFIG_QSFP_ATTEN_TABLE: + case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE: + default: + break; + } +} + static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table, int field, u32 *field_len_bits, u32 *field_start_bits) @@ -1976,6 +2065,15 @@ int get_platform_config_field(struct hfi1_devdata *dd, else return -EINVAL; + if (is_integrated(dd) && !platform_config_load) { + /* + * Use saved configuration from ppd for integrated platforms + */ + get_integrated_platform_config_field(dd, table_type, + field_index, data); + return 0; + } + ret = get_platform_fw_field_metadata(dd, table_type, field_index, &field_len_bits, &field_start_bits); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 25e44c4af995..a66d198199ff 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -582,8 +582,16 @@ struct hfi1_pportdata { struct kobject vl2mtu_kobj; /* PHY support */ - u32 port_type; struct qsfp_data qsfp_info; + /* Values for SI tuning of SerDes */ + u32 port_type; + u32 tx_preset_eq; + u32 tx_preset_noeq; + u32 rx_preset; + u8 local_atten; + u8 remote_atten; + u8 default_atten; + u8 max_power_class; /* GUIDs for this interface, in host order, guids[0] is a port guid */ u64 guids[HFI1_GUIDS_PER_PORT]; @@ -2020,6 +2028,12 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd) return i2c_target(dd->hfi1_id); } +/* Is this device integrated or discrete? */ +static inline bool is_integrated(struct hfi1_devdata *dd) +{ + return dd->pcidev->device == PCI_DEVICE_ID_INTEL1; +} + int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 202433178864..0e7043801bb1 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -49,6 +49,90 @@ #include "efivar.h" #include "eprom.h" +static int validate_scratch_checksum(struct hfi1_devdata *dd) +{ + u64 checksum = 0, temp_scratch = 0; + int i, j, version; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT; + + /* Prevent power on default of all zeroes from passing checksum */ + if (!version) + return 0; + + /* + * ASIC scratch 0 only contains the checksum and bitmap version as + * fields of interest, both of which are handled separately from the + * loop below, so skip it + */ + checksum += version; + for (i = 1; i < ASIC_NUM_SCRATCH; i++) { + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i)); + for (j = sizeof(u64); j != 0; j -= 2) { + checksum += (temp_scratch & 0xFFFF); + temp_scratch >>= 16; + } + } + + while (checksum >> 16) + checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16); + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH); + temp_scratch &= CHECKSUM_SMASK; + temp_scratch >>= CHECKSUM_SHIFT; + + if (checksum + temp_scratch == 0xFFFF) + return 1; + return 0; +} + +static void save_platform_config_fields(struct hfi1_devdata *dd) +{ + struct hfi1_pportdata *ppd = dd->pport; + u64 temp_scratch = 0, temp_dest = 0; + + temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK : + PORT0_PORT_TYPE_SMASK); + ppd->port_type = temp_dest >> + (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT : + PORT0_PORT_TYPE_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK : + PORT0_LOCAL_ATTEN_SMASK); + ppd->local_atten = temp_dest >> + (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT : + PORT0_LOCAL_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK : + PORT0_REMOTE_ATTEN_SMASK); + ppd->remote_atten = temp_dest >> + (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT : + PORT0_REMOTE_ATTEN_SHIFT); + + temp_dest = temp_scratch & + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK : + PORT0_DEFAULT_ATTEN_SMASK); + ppd->default_atten = temp_dest >> + (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT : + PORT0_DEFAULT_ATTEN_SHIFT); + + temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 : + ASIC_CFG_SCRATCH_2); + + ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT; + ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT; + ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT; + + ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >> + QSFP_MAX_POWER_SHIFT; +} + void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; @@ -56,38 +140,49 @@ void get_platform_config(struct hfi1_devdata *dd) u8 *temp_platform_config = NULL; u32 esize; - ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, - &esize); - if (!ret) { - /* success */ - size = esize; - goto success; + if (is_integrated(dd)) { + if (validate_scratch_checksum(dd)) { + save_platform_config_fields(dd); + return; + } + dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n", + __func__); + dd_dev_err(dd, + "%s: Please update your BIOS to support active channels\n", + __func__); + } else { + ret = eprom_read_platform_config(dd, + (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = esize; + return; + } + /* fail, try EFI variable */ + + ret = read_hfi1_efi_var(dd, "configuration", &size, + (void **)&temp_platform_config); + if (!ret) { + dd->platform_config.data = temp_platform_config; + dd->platform_config.size = size; + return; + } } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) - goto success; - - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); + dd_dev_err(dd, + "%s: Failed to get platform config, falling back to sub-optimal default file\n", + __func__); /* fall back to request firmware */ platform_config_load = 1; - return; - -success: - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; } void free_platform_config(struct hfi1_devdata *dd) { if (!platform_config_load) { /* - * was loaded from EFI, release memory - * allocated by read_efi_var + * was loaded from EFI or the EPROM, release memory + * allocated by read_efi_var/eprom_read_platform_config */ kfree(dd->platform_config.data); } @@ -100,12 +195,16 @@ void free_platform_config(struct hfi1_devdata *dd) void get_port_type(struct hfi1_pportdata *ppd) { int ret; + u32 temp; ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0, - PORT_TABLE_PORT_TYPE, &ppd->port_type, + PORT_TABLE_PORT_TYPE, &temp, 4); - if (ret) + if (ret) { ppd->port_type = PORT_TYPE_UNKNOWN; + return; + } + ppd->port_type = temp; } int set_qsfp_tx(struct hfi1_pportdata *ppd, int on) diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index e2c21613c326..31155401a95b 100644 --- a/drivers/infiniband/hw/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -295,6 +295,123 @@ enum link_tuning_encoding { OPA_UNKNOWN_TUNING }; +/* + * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch + * registers for integrated platforms + */ +#define PORT0_PORT_TYPE_SHIFT 0 +#define PORT0_LOCAL_ATTEN_SHIFT 4 +#define PORT0_REMOTE_ATTEN_SHIFT 10 +#define PORT0_DEFAULT_ATTEN_SHIFT 32 + +#define PORT1_PORT_TYPE_SHIFT 16 +#define PORT1_LOCAL_ATTEN_SHIFT 20 +#define PORT1_REMOTE_ATTEN_SHIFT 26 +#define PORT1_DEFAULT_ATTEN_SHIFT 40 + +#define PORT0_PORT_TYPE_MASK 0xFUL +#define PORT0_LOCAL_ATTEN_MASK 0x3FUL +#define PORT0_REMOTE_ATTEN_MASK 0x3FUL +#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT1_PORT_TYPE_MASK 0xFUL +#define PORT1_LOCAL_ATTEN_MASK 0x3FUL +#define PORT1_REMOTE_ATTEN_MASK 0x3FUL +#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL + +#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \ + PORT0_PORT_TYPE_SHIFT) +#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \ + PORT0_LOCAL_ATTEN_SHIFT) +#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \ + PORT0_REMOTE_ATTEN_SHIFT) +#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \ + PORT0_DEFAULT_ATTEN_SHIFT) + +#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \ + PORT1_PORT_TYPE_SHIFT) +#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \ + PORT1_LOCAL_ATTEN_SHIFT) +#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \ + PORT1_REMOTE_ATTEN_SHIFT) +#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \ + PORT1_DEFAULT_ATTEN_SHIFT) + +#define QSFP_MAX_POWER_SHIFT 0 +#define TX_NO_EQ_SHIFT 4 +#define TX_EQ_SHIFT 25 +#define RX_SHIFT 46 + +#define QSFP_MAX_POWER_MASK 0xFUL +#define TX_NO_EQ_MASK 0x1FFFFFUL +#define TX_EQ_MASK 0x1FFFFFUL +#define RX_MASK 0xFFFFUL + +#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \ + QSFP_MAX_POWER_SHIFT) +#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT) +#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT) +#define RX_SMASK (RX_MASK << RX_SHIFT) + +#define TX_PRECUR_SHIFT 0 +#define TX_ATTN_SHIFT 4 +#define QSFP_TX_CDR_APPLY_SHIFT 9 +#define QSFP_TX_EQ_APPLY_SHIFT 10 +#define QSFP_TX_CDR_SHIFT 11 +#define QSFP_TX_EQ_SHIFT 12 +#define TX_POSTCUR_SHIFT 16 + +#define TX_PRECUR_MASK 0xFUL +#define TX_ATTN_MASK 0x1FUL +#define QSFP_TX_CDR_APPLY_MASK 0x1UL +#define QSFP_TX_EQ_APPLY_MASK 0x1UL +#define QSFP_TX_CDR_MASK 0x1UL +#define QSFP_TX_EQ_MASK 0xFUL +#define TX_POSTCUR_MASK 0x1FUL + +#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT) +#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT) +#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \ + QSFP_TX_CDR_APPLY_SHIFT) +#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \ + QSFP_TX_EQ_APPLY_SHIFT) +#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT) +#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT) +#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT) + +#define QSFP_RX_CDR_APPLY_SHIFT 0 +#define QSFP_RX_EMP_APPLY_SHIFT 1 +#define QSFP_RX_AMP_APPLY_SHIFT 2 +#define QSFP_RX_CDR_SHIFT 3 +#define QSFP_RX_EMP_SHIFT 4 +#define QSFP_RX_AMP_SHIFT 8 + +#define QSFP_RX_CDR_APPLY_MASK 0x1UL +#define QSFP_RX_EMP_APPLY_MASK 0x1UL +#define QSFP_RX_AMP_APPLY_MASK 0x1UL +#define QSFP_RX_CDR_MASK 0x1UL +#define QSFP_RX_EMP_MASK 0xFUL +#define QSFP_RX_AMP_MASK 0x3UL + +#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \ + QSFP_RX_CDR_APPLY_SHIFT) +#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \ + QSFP_RX_EMP_APPLY_SHIFT) +#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \ + QSFP_RX_AMP_APPLY_SHIFT) +#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT) +#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT) +#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT) + +#define BITMAP_VERSION 1 +#define BITMAP_VERSION_SHIFT 44 +#define BITMAP_VERSION_MASK 0xFUL +#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \ + BITMAP_VERSION_SHIFT) +#define CHECKSUM_SHIFT 48 +#define CHECKSUM_MASK 0xFFFFUL +#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT) + /* platform.c */ void get_platform_config(struct hfi1_devdata *dd); void free_platform_config(struct hfi1_devdata *dd); -- cgit v1.2.3 From 2474d775d9e2f935ff6840c8b21b4262afacc821 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 25 Oct 2016 13:12:28 -0700 Subject: IB/hfi1: Get rid of divide in pio buffer allocator The div instruction shows costly in profiles. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio.c | 11 +++++++---- drivers/infiniband/hw/hfi1/pio.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 385e4dcf2cd3..516fac38d31e 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1249,6 +1249,7 @@ int sc_enable(struct send_context *sc) sc->free = 0; sc->alloc_free = 0; sc->fill = 0; + sc->fill_wrap = 0; sc->sr_head = 0; sc->sr_tail = 0; sc->flags = 0; @@ -1392,7 +1393,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len, unsigned long flags; unsigned long avail; unsigned long blocks = dwords_to_blocks(dw_len); - unsigned long start_fill; + u32 fill_wrap; int trycount = 0; u32 head, next; @@ -1435,8 +1436,11 @@ retry: head = sc->sr_head; /* "allocate" the buffer */ - start_fill = sc->fill; sc->fill += blocks; + fill_wrap = sc->fill_wrap; + sc->fill_wrap += blocks; + if (sc->fill_wrap >= sc->credits) + sc->fill_wrap = sc->fill_wrap - sc->credits; /* * Fill the parts that the releaser looks at before moving the head. @@ -1465,8 +1469,7 @@ retry: spin_unlock_irqrestore(&sc->alloc_lock, flags); /* finish filling in the buffer outside the lock */ - pbuf->start = sc->base_addr + ((start_fill % sc->credits) - * PIO_BLOCK_SIZE); + pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; pbuf->size = sc->credits * PIO_BLOCK_SIZE; pbuf->end = sc->base_addr + pbuf->size; pbuf->block_count = blocks; diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index bd19507b6bb0..498b548055e0 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -119,6 +119,7 @@ struct send_context { unsigned long fill; /* official alloc count */ unsigned long alloc_free; /* copy of free (less cache thrash) */ u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u32 fill_wrap; /* tracks fill within ring */ /* releaser fields */ spinlock_t release_lock ____cacheline_aligned_in_smp; u32 sr_tail; /* shadow ring tail */ -- cgit v1.2.3 From 8af8d2970ed98493a2db88dfcad88b0065e55e79 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Tue, 25 Oct 2016 13:12:34 -0700 Subject: IB/hfi1: Optimize pio_buf and send_context structs Both pio_buf and send_context structs have oversized fields and have cachelines that can be optimized. Reduce oversized fields for both structs. Make sure pio_buf struct fits within a cacheline. Move read-only fields to their own cacheline in send_context struct. All of this will avoid cacheline trading as the ring progresses and pio buffers/send contexts are used. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio.c | 5 ++--- drivers/infiniband/hw/hfi1/pio.h | 29 +++++++++++++++-------------- drivers/infiniband/hw/hfi1/pio_copy.c | 22 +++++++++++----------- 3 files changed, 28 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 516fac38d31e..86a7f365b624 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->hw_context = hw_context; cr_group_addresses(sc, &dma); sc->credits = sci->credits; + sc->size = sc->credits * PIO_BLOCK_SIZE; /* PIO Send Memory Address details */ #define PIO_ADDR_CONTEXT_MASK 0xfful @@ -1470,9 +1471,7 @@ retry: /* finish filling in the buffer outside the lock */ pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE; - pbuf->size = sc->credits * PIO_BLOCK_SIZE; - pbuf->end = sc->base_addr + pbuf->size; - pbuf->block_count = blocks; + pbuf->end = sc->base_addr + sc->size; pbuf->qw_written = 0; pbuf->carry_bytes = 0; pbuf->carry.val64 = 0; diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 498b548055e0..867e5ffc3595 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -83,43 +83,43 @@ struct pio_buf { void *arg; /* argument for cb */ void __iomem *start; /* buffer start address */ void __iomem *end; /* context end address */ - unsigned long size; /* context size, in bytes */ unsigned long sent_at; /* buffer is sent when <= free */ - u32 block_count; /* size of buffer, in blocks */ - u32 qw_written; /* QW written so far */ - u32 carry_bytes; /* number of valid bytes in carry */ union mix carry; /* pending unwritten bytes */ + u16 qw_written; /* QW written so far */ + u8 carry_bytes; /* number of valid bytes in carry */ }; /* cache line aligned pio buffer array */ union pio_shadow_ring { struct pio_buf pbuf; - u64 unused[16]; /* cache line spacer */ } ____cacheline_aligned; /* per-NUMA send context */ struct send_context { /* read-only after init */ struct hfi1_devdata *dd; /* device */ - void __iomem *base_addr; /* start of PIO memory */ union pio_shadow_ring *sr; /* shadow ring */ + void __iomem *base_addr; /* start of PIO memory */ + u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u32 size; /* context size, in bytes */ - struct work_struct halt_work; /* halted context work queue entry */ - unsigned long flags; /* flags */ int node; /* context home node */ - int type; /* context type */ - u32 sw_index; /* software index number */ - u32 hw_context; /* hardware context number */ - u32 credits; /* number of blocks in context */ u32 sr_size; /* size of the shadow ring */ - u32 group; /* credit return group */ + u16 flags; /* flags */ + u8 type; /* context type */ + u8 sw_index; /* software index number */ + u8 hw_context; /* hardware context number */ + u8 group; /* credit return group */ + /* allocator fields */ spinlock_t alloc_lock ____cacheline_aligned_in_smp; u32 sr_head; /* shadow ring head */ unsigned long fill; /* official alloc count */ unsigned long alloc_free; /* copy of free (less cache thrash) */ - u32 __percpu *buffers_allocated;/* count of buffers allocated */ u32 fill_wrap; /* tracks fill within ring */ + u32 credits; /* number of blocks in context */ + /* adding a new field here would make it part of this cacheline */ + /* releaser fields */ spinlock_t release_lock ____cacheline_aligned_in_smp; u32 sr_tail; /* shadow ring tail */ @@ -131,6 +131,7 @@ struct send_context { u32 credit_intr_count; /* count of credit intr users */ u64 credit_ctrl; /* cache for credit control */ wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ + struct work_struct halt_work; /* halted context work queue entry */ }; /* send context flags */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index aa7773643107..03024cec78dd 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf, dest += sizeof(u64); } - dest -= pbuf->size; - dend -= pbuf->size; + dest -= pbuf->sc->size; + dend -= pbuf->sc->size; } /* write 8-byte non-SOP, non-wrap chunk data */ @@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) */ /* adjust if we've wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; @@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf) */ /* adjust if we have wrapped */ if (dest >= pbuf->end) - dest -= pbuf->size; + dest -= pbuf->sc->size; /* jump to the SOP range if within the first block */ else if (pbuf->qw_written < PIO_BLOCK_QWS) dest += SOP_DISTANCE; -- cgit v1.2.3 From 62aeddbf28fa63872e8f13f47177338b0f1fd8b5 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 7 Dec 2016 19:32:15 -0800 Subject: IB/hfi1: Read new EPROM format Add the ability to read the new EPROM format. Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/eprom.c | 211 +++++++++++++++++++++++++++++++++++-- 1 file changed, 203 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index e70c223801b4..26da124c88e2 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -207,6 +207,40 @@ done_asic: /* magic character sequence that trails an image */ #define IMAGE_TRAIL_MAGIC "egamiAPO" +/* EPROM file types */ +#define HFI1_EFT_PLATFORM_CONFIG 2 + +/* segment size - 128 KiB */ +#define SEG_SIZE (128 * 1024) + +struct hfi1_eprom_footer { + u32 oprom_size; /* size of the oprom, in bytes */ + u16 num_table_entries; + u16 version; /* version of this footer */ + u32 magic; /* must be last */ +}; + +struct hfi1_eprom_table_entry { + u32 type; /* file type */ + u32 offset; /* file offset from start of EPROM */ + u32 size; /* file size, in bytes */ +}; + +/* + * Calculate the max number of table entries that will fit within a directory + * buffer of size 'dir_size'. + */ +#define MAX_TABLE_ENTRIES(dir_size) \ + (((dir_size) - sizeof(struct hfi1_eprom_footer)) / \ + sizeof(struct hfi1_eprom_table_entry)) + +#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \ + (sizeof(struct hfi1_eprom_table_entry) * (n))) + +#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a)) +#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm') +#define FOOTER_VERSION 1 + /* * Read all of partition 1. The actual file is at the front. Adjust * the returned size if a trailing image magic is found. @@ -241,6 +275,167 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, return 0; } +/* + * The segment magic has been checked. There is a footer and table of + * contents present. + * + * directory is a u32 aligned buffer of size EP_PAGE_SIZE. + */ +static int read_segment_platform_config(struct hfi1_devdata *dd, + void *directory, void **data, u32 *size) +{ + struct hfi1_eprom_footer *footer; + struct hfi1_eprom_table_entry *table; + struct hfi1_eprom_table_entry *entry; + void *buffer = NULL; + void *table_buffer = NULL; + int ret, i; + u32 directory_size; + u32 seg_base, seg_offset; + u32 bytes_available, ncopied, to_copy; + + /* the footer is at the end of the directory */ + footer = (struct hfi1_eprom_footer *) + (directory + EP_PAGE_SIZE - sizeof(*footer)); + + /* make sure the structure version is supported */ + if (footer->version != FOOTER_VERSION) + return -EINVAL; + + /* oprom size cannot be larger than a segment */ + if (footer->oprom_size >= SEG_SIZE) + return -EINVAL; + + /* the file table must fit in a segment with the oprom */ + if (footer->num_table_entries > + MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size)) + return -EINVAL; + + /* find the file table start, which precedes the footer */ + directory_size = DIRECTORY_SIZE(footer->num_table_entries); + if (directory_size <= EP_PAGE_SIZE) { + /* the file table fits into the directory buffer handed in */ + table = (struct hfi1_eprom_table_entry *) + (directory + EP_PAGE_SIZE - directory_size); + } else { + /* need to allocate and read more */ + table_buffer = kmalloc(directory_size, GFP_KERNEL); + if (!table_buffer) + return -ENOMEM; + ret = read_length(dd, SEG_SIZE - directory_size, + directory_size, table_buffer); + if (ret) + goto done; + table = table_buffer; + } + + /* look for the platform configuration file in the table */ + for (entry = NULL, i = 0; i < footer->num_table_entries; i++) { + if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) { + entry = &table[i]; + break; + } + } + if (!entry) { + ret = -ENOENT; + goto done; + } + + /* + * Sanity check on the configuration file size - it should never + * be larger than 4 KiB. + */ + if (entry->size > (4 * 1024)) { + dd_dev_err(dd, "Bad configuration file size 0x%x\n", + entry->size); + ret = -EINVAL; + goto done; + } + + /* check for bogus offset and size that wrap when added together */ + if (entry->offset + entry->size < entry->offset) { + dd_dev_err(dd, + "Bad configuration file start + size 0x%x+0x%x\n", + entry->offset, entry->size); + ret = -EINVAL; + goto done; + } + + /* allocate the buffer to return */ + buffer = kmalloc(entry->size, GFP_KERNEL); + if (!buffer) { + ret = -ENOMEM; + goto done; + } + + /* + * Extract the file by looping over segments until it is fully read. + */ + seg_offset = entry->offset % SEG_SIZE; + seg_base = entry->offset - seg_offset; + ncopied = 0; + while (ncopied < entry->size) { + /* calculate data bytes available in this segment */ + + /* start with the bytes from the current offset to the end */ + bytes_available = SEG_SIZE - seg_offset; + /* subtract off footer and table from segment 0 */ + if (seg_base == 0) { + /* + * Sanity check: should not have a starting point + * at or within the directory. + */ + if (bytes_available <= directory_size) { + dd_dev_err(dd, + "Bad configuration file - offset 0x%x within footer+table\n", + entry->offset); + ret = -EINVAL; + goto done; + } + bytes_available -= directory_size; + } + + /* calculate bytes wanted */ + to_copy = entry->size - ncopied; + + /* max out at the available bytes in this segment */ + if (to_copy > bytes_available) + to_copy = bytes_available; + + /* + * Read from the EPROM. + * + * The sanity check for entry->offset is done in read_length(). + * The EPROM offset is validated against what the hardware + * addressing supports. In addition, if the offset is larger + * than the actual EPROM, it silently wraps. It will work + * fine, though the reader may not get what they expected + * from the EPROM. + */ + ret = read_length(dd, seg_base + seg_offset, to_copy, + buffer + ncopied); + if (ret) + goto done; + + ncopied += to_copy; + + /* set up for next segment */ + seg_offset = footer->oprom_size; + seg_base += SEG_SIZE; + } + + /* success */ + ret = 0; + *data = buffer; + *size = entry->size; + +done: + kfree(table_buffer); + if (ret) + kfree(buffer); + return ret; +} + /* * Read the platform configuration file from the EPROM. * @@ -253,6 +448,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, * -EBUSY - not able to acquire access to the EPROM * -ENOENT - no recognizable file written * -ENOMEM - buffer could not be allocated + * -EINVAL - invalid EPROM contentents found */ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) { @@ -266,21 +462,20 @@ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) if (ret) return -EBUSY; - /* read the last page of P0 for the EPROM format magic */ - ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + /* read the last page of the segment for the EPROM format magic */ + ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); if (ret) goto done; - /* last dword of P0 contains a magic indicator */ - if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* last dword of the segment contains a magic value */ + if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) { + /* segment format */ + ret = read_segment_platform_config(dd, directory, data, size); + } else { /* partition format */ ret = read_partition_platform_config(dd, data, size); - goto done; } - /* nothing recognized */ - ret = -ENOENT; - done: release_chip_resource(dd, CR_EPROM); return ret; -- cgit v1.2.3 From d7cf4ccf6fe52694142f65cce82decb3fe833f48 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 7 Dec 2016 19:32:22 -0800 Subject: IB/hfi1: Fix dc8051 multiple qword memory reads When reading multiple dc8051 data memory locations at once, the read enabled field must be toggled at every address change. Do that by writing only the address first, then writing the enable. Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 9a36c8c2f8a1..bcf7da4599c4 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -263,11 +263,13 @@ static int __read_8051_data(struct hfi1_devdata *dd, u32 addr, u64 *result) u64 reg; int count; - /* start the read at the given address */ - reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) - << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT) - | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK; + /* step 1: set the address, clear enable */ + reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK) + << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT; write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg); + /* step 2: enable */ + write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, + reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK); /* wait until ACCESS_COMPLETED is set */ count = 0; -- cgit v1.2.3 From 53e91d264be9e9adbf34b83b8ce5c12452f6765e Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 7 Dec 2016 19:32:28 -0800 Subject: IB/hfi1: Use non-atomic __test_and_clear_bit in hot path qp->r_aflags is already protected by qp->r_lock, therefore, test_and_clear_bit() doesn't need to be atomic. Profile shows this function call is costly. Change the test_and_clear_bit() call to use the non-atomic variant. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 8bc5013f39a1..aaca8588e199 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2295,7 +2295,7 @@ send_last: hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last); rvt_put_ss(&qp->r_sge); qp->r_msn++; - if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) + if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; -- cgit v1.2.3 From 1b9e7749335281c086243c2dc75d2ad7c2b32abe Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 7 Dec 2016 19:32:34 -0800 Subject: IB/hfi1: Export 8051 memory and LCB registers via debugfs Both the 8051 memory and LCB register access require multiple steps and coordination with the driver. This cannot be safely done with resource0 alone. The 8051 memory is exported read-only. LCB is exported read/write. Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 110 +++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 632ba21759ab..8725f4c086cf 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -541,6 +541,114 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, return ret; } +/* read the dc8051 memory */ +static ssize_t dc8051_memory_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + ssize_t rval; + void *tmp; + loff_t start, end; + + /* the checks below expect the position to be positive */ + if (*ppos < 0) + return -EINVAL; + + tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + /* + * Fill in the requested portion of the temporary buffer from the + * 8051 memory. The 8051 memory read is done in terms of 8 bytes. + * Adjust start and end to fit. Skip reading anything if out of + * range. + */ + start = *ppos & ~0x7; /* round down */ + if (start < DC8051_DATA_MEM_SIZE) { + end = (*ppos + count + 7) & ~0x7; /* round up */ + if (end > DC8051_DATA_MEM_SIZE) + end = DC8051_DATA_MEM_SIZE; + rval = read_8051_data(ppd->dd, start, end - start, + (u64 *)(tmp + start)); + if (rval) + goto done; + } + + rval = simple_read_from_buffer(buf, count, ppos, tmp, + DC8051_DATA_MEM_SIZE); +done: + kfree(tmp); + return rval; +} + +static ssize_t debugfs_lcb_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off; + u64 data; + + if (*ppos < 0) + return -EINVAL; + /* only read 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (read_lcb_csr(dd, csr_off, (u64 *)&data)) + break; /* failed */ + if (put_user(data, (unsigned long __user *)(buf + total))) + break; + } + *ppos += total; + return total; +} + +static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hfi1_pportdata *ppd = private2ppd(file); + struct hfi1_devdata *dd = ppd->dd; + unsigned long total, csr_off, data; + + if (*ppos < 0) + return -EINVAL; + /* only write 8 byte quantities */ + if ((count % 8) != 0) + return -EINVAL; + /* offset must be 8-byte aligned */ + if ((*ppos % 8) != 0) + return -EINVAL; + /* do nothing if out of range or zero count */ + if (*ppos >= (LCB_END - LCB_START) || !count) + return 0; + /* reduce count if needed */ + if (*ppos + count > LCB_END - LCB_START) + count = (LCB_END - LCB_START) - *ppos; + + csr_off = LCB_START + *ppos; + for (total = 0; total < count; total += 8, csr_off += 8) { + if (get_user(data, (unsigned long __user *)(buf + total))) + break; + if (write_lcb_csr(dd, csr_off, data)) + break; /* failed */ + } + *ppos += total; + return total; +} + /* * read the per-port QSFP data for ppd */ @@ -931,6 +1039,8 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write, qsfp2_debugfs_open, qsfp2_debugfs_release), DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), + DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL), + DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write), }; static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) -- cgit v1.2.3 From e730139b3464cc740c33131c872f7d173744ef11 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Wed, 7 Dec 2016 19:32:41 -0800 Subject: IB/hfi1: Disable header suppression for short packets For the received packets with payload less or equal 8DWS RxDmaDataFifoRdUncErr is not reported. There is set RHF.EccErr if the header is not suppressed. When such packet is detected on the send side the header suppression mechanism is disabled by clearing SH bit in the packet header. Reviewed-by: Mitko Haralanov Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 60 ++++++++++++++++++++++++---------- include/uapi/rdma/hfi/hfi1_user.h | 2 +- 2 files changed, 44 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a761f804111e..663980ef01a8 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -115,6 +115,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_MASK 0xff #define AHG_KDETH_INTR_SHIFT 12 +#define AHG_KDETH_SH_SHIFT 13 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -144,8 +145,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_OM_LARGE 64 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) -/* Last packet in the request */ -#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) +/* Tx request flag bits */ +#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ +#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 @@ -943,8 +945,13 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) tx->busycount = 0; INIT_LIST_HEAD(&tx->list); + /* + * For the last packet set the ACK request + * and disable header suppression. + */ if (req->seqnum == req->info.npkts - 1) - tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT; + tx->flags |= (TXREQ_FLAGS_REQ_ACK | + TXREQ_FLAGS_REQ_DISABLE_SH); /* * Calculate the payload size - this is min of the fragment @@ -963,11 +970,22 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) } datalen = compute_data_length(req, tx); + + /* + * Disable header suppression for the payload <= 8DWS. + * If there is an uncorrectable error in the receive + * data FIFO when the received payload size is less than + * or equal to 8DWS then the RxDmaDataFifoRdUncErr is + * not reported.There is set RHF.EccErr if the header + * is not suppressed. + */ if (!datalen) { SDMA_DBG(req, "Request has data but pkt len is 0"); ret = -EFAULT; goto free_tx; + } else if (datalen <= 32) { + tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH; } } @@ -990,6 +1008,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) LRH2PBC(lrhlen); tx->hdr.pbc[0] = cpu_to_le16(pbclen); } + ret = check_header_template(req, &tx->hdr, + lrhlen, datalen); + if (ret) + goto free_tx; ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, sizeof(tx->hdr) + datalen, @@ -1351,7 +1373,7 @@ static int set_txreq_header(struct user_sdma_request *req, req->seqnum)); /* Set ACK request on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) hdr->bth[2] |= cpu_to_be32(1UL << 31); /* Set the new offset */ @@ -1384,8 +1406,8 @@ static int set_txreq_header(struct user_sdma_request *req, /* Set KDETH.TID based on value for this TID */ KDETH_SET(hdr->kdeth.ver_tid_offset, TID, EXP_TID_GET(tidval, IDX)); - /* Clear KDETH.SH only on the last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + /* Clear KDETH.SH when DISABLE_SH flag is set */ + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0); /* * Set the KDETH.OFFSET and KDETH.OM based on size of @@ -1429,7 +1451,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* BTH.PSN and BTH.A */ val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) & (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); @@ -1468,19 +1490,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | ((req->tidoffset / req->omfactor) & 0x7fff))); - /* KDETH.TIDCtrl, KDETH.TID */ + /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | - (EXP_TID_GET(tidval, IDX) & 0x3ff)); - /* Clear KDETH.SH on last packet */ - if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { - val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) << - AHG_KDETH_INTR_SHIFT); - val &= cpu_to_le16(~(1U << 13)); - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + (EXP_TID_GET(tidval, IDX) & 0x3ff)); + + if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) { + val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } else { - AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val); + val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ? + cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) : + cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset, + INTR) << + AHG_KDETH_INTR_SHIFT)); } + + AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index d15e7289d835..587b7360e820 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -75,7 +75,7 @@ * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define HFI1_USER_SWMINOR 2 +#define HFI1_USER_SWMINOR 3 /* * We will encode the major/minor inside a single 32bit version number. -- cgit v1.2.3 From cf4c2f8c9d0483f7585883d3f6733ebf09ea837c Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 7 Dec 2016 19:32:47 -0800 Subject: IB/rdmavt: Fix trace hierarchy Split rdmavt traces into separate files to preserve the original hierarchy since only one trace sub system may now be defined per header file. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/trace.h | 140 +------------------------------ drivers/infiniband/sw/rdmavt/trace_qp.h | 96 +++++++++++++++++++++ drivers/infiniband/sw/rdmavt/trace_rvt.h | 81 ++++++++++++++++++ drivers/infiniband/sw/rdmavt/trace_tx.h | 132 +++++++++++++++++++++++++++++ 4 files changed, 312 insertions(+), 137 deletions(-) create mode 100644 drivers/infiniband/sw/rdmavt/trace_qp.h create mode 100644 drivers/infiniband/sw/rdmavt/trace_rvt.h create mode 100644 drivers/infiniband/sw/rdmavt/trace_tx.h (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 6c0457db5499..11a2afbc8e76 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -45,143 +45,9 @@ * */ -#undef TRACE_SYSTEM_VAR -#define TRACE_SYSTEM_VAR rdmavt - -#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __RDMAVT_TRACE_H - -#include -#include - -#include -#include - #define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) #define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi)) -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rdmavt - -TRACE_EVENT(rvt_dbg, - TP_PROTO(struct rvt_dev_info *rdi, - const char *msg), - TP_ARGS(rdi, msg), - TP_STRUCT__entry( - RDI_DEV_ENTRY(rdi) - __string(msg, msg) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(rdi); - __assign_str(msg, msg); - ), - TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_qphash -DECLARE_EVENT_CLASS(rvt_qphash_template, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, bucket) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->bucket = bucket; - ), - TP_printk( - "[%s] qpn 0x%x bucket %u", - __get_str(dev), - __entry->qpn, - __entry->bucket - ) -); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_tx - -#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } -#define show_wr_opcode(opcode) \ -__print_symbolic(opcode, \ - wr_opcode_name(RDMA_WRITE), \ - wr_opcode_name(RDMA_WRITE_WITH_IMM), \ - wr_opcode_name(SEND), \ - wr_opcode_name(SEND_WITH_IMM), \ - wr_opcode_name(RDMA_READ), \ - wr_opcode_name(ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ - wr_opcode_name(LSO), \ - wr_opcode_name(SEND_WITH_INV), \ - wr_opcode_name(RDMA_READ_WITH_INV), \ - wr_opcode_name(LOCAL_INV), \ - wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) - -#define POS_PRN \ -"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" - -TRACE_EVENT( - rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u64, wr_id) - __field(u32, qpn) - __field(u32, psn) - __field(u32, lpsn) - __field(u32, length) - __field(u32, opcode) - __field(u32, size) - __field(u32, avail) - __field(u32, head) - __field(u32, last) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->wr_id = wqe->wr.wr_id; - __entry->qpn = qp->ibqp.qp_num; - __entry->psn = wqe->psn; - __entry->lpsn = wqe->lpsn; - __entry->length = wqe->length; - __entry->opcode = wqe->wr.opcode; - __entry->size = qp->s_size; - __entry->avail = qp->s_avail; - __entry->head = qp->s_head; - __entry->last = qp->s_last; - ), - TP_printk( - POS_PRN, - __get_str(dev), - __entry->wr_id, - __entry->qpn, - __entry->psn, - __entry->lpsn, - __entry->length, - __entry->opcode, show_wr_opcode(__entry->opcode), - __entry->size, - __entry->avail, - __entry->head, - __entry->last - ) -); - -#endif /* __RDMAVT_TRACE_H */ - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include +#include "trace_rvt.h" +#include "trace_qp.h" +#include "trace_tx.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h new file mode 100644 index 000000000000..4c77a3119bda --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -0,0 +1,96 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_QP_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qp + +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + + +#endif /* __RVT_TRACE_QP_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_qp +#include + diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h new file mode 100644 index 000000000000..746f33461d9a --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -0,0 +1,81 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_RVT_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt + +TRACE_EVENT(rvt_dbg, + TP_PROTO(struct rvt_dev_info *rdi, + const char *msg), + TP_ARGS(rdi, msg), + TP_STRUCT__entry( + RDI_DEV_ENTRY(rdi) + __string(msg, msg) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(rdi); + __assign_str(msg, msg); + ), + TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) +); + +#endif /* __RVT_TRACE_MISC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rvt +#include + diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h new file mode 100644 index 000000000000..0e03173662d8 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -0,0 +1,132 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_TX_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_tx + +#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } +#define show_wr_opcode(opcode) \ +__print_symbolic(opcode, \ + wr_opcode_name(RDMA_WRITE), \ + wr_opcode_name(RDMA_WRITE_WITH_IMM), \ + wr_opcode_name(SEND), \ + wr_opcode_name(SEND_WITH_IMM), \ + wr_opcode_name(RDMA_READ), \ + wr_opcode_name(ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(LSO), \ + wr_opcode_name(SEND_WITH_INV), \ + wr_opcode_name(RDMA_READ_WITH_INV), \ + wr_opcode_name(LOCAL_INV), \ + wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + +#define POS_PRN \ +"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" + +TRACE_EVENT( + rvt_post_one_wr, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), + TP_ARGS(qp, wqe), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, psn) + __field(u32, lpsn) + __field(u32, length) + __field(u32, opcode) + __field(u32, size) + __field(u32, avail) + __field(u32, head) + __field(u32, last) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->psn = wqe->psn; + __entry->lpsn = wqe->lpsn; + __entry->length = wqe->length; + __entry->opcode = wqe->wr.opcode; + __entry->size = qp->s_size; + __entry->avail = qp->s_avail; + __entry->head = qp->s_head; + __entry->last = qp->s_last; + ), + TP_printk( + POS_PRN, + __get_str(dev), + __entry->wr_id, + __entry->qpn, + __entry->psn, + __entry->lpsn, + __entry->length, + __entry->opcode, show_wr_opcode(__entry->opcode), + __entry->size, + __entry->avail, + __entry->head, + __entry->last + ) +); + +#endif /* __RVT_TRACE_TX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tx +#include + -- cgit v1.2.3 From b7481944b06e99dc84f4c7da2681ac89528b2020 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Wed, 7 Dec 2016 19:32:53 -0800 Subject: IB/hfi1: Show statistics counters under IB stats interface Previously tools like hfi1stats had to access these counters through debugfs, which often caused permission issue for non-root users. It is not always acceptable to change the debugfs mounting permission due to security concerns. When exposed under the IB stats interface, the counters are universally readable by default. Reviewed-by: Ira Weiny Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs.c | 154 +++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index f1dead369258..09132582e09c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1609,6 +1609,154 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, dc8051_ver_min(ver)); } +static const char * const driver_cntr_names[] = { + /* must be element 0*/ + "DRIVER_KernIntr", + "DRIVER_ErrorIntr", + "DRIVER_Tx_Errs", + "DRIVER_Rcv_Errs", + "DRIVER_HW_Errs", + "DRIVER_NoPIOBufs", + "DRIVER_CtxtsOpen", + "DRIVER_RcvLen_Errs", + "DRIVER_EgrBufFull", + "DRIVER_EgrHdrFull" +}; + +static const char **dev_cntr_names; +static const char **port_cntr_names; +static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); +static int num_dev_cntrs; +static int num_port_cntrs; +static int cntr_names_initialized; + +/* + * Convert a list of names separated by '\n' into an array of NULL terminated + * strings. Optionally some entries can be reserved in the array to hold extra + * external strings. + */ +static int init_cntr_names(const char *names_in, + const int names_len, + int num_extra_names, + int *num_cntrs, + const char ***cntr_names) +{ + char *names_out, *p, **q; + int i, n; + + n = 0; + for (i = 0; i < names_len; i++) + if (names_in[i] == '\n') + n++; + + names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, + GFP_KERNEL); + if (!names_out) { + *num_cntrs = 0; + *cntr_names = NULL; + return -ENOMEM; + } + + p = names_out + (n + num_extra_names) * sizeof(char *); + memcpy(p, names_in, names_len); + + q = (char **)names_out; + for (i = 0; i < n; i++) { + q[i] = p; + p = strchr(p, '\n'); + *p++ = '\0'; + } + + *num_cntrs = n; + *cntr_names = (const char **)names_out; + return 0; +} + +static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + int i, err; + + if (!cntr_names_initialized) { + struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + + err = init_cntr_names(dd->cntrnames, + dd->cntrnameslen, + num_driver_cntrs, + &num_dev_cntrs, + &dev_cntr_names); + if (err) + return NULL; + + for (i = 0; i < num_driver_cntrs; i++) + dev_cntr_names[num_dev_cntrs + i] = + driver_cntr_names[i]; + + err = init_cntr_names(dd->portcntrnames, + dd->portcntrnameslen, + 0, + &num_port_cntrs, + &port_cntr_names); + if (err) { + kfree(dev_cntr_names); + dev_cntr_names = NULL; + return NULL; + } + cntr_names_initialized = 1; + } + + if (!port_num) + return rdma_alloc_hw_stats_struct( + dev_cntr_names, + num_dev_cntrs + num_driver_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + else + return rdma_alloc_hw_stats_struct( + port_cntr_names, + num_port_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static u64 hfi1_sps_ints(void) +{ + unsigned long flags; + struct hfi1_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&hfi1_devs_lock, flags); + list_for_each_entry(dd, &hfi1_dev_list, list) { + sps_ints += get_all_cpu_total(dd->int_counter); + } + spin_unlock_irqrestore(&hfi1_devs_lock, flags); + return sps_ints; +} + +static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port, int index) +{ + u64 *values; + int count; + + if (!port) { + u64 *stats = (u64 *)&hfi1_stats; + int i; + + hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values); + values[num_dev_cntrs] = hfi1_sps_ints(); + for (i = 1; i < num_driver_cntrs; i++) + values[num_dev_cntrs + i] = stats[i]; + count = num_dev_cntrs + num_driver_cntrs; + } else { + struct hfi1_ibport *ibp = to_iport(ibdev, port); + + hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values); + count = num_port_cntrs; + } + + memcpy(stats->value, values, count * sizeof(u64)); + return count; +} + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1656,6 +1804,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->phys_port_cnt = dd->num_pports; ibdev->dma_device = &dd->pcidev->dev; ibdev->modify_device = modify_device; + ibdev->alloc_hw_stats = alloc_hw_stats; + ibdev->get_hw_stats = get_hw_stats; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; @@ -1770,6 +1920,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + + kfree(dev_cntr_names); + kfree(port_cntr_names); + cntr_names_initialized = 0; } void hfi1_cnp_rcv(struct hfi1_packet *packet) -- cgit v1.2.3 From e922ae06e90a37ab0b212f844e8aed9b6021cf21 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Wed, 7 Dec 2016 19:33:00 -0800 Subject: IB/hfi1: Remove dependence on qp->s_cur_size The qp->s_cur_size field assumes that the S_BUSY bit protects the field from modification after the slock is dropped. Scaling the send engine to multiple cores would break that assumption. Correct the issue by carrying the payload size in the txreq structure. Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 4 ++-- drivers/infiniband/hw/hfi1/ruc.c | 4 ++-- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 6 +++--- drivers/infiniband/hw/hfi1/verbs_txreq.h | 1 + 6 files changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index aaca8588e199..e69161e007db 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -351,7 +351,7 @@ normal: qp->s_rdma_ack_cnt++; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_size = len; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); /* pbc */ ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; @@ -802,7 +802,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = ss; - qp->s_cur_size = len; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header( qp, ohdr, diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 5a70e91b5191..2ec3e908691b 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -767,8 +767,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth1; /* Construct the header. */ - extra_bytes = -qp->s_cur_size & 3; - nwords = (qp->s_cur_size + extra_bytes) >> 2; + extra_bytes = -ps->s_txreq->s_cur_size & 3; + nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { qp->s_hdrwords += hfi1_make_grh(ibp, diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5e6d1bac4914..d062c3537f7c 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -259,7 +259,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; qp->s_cur_sge = &qp->s_sge; - qp->s_cur_size = len; + ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), mask_psn(qp->s_psn++), middle, ps); /* pbc */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 97ae24b6314c..d742ac1af947 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -354,7 +354,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ qp->s_hdrwords = 7; - qp->s_cur_size = wqe->length; + ps->s_txreq->s_cur_size = wqe->length; qp->s_cur_sge = &qp->s_sge; qp->s_srate = ah_attr->static_rate; qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 09132582e09c..65f2f1d76fc8 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -895,7 +895,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + u32 len = ps->s_txreq->s_cur_size; u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; @@ -1012,7 +1012,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_qp_priv *priv = qp->priv; u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = qp->s_cur_sge; - u32 len = qp->s_cur_size; + u32 len = ps->s_txreq->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; @@ -1240,7 +1240,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, u8 op = get_opcode(h); if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && + tx->s_cur_size <= min(piothreshold, qp->pmtu) && (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 5660897593ba..76216f2ef35a 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -65,6 +65,7 @@ struct verbs_txreq { struct sdma_engine *sde; struct send_context *psc; u16 hdr_dwords; + u16 s_cur_size; }; struct hfi1_ibdev; -- cgit v1.2.3 From 6e40b59cfa99710fcbf849439783e8fc98d183bf Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 7 Dec 2016 19:33:06 -0800 Subject: IB/hfi1: Remove definition of unused hfi1_affinity struct The struct hfi1_affinity is not used anymore. We use the struct hfi1_affinity_node and hfi1_affinity_node_list instead. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.h | 8 -------- drivers/infiniband/hw/hfi1/hfi.h | 1 - 2 files changed, 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index b89ea3c0ee1a..ef4ebc358619 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -70,14 +70,6 @@ struct cpu_mask_set { uint gen; }; -struct hfi1_affinity { - struct cpu_mask_set def_intr; - struct cpu_mask_set rcv_intr; - struct cpumask real_cpu_mask; - /* spin lock to protect affinity struct */ - spinlock_t lock; -}; - struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index a66d198199ff..4163596ce4c9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1185,7 +1185,6 @@ struct hfi1_devdata { /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; - struct hfi1_affinity *affinity; bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ bool aspm_enabled; /* ASPM state: enabled/disabled */ -- cgit v1.2.3 From 5213006ade47153f5f2c2e5a42e84312afccabfd Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 7 Dec 2016 19:33:13 -0800 Subject: IB/hfi1: Add special setting for low power AOC Low power QSFP AOC cables require a different SerDes Tx PLL bandwidth setting than the default. The 8051 firmware does not know the details, so the driver needs to tell the firmware through a special setting. Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 44 ++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 0e7043801bb1..838fe84e285a 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -637,6 +637,38 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, } } +/* + * Return a special SerDes setting for low power AOC cables. The power class + * threshold and setting being used were all found by empirical testing. + * + * Summary of the logic: + * + * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4) + * return 0xe + * return 0; // leave at default + */ +static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd) +{ + u8 *cache = ppd->qsfp_info.cache; + int power_class; + + /* QSFP only */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; /* leave at default */ + + /* active optical cables only */ + switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) { + case 0x0 ... 0x9: /* fallthrough */ + case 0xC: /* fallthrough */ + case 0xE: + /* active AOC */ + power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]); + if (power_class < QSFP_POWER_CLASS_4) + return 0xe; + } + return 0; /* leave at default */ +} + static void apply_tunings( struct hfi1_pportdata *ppd, u32 tx_preset_index, u8 tuning_method, u32 total_atten, u8 limiting_active) @@ -705,7 +737,17 @@ static void apply_tunings( tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4); postcur = tx_preset; - config_data = precur | (attn << 8) | (postcur << 16); + /* + * NOTES: + * o The aoc_low_power_setting is applied to all lanes even + * though only lane 0's value is examined by the firmware. + * o A lingering low power setting after a cable swap does + * not occur. On cable unplug the 8051 is reset and + * restarted on cable insert. This resets all settings to + * their default, erasing any previous low power setting. + */ + config_data = precur | (attn << 8) | (postcur << 16) | + (aoc_low_power_setting(ppd) << 24); apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data, "Applying TX settings"); -- cgit v1.2.3 From fcb29a6668a254104d38c0132d1853d0644af066 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 7 Dec 2016 19:33:20 -0800 Subject: IB/rdmavt: Add trace of MR segs Add tracing of MR segment information. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 4 ++ drivers/infiniband/sw/rdmavt/trace.h | 1 + drivers/infiniband/sw/rdmavt/trace_mr.h | 112 ++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 drivers/infiniband/sw/rdmavt/trace_mr.h (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 4acf179adf3b..562cf78b6ae9 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -51,6 +51,7 @@ #include #include "vt.h" #include "mr.h" +#include "trace.h" /** * rvt_driver_mr_init - Init MR resources per driver @@ -403,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = umem->page_size; + trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size); n++; if (n == RVT_SEGSZ) { m++; @@ -507,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; return 0; @@ -693,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, for (i = 0; i < list_len; i++) { fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; fmr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); if (++n == RVT_SEGSZ) { m++; n = 0; diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 11a2afbc8e76..e2d23acb6a7d 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -51,3 +51,4 @@ #include "trace_rvt.h" #include "trace_qp.h" #include "trace_tx.h" +#include "trace_mr.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h new file mode 100644 index 000000000000..3318a6c36373 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -0,0 +1,112 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_MR_H + +#include +#include + +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_mr +DECLARE_EVENT_CLASS( + rvt_mr_template, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device)) + __field(void *, vaddr) + __field(struct page *, page) + __field(size_t, len) + __field(u32, lkey) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device)); + __entry->vaddr = v; + __entry->page = virt_to_page(v); + __entry->m = m; + __entry->n = n; + __entry->len = len; + ), + TP_printk( + "[%s] vaddr %p page %p m %u n %u len %ld", + __get_str(dev), + __entry->vaddr, + __entry->page, + __entry->m, + __entry->n, + __entry->len + ) +); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_page_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_fmr_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_user_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +#endif /* __RVT_TRACE_MR_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_mr +#include -- cgit v1.2.3 From b777f154a0c21e1187c1806ababf9c5ba3e49eea Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 7 Dec 2016 19:33:27 -0800 Subject: IB/hfi1: Remove usage of qp->s_cur_sge The s_cur_sge field in the qp structure holds a pointer to the SGE of the currently processed WQE. It assumes the protection of the RVT_S_BUSY flag to prevent the changing of this field while the send engine is using it. This scheme works as long as there is only one instance of the send engine running at a time. Scaling of the send engine to multiple cores would break this assumption as there could be multiple instances of the send engine running on different CPUs. This opens a window where the QP's RVT_S_BUSY flag is not set but the send engine is still running. To prevent accidental changing of the s_cur_sge pointer, the QP's dependence on it is removed. The SGE pointer is now stored in the verbs_txreq, which is a per-packet data structure. This ensures that each individual packet has it's own pointer, which is setup while the RVT_S_BUSY flag is set. Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Mitko Haralanov Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 10 +++++----- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 14 ++++++-------- 4 files changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index e69161e007db..c996a373837e 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -276,7 +276,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, rvt_get_mr(ps->s_txreq->mr); qp->s_ack_rdma_sge.sge = e->rdma_sge; qp->s_ack_rdma_sge.num_sge = 1; - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); @@ -290,7 +290,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, bth2 = mask_psn(qp->s_ack_rdma_psn++); } else { /* COMPARE_SWAP or FETCH_ADD */ - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); @@ -306,7 +306,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): - qp->s_cur_sge = &qp->s_ack_rdma_sge; + ps->s_txreq->ss = &qp->s_ack_rdma_sge; ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; if (ps->s_txreq->mr) rvt_get_mr(ps->s_txreq->mr); @@ -335,7 +335,7 @@ normal: */ qp->s_ack_state = OP(SEND_ONLY); qp->s_flags &= ~RVT_S_ACK_PENDING; - qp->s_cur_sge = NULL; + ps->s_txreq->ss = NULL; if (qp->s_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) | @@ -801,7 +801,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = ss; + ps->s_txreq->ss = ss; ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header( qp, diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index d062c3537f7c..b141a78ae38b 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -258,7 +258,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_len -= len; qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; - qp->s_cur_sge = &qp->s_sge; + ps->s_txreq->ss = &qp->s_sge; ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), mask_psn(qp->s_psn++), middle, ps); diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index d742ac1af947..c071955c0272 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -355,7 +355,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ qp->s_hdrwords = 7; ps->s_txreq->s_cur_size = wqe->length; - qp->s_cur_sge = &qp->s_sge; + ps->s_txreq->ss = &qp->s_sge; qp->s_srate = ah_attr->static_rate; qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); qp->s_wqe = wqe; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 65f2f1d76fc8..3b7bfd817647 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -790,10 +790,10 @@ static int wait_kmem(struct hfi1_ibdev *dev, */ static noinline int build_verbs_ulp_payload( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx) { + struct rvt_sge_state *ss = tx->ss; struct rvt_sge *sg_list = ss->sg_list; struct rvt_sge sge = ss->sge; u8 num_sge = ss->num_sge; @@ -837,7 +837,6 @@ bail_txadd: /* New API */ static int build_verbs_tx_desc( struct sdma_engine *sde, - struct rvt_sge_state *ss, u32 length, struct verbs_txreq *tx, struct hfi1_ahg_info *ahg_info, @@ -881,9 +880,9 @@ static int build_verbs_tx_desc( goto bail_txadd; } - /* add the ulp payload - if any. ss can be NULL for acks */ - if (ss) - ret = build_verbs_ulp_payload(sde, ss, length, tx); + /* add the ulp payload - if any. tx->ss can be NULL for acks */ + if (tx->ss) + ret = build_verbs_ulp_payload(sde, length, tx); bail_txadd: return ret; } @@ -894,7 +893,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; u32 len = ps->s_txreq->s_cur_size; u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ struct hfi1_ibdev *dev = ps->dev; @@ -920,7 +918,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, plen); } tx->wqe = qp->s_wqe; - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc); + ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); if (unlikely(ret)) goto bail_build; } @@ -1011,7 +1009,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, { struct hfi1_qp_priv *priv = qp->priv; u32 hdrwords = qp->s_hdrwords; - struct rvt_sge_state *ss = qp->s_cur_sge; + struct rvt_sge_state *ss = ps->s_txreq->ss; u32 len = ps->s_txreq->s_cur_size; u32 dwords = (len + 3) >> 2; u32 plen = hdrwords + dwords + 2; /* includes pbc */ -- cgit v1.2.3 From 9b86071c5ef7a8c0a0d6d541cb79df5a8b115d91 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 7 Dec 2016 19:33:33 -0800 Subject: IB/hfi1: Remove critical section gap in sc_buffer_alloc() In sc_buffer_alloc(), the sc->alloc_lock is released before calling sc_release_update(), and it is reacquired after the function call. This causes CPU lock trading. Fix it by not dropping the lock before calling sc_release_update(). Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 86a7f365b624..75c4dea84c4b 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1419,9 +1419,7 @@ retry: (sc->fill - sc->alloc_free); if (blocks > avail) { /* still no room, actively update */ - spin_unlock_irqrestore(&sc->alloc_lock, flags); sc_release_update(sc); - spin_lock_irqsave(&sc->alloc_lock, flags); sc->alloc_free = ACCESS_ONCE(sc->free); trycount++; goto retry; -- cgit v1.2.3 From 008016746720d4295e38e940e22dbd116c90c95b Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Wed, 7 Dec 2016 19:33:40 -0800 Subject: IB/hfi1: Preserve external device completed bit The driver should not change the external device request completed bit when not actually doing an external device request. Reviewed-by: Easwar Hariharan Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f87d80567572..37d8af50cc13 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8488,7 +8488,10 @@ static int do_8051_command( */ if (type == HCMD_WRITE_LCB_CSR) { in_data |= ((*out_data) & 0xffffffffffull) << 8; - reg = ((((*out_data) >> 40) & 0xff) << + /* must preserve COMPLETED - it is tied to hardware */ + reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0); + reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK; + reg |= ((((*out_data) >> 40) & 0xff) << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT) | ((((*out_data) >> 48) & 0xffff) << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT); -- cgit v1.2.3 From b44980f8794faf3561d970fe8c05e34765fbdc9b Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 7 Dec 2016 19:33:46 -0800 Subject: IB/hfi1: Replace qp->refcount release code with standard driver wrapper Some parts of the code don't use the standard release wrapper rvt_put_qp() for decrementing and testing the refcount to then try to use a resource. Replace this code with the standard driver wrapper. Fixes: Commit 4d6f85c3fa55 ("IB/rdmavt, IB/qib, IB/hfi1: Use new QP put get routines") Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 3 +-- drivers/infiniband/sw/rdmavt/mcast.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 6563e4d38b80..d4261163bd25 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -793,8 +793,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 983d319ac976..05c8c2afb0e3 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) goto bail; mqp->qp = qp; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); bail: return mqp; @@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) struct rvt_qp *qp = mqp->qp; /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); kfree(mqp); } -- cgit v1.2.3 From f84dfa26e61c6f31a3a0366532296911df763170 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 7 Dec 2016 19:33:53 -0800 Subject: IB/hfi1: Use reference count wrapper for MRs Some parts of the code don't use the standard driver wrapper for memory region reference counters. Use the standard driver wrapper throughout the code. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 562cf78b6ae9..52fd15276ee6 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -793,7 +793,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); isge->mr = mr; @@ -814,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, off + sge->length > mr->length || (mr->access_flags & acc) != acc)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; @@ -892,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, mr = rcu_dereference(rdi->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); sge->mr = mr; @@ -913,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; -- cgit v1.2.3 From 238b1862b4608ee8c28733be67e08f0447963090 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 7 Dec 2016 19:34:00 -0800 Subject: IB/qib: Use standard refcount wrapper for QPs Use the standard driver wrapper for QP reference counters. This makes the code more maintainable. Fixes: Commit 4d6f85c3fa55 ("IB/rdmavt, IB/qib, IB/hfi1: Use new QP put get routines") Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_driver.c | 3 +-- drivers/infiniband/hw/qib/qib_verbs.c | 20 ++++++++------------ 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 728e0a030d2e..2b5982f743ef 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -420,8 +420,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc( - &qp->refcount); + rvt_get_qp(qp); list_add_tail( &qp->rspwait, &rcd->qp_wait_list); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 954f15064514..204482468953 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -464,7 +464,7 @@ static void mem_timer(unsigned long data) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); if (!list_empty(list)) mod_timer(&dev->mem_timer, jiffies + 1); } @@ -477,8 +477,7 @@ static void mem_timer(unsigned long data) qib_schedule_send(qp); } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -762,7 +761,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); spin_lock_irqsave(&qp->s_lock, flags); @@ -772,8 +771,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) } spin_unlock_irqrestore(&qp->s_lock, flags); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } else spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); } @@ -808,7 +806,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) break; avail -= qpp->s_tx->txreq.sg_count; list_del_init(&qpp->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } @@ -822,8 +820,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) qib_schedule_send(qp); } spin_unlock(&qp->s_lock); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } @@ -1288,7 +1285,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd) priv = list_entry(list->next, struct qib_qp_priv, iowait); qp = priv->owner; list_del_init(&priv->iowait); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); qps[n++] = qp; } dd->f_wantpiobuf_intr(dd, 0); @@ -1306,8 +1303,7 @@ full: spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify qib_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } -- cgit v1.2.3 From f2dc9cdce83c4aac2f8b6c803b0327df1c7d44a6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 7 Dec 2016 19:34:06 -0800 Subject: IB/rdmavt: Add a send completion helper This is for use by client drivers to drive send completions into a CQ. A new exported table allows for the mapping of ib_wr_opcode into a ib_wc_opcode. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 17 +++++++++++++++++ include/rdma/rdmavt_qp.h | 40 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 6500c3b5a89c..9e14addd690c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { }; EXPORT_SYMBOL(ib_rvt_state_ops); +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_rvt_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; +EXPORT_SYMBOL(ib_rvt_wc_opcode); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 2c5183ef0243..d78e99cf6c11 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -51,6 +51,7 @@ #include #include #include +#include /* * Atomic bit definitions for r_aflags. */ @@ -527,7 +528,44 @@ static inline void rvt_qp_wqe_unreserve( } } -extern const int ib_rvt_state_ops[]; +extern const enum ib_wc_opcode ib_rvt_wc_opcode[]; + +/** + * rvt_qp_swqe_complete() - insert send completion + * @qp - the qp + * @wqe - the send wqe + * @status - completion status + * + * Insert a send completion into the completion + * queue if the qp indicates it should be done. + * + * See IBTA 10.7.3.1 for info on completion + * control. + */ +static inline void rvt_qp_swqe_complete( + struct rvt_qp *qp, + struct rvt_swqe *wqe, + enum ib_wc_status status) +{ + if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) + return; + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + status != IB_WC_SUCCESS) { + struct ib_wc wc; + + memset(&wc, 0, sizeof(wc)); + wc.wr_id = wqe->wr.wr_id; + wc.status = status; + wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode]; + wc.qp = &qp->ibqp; + wc.byte_len = wqe->length; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); + } +} + +extern const int ib_rvt_state_ops[]; struct rvt_dev_info; int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -- cgit v1.2.3 From 0771da5a6e9d67e48207a81ddf6c8e739e9493bd Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 7 Dec 2016 19:34:12 -0800 Subject: IB/hfi1,IB/qib: Use new send completion helper Convert cq completion returns in both rdmavt drivers to use the new helper. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 26 ++------------------------ drivers/infiniband/hw/hfi1/ruc.c | 17 +---------------- drivers/infiniband/hw/hfi1/verbs.c | 16 ---------------- drivers/infiniband/hw/qib/qib_rc.c | 26 ++------------------------ drivers/infiniband/hw/qib/qib_ruc.c | 17 +---------------- drivers/infiniband/hw/qib/qib_verbs.c | 13 ------------- 6 files changed, 6 insertions(+), 109 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index c996a373837e..3df1c0650571 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1146,7 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; unsigned i; u32 opcode; u32 psn; @@ -1200,17 +1199,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) rvt_put_mr(sge->mr); } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before re-sending, @@ -1240,7 +1229,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct hfi1_ibport *ibp) { - struct ib_wc wc; unsigned i; lockdep_assert_held(&qp->s_lock); @@ -1264,17 +1252,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 2ec3e908691b..03c89315f1c7 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -964,22 +964,7 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3b7bfd817647..95ed4d6da510 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -296,22 +296,6 @@ static inline int wss_exceeds_threshold(void) return atomic_read(&wss.total_count) >= wss.threshold; } -/* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, - [IB_WR_SEND_WITH_INV] = IB_WC_SEND, - [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, - [IB_WR_REG_MR] = IB_WC_REG_MR -}; - /* * Length of header by opcode, 0 --> not supported */ diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 2097512e75aa..d54990514730 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -941,7 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - struct ib_wc wc; unsigned i; u32 opcode; u32 psn; @@ -993,17 +992,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) rvt_put_mr(sge->mr); } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before resending, @@ -1032,7 +1021,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct qib_ibport *ibp) { - struct ib_wc wc; unsigned i; /* @@ -1055,17 +1043,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qp->s_last = s_last; /* see post_send() */ barrier(); - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.byte_len = wqe->length; - wc.qp = &qp->ibqp; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0); - } + rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } else this_cpu_inc(*ibp->rvp.rc_delayed_comp); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index de1bde5950f5..87e3eb5a836b 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -815,22 +815,7 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - /* See ch. 11.2.4.1 and 10.7.3.1 */ - if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED) || - status != IB_WC_SUCCESS) { - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr.wr_id; - wc.status = status; - wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; - wc.qp = &qp->ibqp; - if (status == IB_WC_SUCCESS) - wc.byte_len = wqe->length; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, - status != IB_WC_SUCCESS); - } + rvt_qp_swqe_complete(qp, wqe, status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 204482468953..4b54c0ddd08a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -113,19 +113,6 @@ static unsigned int ib_qib_disable_sma; module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); -/* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_qib_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD -}; - /* * System image GUID. */ -- cgit v1.2.3 From 9d8145a604937780898c0e4bdb124a57988fc2ed Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Wed, 7 Dec 2016 19:34:19 -0800 Subject: IB/hfi1: Avoid credit return allocation for cpu-less NUMA nodes Do not allocate credit return base and DMA memory for NUMA nodes without CPUs. Reviewed-by: Mike Marciniszyn Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 3 ++- drivers/infiniband/hw/hfi1/affinity.h | 1 + drivers/infiniband/hw/hfi1/pio.c | 20 +++----------------- 3 files changed, 6 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a26a9a0bfc41..4962b6ef1f34 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -125,6 +125,7 @@ int node_affinity_init(void) cpumask_weight(topology_sibling_cpumask( cpumask_first(&node_affinity.proc.mask) )); + node_affinity.num_possible_nodes = num_possible_nodes(); node_affinity.num_online_nodes = num_online_nodes(); node_affinity.num_online_cpus = num_online_cpus(); @@ -135,7 +136,7 @@ int node_affinity_init(void) */ init_real_cpu_mask(); - hfi1_per_node_cntr = kcalloc(num_possible_nodes(), + hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes, sizeof(*hfi1_per_node_cntr), GFP_KERNEL); if (!hfi1_per_node_cntr) return -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index ef4ebc358619..c9453b3d47b4 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -111,6 +111,7 @@ struct hfi1_affinity_node_list { struct cpumask real_cpu_mask; struct cpu_mask_set proc; int num_core_siblings; + int num_possible_nodes; int num_online_nodes; int num_online_cpus; struct mutex lock; /* protects affinity nodes */ diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 75c4dea84c4b..3d5dac2f5266 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2036,21 +2036,10 @@ freesc15: int init_credit_return(struct hfi1_devdata *dd) { int ret; - int num_numa; int i; - num_numa = num_online_nodes(); - /* enforce the expectation that the numas are compact */ - for (i = 0; i < num_numa; i++) { - if (!node_online(i)) { - dd_dev_err(dd, "NUMA nodes are not compact\n"); - ret = -EINVAL; - goto done; - } - } - dd->cr_base = kcalloc( - num_numa, + node_affinity.num_possible_nodes, sizeof(struct credit_return_base), GFP_KERNEL); if (!dd->cr_base) { @@ -2058,7 +2047,7 @@ int init_credit_return(struct hfi1_devdata *dd) ret = -ENOMEM; goto done; } - for (i = 0; i < num_numa; i++) { + for_each_node_with_cpus(i) { int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return); set_dev_node(&dd->pcidev->dev, i); @@ -2085,14 +2074,11 @@ done: void free_credit_return(struct hfi1_devdata *dd) { - int num_numa; int i; if (!dd->cr_base) return; - - num_numa = num_online_nodes(); - for (i = 0; i < num_numa; i++) { + for (i = 0; i < node_affinity.num_possible_nodes; i++) { if (dd->cr_base[i].va) { dma_free_coherent(&dd->pcidev->dev, TXE_NUM_CONTEXTS * -- cgit v1.2.3 From c64607aa8abd16ba584523bdf83a708fae91986a Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 7 Dec 2016 19:34:31 -0800 Subject: IB/hfi1,IB/qib: use rvt swqe mr deref helper Convert to use new swqe put routine. Reviewed-by: Brian Welty Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 15 ++------------- drivers/infiniband/hw/hfi1/ruc.c | 7 +------ drivers/infiniband/hw/qib/qib_rc.c | 15 ++------------- drivers/infiniband/hw/qib/qib_ruc.c | 7 +------ 4 files changed, 6 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 3df1c0650571..ddc02237e5f6 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1146,7 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned i; u32 opcode; u32 psn; @@ -1194,11 +1193,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* @@ -1229,8 +1224,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct hfi1_ibport *ibp) { - unsigned i; - lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a @@ -1241,11 +1234,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 03c89315f1c7..717ed4b159d3 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -942,7 +942,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -954,11 +953,7 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index d54990514730..e23ee6ce435a 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -941,7 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned i; u32 opcode; u32 psn; @@ -987,11 +986,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) qp->s_last = s_last; /* see post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); } /* @@ -1021,8 +1016,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe, struct qib_ibport *ibp) { - unsigned i; - /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1032,11 +1025,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { u32 s_last; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); s_last = qp->s_last; if (++s_last >= qp->s_size) s_last = 0; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 87e3eb5a836b..e54a2feeeb10 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -793,7 +793,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - unsigned i; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -805,11 +804,7 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->s_last = last; /* See post_send() */ barrier(); - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } + rvt_put_swqe(wqe); if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) -- cgit v1.2.3 From 5dc806052a9fc8c44e111646f9a9f436877749d0 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 7 Dec 2016 19:34:37 -0800 Subject: IB/rdmavt, IB/hfi1, IB/qib: Add inlines for mtu division Add rvt_div_round_up_mtu() and rvt_div_mtu() routines to do the computation based on the pmtu and the log_pmtu. Change divides in qib, hfi1 to use the new inlines. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 3 +-- drivers/infiniband/hw/qib/qib_rc.c | 3 +-- include/rdma/rdmavt_qp.h | 24 +++++++++++++++++++++++- 3 files changed, 25 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index ddc02237e5f6..9db260fe782a 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2377,8 +2377,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index e23ee6ce435a..031433cb7206 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -2079,8 +2079,7 @@ send_last: * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (len > pmtu) - qp->r_psn += (len - 1) / pmtu; + qp->r_psn += rvt_div_mtu(qp, len - 1); } else { e->rdma_sge.mr = NULL; e->rdma_sge.vaddr = NULL; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 04facda681e0..f3dbd157ae5c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -582,7 +582,29 @@ static inline void rvt_qp_swqe_complete( } } -extern const int ib_rvt_state_ops[]; +/** + * @qp - the qp pair + * @len - the length + * + * Perform a shift based mtu round up divide + */ +static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len) +{ + return (len + qp->pmtu - 1) >> qp->log_pmtu; +} + +/** + * @qp - the qp pair + * @len - the length + * + * Perform a shift based mtu divide + */ +static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len) +{ + return len >> qp->log_pmtu; +} + +extern const int ib_rvt_state_ops[]; struct rvt_dev_info; int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -- cgit v1.2.3 From 0fc859a6576aaa747c5359c2bf622aac3158e2c7 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Sat, 19 Nov 2016 15:17:48 +0530 Subject: IB/hfi1: constify mmu_notifier_ops structure Declare the structure mmu_notifier_ops as const as it is only stored in the ops field of a mmu_notifier structure. The ops field is of type const struct mmu_notifier_ops *, so mmu_notifier_ops structures having this property can be declared as const. Done using coccinelle: @r1 disable optional_qualifier @ identifier i; position p; @@ static struct mmu_notifier_ops i@p = {...}; @ok1@ identifier r1.i; position p; struct mmu_rb_handler handler; @@ handler.mn.ops=&i@p @bad@ position p!={r1.p,ok1.p}; identifier r1.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r1.i; @@ static +const struct mmu_notifier_ops i={...}; @depends on !bad disable optional_qualifier@ identifier r1.i; @@ +const struct mmu_notifier_ops i; File size before: text data bss dec hex filename 3566 72 16 3654 e46 drivers/infiniband/hw/hfi1/mmu_rb.o File size after: text data bss dec hex filename 3658 0 16 3674 e5a drivers/infiniband/hw/hfi1/mmu_rb.o Signed-off-by: Bhumika Goyal Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mmu_rb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 7ad30898fc19..ccbf52c8ff6f 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -81,7 +81,7 @@ static void do_remove(struct mmu_rb_handler *handler, struct list_head *del_list); static void handle_remove(struct work_struct *work); -static struct mmu_notifier_ops mn_opts = { +static const struct mmu_notifier_ops mn_opts = { .invalidate_page = mmu_notifier_page, .invalidate_range_start = mmu_notifier_range_start, }; -- cgit v1.2.3 From 66431b0e8657e2406742105f89175f571340090b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Dec 2016 16:48:11 -0800 Subject: IB/hfi1: Define platform_config_table_limits once Defining static data structures in a header file is wrong because this causes the data structure to be instantiated once in every .c file it is included in. Hence move the definition of a static array from a header file into the only .c file in which it is used. Signed-off-by: Bart Van Assche Cc: Dennis Dalessandro Cc: Dean Luick Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 10 ++++++++++ drivers/infiniband/hw/hfi1/platform.h | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index bcf7da4599c4..0dd50cdb039a 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -239,6 +239,16 @@ static const u8 all_fabric_serdes_broadcast = 0xe1; const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 }; static const u8 all_pcie_serdes_broadcast = 0xe0; +static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { + 0, + SYSTEM_TABLE_MAX, + PORT_TABLE_MAX, + RX_PRESET_TABLE_MAX, + TX_PRESET_TABLE_MAX, + QSFP_ATTEN_TABLE_MAX, + VARIABLE_SETTINGS_TABLE_MAX +}; + /* forwards */ static void dispose_one_firmware(struct firmware_details *fdet); static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h index 31155401a95b..eed0aa9124fa 100644 --- a/drivers/infiniband/hw/hfi1/platform.h +++ b/drivers/infiniband/hw/hfi1/platform.h @@ -168,16 +168,6 @@ struct platform_config_cache { struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX]; }; -static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = { - 0, - SYSTEM_TABLE_MAX, - PORT_TABLE_MAX, - RX_PRESET_TABLE_MAX, - TX_PRESET_TABLE_MAX, - QSFP_ATTEN_TABLE_MAX, - VARIABLE_SETTINGS_TABLE_MAX -}; - /* This section defines default values and encodings for the * fields defined for each table above */ -- cgit v1.2.3 From 6efaf10f163d9a60d1d4b2a049b194a53537ba1b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 19 Oct 2016 14:07:19 +0200 Subject: IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker The memory barrier is not enough to protect queuing works into a destroyed cq kthread. Just imagine the following situation: CPU1 CPU2 rvt_cq_enter() worker = cq->rdi->worker; rvt_cq_exit() rdi->worker = NULL; smp_wmb(); kthread_flush_worker(worker); kthread_stop(worker->task); kfree(worker); // nothing queued yet => // nothing flushed and // happily stopped and freed if (likely(worker)) { // true => read before CPU2 acted cq->notify = RVT_CQ_NONE; cq->triggered++; kthread_queue_work(worker, &cq->comptask); BANG: worker has been flushed/stopped/freed in the meantime. This patch solves this by protecting the critical sections by rdi->n_cqs_lock. It seems that this lock is not much contended and looks reasonable for this purpose. One catch is that rvt_cq_enter() might be called from IRQ context. Therefore we must always take the lock with IRQs disabled to avoid a possible deadlock. Signed-off-by: Petr Mladek Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6d9904a4a0ab..223ec4589fc7 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - smp_read_barrier_depends(); /* see rvt_cq_exit */ - worker = cq->rdi->worker; - if (likely(worker)) { + spin_lock(&cq->rdi->n_cqs_lock); + if (likely(cq->rdi->worker)) { cq->notify = RVT_CQ_NONE; cq->triggered++; - kthread_queue_work(worker, &cq->comptask); + kthread_queue_work(cq->rdi->worker, &cq->comptask); } + spin_unlock(&cq->rdi->n_cqs_lock); } spin_unlock_irqrestore(&cq->lock, flags); @@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, } } - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_cqs_allocated++; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) { spin_lock_irq(&rdi->pending_lock); @@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq) struct rvt_dev_info *rdi = cq->rdi; kthread_flush_work(&cq->comptask); - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); rdi->n_cqs_allocated--; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else @@ -541,12 +540,15 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; - worker = rdi->worker; - if (!worker) + /* block future queuing from send_complete() */ + spin_lock_irq(&rdi->n_cqs_lock); + if (!rdi->worker) { + spin_unlock_irq(&rdi->n_cqs_lock); return; - /* blocks future queuing from send_complete() */ + } rdi->worker = NULL; - smp_wmb(); /* See rdi_cq_enter */ + spin_unlock_irq(&rdi->n_cqs_lock); + kthread_flush_worker(worker); kthread_stop(worker->task); kfree(worker); -- cgit v1.2.3 From f5eabf5e5129e8ab5b3e7f50b24444aca1680e64 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 19 Oct 2016 14:07:20 +0200 Subject: IB/rdmavt: Handle the kthread worker using the new API Use the new API to create and destroy the cq kthread worker. The API hides some implementation details. In particular, kthread_create_worker() allocates and initializes struct kthread_worker. It runs the kthread the right way and stores task_struct into the worker structure. In addition, the *on_cpu() variant binds the kthread to the given cpu and the related memory node. kthread_destroy_worker() flushes all pending works, stops the kthread and frees the structure. This patch does not change the existing behavior. Note that we must use the on_cpu() variant because the function starts the kthread and it must bind it to the right CPU before waking. The numa node is associated for given CPU as well. Signed-off-by: Petr Mladek Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 223ec4589fc7..4d0b6992e847 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -503,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { - int ret = 0; int cpu; - struct task_struct *task; + struct kthread_worker *worker; if (rdi->worker) return 0; + spin_lock_init(&rdi->n_cqs_lock); - rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); - if (!rdi->worker) - return -ENOMEM; - kthread_init_worker(rdi->worker); - task = kthread_create_on_node( - kthread_worker_fn, - rdi->worker, - rdi->dparms.node, - "%s", rdi->dparms.cq_name); - if (IS_ERR(task)) { - kfree(rdi->worker); - rdi->worker = NULL; - return PTR_ERR(task); - } - set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); - kthread_bind(task, cpu); - wake_up_process(task); - return ret; + worker = kthread_create_worker_on_cpu(cpu, 0, + "%s", rdi->dparms.cq_name); + if (IS_ERR(worker)) + return PTR_ERR(worker); + + set_user_nice(worker->task, MIN_NICE); + rdi->worker = worker; + return 0; } /** @@ -549,7 +539,5 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) rdi->worker = NULL; spin_unlock_irq(&rdi->n_cqs_lock); - kthread_flush_worker(worker); - kthread_stop(worker->task); - kfree(worker); + kthread_destroy_worker(worker); } -- cgit v1.2.3 From 22dccc5454a39427de7b87a080d026b6bf66a7b9 Mon Sep 17 00:00:00 2001 From: Jim Foraker Date: Tue, 1 Nov 2016 13:44:12 -0700 Subject: IB/rdmavt: Only put mmap_info ref if it exists rvt_create_qp() creates qp->ip only when a qp creation request comes from userspace (udata is not NULL). If we exceed the number of available queue pairs however, the error path always attempts to put a kref to this structure. If the requestor is inside the kernel, this leads to a crash. We fix this by checking that qp->ip is not NULL before caling kref_put(). Signed-off-by: Jim Foraker Acked-by: Dennis Dalessandro Acked-by: Jonathan Toppins Acked-by: Alex Estrin Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 9e14addd690c..2a13ac660f2b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -901,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, return ret; bail_ip: - kref_put(&qp->ip->ref, rvt_release_mmap_info); + if (qp->ip) + kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); -- cgit v1.2.3