summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw/rxe/rxe_resp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/sw/rxe/rxe_resp.c')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c329
1 files changed, 279 insertions, 50 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 693081e813ec..7a60c7709da0 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -22,6 +22,8 @@ enum resp_states {
RESPST_EXECUTE,
RESPST_READ_REPLY,
RESPST_ATOMIC_REPLY,
+ RESPST_ATOMIC_WRITE_REPLY,
+ RESPST_PROCESS_FLUSH,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@@ -57,6 +59,8 @@ static char *resp_state_name[] = {
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
[RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
+ [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
+ [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -91,7 +95,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
(skb_queue_len(&qp->req_pkts) > 1);
- rxe_run_task(&qp->resp.task, must_sched);
+ if (must_sched)
+ rxe_sched_task(&qp->resp.task);
+ else
+ rxe_run_task(&qp->resp.task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
@@ -253,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
}
}
+static bool check_qp_attr_access(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ if (((pkt->mask & RXE_READ_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
+ ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
+ ((pkt->mask & RXE_ATOMIC_MASK) &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ return false;
+
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if ((flush_type & IB_FLUSH_GLOBAL &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
+ (flush_type & IB_FLUSH_PERSISTENT &&
+ !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
+ return false;
+ }
+
+ return true;
+}
+
static enum resp_states check_op_valid(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (((pkt->mask & RXE_READ_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
- ((pkt->mask & RXE_WRITE_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
- ((pkt->mask & RXE_ATOMIC_MASK) &&
- !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
+ if (!check_qp_attr_access(qp, pkt))
return RESPST_ERR_UNSUPPORTED_OPCODE;
- }
break;
@@ -314,7 +339,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
/* don't trust user space data */
if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
- pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
+ rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
return RESPST_ERR_MALFORMED_WQE;
}
size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
@@ -364,7 +389,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
}
}
- if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) {
+ if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
* recycle the responder resource queue
@@ -387,19 +412,66 @@ static enum resp_states check_resource(struct rxe_qp *qp,
return RESPST_CHK_LENGTH;
}
-static enum resp_states check_length(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt)
+static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
{
- switch (qp_type(qp)) {
- case IB_QPT_RC:
- return RESPST_CHK_RKEY;
-
- case IB_QPT_UC:
- return RESPST_CHK_RKEY;
+ /*
+ * See IBA C9-92
+ * For UD QPs we only check if the packet will fit in the
+ * receive buffer later. For rmda operations additional
+ * length checks are performed in check_rkey.
+ */
+ if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
+ (qp_type(qp) == IB_QPT_UC))) {
+ unsigned int mtu = qp->mtu;
+ unsigned int payload = payload_size(pkt);
+
+ if ((pkt->mask & RXE_START_MASK) &&
+ (pkt->mask & RXE_END_MASK)) {
+ if (unlikely(payload > mtu)) {
+ rxe_dbg_qp(qp, "only packet too long");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if ((pkt->mask & RXE_START_MASK) ||
+ (pkt->mask & RXE_MIDDLE_MASK)) {
+ if (unlikely(payload != mtu)) {
+ rxe_dbg_qp(qp, "first or middle packet not mtu");
+ return RESPST_ERR_LENGTH;
+ }
+ } else if (pkt->mask & RXE_END_MASK) {
+ if (unlikely((payload == 0) || (payload > mtu))) {
+ rxe_dbg_qp(qp, "last packet zero or too long");
+ return RESPST_ERR_LENGTH;
+ }
+ }
+ }
- default:
- return RESPST_CHK_RKEY;
+ /* See IBA C9-94 */
+ if (pkt->mask & RXE_RETH_MASK) {
+ if (reth_len(pkt) > (1U << 31)) {
+ rxe_dbg_qp(qp, "dma length too long");
+ return RESPST_ERR_LENGTH;
+ }
}
+
+ return RESPST_CHK_RKEY;
+}
+
+static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ qp->resp.va = reth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.rkey = reth_rkey(pkt);
+ qp->resp.resid = reth_len(pkt);
+ qp->resp.length = reth_len(pkt);
+}
+
+static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ qp->resp.va = atmeth_va(pkt);
+ qp->resp.offset = 0;
+ qp->resp.rkey = atmeth_rkey(pkt);
+ qp->resp.resid = sizeof(u64);
}
static enum resp_states check_rkey(struct rxe_qp *qp,
@@ -413,29 +485,32 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
u32 pktlen;
int mtu = qp->mtu;
enum resp_states state;
- int access;
-
- if (pkt->mask & RXE_READ_OR_WRITE_MASK) {
- if (pkt->mask & RXE_RETH_MASK) {
- qp->resp.va = reth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = reth_rkey(pkt);
- qp->resp.resid = reth_len(pkt);
- qp->resp.length = reth_len(pkt);
- }
+ int access = 0;
+
+ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
: IB_ACCESS_REMOTE_WRITE;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ u32 flush_type = feth_plt(pkt);
+
+ if (pkt->mask & RXE_RETH_MASK)
+ qp_resp_from_reth(qp, pkt);
+
+ if (flush_type & IB_FLUSH_GLOBAL)
+ access |= IB_ACCESS_FLUSH_GLOBAL;
+ if (flush_type & IB_FLUSH_PERSISTENT)
+ access |= IB_ACCESS_FLUSH_PERSISTENT;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
- qp->resp.va = atmeth_va(pkt);
- qp->resp.offset = 0;
- qp->resp.rkey = atmeth_rkey(pkt);
- qp->resp.resid = sizeof(u64);
+ qp_resp_from_atmeth(qp, pkt);
access = IB_ACCESS_REMOTE_ATOMIC;
} else {
return RESPST_EXECUTE;
}
- /* A zero-byte op is not required to set an addr or rkey. */
+ /* A zero-byte op is not required to set an addr or rkey. See C9-88 */
if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
(pkt->mask & RXE_RETH_MASK) &&
reth_len(pkt) == 0) {
@@ -450,15 +525,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (rkey_is_mw(rkey)) {
mw = rxe_lookup_mw(qp, access, rkey);
if (!mw) {
- pr_debug("%s: no MW matches rkey %#x\n",
- __func__, rkey);
+ rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
mr = mw->mr;
if (!mr) {
- pr_err("%s: MW doesn't have an MR\n", __func__);
+ rxe_dbg_qp(qp, "MW doesn't have an MR\n");
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
@@ -471,19 +545,27 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
- pr_debug("%s: no MR matches rkey %#x\n",
- __func__, rkey);
+ rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
}
+ if (pkt->mask & RXE_FLUSH_MASK) {
+ /* FLUSH MR may not set va or resid
+ * no need to check range since we will flush whole mr
+ */
+ if (feth_sel(pkt) == IB_FLUSH_MR)
+ goto skip_check_range;
+ }
+
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
goto err;
}
- if (pkt->mask & RXE_WRITE_MASK) {
+skip_check_range:
+ if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
@@ -583,15 +665,66 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
res->state = rdatm_res_state_new;
break;
case RXE_ATOMIC_MASK:
+ case RXE_ATOMIC_WRITE_MASK:
res->first_psn = pkt->psn;
res->last_psn = pkt->psn;
res->cur_psn = pkt->psn;
break;
+ case RXE_FLUSH_MASK:
+ res->flush.va = qp->resp.va + qp->resp.offset;
+ res->flush.length = qp->resp.length;
+ res->flush.type = feth_plt(pkt);
+ res->flush.level = feth_sel(pkt);
}
return res;
}
+static enum resp_states process_flush(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 length, start;
+ struct rxe_mr *mr = qp->resp.mr;
+ struct resp_res *res = qp->resp.res;
+
+ /* oA19-14, oA19-15 */
+ if (res && res->replay)
+ return RESPST_ACKNOWLEDGE;
+ else if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
+ qp->resp.res = res;
+ }
+
+ if (res->flush.level == IB_FLUSH_RANGE) {
+ start = res->flush.va;
+ length = res->flush.length;
+ } else { /* level == IB_FLUSH_MR */
+ start = mr->ibmr.iova;
+ length = mr->ibmr.length;
+ }
+
+ if (res->flush.type & IB_FLUSH_PERSISTENT) {
+ if (rxe_flush_pmem_iova(mr, start, length))
+ return RESPST_ERR_RKEY_VIOLATION;
+ /* Make data persistent. */
+ wmb();
+ } else if (res->flush.type & IB_FLUSH_GLOBAL) {
+ /* Make data global visibility. */
+ wmb();
+ }
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
+}
+
/* Guarantee atomicity of atomic operations at the machine level. */
static DEFINE_SPINLOCK(atomic_ops_lock);
@@ -652,6 +785,55 @@ out:
return ret;
}
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 src, *dst;
+ struct resp_res *res = qp->resp.res;
+ struct rxe_mr *mr = qp->resp.mr;
+ int payload = payload_size(pkt);
+
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+ qp->resp.res = res;
+ }
+
+ if (!res->replay) {
+#ifdef CONFIG_64BIT
+ if (mr->state != RXE_MR_STATE_VALID)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ memcpy(&src, payload_addr(pkt), payload);
+
+ dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
+ /* check vaddr is 8 bytes aligned. */
+ if (!dst || (uintptr_t)dst & 7)
+ return RESPST_ERR_MISALIGNED_ATOMIC;
+
+ /* Do atomic write after all prior operations have completed */
+ smp_store_release(dst, src);
+
+ /* decrease resp.resid to zero */
+ qp->resp.resid -= sizeof(payload);
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
+#else
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+#endif /* CONFIG_64BIT */
+ }
+
+ return RESPST_ACKNOWLEDGE;
+}
+
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
struct rxe_pkt_info *ack,
int opcode,
@@ -807,14 +989,19 @@ static enum resp_states read_reply(struct rxe_qp *qp,
skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
res->cur_psn, AETH_ACK_UNLIMITED);
if (!skb) {
- rxe_put(mr);
+ if (mr)
+ rxe_put(mr);
return RESPST_ERR_RNR;
}
- rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
- payload, RXE_FROM_MR_OBJ);
+ err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
+ payload, RXE_FROM_MR_OBJ);
if (mr)
rxe_put(mr);
+ if (err) {
+ kfree_skb(skb);
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
if (bth_pad(&ack_pkt)) {
u8 *pad = payload_addr(&ack_pkt) + payload;
@@ -890,6 +1077,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
return RESPST_ATOMIC_REPLY;
+ } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ return RESPST_ATOMIC_WRITE_REPLY;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ return RESPST_PROCESS_FLUSH;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -1040,7 +1231,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
- pr_err_ratelimited("Failed sending %s\n", msg);
+ rxe_dbg_qp(qp, "Failed sending %s\n", msg);
return err;
}
@@ -1063,6 +1254,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
return ret;
}
+static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+ int ret = send_common_ack(qp, syndrome, psn,
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
+ "RDMA READ response of length zero ACK");
+
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
+ return ret;
+}
+
static enum resp_states acknowledge(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
@@ -1073,6 +1277,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
+ else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
+ send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
@@ -1129,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* SEND. Ack again and cleanup. C9-105. */
send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
return RESPST_CLEANUP;
+ } else if (pkt->mask & RXE_FLUSH_MASK) {
+ struct resp_res *res;
+
+ /* Find the operation in our list of responder resources. */
+ res = find_resource(qp, pkt->psn);
+ if (res) {
+ res->replay = 1;
+ res->cur_psn = pkt->psn;
+ qp->resp.res = res;
+ rc = RESPST_PROCESS_FLUSH;
+ goto out;
+ }
+
+ /* Resource not found. Class D error. Drop the request. */
+ rc = RESPST_CLEANUP;
+ goto out;
} else if (pkt->mask & RXE_READ_MASK) {
struct resp_res *res;
@@ -1184,7 +1406,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
- rc = RESPST_ATOMIC_REPLY;
+ rc = pkt->mask & RXE_ATOMIC_MASK ?
+ RESPST_ATOMIC_REPLY :
+ RESPST_ATOMIC_WRITE_REPLY;
goto out;
}
@@ -1286,8 +1510,7 @@ int rxe_responder(void *arg)
}
while (1) {
- pr_debug("qp#%d state = %s\n", qp_num(qp),
- resp_state_name[state]);
+ rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
switch (state) {
case RESPST_GET_REQ:
state = get_req(qp, &pkt);
@@ -1305,7 +1528,7 @@ int rxe_responder(void *arg)
state = check_resource(qp, pkt);
break;
case RESPST_CHK_LENGTH:
- state = check_length(qp, pkt);
+ state = rxe_resp_check_length(qp, pkt);
break;
case RESPST_CHK_RKEY:
state = check_rkey(qp, pkt);
@@ -1322,6 +1545,12 @@ int rxe_responder(void *arg)
case RESPST_ATOMIC_REPLY:
state = atomic_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_WRITE_REPLY:
+ state = atomic_write_reply(qp, pkt);
+ break;
+ case RESPST_PROCESS_FLUSH:
+ state = process_flush(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
@@ -1444,7 +1673,7 @@ int rxe_responder(void *arg)
case RESPST_ERROR:
qp->resp.goto_error = 0;
- pr_debug("qp#%d moved to error state\n", qp_num(qp));
+ rxe_dbg_qp(qp, "moved to error state\n");
rxe_qp_error(qp);
goto exit;