diff options
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 8 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 47 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_rw.c | 24 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 62 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 20 |
5 files changed, 85 insertions, 76 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index aa2227a7e552..7420a2c990c7 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, */ get_page(virt_to_page(rqst->rq_buffer)); sctxt->sc_send_wr.opcode = IB_WR_SEND; - ret = svc_rdma_send(rdma, sctxt); - if (ret < 0) - return ret; - - ret = wait_for_completion_killable(&sctxt->sc_done); - svc_rdma_send_ctxt_put(rdma, sctxt); - return ret; + return svc_rdma_send(rdma, sctxt); } /* Server-side transport endpoint wants a whole page for its send diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 1c658fa43063..85c8bcaebb80 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, static struct svc_rdma_recv_ctxt * svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) { + int node = ibdev_to_node(rdma->sc_cm_id->device); struct svc_rdma_recv_ctxt *ctxt; dma_addr_t addr; void *buffer; - ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); + ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node); if (!ctxt) goto fail0; - buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); + buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail1; addr = ib_dma_map_single(rdma->sc_pd->device, buffer, @@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->rc_recv_sge.length = rdma->sc_max_req_size; ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; ctxt->rc_recv_buf = buffer; - ctxt->rc_temp = false; return ctxt; fail2: @@ -232,34 +232,30 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, pcl_free(&ctxt->rc_write_pcl); pcl_free(&ctxt->rc_reply_pcl); - if (!ctxt->rc_temp) - llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); - else - svc_rdma_recv_ctxt_destroy(rdma, ctxt); + llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); } /** - * svc_rdma_release_rqst - Release transport-specific per-rqst resources - * @rqstp: svc_rqst being released + * svc_rdma_release_ctxt - Release transport-specific per-rqst resources + * @xprt: the transport which owned the context + * @vctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt * * Ensure that the recv_ctxt is released whether or not a Reply * was sent. For example, the client could close the connection, * or svc_process could drop an RPC, before the Reply is sent. */ -void svc_rdma_release_rqst(struct svc_rqst *rqstp) +void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt) { - struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt; - struct svc_xprt *xprt = rqstp->rq_xprt; + struct svc_rdma_recv_ctxt *ctxt = vctxt; struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); - rqstp->rq_xprt_ctxt = NULL; if (ctxt) svc_rdma_recv_ctxt_put(rdma, ctxt); } static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, - unsigned int wanted, bool temp) + unsigned int wanted) { const struct ib_recv_wr *bad_wr = NULL; struct svc_rdma_recv_ctxt *ctxt; @@ -276,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, break; trace_svcrdma_post_recv(ctxt); - ctxt->rc_temp = temp; ctxt->rc_recv_wr.next = recv_chain; recv_chain = &ctxt->rc_recv_wr; rdma->sc_pending_recvs++; @@ -310,7 +305,7 @@ err_free: */ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) { - return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true); + return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests); } /** @@ -344,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) * client reconnects. */ if (rdma->sc_pending_recvs < rdma->sc_max_requests) - if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false)) + if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch)) goto dropped; /* All wc fields are now known to be valid */ @@ -776,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, * * The next ctxt is removed from the "receive" lists. * - * - If the ctxt completes a Read, then finish assembling the Call - * message and return the number of bytes in the message. - * * - If the ctxt completes a Receive, then construct the Call * message from the contents of the Receive buffer. * @@ -787,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, * in the message. * * - If there are Read chunks in this message, post Read WRs to - * pull that payload and return 0. + * pull that payload. When the Read WRs complete, build the + * full message and return the number of bytes in it. */ int svc_rdma_recvfrom(struct svc_rqst *rqstp) { @@ -797,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) struct svc_rdma_recv_ctxt *ctxt; int ret; + /* Prevent svc_xprt_release() from releasing pages in rq_pages + * when returning 0 or an error. + */ + rqstp->rq_respages = rqstp->rq_pages; + rqstp->rq_next_page = rqstp->rq_respages; + rqstp->rq_xprt_ctxt = NULL; ctxt = NULL; @@ -820,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) DMA_FROM_DEVICE); svc_rdma_build_arg_xdr(rqstp, ctxt); - /* Prevent svc_xprt_release from releasing pages in rq_pages - * if we return 0 or an error. - */ - rqstp->rq_respages = rqstp->rq_pages; - rqstp->rq_next_page = rqstp->rq_respages; - ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); if (ret < 0) goto out_err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 11cf7c646644..e460e25a1d6d 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) if (node) { ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); } else { - ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), - GFP_KERNEL); + ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), + GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device)); if (!ctxt) goto out_noctx; @@ -84,8 +84,7 @@ out_noctx: return NULL; } -static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, - struct svc_rdma_rw_ctxt *ctxt, +static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt, struct llist_head *list) { sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); @@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, struct svc_rdma_rw_ctxt *ctxt) { - __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts); + __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts); } /** @@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, struct svc_rdma_rw_ctxt *ctxt; LLIST_HEAD(free); + trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount); + first = last = NULL; while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { list_del(&ctxt->rw_list); @@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num, ctxt->rw_sg_table.sgl, ctxt->rw_nents, dir); - __svc_rdma_put_rw_ctxt(rdma, ctxt, &free); + __svc_rdma_put_rw_ctxt(ctxt, &free); ctxt->rw_node.next = first; first = &ctxt->rw_node; @@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, { struct svc_rdma_write_info *info; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kmalloc_node(sizeof(*info), GFP_KERNEL, + ibdev_to_node(rdma->sc_cm_id->device)); if (!info) return info; @@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma) { struct svc_rdma_read_info *info; - info = kmalloc(sizeof(*info), GFP_KERNEL); + info = kmalloc_node(sizeof(*info), GFP_KERNEL, + ibdev_to_node(rdma->sc_cm_id->device)); if (!info) return info; @@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) return; } -/* This function sleeps when the transport's Send Queue is congested. - * +/* * Assumptions: * - If ib_post_send() succeeds, only one completion is expected, * even if one or more WRs are flushed. This is true when posting @@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) struct ib_cqe *cqe; int ret; + might_sleep(); + if (cc->cc_sqecount > rdma->sc_sq_depth) return -EINVAL; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 22a871e6fe4d..c6644cca52c5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, static struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) { + int node = ibdev_to_node(rdma->sc_cm_id->device); struct svc_rdma_send_ctxt *ctxt; dma_addr_t addr; void *buffer; - size_t size; int i; - size = sizeof(*ctxt); - size += rdma->sc_max_send_sges * sizeof(struct ib_sge); - ctxt = kmalloc(size, GFP_KERNEL); + ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges), + GFP_KERNEL, node); if (!ctxt) goto fail0; - buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); + buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); if (!buffer) goto fail1; addr = ib_dma_map_single(rdma->sc_pd->device, buffer, @@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe; ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; - init_completion(&ctxt->sc_done); ctxt->sc_cqe.done = svc_rdma_wc_send; ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, @@ -214,6 +212,7 @@ out: ctxt->sc_send_wr.num_sge = 0; ctxt->sc_cur_sge_no = 0; + ctxt->sc_page_count = 0; return ctxt; out_empty: @@ -228,6 +227,8 @@ out_empty: * svc_rdma_send_ctxt_put - Return send_ctxt to free list * @rdma: controlling svcxprt_rdma * @ctxt: object to return to the free list + * + * Pages left in sc_pages are DMA unmapped and released. */ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) @@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; + if (ctxt->sc_page_count) + release_pages(ctxt->sc_pages, ctxt->sc_page_count); + /* The first SGE contains the transport header, which * remains mapped until @ctxt is destroyed. */ @@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe); svc_rdma_wake_send_waiters(rdma, 1); - complete(&ctxt->sc_done); if (unlikely(wc->status != IB_WC_SUCCESS)) goto flushed; trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + svc_rdma_send_ctxt_put(rdma, ctxt); return; flushed: @@ -294,6 +298,7 @@ flushed: trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid); else trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); + svc_rdma_send_ctxt_put(rdma, ctxt); svc_xprt_deferred_close(&rdma->sc_xprt); } @@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) struct ib_send_wr *wr = &ctxt->sc_send_wr; int ret; - reinit_completion(&ctxt->sc_done); + might_sleep(); /* Sync the transport header buffer */ ib_dma_sync_single_for_device(rdma->sc_pd->device, @@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, svc_rdma_xb_dma_map, &args); } +/* The svc_rqst and all resources it owns are released as soon as + * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt + * so they are released by the Send completion handler. + */ +static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, + struct svc_rdma_send_ctxt *ctxt) +{ + int i, pages = rqstp->rq_next_page - rqstp->rq_respages; + + ctxt->sc_page_count += pages; + for (i = 0; i < pages; i++) { + ctxt->sc_pages[i] = rqstp->rq_respages[i]; + rqstp->rq_respages[i] = NULL; + } + + /* Prevent svc_xprt_release from releasing pages in rq_pages */ + rqstp->rq_next_page = rqstp->rq_respages; +} + /* Prepare the portion of the RPC Reply that will be transmitted * via RDMA Send. The RPC-over-RDMA transport header is prepared * in sc_sges[0], and the RPC xdr_buf is prepared in following sges. @@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, if (ret < 0) return ret; + svc_rdma_save_io_pages(rqstp, sctxt); + if (rctxt->rc_inv_rkey) { sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV; sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey; @@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, sctxt->sc_send_wr.opcode = IB_WR_SEND; } - ret = svc_rdma_send(rdma, sctxt); - if (ret < 0) - return ret; - - ret = wait_for_completion_killable(&sctxt->sc_done); - svc_rdma_send_ctxt_put(rdma, sctxt); - return ret; + return svc_rdma_send(rdma, sctxt); } /** @@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len; if (svc_rdma_send(rdma, sctxt)) goto put_ctxt; - - wait_for_completion_killable(&sctxt->sc_done); + return; put_ctxt: svc_rdma_send_ctxt_put(rdma, sctxt); @@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp); if (ret < 0) goto put_ctxt; - - /* Prevent svc_xprt_release() from releasing the page backing - * rq_res.head[0].iov_base. It's no longer being accessed by - * the I/O device. */ - rqstp->rq_respages++; return 0; reply_chunk: if (ret != -E2BIG && ret != -EINVAL) goto put_ctxt; + /* Send completion releases payload pages that were part + * of previously posted RDMA Writes. + */ + svc_rdma_save_io_pages(rqstp, sctxt); svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 416b298f74dd..2abd895046ee 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -64,7 +64,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, - struct net *net); + struct net *net, int node); static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = { .xpo_recvfrom = svc_rdma_recvfrom, .xpo_sendto = svc_rdma_sendto, .xpo_result_payload = svc_rdma_result_payload, - .xpo_release_rqst = svc_rdma_release_rqst, + .xpo_release_ctxt = svc_rdma_release_ctxt, .xpo_detach = svc_rdma_detach, .xpo_free = svc_rdma_free, .xpo_has_wspace = svc_rdma_has_wspace, @@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context) } static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, - struct net *net) + struct net *net, int node) { - struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); + struct svcxprt_rdma *cma_xprt; - if (!cma_xprt) { - dprintk("svcrdma: failed to create new transport\n"); + cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node); + if (!cma_xprt) return NULL; - } + svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); INIT_LIST_HEAD(&cma_xprt->sc_accept_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); @@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, struct svcxprt_rdma *newxprt; struct sockaddr *sa; - /* Create a new transport */ newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, - listen_xprt->sc_xprt.xpt_net); + listen_xprt->sc_xprt.xpt_net, + ibdev_to_node(new_cma_id->device)); if (!newxprt) return; newxprt->sc_cm_id = new_cma_id; @@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) return ERR_PTR(-EAFNOSUPPORT); - cma_xprt = svc_rdma_create_xprt(serv, net); + cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE); if (!cma_xprt) return ERR_PTR(-ENOMEM); set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); |