From ff79c74dcace8fec62706d0bef00b6680b477fdb Mon Sep 17 00:00:00 2001 From: Shirley Ma Date: Thu, 9 Jul 2015 16:45:08 -0400 Subject: NFS/RDMA Release resources in svcrdma when device is removed When removing underlying RDMA device, the rmmod will hang forever if there are any outstanding NFS/RDMA client mounts. The outstanding NFS/RDMA counts could also prevent the server from shutting down. Further debugging shows that the existing connections are not teared down and resource are not released when receiving RDMA_CM_EVENT_DEVICE_REMOVAL event. It seems the original code missing svc_xprt_put() in RDMA_CM_EVENT_REMOVAL event handler thus svc_xprt_free is never invoked to release the existing connection resources. The patch has been passed removing, adding device back and forth without stopping NFS/RDMA service. This will also allow a device to be unplugged and swapped out without shutting down NFS service. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=252 Signed-off-by: Shirley Ma Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6b36279e4288..f4b973233977 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -659,6 +659,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + svc_xprt_put(xprt); } break; default: -- cgit v1.2.3 From 9d11b51ce7c150a69e761e30518f294fc73d55ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 9 Jul 2015 16:45:18 -0400 Subject: svcrdma: Fix send_reply() scatter/gather set-up The Linux NFS server returns garbage in the data payload of inline NFS/RDMA READ replies. These are READs of under 1000 bytes or so where the client has not provided either a reply chunk or a write list. The NFS server delivers the data payload for an NFS READ reply to the transport in an xdr_buf page list. If the NFS client did not provide a reply chunk or a write list, send_reply() is supposed to set up a separate sge for the page containing the READ data, and another sge for XDR padding if needed, then post all of the sges via a single SEND Work Request. The problem is send_reply() does not advance through the xdr_buf when setting up scatter/gather entries for SEND WR. It always calls dma_map_xdr with xdr_off set to zero. When there's more than one sge, dma_map_xdr() sets up the SEND sge's so they all point to the xdr_buf's head. The current Linux NFS/RDMA client always provides a reply chunk or a write list when performing an NFS READ over RDMA. Therefore, it does not exercise this particular case. The Linux server has never had to use more than one extra sge for building RPC/RDMA replies with a Linux client. However, an NFS/RDMA client _is_ allowed to send small NFS READs without setting up a write list or reply chunk. The NFS READ reply fits entirely within the inline reply buffer in this case. This is perhaps a more efficient way of performing NFS READs that the Linux NFS/RDMA client may some day adopt. Fixes: b432e6b3d9c1 ('svcrdma: Change DMA mapping logic to . . .') BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=285 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index d25cd430f9ff..95412abc95b0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -384,6 +384,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; + u32 xdr_off; int sge_no; int sge_bytes; int page_no; @@ -418,8 +419,8 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->direction = DMA_TO_DEVICE; /* Map the payload indicated by 'byte_count' */ + xdr_off = 0; for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { - int xdr_off = 0; sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; ctxt->sge[sge_no].addr = @@ -457,6 +458,13 @@ static int send_reply(struct svcxprt_rdma *rdma, } rqstp->rq_next_page = rqstp->rq_respages + 1; + /* The loop above bumps sc_dma_used for each sge. The + * xdr_buf.tail gets a separate sge, but resides in the + * same page as xdr_buf.head. Don't count it twice. + */ + if (sge_no > ctxt->count) + atomic_dec(&rdma->sc_dma_used); + if (sge_no > rdma->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err; -- cgit v1.2.3 From 10dc4512185741a298cd7bc87e9968944f31a50d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 9 Jul 2015 16:45:28 -0400 Subject: svcrdma: Clean up svc_rdma_get_reply_array() Kernel coding conventions frown upon having large nontrivial functions in header files, and the preference these days is to allow the compiler to make inlining decisions if possible. As these functions are re-homed into a .c file, be sure that comparisons with fields in struct rpcrdma_msg are with be32 constants. This is a refactoring change; no behavior change is intended. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 81 +---------------------------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 73 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 79 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cb94ee4181d4..ca4d86a6c947 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -213,6 +213,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, /* svc_rdma_sendto.c */ extern int svc_rdma_sendto(struct svc_rqst *); +extern struct rpcrdma_read_chunk * + svc_rdma_get_read_chunk(struct rpcrdma_msg *); /* svc_rdma_transport.c */ extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); @@ -238,83 +240,4 @@ extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); -/* - * Returns the address of the first read chunk or if no read chunk is - * present - */ -static inline struct rpcrdma_read_chunk * -svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *ch = - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - - if (ch->rc_discrim == 0) - return NULL; - - return ch; -} - -/* - * Returns the address of the first read write array element or if no - * write array list is present - */ -static inline struct rpcrdma_write_array * -svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) -{ - if (rmsgp->rm_body.rm_chunks[0] != 0 - || rmsgp->rm_body.rm_chunks[1] == 0) - return NULL; - - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; -} - -/* - * Returns the address of the first reply array element or if no - * reply array is present - */ -static inline struct rpcrdma_write_array * -svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *rch; - struct rpcrdma_write_array *wr_ary; - struct rpcrdma_write_array *rp_ary; - - /* XXX: Need to fix when reply list may occur with read-list and/or - * write list */ - if (rmsgp->rm_body.rm_chunks[0] != 0 || - rmsgp->rm_body.rm_chunks[1] != 0) - return NULL; - - rch = svc_rdma_get_read_chunk(rmsgp); - if (rch) { - while (rch->rc_discrim) - rch++; - - /* The reply list follows an empty write array located - * at 'rc_position' here. The reply array is at rc_target. - */ - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; - - goto found_it; - } - - wr_ary = svc_rdma_get_write_array(rmsgp); - if (wr_ary) { - rp_ary = (struct rpcrdma_write_array *) - &wr_ary-> - wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; - - goto found_it; - } - - /* No read list, no write list */ - rp_ary = (struct rpcrdma_write_array *) - &rmsgp->rm_body.rm_chunks[2]; - - found_it: - if (rp_ary->wc_discrim == 0) - return NULL; - - return rp_ary; -} #endif diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 95412abc95b0..1dfae8317065 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -136,6 +136,79 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, return dma_addr; } +/* Returns the address of the first read chunk or if no read chunk + * is present + */ +struct rpcrdma_read_chunk * +svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *ch = + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + + if (ch->rc_discrim == xdr_zero) + return NULL; + return ch; +} + +/* Returns the address of the first read write array element or + * if no write array list is present + */ +static struct rpcrdma_write_array * +svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) +{ + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || + rmsgp->rm_body.rm_chunks[1] == xdr_zero) + return NULL; + return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; +} + +/* Returns the address of the first reply array element or if no + * reply array is present + */ +static struct rpcrdma_write_array * +svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *rch; + struct rpcrdma_write_array *wr_ary; + struct rpcrdma_write_array *rp_ary; + + /* XXX: Need to fix when reply chunk may occur with read list + * and/or write list. + */ + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || + rmsgp->rm_body.rm_chunks[1] != xdr_zero) + return NULL; + + rch = svc_rdma_get_read_chunk(rmsgp); + if (rch) { + while (rch->rc_discrim != xdr_zero) + rch++; + + /* The reply chunk follows an empty write array located + * at 'rc_position' here. The reply array is at rc_target. + */ + rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; + goto found_it; + } + + wr_ary = svc_rdma_get_write_array(rmsgp); + if (wr_ary) { + int chunk = be32_to_cpu(wr_ary->wc_nchunks); + + rp_ary = (struct rpcrdma_write_array *) + &wr_ary->wc_array[chunk].wc_target.rs_length; + goto found_it; + } + + /* No read list, no write list */ + rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2]; + + found_it: + if (rp_ary->wc_discrim == xdr_zero) + return NULL; + return rp_ary; +} + /* Assumptions: * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ -- cgit v1.2.3 From 31193fe5f6fb616711323f5d74ee5bb92aacba4a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 9 Jul 2015 16:45:37 -0400 Subject: svcrdma: Remove svc_rdma_fastreg() Commit 0bf4828983df ("svcrdma: refactor marshalling logic") removed the last call site for svc_rdma_fastreg(). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - net/sunrpc/xprtrdma/svc_rdma_transport.c | 34 -------------------------------- 2 files changed, 35 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index ca4d86a6c947..13af61b70417 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -227,7 +227,6 @@ extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); -extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f4b973233977..4054a9de6a91 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1202,40 +1202,6 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp) return 1; } -/* - * Attempt to register the kvec representing the RPC memory with the - * device. - * - * Returns: - * NULL : The device does not support fastreg or there were no more - * fastreg mr. - * frmr : The kvec register request was successfully posted. - * <0 : An error was encountered attempting to register the kvec. - */ -int svc_rdma_fastreg(struct svcxprt_rdma *xprt, - struct svc_rdma_fastreg_mr *frmr) -{ - struct ib_send_wr fastreg_wr; - u8 key; - - /* Bump the key */ - key = (u8)(frmr->mr->lkey & 0x000000FF); - ib_update_fast_reg_key(frmr->mr, ++key); - - /* Prepare FASTREG WR */ - memset(&fastreg_wr, 0, sizeof fastreg_wr); - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; - fastreg_wr.wr.fast_reg.page_list = frmr->page_list; - fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.length = frmr->map_len; - fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; - fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; - return svc_rdma_send(xprt, &fastreg_wr); -} - int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr, *n_wr; -- cgit v1.2.3 From cc9a903d915c21626b6b2fbf8ed0ff16a7f82210 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 7 Aug 2015 16:55:46 -0400 Subject: svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD Both commit 0380a3f375 ("svcrdma: Add a separate "max data segs" macro for svcrdma") and commit 7e5be28827bf ("svcrdma: advertise the correct max payload") are incorrect. This commit reverts both changes, restoring the server's maximum payload size to 1MB. Commit 7e5be28827bf based the server's maximum payload on the _client's_ RPCRDMA_MAX_DATA_SEGS value. That was wrong. Commit 0380a3f375 tried to fix this so that the client maximum payload size could be raised without affecting the server, but managed to confuse matters more on the server side. More importantly, limiting the advertised maximum payload size was meant to be a workaround, not the actual fix. We need to revisit https://bugzilla.linux-nfs.org/show_bug.cgi?id=270 A Linux client on a platform with 64KB pages can overrun and crash an x86_64 NFS/RDMA server when the r/wsize is 1MB. An x86/64 Linux client seems to work fine using 1MB reads and writes when the Linux server's maximum payload size is restored to 1MB. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=270 Fixes: 0380a3f375 ("svcrdma: Add a separate "max data segs" macro") Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 9 ++------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 13af61b70417..d5ee6d8b7c58 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,13 +172,6 @@ struct svcxprt_rdma { #define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 -#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */ -#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) -#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD -#else -#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) -#endif - #define RPCRDMA_LISTEN_BACKLOG 10 /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ @@ -187,6 +180,8 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 +#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD + /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4054a9de6a91..21e40365042c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_name = "rdma", .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, - .xcl_max_payload = RPCRDMA_MAXPAYLOAD, + .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, .xcl_ident = XPRT_TRANSPORT_RDMA, }; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f49dd8b38122..e718d0959af3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -51,7 +51,6 @@ #include /* rpc_xprt */ #include /* RPC/RDMA protocol */ #include /* xprt parameters */ -#include /* RPCSVC_MAXPAYLOAD */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ -- cgit v1.2.3 From ea126e74353453d15fc0a181910ae1e25162f2a1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:03:32 -0700 Subject: nfsd/sunrpc: add a new svc_serv_ops struct and move sv_shutdown into it In later patches we'll need to abstract out more operations on a per-service level, besides sv_shutdown and sv_function. Declare a new svc_serv_ops struct to hold these operations, and move sv_shutdown into this struct. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 6 +++++- fs/nfs/callback.c | 5 ++++- fs/nfsd/nfssvc.c | 6 +++++- include/linux/sunrpc/svc.h | 20 ++++++++++---------- net/sunrpc/svc.c | 18 +++++++++--------- 5 files changed, 33 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 55505cbe11af..4182b2f925cd 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -322,6 +322,10 @@ out_rqst: return error; } +static struct svc_serv_ops lockd_sv_ops = { + .svo_shutdown = svc_rpcb_cleanup, +}; + static struct svc_serv *lockd_create_svc(void) { struct svc_serv *serv; @@ -350,7 +354,7 @@ static struct svc_serv *lockd_create_svc(void) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); return ERR_PTR(-ENOMEM); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 682529c00996..182792d115fc 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -308,6 +308,9 @@ err_bind: return ret; } +static struct svc_serv_ops nfs_cb_sv_ops = { +}; + static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; @@ -333,7 +336,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9277cc91c21b..7311677330b2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -391,6 +391,10 @@ static int nfsd_get_default_max_blksize(void) return ret; } +static struct svc_serv_ops nfsd_sv_ops = { + .svo_shutdown = nfsd_last_thread, +}; + int nfsd_create_serv(struct net *net) { int error; @@ -405,7 +409,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - nfsd_last_thread, nfsd, THIS_MODULE); + &nfsd_sv_ops, nfsd, THIS_MODULE); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fae6fb947fc8..2e682f636b13 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -54,6 +54,13 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; +struct svc_serv; + +struct svc_serv_ops { + /* Callback to use when last thread exits. */ + void (*svo_shutdown)(struct svc_serv *serv, struct net *net); +}; + /* * RPC service. * @@ -85,13 +92,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - - void (*sv_shutdown)(struct svc_serv *serv, - struct net *net); - /* Callback to use when last thread - * exits. - */ - + struct svc_serv_ops *sv_ops; /* server operations */ struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ @@ -429,13 +430,12 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net)); + struct svc_serv_ops *); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net), - svc_thread_fn, struct module *); + struct svc_serv_ops *, svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a16d8d8c831..36eee907696b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -423,7 +423,7 @@ EXPORT_SYMBOL_GPL(svc_bind); */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int vers; @@ -440,7 +440,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); - serv->sv_shutdown = shutdown; + serv->sv_ops = ops; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -486,21 +486,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + struct svc_serv_ops *ops) { - return __svc_create(prog, bufsize, /*npools*/1, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, ops); } EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net), - svc_thread_fn func, struct module *mod) + struct svc_serv_ops *ops, svc_thread_fn func, + struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, shutdown); + serv = __svc_create(prog, bufsize, npools, ops); if (!serv) goto out_err; @@ -517,8 +517,8 @@ void svc_shutdown_net(struct svc_serv *serv, struct net *net) { svc_close_net(serv, net); - if (serv->sv_shutdown) - serv->sv_shutdown(serv, net); + if (serv->sv_ops->svo_shutdown) + serv->sv_ops->svo_shutdown(serv, net); } EXPORT_SYMBOL_GPL(svc_shutdown_net); -- cgit v1.2.3 From c369014f1776367269c8fbb5ea8932826d89ce2f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:04:46 -0700 Subject: nfsd/sunrpc: move sv_function into sv_ops Since we now have a container for holding svc_serv operations, move the sv_function into it as well. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 3 ++- include/linux/sunrpc/svc.h | 11 +++-------- net/sunrpc/svc.c | 8 +++----- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7311677330b2..bd03968363ff 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -393,6 +393,7 @@ static int nfsd_get_default_max_blksize(void) static struct svc_serv_ops nfsd_sv_ops = { .svo_shutdown = nfsd_last_thread, + .svo_function = nfsd, }; int nfsd_create_serv(struct net *net) @@ -409,7 +410,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_sv_ops, nfsd, THIS_MODULE); + &nfsd_sv_ops, THIS_MODULE); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 2e682f636b13..7c51b21ce9d6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -19,11 +19,6 @@ #include #include -/* - * This is the RPC server thread function prototype - */ -typedef int (*svc_thread_fn)(void *); - /* statistics for svc_pool structures */ struct svc_pool_stats { atomic_long_t packets; @@ -58,7 +53,8 @@ struct svc_serv; struct svc_serv_ops { /* Callback to use when last thread exits. */ - void (*svo_shutdown)(struct svc_serv *serv, struct net *net); + void (*svo_shutdown)(struct svc_serv *, struct net *); + int (*svo_function)(void *); }; /* @@ -95,7 +91,6 @@ struct svc_serv { struct svc_serv_ops *sv_ops; /* server operations */ struct module * sv_module; /* optional module to count when * adding threads */ - svc_thread_fn sv_function; /* main function for threads */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -435,7 +430,7 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - struct svc_serv_ops *, svc_thread_fn, struct module *); + struct svc_serv_ops *, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 36eee907696b..5b8726030c24 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -34,7 +34,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define svc_serv_is_pooled(serv) ((serv)->sv_function) +#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) /* * Mode for mapping cpus to pools. @@ -494,8 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops, svc_thread_fn func, - struct module *mod) + struct svc_serv_ops *ops, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); @@ -504,7 +503,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, if (!serv) goto out_err; - serv->sv_function = func; serv->sv_module = mod; return serv; out_err: @@ -740,7 +738,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) } __module_get(serv->sv_module); - task = kthread_create_on_node(serv->sv_function, rqstp, + task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); -- cgit v1.2.3 From 758f62fff9ad630f05866a1dd6ae9453a7730c2e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:05:56 -0700 Subject: nfsd/sunrpc: move sv_module parm into sv_ops ...not technically an operation, but it's more convenient and cleaner to pass the module pointer in this struct. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 3 ++- include/linux/sunrpc/svc.h | 9 ++++++--- net/sunrpc/svc.c | 8 +++----- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index bd03968363ff..17ceaad5f80a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -394,6 +394,7 @@ static int nfsd_get_default_max_blksize(void) static struct svc_serv_ops nfsd_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, + .svo_module = THIS_MODULE, }; int nfsd_create_serv(struct net *net) @@ -410,7 +411,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_sv_ops, THIS_MODULE); + &nfsd_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7c51b21ce9d6..0150003d584b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -54,7 +54,12 @@ struct svc_serv; struct svc_serv_ops { /* Callback to use when last thread exits. */ void (*svo_shutdown)(struct svc_serv *, struct net *); + + /* function for service threads to run */ int (*svo_function)(void *); + + /* optional module to count when adding threads (pooled svcs only) */ + struct module *svo_module; }; /* @@ -89,8 +94,6 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ struct svc_serv_ops *sv_ops; /* server operations */ - struct module * sv_module; /* optional module to count when - * adding threads */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -430,7 +433,7 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - struct svc_serv_ops *, struct module *); + struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5b8726030c24..5a6be22a7904 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops, struct module *mod) + struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); @@ -502,8 +502,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, serv = __svc_create(prog, bufsize, npools, ops); if (!serv) goto out_err; - - serv->sv_module = mod; return serv; out_err: svc_pool_map_put(); @@ -737,12 +735,12 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) break; } - __module_get(serv->sv_module); + __module_get(serv->sv_ops->svo_module); task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); - module_put(serv->sv_module); + module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); break; } -- cgit v1.2.3 From b9e13cdfac70e38ade17b53810a36968c5842339 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:06:51 -0700 Subject: nfsd/sunrpc: turn enqueueing a svc_xprt into a svc_serv operation For now, all services use svc_xprt_do_enqueue, but once we add workqueue-based service support, we'll need to do something different. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 3 ++- fs/nfs/callback.c | 1 + fs/nfsd/nfssvc.c | 11 ++++++----- include/linux/sunrpc/svc.h | 3 +++ include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 10 +++++----- 6 files changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 4182b2f925cd..530914b5c455 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -323,7 +323,8 @@ out_rqst: } static struct svc_serv_ops lockd_sv_ops = { - .svo_shutdown = svc_rpcb_cleanup, + .svo_shutdown = svc_rpcb_cleanup, + .svo_enqueue_xprt = svc_xprt_do_enqueue, }; static struct svc_serv *lockd_create_svc(void) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 182792d115fc..2c4a0b565d28 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -309,6 +309,7 @@ err_bind: } static struct svc_serv_ops nfs_cb_sv_ops = { + .svo_enqueue_xprt = svc_xprt_do_enqueue, }; static struct svc_serv *nfs_callback_create_svc(int minorversion) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 17ceaad5f80a..d8b9b4cd37c6 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -391,10 +391,11 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static struct svc_serv_ops nfsd_sv_ops = { - .svo_shutdown = nfsd_last_thread, - .svo_function = nfsd, - .svo_module = THIS_MODULE, +static struct svc_serv_ops nfsd_thread_sv_ops = { + .svo_shutdown = nfsd_last_thread, + .svo_function = nfsd, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_module = THIS_MODULE, }; int nfsd_create_serv(struct net *net) @@ -411,7 +412,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_sv_ops); + &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0150003d584b..97609d0f68f6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -58,6 +58,9 @@ struct svc_serv_ops { /* function for service threads to run */ int (*svo_function)(void *); + /* queue up a transport for servicing */ + void (*svo_enqueue_xprt)(struct svc_xprt *); + /* optional module to count when adding threads (pooled svcs only) */ struct module *svo_module; }; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 79f6f8f3dc0a..78512cfe1fe6 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -116,6 +116,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int); +void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 163ac45c3639..a6cbb2104667 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -24,7 +24,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); static void svc_delete_xprt(struct svc_xprt *xprt); -static void svc_xprt_do_enqueue(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -225,12 +224,12 @@ static void svc_xprt_received(struct svc_xprt *xprt) } /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_do_enqueue with: + * 'put', so we need a reference to call svc_enqueue_xprt with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_do_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); svc_xprt_put(xprt); } @@ -320,7 +319,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) return false; } -static void svc_xprt_do_enqueue(struct svc_xprt *xprt) +void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; @@ -402,6 +401,7 @@ redo_search: out: trace_svc_xprt_do_enqueue(xprt, rqstp); } +EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); /* * Queue up a transport with data pending. If there are idle nfsd @@ -412,7 +412,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) { if (test_bit(XPT_BUSY, &xprt->xpt_flags)) return; - svc_xprt_do_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); -- cgit v1.2.3 From d70bc0c67c7aaf0d00084b2f91b44fe1a8ae4e15 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:09:54 -0700 Subject: nfsd/sunrpc: move pool_mode definitions into svc.h In later patches, we're going to need to allow code external to svc.c to figure out what pool_mode is in use. Move these definitions into svc.h to prepare for that. Also, make the svc_pool_map object available and exported so that other modules can peek in there to get insight into what pool mode is in use. Likewise, export svc_pool_map_get/put function to make it safe to do so. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 25 +++++++++++++++++++++++++ net/sunrpc/svc.c | 31 +++++++------------------------ 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fd5bb9922545..3a9baead5c3e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -427,6 +427,29 @@ struct svc_procedure { unsigned int pc_xdrressize; /* maximum size of XDR reply */ }; +/* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_AUTO = -1, /* choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + +struct svc_pool_map { + int count; /* How many svc_servs use us */ + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +}; + +extern struct svc_pool_map svc_pool_map; + /* * Function prototypes. */ @@ -438,6 +461,8 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int, struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); +unsigned int svc_pool_map_get(void); +void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a6be22a7904..486c14bf4e49 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -36,34 +36,17 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); #define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ -static struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -} svc_pool_map = { - .count = 0, +struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; +EXPORT_SYMBOL_GPL(svc_pool_map); + static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ static int @@ -236,7 +219,7 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) * vice versa). Initialise the map if we're the first user. * Returns the number of pools. */ -static unsigned int +unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -271,7 +254,7 @@ svc_pool_map_get(void) mutex_unlock(&svc_pool_map_mutex); return m->npools; } - +EXPORT_SYMBOL_GPL(svc_pool_map_get); /* * Drop a reference to the global map of cpus to pools. @@ -280,7 +263,7 @@ svc_pool_map_get(void) * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -static void +void svc_pool_map_put(void) { struct svc_pool_map *m = &svc_pool_map; @@ -297,7 +280,7 @@ svc_pool_map_put(void) mutex_unlock(&svc_pool_map_mutex); } - +EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { -- cgit v1.2.3 From 1b6dc1dffbb142de60eb65f6155276ac31ff5474 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:11:10 -0700 Subject: nfsd/sunrpc: factor svc_rqst allocation and freeing from sv_nrthreads refcounting In later patches, we'll want to be able to allocate and free svc_rqst structures without monkeying with the serv->sv_nrthreads refcount. Factor those pieces out of their respective functions. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 3 +++ net/sunrpc/svc.c | 54 ++++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3a9baead5c3e..cc0fc712bb82 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -458,8 +458,11 @@ void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, struct svc_serv_ops *); +struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, + struct svc_pool *pool, int node); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); +void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); unsigned int svc_pool_map_get(void); void svc_pool_map_put(void); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 486c14bf4e49..a8f579df14d8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -583,40 +583,52 @@ svc_release_buffer(struct svc_rqst *rqstp) } struct svc_rqst * -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) +svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); if (!rqstp) - goto out_enomem; + return rqstp; - serv->sv_nrthreads++; __set_bit(RQ_BUSY, &rqstp->rq_flags); spin_lock_init(&rqstp->rq_lock); rqstp->rq_server = serv; rqstp->rq_pool = pool; - spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads++; - list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); - spin_unlock_bh(&pool->sp_lock); rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_argp) - goto out_thread; + goto out_enomem; rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_resp) - goto out_thread; + goto out_enomem; if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) - goto out_thread; + goto out_enomem; return rqstp; -out_thread: - svc_exit_thread(rqstp); out_enomem: - return ERR_PTR(-ENOMEM); + svc_rqst_free(rqstp); + return NULL; +} +EXPORT_SYMBOL_GPL(svc_rqst_alloc); + +struct svc_rqst * +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) +{ + struct svc_rqst *rqstp; + + rqstp = svc_rqst_alloc(serv, pool, node); + if (!rqstp) + return ERR_PTR(-ENOMEM); + + serv->sv_nrthreads++; + spin_lock_bh(&pool->sp_lock); + pool->sp_nrthreads++; + list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); + spin_unlock_bh(&pool->sp_lock); + return rqstp; } EXPORT_SYMBOL_GPL(svc_prepare_thread); @@ -751,15 +763,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); * mutex" for the service. */ void -svc_exit_thread(struct svc_rqst *rqstp) +svc_rqst_free(struct svc_rqst *rqstp) { - struct svc_serv *serv = rqstp->rq_server; - struct svc_pool *pool = rqstp->rq_pool; - svc_release_buffer(rqstp); kfree(rqstp->rq_resp); kfree(rqstp->rq_argp); kfree(rqstp->rq_auth_data); + kfree_rcu(rqstp, rq_rcu_head); +} +EXPORT_SYMBOL_GPL(svc_rqst_free); + +void +svc_exit_thread(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + struct svc_pool *pool = rqstp->rq_pool; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads--; @@ -767,7 +785,7 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); - kfree_rcu(rqstp, rq_rcu_head); + svc_rqst_free(rqstp); /* Release the server */ if (serv) -- cgit v1.2.3 From 9936f2ae37482aff54ce53918c69b378bb50097c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 27 Jul 2015 11:09:10 +0800 Subject: sunrpc: Store cache_detail in seq_file's private directly Cleanup. Just store cache_detail in seq_file's private, an allocated handle is redundant. v8, same as v6. Signed-off-by: Kinglong Mee Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 2928afffbb81..edec603abc17 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1270,18 +1270,13 @@ EXPORT_SYMBOL_GPL(qword_get); * get a header, then pass each real item in the cache */ -struct handle { - struct cache_detail *cd; -}; - static void *c_start(struct seq_file *m, loff_t *pos) __acquires(cd->hash_lock) { loff_t n = *pos; unsigned int hash, entry; struct cache_head *ch; - struct cache_detail *cd = ((struct handle*)m->private)->cd; - + struct cache_detail *cd = m->private; read_lock(&cd->hash_lock); if (!n--) @@ -1308,7 +1303,7 @@ static void *c_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); - struct cache_detail *cd = ((struct handle*)m->private)->cd; + struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) hash = 0; @@ -1334,14 +1329,14 @@ static void *c_next(struct seq_file *m, void *p, loff_t *pos) static void c_stop(struct seq_file *m, void *p) __releases(cd->hash_lock) { - struct cache_detail *cd = ((struct handle*)m->private)->cd; + struct cache_detail *cd = m->private; read_unlock(&cd->hash_lock); } static int c_show(struct seq_file *m, void *p) { struct cache_head *cp = p; - struct cache_detail *cd = ((struct handle*)m->private)->cd; + struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) return cd->cache_show(m, cd, NULL); @@ -1373,24 +1368,27 @@ static const struct seq_operations cache_content_op = { static int content_open(struct inode *inode, struct file *file, struct cache_detail *cd) { - struct handle *han; + struct seq_file *seq; + int err; if (!cd || !try_module_get(cd->owner)) return -EACCES; - han = __seq_open_private(file, &cache_content_op, sizeof(*han)); - if (han == NULL) { + + err = seq_open(file, &cache_content_op); + if (err) { module_put(cd->owner); - return -ENOMEM; + return err; } - han->cd = cd; + seq = file->private_data; + seq->private = cd; return 0; } static int content_release(struct inode *inode, struct file *file, struct cache_detail *cd) { - int ret = seq_release_private(inode, file); + int ret = seq_release(inode, file); module_put(cd->owner); return ret; } -- cgit v1.2.3 From c8c081b70cb563cc4d41ab9933fa3323c6f6ffca Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 27 Jul 2015 11:09:42 +0800 Subject: sunrpc/nfsd: Remove redundant code by exports seq_operations functions Nfsd has implement a site of seq_operations functions as sunrpc's cache. Just exports sunrpc's codes, and remove nfsd's redundant codes. v8, same as v6 Signed-off-by: Kinglong Mee Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 73 ++------------------------------------------ include/linux/sunrpc/cache.h | 5 +++ net/sunrpc/cache.c | 15 +++++---- 3 files changed, 17 insertions(+), 76 deletions(-) (limited to 'net') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index f79521a59747..b4d84b579f20 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1075,73 +1075,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) return rv; } -/* Iterator */ - -static void *e_start(struct seq_file *m, loff_t *pos) - __acquires(((struct cache_detail *)m->private)->hash_lock) -{ - loff_t n = *pos; - unsigned hash, export; - struct cache_head *ch; - struct cache_detail *cd = m->private; - struct cache_head **export_table = cd->hash_table; - - read_lock(&cd->hash_lock); - if (!n--) - return SEQ_START_TOKEN; - hash = n >> 32; - export = n & ((1LL<<32) - 1); - - - for (ch=export_table[hash]; ch; ch=ch->next) - if (!export--) - return ch; - n &= ~((1LL<<32) - 1); - do { - hash++; - n += 1LL<<32; - } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL); - if (hash >= EXPORT_HASHMAX) - return NULL; - *pos = n+1; - return export_table[hash]; -} - -static void *e_next(struct seq_file *m, void *p, loff_t *pos) -{ - struct cache_head *ch = p; - int hash = (*pos >> 32); - struct cache_detail *cd = m->private; - struct cache_head **export_table = cd->hash_table; - - if (p == SEQ_START_TOKEN) - hash = 0; - else if (ch->next == NULL) { - hash++; - *pos += 1LL<<32; - } else { - ++*pos; - return ch->next; - } - *pos &= ~((1LL<<32) - 1); - while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) { - hash++; - *pos += 1LL<<32; - } - if (hash >= EXPORT_HASHMAX) - return NULL; - ++*pos; - return export_table[hash]; -} - -static void e_stop(struct seq_file *m, void *p) - __releases(((struct cache_detail *)m->private)->hash_lock) -{ - struct cache_detail *cd = m->private; - - read_unlock(&cd->hash_lock); -} - static struct flags { int flag; char *name[2]; @@ -1270,9 +1203,9 @@ static int e_show(struct seq_file *m, void *p) } const struct seq_operations nfs_exports_op = { - .start = e_start, - .next = e_next, - .stop = e_stop, + .start = cache_seq_start, + .next = cache_seq_next, + .stop = cache_seq_stop, .show = e_show, }; diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 437ddb6c4aef..04ee5a284aac 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -224,6 +224,11 @@ extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); +/* Must store cache_detail in seq_file->private if using next three functions */ +extern void *cache_seq_start(struct seq_file *file, loff_t *pos); +extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos); +extern void cache_seq_stop(struct seq_file *file, void *p); + extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); extern int qword_get(char **bpp, char *dest, int bufsize); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index edec603abc17..673c2fa3c6c2 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1270,7 +1270,7 @@ EXPORT_SYMBOL_GPL(qword_get); * get a header, then pass each real item in the cache */ -static void *c_start(struct seq_file *m, loff_t *pos) +void *cache_seq_start(struct seq_file *m, loff_t *pos) __acquires(cd->hash_lock) { loff_t n = *pos; @@ -1298,8 +1298,9 @@ static void *c_start(struct seq_file *m, loff_t *pos) *pos = n+1; return cd->hash_table[hash]; } +EXPORT_SYMBOL_GPL(cache_seq_start); -static void *c_next(struct seq_file *m, void *p, loff_t *pos) +void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); @@ -1325,13 +1326,15 @@ static void *c_next(struct seq_file *m, void *p, loff_t *pos) ++*pos; return cd->hash_table[hash]; } +EXPORT_SYMBOL_GPL(cache_seq_next); -static void c_stop(struct seq_file *m, void *p) +void cache_seq_stop(struct seq_file *m, void *p) __releases(cd->hash_lock) { struct cache_detail *cd = m->private; read_unlock(&cd->hash_lock); } +EXPORT_SYMBOL_GPL(cache_seq_stop); static int c_show(struct seq_file *m, void *p) { @@ -1359,9 +1362,9 @@ static int c_show(struct seq_file *m, void *p) } static const struct seq_operations cache_content_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, + .start = cache_seq_start, + .next = cache_seq_next, + .stop = cache_seq_stop, .show = c_show, }; -- cgit v1.2.3 From 129e5824cd96d9289679973f0ff7c48e88d569bb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 27 Jul 2015 11:10:15 +0800 Subject: sunrpc: Switch to using hash list instead single list Switch using list_head for cache_head in cache_detail, it is useful of remove an cache_head entry directly from cache_detail. v8, using hash list, not head list Signed-off-by: Kinglong Mee Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 4 +-- net/sunrpc/cache.c | 60 +++++++++++++++++++++++--------------------- 2 files changed, 33 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 04ee5a284aac..03d3b4c92d9f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -46,7 +46,7 @@ * */ struct cache_head { - struct cache_head * next; + struct hlist_node cache_list; time_t expiry_time; /* After time time, don't use the data */ time_t last_refresh; /* If CACHE_PENDING, this is when upcall * was sent, else this is when update was received @@ -73,7 +73,7 @@ struct cache_detail_pipefs { struct cache_detail { struct module * owner; int hash_size; - struct cache_head ** hash_table; + struct hlist_head * hash_table; rwlock_t hash_lock; atomic_t inuse; /* active user-space update or lookup */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 673c2fa3c6c2..4a2340a54401 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -44,7 +44,7 @@ static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) { time_t now = seconds_since_boot(); - h->next = NULL; + INIT_HLIST_NODE(&h->cache_list); h->flags = 0; kref_init(&h->ref); h->expiry_time = now + CACHE_NEW_EXPIRY; @@ -54,15 +54,14 @@ static void cache_init(struct cache_head *h) struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { - struct cache_head **head, **hp; - struct cache_head *new = NULL, *freeme = NULL; + struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL; + struct hlist_head *head; head = &detail->hash_table[hash]; read_lock(&detail->hash_lock); - for (hp=head; *hp != NULL ; hp = &(*hp)->next) { - struct cache_head *tmp = *hp; + hlist_for_each_entry(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) /* This entry is expired, we will discard it. */ @@ -88,12 +87,10 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, write_lock(&detail->hash_lock); /* check if entry appeared while we slept */ - for (hp=head; *hp != NULL ; hp = &(*hp)->next) { - struct cache_head *tmp = *hp; + hlist_for_each_entry(tmp, head, cache_list) { if (detail->match(tmp, key)) { if (cache_is_expired(detail, tmp)) { - *hp = tmp->next; - tmp->next = NULL; + hlist_del_init(&tmp->cache_list); detail->entries --; freeme = tmp; break; @@ -104,8 +101,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, return tmp; } } - new->next = *head; - *head = new; + + hlist_add_head(&new->cache_list, head); detail->entries++; cache_get(new); write_unlock(&detail->hash_lock); @@ -143,7 +140,6 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, * If 'old' is not VALID, we update it directly, * otherwise we need to replace it */ - struct cache_head **head; struct cache_head *tmp; if (!test_bit(CACHE_VALID, &old->flags)) { @@ -168,15 +164,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, } cache_init(tmp); detail->init(tmp, old); - head = &detail->hash_table[hash]; write_lock(&detail->hash_lock); if (test_bit(CACHE_NEGATIVE, &new->flags)) set_bit(CACHE_NEGATIVE, &tmp->flags); else detail->update(tmp, new); - tmp->next = *head; - *head = tmp; + hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); detail->entries++; cache_get(tmp); cache_fresh_locked(tmp, new->expiry_time); @@ -416,28 +410,29 @@ static int cache_clean(void) /* find a non-empty bucket in the table */ while (current_detail && current_index < current_detail->hash_size && - current_detail->hash_table[current_index] == NULL) + hlist_empty(¤t_detail->hash_table[current_index])) current_index++; /* find a cleanable entry in the bucket and clean it, or set to next bucket */ if (current_detail && current_index < current_detail->hash_size) { - struct cache_head *ch, **cp; + struct cache_head *ch = NULL; struct cache_detail *d; + struct hlist_head *head; + struct hlist_node *tmp; write_lock(¤t_detail->hash_lock); /* Ok, now to clean this strand */ - cp = & current_detail->hash_table[current_index]; - for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) { + head = ¤t_detail->hash_table[current_index]; + hlist_for_each_entry_safe(ch, tmp, head, cache_list) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; if (!cache_is_expired(current_detail, ch)) continue; - *cp = ch->next; - ch->next = NULL; + hlist_del_init(&ch->cache_list); current_detail->entries--; rv = 1; break; @@ -1284,7 +1279,7 @@ void *cache_seq_start(struct seq_file *m, loff_t *pos) hash = n >> 32; entry = n & ((1LL<<32) - 1); - for (ch=cd->hash_table[hash]; ch; ch=ch->next) + hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; n &= ~((1LL<<32) - 1); @@ -1292,11 +1287,12 @@ void *cache_seq_start(struct seq_file *m, loff_t *pos) hash++; n += 1LL<<32; } while(hash < cd->hash_size && - cd->hash_table[hash]==NULL); + hlist_empty(&cd->hash_table[hash])); if (hash >= cd->hash_size) return NULL; *pos = n+1; - return cd->hash_table[hash]; + return hlist_entry_safe(cd->hash_table[hash].first, + struct cache_head, cache_list); } EXPORT_SYMBOL_GPL(cache_seq_start); @@ -1308,23 +1304,25 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) if (p == SEQ_START_TOKEN) hash = 0; - else if (ch->next == NULL) { + else if (ch->cache_list.next == NULL) { hash++; *pos += 1LL<<32; } else { ++*pos; - return ch->next; + return hlist_entry_safe(ch->cache_list.next, + struct cache_head, cache_list); } *pos &= ~((1LL<<32) - 1); while (hash < cd->hash_size && - cd->hash_table[hash] == NULL) { + hlist_empty(&cd->hash_table[hash])) { hash++; *pos += 1LL<<32; } if (hash >= cd->hash_size) return NULL; ++*pos; - return cd->hash_table[hash]; + return hlist_entry_safe(cd->hash_table[hash].first, + struct cache_head, cache_list); } EXPORT_SYMBOL_GPL(cache_seq_next); @@ -1666,17 +1664,21 @@ EXPORT_SYMBOL_GPL(cache_unregister_net); struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; + int i; cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); if (cd == NULL) return ERR_PTR(-ENOMEM); - cd->hash_table = kzalloc(cd->hash_size * sizeof(struct cache_head *), + cd->hash_table = kzalloc(cd->hash_size * sizeof(struct hlist_head), GFP_KERNEL); if (cd->hash_table == NULL) { kfree(cd); return ERR_PTR(-ENOMEM); } + + for (i = 0; i < cd->hash_size; i++) + INIT_HLIST_HEAD(&cd->hash_table[i]); cd->net = net; return cd; } -- cgit v1.2.3