summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 20:48:57 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 20:48:57 +0300
commitf7976a6493b3f00c4d057a37d9e63c322154ef8c (patch)
tree0f295581c6993306c76a711807c82dda19dd2df4 /fs
parentc0a572d9d32fe1e95672f24e860776dba0750a38 (diff)
parent75bfb70457a4c4c9f0095e39885382fc5049c5ce (diff)
downloadlinux-f7976a6493b3f00c4d057a37d9e63c322154ef8c.tar.xz
Merge tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: - Clean-ups in the READ path in anticipation of MSG_SPLICE_PAGES - Better NUMA awareness when allocating pages and other objects - A number of minor clean-ups to XDR encoding - Elimination of a race when accepting a TCP socket - Numerous observability enhancements * tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (46 commits) nfsd: remove redundant assignments to variable len svcrdma: Fix stale comment NFSD: Distinguish per-net namespace initialization nfsd: move init of percpu reply_cache_stats counters back to nfsd_init_net SUNRPC: Address RCU warning in net/sunrpc/svc.c SUNRPC: Use sysfs_emit in place of strlcpy/sprintf SUNRPC: Remove transport class dprintk call sites SUNRPC: Fix comments for transport class registration svcrdma: Remove an unused argument from __svc_rdma_put_rw_ctxt() svcrdma: trace cc_release calls svcrdma: Convert "might sleep" comment into a code annotation NFSD: Add an nfsd4_encode_nfstime4() helper SUNRPC: Move initialization of rq_stime SUNRPC: Optimize page release in svc_rdma_sendto() svcrdma: Prevent page release when nothing was received svcrdma: Revert 2a1e4f21d841 ("svcrdma: Normalize Send page handling") SUNRPC: Revert 579900670ac7 ("svcrdma: Remove unused sc_pages field") SUNRPC: Revert cc93ce9529a6 ("svcrdma: Retain the page backing rq_res.head[0].iov_base") NFSD: add encoding of op_recall flag for write delegation NFSD: Add "official" reviewers for this subsystem ...
Diffstat (limited to 'fs')
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/export.c12
-rw-r--r--fs/nfsd/nfs3proc.c14
-rw-r--r--fs/nfsd/nfs3xdr.c11
-rw-r--r--fs/nfsd/nfs4xdr.c289
-rw-r--r--fs/nfsd/nfscache.c25
-rw-r--r--fs/nfsd/nfsctl.c116
-rw-r--r--fs/nfsd/nfsfh.c26
-rw-r--r--fs/nfsd/nfsproc.c14
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/nfsxdr.c11
-rw-r--r--fs/nfsd/trace.h259
-rw-r--r--fs/nfsd/vfs.c80
-rw-r--r--fs/nfsd/vfs.h9
15 files changed, 593 insertions, 281 deletions
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 04ba95b83d16..22d3ff3818f5 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -355,7 +355,6 @@ static int lockd_get(void)
int error;
if (nlmsvc_serv) {
- svc_get(nlmsvc_serv);
nlmsvc_users++;
return 0;
}
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index f21259ead64b..4c9b87850ab1 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -80,6 +80,8 @@ enum {
int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void);
+int nfsd_net_reply_cache_init(struct nfsd_net *nn);
+void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ae85257b4238..11a0eaa2f914 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out;
err = -EINVAL;
- if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("found domain %s\n", buf);
err = -EINVAL;
- if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
fsidtype = simple_strtoul(buf, &ep, 10);
if (*ep)
@@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
char *buf;
- int len;
int err;
struct auth_domain *dom = NULL;
struct svc_export exp = {}, *expp;
@@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* client */
err = -EINVAL;
- len = qword_get(&mesg, buf, PAGE_SIZE);
- if (len <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* path */
err = -EINVAL;
- if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out1;
err = kern_path(buf, 0, &exp.ex_path);
@@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out3;
exp.ex_fsid = an_int;
- while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
+ while (qword_get(&mesg, buf, PAGE_SIZE) > 0) {
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index e6bb8eeb5bc2..fc8d5b7db9f8 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
{
struct nfsd3_readargs *argp = rqstp->rq_argp;
struct nfsd3_readres *resp = rqstp->rq_resp;
- unsigned int len;
- int v;
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
if (argp->offset + argp->count > (u64)OFFSET_MAX)
argp->count = (u64)OFFSET_MAX - argp->offset;
- v = 0;
- len = argp->count;
resp->pages = rqstp->rq_next_page;
- while (len > 0) {
- struct page *page = *(rqstp->rq_next_page++);
-
- rqstp->rq_vec[v].iov_base = page_address(page);
- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- }
/* Obtain buffer pointer for payload.
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
@@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, v, &resp->count, &resp->eof);
+ &resp->count, &resp->eof);
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 3308dd671ef0..f32128955ec8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false;
- xdr_write_pages(xdr, resp->pages, 0, resp->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0,
+ resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false;
break;
@@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false;
- xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
- resp->count);
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+ rqstp->rq_res.page_base,
+ resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false;
break;
@@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
return false;
- xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+ dirlist->len);
/* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0)
return false;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..26b1343c8035 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
return p;
}
+static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
+ struct timespec64 *tv)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
+ if (!p)
+ return nfserr_resource;
+
+ p = xdr_encode_hyper(p, (s64)tv->tv_sec);
+ *p = cpu_to_be32(tv->tv_nsec);
+ return nfs_ok;
+}
+
/*
* ctime (in NFSv4, time_metadata) is not writeable, and the client
* doesn't really care what resolution could theoretically be stored by
@@ -2566,12 +2580,16 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
return p;
}
-static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
+static __be32
+nfsd4_encode_change_info4(struct xdr_stream *xdr, struct nfsd4_change_info *c)
{
- *p++ = cpu_to_be32(c->atomic);
- p = xdr_encode_hyper(p, c->before_change);
- p = xdr_encode_hyper(p, c->after_change);
- return p;
+ if (xdr_stream_encode_bool(xdr, c->atomic) < 0)
+ return nfserr_resource;
+ if (xdr_stream_encode_u64(xdr, c->before_change) < 0)
+ return nfserr_resource;
+ if (xdr_stream_encode_u64(xdr, c->after_change) < 0)
+ return nfserr_resource;
+ return nfs_ok;
}
/* Encode as an array of strings the string given with components
@@ -3348,11 +3366,9 @@ out_acl:
p = xdr_encode_hyper(p, dummy64);
}
if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
- *p++ = cpu_to_be32(stat.atime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.atime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
p = xdr_reserve_space(xdr, 12);
@@ -3361,25 +3377,19 @@ out_acl:
p = encode_time_delta(p, d_inode(dentry));
}
if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
- *p++ = cpu_to_be32(stat.ctime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
- *p++ = cpu_to_be32(stat.mtime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
- *p++ = cpu_to_be32(stat.btime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.btime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
u64 ino = stat.ino;
@@ -3689,6 +3699,30 @@ fail:
}
static __be32
+nfsd4_encode_verifier4(struct xdr_stream *xdr, const nfs4_verifier *verf)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+ memcpy(p, verf->data, sizeof(verf->data));
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, sizeof(__be64));
+ if (!p)
+ return nfserr_resource;
+ memcpy(p, clientid, sizeof(*clientid));
+ return nfs_ok;
+}
+
+static __be32
nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
{
__be32 *p;
@@ -3752,15 +3786,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_commit *commit = &u->commit;
- struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
- NFS4_VERIFIER_SIZE);
- return 0;
+ return nfsd4_encode_verifier4(resp->xdr, &commit->co_verf);
}
static __be32
@@ -3769,12 +3796,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_create *create = &u->create;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- encode_cinfo(p, &create->cr_cinfo);
+ nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo);
+ if (nfserr)
+ return nfserr;
return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]);
}
@@ -3892,13 +3917,8 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_link *link = &u->link;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &link->li_cinfo);
- return 0;
+ return nfsd4_encode_change_info4(xdr, &link->li_cinfo);
}
@@ -3913,11 +3933,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
if (nfserr)
return nfserr;
- p = xdr_reserve_space(xdr, 24);
- if (!p)
+ nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo);
+ if (nfserr)
+ return nfserr;
+ if (xdr_stream_encode_u32(xdr, open->op_rflags) < 0)
return nfserr_resource;
- p = encode_cinfo(p, &open->op_cinfo);
- *p++ = cpu_to_be32(open->op_rflags);
nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
open->op_bmval[2]);
@@ -3956,7 +3976,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
p = xdr_reserve_space(xdr, 32);
if (!p)
return nfserr_resource;
- *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(open->op_recall);
/*
* TODO: space_limit's in delegations
@@ -4018,6 +4038,11 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfsd4_encode_stateid(xdr, &od->od_stateid);
}
+/*
+ * The operation of this function assumes that this is the only
+ * READ operation in the COMPOUND. If there are multiple READs,
+ * we use nfsd4_encode_readv().
+ */
static __be32 nfsd4_encode_splice_read(
struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
@@ -4028,8 +4053,12 @@ static __be32 nfsd4_encode_splice_read(
int status, space_left;
__be32 nfserr;
- /* Make sure there will be room for padding if needed */
- if (xdr->end - xdr->p < 1)
+ /*
+ * Make sure there is room at the end of buf->head for
+ * svcxdr_encode_opaque_pages() to create a tail buffer
+ * to XDR-pad the payload.
+ */
+ if (xdr->iov != xdr->buf->head || xdr->end - xdr->p < 1)
return nfserr_resource;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
@@ -4038,6 +4067,8 @@ static __be32 nfsd4_encode_splice_read(
read->rd_length = maxcount;
if (nfserr)
goto out_err;
+ svcxdr_encode_opaque_pages(read->rd_rqstp, xdr, buf->pages,
+ buf->page_base, maxcount);
status = svc_encode_result_payload(read->rd_rqstp,
buf->head[0].iov_len, maxcount);
if (status) {
@@ -4045,31 +4076,19 @@ static __be32 nfsd4_encode_splice_read(
goto out_err;
}
- buf->page_len = maxcount;
- buf->len += maxcount;
- xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
- / PAGE_SIZE;
-
- /* Use rest of head for padding and remaining ops: */
- buf->tail[0].iov_base = xdr->p;
- buf->tail[0].iov_len = 0;
- xdr->iov = buf->tail;
- if (maxcount&3) {
- int pad = 4 - (maxcount&3);
-
- *(xdr->p++) = 0;
-
- buf->tail[0].iov_base += maxcount&3;
- buf->tail[0].iov_len = pad;
- buf->len += pad;
- }
-
+ /*
+ * Prepare to encode subsequent operations.
+ *
+ * xdr_truncate_encode() is not safe to use after a successful
+ * splice read has been done, so the following stream
+ * manipulations are open-coded.
+ */
space_left = min_t(int, (void *)xdr->end - (void *)xdr->p,
buf->buflen - buf->len);
buf->buflen = buf->len + space_left;
xdr->end = (__be32 *)((void *)xdr->end + space_left);
- return 0;
+ return nfs_ok;
out_err:
/*
@@ -4090,13 +4109,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
__be32 zero = xdr_zero;
__be32 nfserr;
- read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
- if (read->rd_vlen < 0)
+ if (xdr_reserve_space_vec(xdr, maxcount) < 0)
return nfserr_resource;
- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
- &read->rd_eof);
+ nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file,
+ read->rd_offset, &maxcount,
+ xdr->buf->page_len & ~PAGE_MASK,
+ &read->rd_eof);
read->rd_length = maxcount;
if (nfserr)
return nfserr;
@@ -4213,15 +4232,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
int starting_len = xdr->buf->len;
__be32 *p;
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
-
- /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
- xdr->buf->head[0].iov_len = (char *)xdr->p -
- (char *)xdr->buf->head[0].iov_base;
+ nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf);
+ if (nfserr != nfs_ok)
+ return nfserr;
/*
* Number of bytes left for directory entries allowing for the
@@ -4299,13 +4312,8 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_remove *remove = &u->remove;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &remove->rm_cinfo);
- return 0;
+ return nfsd4_encode_change_info4(xdr, &remove->rm_cinfo);
}
static __be32
@@ -4314,14 +4322,11 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_rename *rename = &u->rename;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 40);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &rename->rn_sinfo);
- p = encode_cinfo(p, &rename->rn_tinfo);
- return 0;
+ nfserr = nfsd4_encode_change_info4(xdr, &rename->rn_sinfo);
+ if (nfserr)
+ return nfserr;
+ return nfsd4_encode_change_info4(xdr, &rename->rn_tinfo);
}
static __be32
@@ -4448,23 +4453,25 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_setclientid *scd = &u->setclientid;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
if (!nfserr) {
- p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8);
- p = xdr_encode_opaque_fixed(p, &scd->se_confirm,
- NFS4_VERIFIER_SIZE);
- }
- else if (nfserr == nfserr_clid_inuse) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
+ nfserr = nfsd4_encode_clientid4(xdr, &scd->se_clientid);
+ if (nfserr != nfs_ok)
+ goto out;
+ nfserr = nfsd4_encode_verifier4(xdr, &scd->se_confirm);
+ } else if (nfserr == nfserr_clid_inuse) {
+ /* empty network id */
+ if (xdr_stream_encode_u32(xdr, 0) < 0) {
+ nfserr = nfserr_resource;
+ goto out;
+ }
+ /* empty universal address */
+ if (xdr_stream_encode_u32(xdr, 0) < 0) {
+ nfserr = nfserr_resource;
+ goto out;
+ }
}
+out:
return nfserr;
}
@@ -4473,17 +4480,12 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_write *write = &u->write;
- struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 16);
- if (!p)
+ if (xdr_stream_encode_u32(resp->xdr, write->wr_bytes_written) < 0)
return nfserr_resource;
- *p++ = cpu_to_be32(write->wr_bytes_written);
- *p++ = cpu_to_be32(write->wr_how_written);
- p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
- NFS4_VERIFIER_SIZE);
- return 0;
+ if (xdr_stream_encode_u32(resp->xdr, write->wr_how_written) < 0)
+ return nfserr_resource;
+ return nfsd4_encode_verifier4(resp->xdr, &write->wr_verifier);
}
static __be32
@@ -4505,20 +4507,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
server_scope = nn->nfsd_name;
server_scope_sz = strlen(nn->nfsd_name);
- p = xdr_reserve_space(xdr,
- 8 /* eir_clientid */ +
- 4 /* eir_sequenceid */ +
- 4 /* eir_flags */ +
- 4 /* spr_how */);
- if (!p)
+ if (nfsd4_encode_clientid4(xdr, &exid->clientid) != nfs_ok)
+ return nfserr_resource;
+ if (xdr_stream_encode_u32(xdr, exid->seqid) < 0)
+ return nfserr_resource;
+ if (xdr_stream_encode_u32(xdr, exid->flags) < 0)
return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, &exid->clientid, 8);
- *p++ = cpu_to_be32(exid->seqid);
- *p++ = cpu_to_be32(exid->flags);
-
- *p++ = cpu_to_be32(exid->spa_how);
-
+ if (xdr_stream_encode_u32(xdr, exid->spa_how) < 0)
+ return nfserr_resource;
switch (exid->spa_how) {
case SP4_NONE:
break;
@@ -5099,15 +5096,8 @@ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_setxattr *setxattr = &u->setxattr;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
-
- encode_cinfo(p, &setxattr->setxa_cinfo);
-
- return 0;
+ return nfsd4_encode_change_info4(xdr, &setxattr->setxa_cinfo);
}
/*
@@ -5253,14 +5243,8 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_removexattr *removexattr = &u->removexattr;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
-
- p = encode_cinfo(p, &removexattr->rmxa_cinfo);
- return 0;
+ return nfsd4_encode_change_info4(xdr, &removexattr->rmxa_cinfo);
}
typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
@@ -5460,6 +5444,12 @@ status:
release:
if (opdesc && opdesc->op_release)
opdesc->op_release(&op->u);
+
+ /*
+ * Account for pages consumed while encoding this operation.
+ * The xdr_stream primitives don't manage rq_next_page.
+ */
+ rqstp->rq_next_page = xdr->page_ptr + 1;
}
/*
@@ -5528,9 +5518,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
p = resp->statusp;
*p++ = resp->cstate.status;
-
- rqstp->rq_next_page = xdr->page_ptr + 1;
-
*p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen);
p += XDR_QUADLEN(resp->taglen);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 041faa13b852..a8eda1c85829 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab);
}
-static int nfsd_reply_cache_stats_init(struct nfsd_net *nn)
+/**
+ * nfsd_net_reply_cache_init - per net namespace reply cache set-up
+ * @nn: nfsd_net being initialized
+ *
+ * Returns zero on succes; otherwise a negative errno is returned.
+ */
+int nfsd_net_reply_cache_init(struct nfsd_net *nn)
{
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
}
-static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn)
+/**
+ * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
+ * @nn: nfsd_net being freed
+ *
+ */
+void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
{
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
}
@@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize);
- status = nfsd_reply_cache_stats_init(nn);
- if (status)
- goto out_nomem;
-
nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
nn->nfsd_reply_cache_shrinker.seeks = 1;
status = register_shrinker(&nn->nfsd_reply_cache_shrinker,
"nfsd-reply:%s", nn->nfsd_name);
if (status)
- goto out_stats_destroy;
+ return status;
nn->drc_hashtbl = kvzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
@@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
return 0;
out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
-out_stats_destroy:
- nfsd_reply_cache_stats_destroy(nn);
-out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
return -ENOMEM;
}
@@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn);
}
}
- nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b4fd7a7062d5..1b8b1aab9a15 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -25,6 +25,7 @@
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
+#include "trace.h"
/*
* We have a single directory with several nodes in it.
@@ -109,12 +110,12 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
if (IS_ERR(data))
return PTR_ERR(data);
- rv = write_op[ino](file, data, size);
- if (rv >= 0) {
- simple_transaction_set(file, rv);
- rv = size;
- }
- return rv;
+ rv = write_op[ino](file, data, size);
+ if (rv < 0)
+ return rv;
+
+ simple_transaction_set(file, rv);
+ return size;
}
static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
@@ -230,6 +231,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
if (rpc_pton(net, fo_path, size, sap, salen) == 0)
return -EINVAL;
+ trace_nfsd_ctl_unlock_ip(net, buf);
return nlmsvc_unlock_all_by_ip(sap);
}
@@ -263,7 +265,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
fo_path = buf;
if (qword_get(&buf, fo_path, size) < 0)
return -EINVAL;
-
+ trace_nfsd_ctl_unlock_fs(netns(file), fo_path);
error = kern_path(fo_path, 0, &path);
if (error)
return error;
@@ -324,7 +326,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, dname, size);
if (len <= 0)
return -EINVAL;
-
+
path = dname+len+1;
len = qword_get(&mesg, path, size);
if (len <= 0)
@@ -338,15 +340,17 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
return -EINVAL;
maxsize = min(maxsize, NFS3_FHSIZE);
- if (qword_get(&mesg, mesg, size)>0)
+ if (qword_get(&mesg, mesg, size) > 0)
return -EINVAL;
+ trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize);
+
/* we have all the words, they are in buf.. */
dom = unix_domain_find(dname);
if (!dom)
return -ENOMEM;
- len = exp_rootfh(netns(file), dom, path, &fh, maxsize);
+ len = exp_rootfh(netns(file), dom, path, &fh, maxsize);
auth_domain_put(dom);
if (len)
return len;
@@ -399,6 +403,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return rv;
if (newthreads < 0)
return -EINVAL;
+ trace_nfsd_ctl_threads(net, newthreads);
rv = nfsd_svc(newthreads, net, file->f_cred);
if (rv < 0)
return rv;
@@ -418,8 +423,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: C string containing whitespace-
- * separated unsigned integer values
+ * buf: C string containing whitespace-
+ * separated unsigned integer values
* representing the number of NFSD
* threads to start in each pool
* size: non-zero length of C string in @buf
@@ -471,6 +476,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
rv = -EINVAL;
if (nthreads[i] < 0)
goto out_free;
+ trace_nfsd_ctl_pool_threads(net, i, nthreads[i]);
}
rv = nfsd_set_nrthreads(i, nthreads, net);
if (rv)
@@ -526,7 +532,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
char *sep;
struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
- if (size>0) {
+ if (size > 0) {
if (nn->nfsd_serv)
/* Cannot change versions without updating
* nn->nfsd_serv->sv_xdrsize, and reallocing
@@ -536,6 +542,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
+ trace_nfsd_ctl_version(netns(file), buf);
vers = mesg;
len = qword_get(&mesg, vers, size);
@@ -637,11 +644,11 @@ out:
* OR
*
* Input:
- * buf: C string containing whitespace-
- * separated positive or negative
- * integer values representing NFS
- * protocol versions to enable ("+n")
- * or disable ("-n")
+ * buf: C string containing whitespace-
+ * separated positive or negative
+ * integer values representing NFS
+ * protocol versions to enable ("+n")
+ * or disable ("-n")
* size: non-zero length of C string in @buf
* Output:
* On success: status of zero or more protocol versions has
@@ -689,6 +696,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
err = get_int(&mesg, &fd);
if (err != 0 || fd < 0)
return -EINVAL;
+ trace_nfsd_ctl_ports_addfd(net, fd);
err = nfsd_create_serv(net);
if (err != 0)
@@ -705,7 +713,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
}
/*
- * A transport listener is added by writing it's transport name and
+ * A transport listener is added by writing its transport name and
* a port number.
*/
static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred)
@@ -720,6 +728,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (port < 1 || port > USHRT_MAX)
return -EINVAL;
+ trace_nfsd_ctl_ports_addxprt(net, transport, port);
err = nfsd_create_serv(net);
if (err != 0)
@@ -832,9 +841,9 @@ int nfsd_max_blksize;
* OR
*
* Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * NFS blksize
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * NFS blksize
* size: non-zero length of C string in @buf
* Output:
* On success: passed-in buffer filled with '\n'-terminated C string
@@ -853,6 +862,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
int rv = get_int(&mesg, &bsize);
if (rv)
return rv;
+ trace_nfsd_ctl_maxblksize(netns(file), bsize);
+
/* force bsize into allowed range and
* required alignment.
*/
@@ -881,9 +892,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * number of max connections
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * number of max connections
* size: non-zero length of C string in @buf
* Output:
* On success: passed-in buffer filled with '\n'-terminated C string
@@ -903,6 +914,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
if (rv)
return rv;
+ trace_nfsd_ctl_maxconn(netns(file), maxconn);
nn->max_connections = maxconn;
}
@@ -913,6 +925,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time64_t *time, struct nfsd_net *nn)
{
+ struct dentry *dentry = file_dentry(file);
char *mesg = buf;
int rv, i;
@@ -922,6 +935,9 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
rv = get_int(&mesg, &i);
if (rv)
return rv;
+ trace_nfsd_ctl_time(netns(file), dentry->d_name.name,
+ dentry->d_name.len, i);
+
/*
* Some sanity checking. We don't have a reason for
* these particular numbers, but problems with the
@@ -1014,6 +1030,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
len = qword_get(&mesg, recdir, size);
if (len <= 0)
return -EINVAL;
+ trace_nfsd_ctl_recoverydir(netns(file), recdir);
status = nfs4_reset_recoverydir(recdir);
if (status)
@@ -1065,7 +1082,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: any value
+ * buf: any value
* size: non-zero length of C string in @buf
* Output:
* passed-in buffer filled with "Y" or "N" with a newline
@@ -1087,7 +1104,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case '1':
if (!nn->nfsd_serv)
return -EBUSY;
- nfsd4_end_grace(nn);
+ trace_nfsd_end_grace(netns(file));
break;
default:
return -EINVAL;
@@ -1192,8 +1209,8 @@ static int __nfsd_symlink(struct inode *dir, struct dentry *dentry,
* @content is assumed to be a NUL-terminated string that lives
* longer than the symlink itself.
*/
-static void nfsd_symlink(struct dentry *parent, const char *name,
- const char *content)
+static void _nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
{
struct inode *dir = parent->d_inode;
struct dentry *dentry;
@@ -1210,8 +1227,8 @@ out:
inode_unlock(dir);
}
#else
-static inline void nfsd_symlink(struct dentry *parent, const char *name,
- const char *content)
+static inline void _nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
{
}
@@ -1389,8 +1406,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
if (ret)
return ret;
- nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
- "/proc/net/rpc/gss_krb5_enctypes");
+ _nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
+ "/proc/net/rpc/gss_krb5_enctypes");
dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
if (IS_ERR(dentry))
return PTR_ERR(dentry);
@@ -1477,7 +1494,17 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id;
-static __net_init int nfsd_init_net(struct net *net)
+/**
+ * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
+ * @net: a freshly-created network namespace
+ *
+ * This information stays around as long as the network namespace is
+ * alive whether or not there is an NFSD instance running in the
+ * namespace.
+ *
+ * Returns zero on success, or a negative errno otherwise.
+ */
+static __net_init int nfsd_net_init(struct net *net)
{
int retval;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -1488,6 +1515,9 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
+ retval = nfsd_net_reply_cache_init(nn);
+ if (retval)
+ goto out_repcache_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn);
@@ -1496,22 +1526,32 @@ static __net_init int nfsd_init_net(struct net *net)
return 0;
+out_repcache_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
return retval;
}
-static __net_exit void nfsd_exit_net(struct net *net)
+/**
+ * nfsd_net_exit - Release the nfsd_net portion of a net namespace
+ * @net: a network namespace that is about to be destroyed
+ *
+ */
+static __net_exit void nfsd_net_exit(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfsd_net_reply_cache_destroy(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
- nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
+ nfsd_netns_free_versions(nn);
}
static struct pernet_operations nfsd_net_ops = {
- .init = nfsd_init_net,
- .exit = nfsd_exit_net,
+ .init = nfsd_net_init,
+ .exit = nfsd_net_exit,
.id = &nfsd_net_id,
.size = sizeof(struct nfsd_net),
};
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ccd8485fee04..e8e13ae72e3c 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
- if (err) {
- /* Grab the times from inode anyway */
- stat.mtime = inode->i_mtime;
- stat.ctime = inode->i_ctime;
- stat.size = inode->i_size;
- if (v4 && IS_I_VERSION(inode)) {
- stat.change_cookie = inode_query_iversion(inode);
- stat.result_mask |= STATX_CHANGE_COOKIE;
- }
- }
+ if (err)
+ return;
+
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -660,15 +653,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr);
- if (err) {
- fhp->fh_post_saved = false;
- fhp->fh_post_attr.ctime = inode->i_ctime;
- if (v4 && IS_I_VERSION(inode)) {
- fhp->fh_post_attr.change_cookie = inode_query_iversion(inode);
- fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE;
- }
- } else
- fhp->fh_post_saved = true;
+ if (err)
+ return;
+
+ fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =
nfsd4_change_attribute(&fhp->fh_post_attr, inode);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c37195572fd0..a7315928a760 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
{
struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp;
- unsigned int len;
u32 eof;
- int v;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
- v = 0;
- len = argp->count;
resp->pages = rqstp->rq_next_page;
- while (len > 0) {
- struct page *page = *(rqstp->rq_next_page++);
-
- rqstp->rq_vec[v].iov_base = page_address(page);
- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- }
/* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count.
@@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
resp->count = argp->count;
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, v, &resp->count, &eof);
+ &resp->count, &eof);
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9c7b1ef5be40..2154fa63c5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn)
write_sequnlock(&nn->writeverf_lock);
}
+/*
+ * Crank up a set of per-namespace resources for a new NFSD instance,
+ * including lockd, a duplicate reply cache, an open file cache
+ * instance, and a cache of NFSv4 state objects.
+ */
static int nfsd_startup_net(struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index caf6355b18fa..5777f40c7353 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
case nfs_ok:
if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false;
- xdr_write_pages(xdr, &resp->page, 0, resp->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0,
+ resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false;
break;
@@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false;
- xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
- resp->count);
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+ rqstp->rq_res.page_base,
+ resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false;
break;
@@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
switch (resp->status) {
case nfs_ok:
- xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+ dirlist->len);
/* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0)
return false;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 72a906a053dc..2af74983f146 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done,
)
);
+TRACE_EVENT(nfsd_ctl_unlock_ip,
+ TP_PROTO(
+ const struct net *net,
+ const char *address
+ ),
+ TP_ARGS(net, address),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(address, address)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(address, address);
+ ),
+ TP_printk("address=%s",
+ __get_str(address)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_unlock_fs,
+ TP_PROTO(
+ const struct net *net,
+ const char *path
+ ),
+ TP_ARGS(net, path),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(path, path)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(path, path);
+ ),
+ TP_printk("path=%s",
+ __get_str(path)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_filehandle,
+ TP_PROTO(
+ const struct net *net,
+ const char *domain,
+ const char *path,
+ int maxsize
+ ),
+ TP_ARGS(net, domain, path, maxsize),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, maxsize)
+ __string(domain, domain)
+ __string(path, path)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->maxsize = maxsize;
+ __assign_str(domain, domain);
+ __assign_str(path, path);
+ ),
+ TP_printk("domain=%s path=%s maxsize=%d",
+ __get_str(domain), __get_str(path), __entry->maxsize
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_threads,
+ TP_PROTO(
+ const struct net *net,
+ int newthreads
+ ),
+ TP_ARGS(net, newthreads),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, newthreads)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->newthreads = newthreads;
+ ),
+ TP_printk("newthreads=%d",
+ __entry->newthreads
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_pool_threads,
+ TP_PROTO(
+ const struct net *net,
+ int pool,
+ int nrthreads
+ ),
+ TP_ARGS(net, pool, nrthreads),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, pool)
+ __field(int, nrthreads)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->pool = pool;
+ __entry->nrthreads = nrthreads;
+ ),
+ TP_printk("pool=%d nrthreads=%d",
+ __entry->pool, __entry->nrthreads
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_version,
+ TP_PROTO(
+ const struct net *net,
+ const char *mesg
+ ),
+ TP_ARGS(net, mesg),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(mesg, mesg)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(mesg, mesg);
+ ),
+ TP_printk("%s",
+ __get_str(mesg)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_ports_addfd,
+ TP_PROTO(
+ const struct net *net,
+ int fd
+ ),
+ TP_ARGS(net, fd),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, fd)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->fd = fd;
+ ),
+ TP_printk("fd=%d",
+ __entry->fd
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_ports_addxprt,
+ TP_PROTO(
+ const struct net *net,
+ const char *transport,
+ int port
+ ),
+ TP_ARGS(net, transport, port),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, port)
+ __string(transport, transport)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->port = port;
+ __assign_str(transport, transport);
+ ),
+ TP_printk("transport=%s port=%d",
+ __get_str(transport), __entry->port
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_maxblksize,
+ TP_PROTO(
+ const struct net *net,
+ int bsize
+ ),
+ TP_ARGS(net, bsize),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, bsize)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->bsize = bsize;
+ ),
+ TP_printk("bsize=%d",
+ __entry->bsize
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_maxconn,
+ TP_PROTO(
+ const struct net *net,
+ int maxconn
+ ),
+ TP_ARGS(net, maxconn),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, maxconn)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->maxconn = maxconn;
+ ),
+ TP_printk("maxconn=%d",
+ __entry->maxconn
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_time,
+ TP_PROTO(
+ const struct net *net,
+ const char *name,
+ size_t namelen,
+ int time
+ ),
+ TP_ARGS(net, name, namelen, time),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, time)
+ __string_len(name, name, namelen)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->time = time;
+ __assign_str_len(name, name, namelen);
+ ),
+ TP_printk("file=%s time=%d\n",
+ __get_str(name), __entry->time
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_recoverydir,
+ TP_PROTO(
+ const struct net *net,
+ const char *recdir
+ ),
+ TP_ARGS(net, recdir),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(recdir, recdir)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(recdir, recdir);
+ ),
+ TP_printk("recdir=%s",
+ __get_str(recdir)
+ )
+);
+
+TRACE_EVENT(nfsd_end_grace,
+ TP_PROTO(
+ const struct net *net
+ ),
+ TP_ARGS(net),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ ),
+ TP_printk("nn=%d", __entry->netns_ino
+ )
+);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index db67f8e19344..59b7d60ae33e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_mode &= ~S_ISGID;
} else {
/* set ATTR_KILL_* bits and let VFS handle it */
- iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
+ iap->ia_valid |= ATTR_KILL_SUID;
+ iap->ia_valid |=
+ setattr_should_drop_sgid(&nop_mnt_idmap, inode);
}
}
}
@@ -1001,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
}
+/**
+ * nfsd_splice_read - Perform a VFS read using a splice pipe
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @file: opened struct file of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count,
u32 *eof)
@@ -1014,22 +1028,50 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
- rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
-__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *count,
- u32 *eof)
+/**
+ * nfsd_iter_read - Perform a VFS read using an iterator
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @file: opened struct file of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @base: offset in first page of read buffer
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * Some filesystems or situations cannot use nfsd_splice_read. This
+ * function is the slightly less-performant fallback for those cases.
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
+__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset, unsigned long *count,
+ unsigned int base, u32 *eof)
{
+ unsigned long v, total;
struct iov_iter iter;
loff_t ppos = offset;
+ struct page *page;
ssize_t host_err;
+ v = 0;
+ total = *count;
+ while (total) {
+ page = *(rqstp->rq_next_page++);
+ rqstp->rq_vec[v].iov_base = page_address(page) + base;
+ rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
+ total -= rqstp->rq_vec[v].iov_len;
+ ++v;
+ base = 0;
+ }
+ WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
+
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count);
+ iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@@ -1159,14 +1201,24 @@ out_nfserr:
return nfserr;
}
-/*
- * Read data from a file. count must contain the requested read count
- * on entry. On return, *count contains the number of bytes actually read.
+/**
+ * nfsd_read - Read data from a file
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * The caller must verify that there is enough space in @rqstp.rq_res
+ * to perform this operation.
+ *
* N.B. After this call fhp needs an fh_put
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
- u32 *eof)
+ loff_t offset, unsigned long *count, u32 *eof)
{
struct nfsd_file *nf;
struct file *file;
@@ -1181,12 +1233,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else
- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
+ err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
nfsd_file_put(nf);
-
trace_nfsd_read_done(rqstp, fhp, offset, *count);
-
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 43fb57a301d3..a6890ea7b765 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -110,13 +110,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
unsigned long *count,
u32 *eof);
-__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
+__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- struct kvec *vec, int vlen,
- unsigned long *count,
+ unsigned long *count, unsigned int base,
u32 *eof);
-__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
- loff_t, struct kvec *, int, unsigned long *,
+__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, unsigned long *count,
u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *,