summaryrefslogtreecommitdiff
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/blocklayoutxdr.c9
-rw-r--r--fs/nfsd/cache.h8
-rw-r--r--fs/nfsd/flexfilelayoutxdr.c9
-rw-r--r--fs/nfsd/nfs3proc.c4
-rw-r--r--fs/nfsd/nfs4acl.c34
-rw-r--r--fs/nfsd/nfs4proc.c51
-rw-r--r--fs/nfsd/nfs4state.c162
-rw-r--r--fs/nfsd/nfs4xdr.c39
-rw-r--r--fs/nfsd/nfscache.c204
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsd.h7
-rw-r--r--fs/nfsd/nfsfh.c26
-rw-r--r--fs/nfsd/nfsfh.h6
-rw-r--r--fs/nfsd/nfssvc.c111
-rw-r--r--fs/nfsd/state.h3
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nfsd/stats.h7
-rw-r--r--fs/nfsd/trace.h27
-rw-r--r--fs/nfsd/vfs.c52
-rw-r--r--fs/nfsd/xdr4.h11
20 files changed, 517 insertions, 256 deletions
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 8e9c1a0f8d38..1ed2f691ebb9 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -83,6 +83,15 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
int len = sizeof(__be32), ret, i;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4c9b87850ab1..929248c6ca84 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -19,7 +19,7 @@
* typical sockaddr_storage. This is for space reasons, since sockaddr_storage
* is much larger than a sockaddr_in6.
*/
-struct svc_cacherep {
+struct nfsd_cacherep {
struct {
/* Keep often-read xid, csum in the same cache line: */
__be32 k_xid;
@@ -84,8 +84,10 @@ int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
-int nfsd_cache_lookup(struct svc_rqst *);
-void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+int nfsd_cache_lookup(struct svc_rqst *rqstp,
+ struct nfsd_cacherep **cacherep);
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp);
int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index e81d2a5cf381..bb205328e043 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -85,6 +85,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
int addr_len;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index fc8d5b7db9f8..268ef57751c4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -307,7 +307,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
- fh_fill_pre_attrs(fhp);
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true);
if (host_err < 0) {
status = nfserrno(host_err);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 518203821790..96e786b5e544 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -441,7 +441,7 @@ struct posix_ace_state_array {
* calculated so far: */
struct posix_acl_state {
- int empty;
+ unsigned char valid;
struct posix_ace_state owner;
struct posix_ace_state group;
struct posix_ace_state other;
@@ -457,7 +457,6 @@ init_state(struct posix_acl_state *state, int cnt)
int alloc;
memset(state, 0, sizeof(struct posix_acl_state));
- state->empty = 1;
/*
* In the worst case, each individual acl could be for a distinct
* named user or group, but we don't know which, so we allocate
@@ -500,7 +499,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
* and effective cases: when there are no inheritable ACEs,
* calls ->set_acl with a NULL ACL structure.
*/
- if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
+ if (!state->valid && (flags & NFS4_ACL_TYPE_DEFAULT))
return NULL;
/*
@@ -622,11 +621,12 @@ static void process_one_v4_ace(struct posix_acl_state *state,
struct nfs4_ace *ace)
{
u32 mask = ace->access_mask;
+ short type = ace2type(ace);
int i;
- state->empty = 0;
+ state->valid |= type;
- switch (ace2type(ace)) {
+ switch (type) {
case ACL_USER_OBJ:
if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
allow_bits(&state->owner, mask);
@@ -726,6 +726,30 @@ static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl,
if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
process_one_v4_ace(&effective_acl_state, ace);
}
+
+ /*
+ * At this point, the default ACL may have zeroed-out entries for owner,
+ * group and other. That usually results in a non-sensical resulting ACL
+ * that denies all access except to any ACE that was explicitly added.
+ *
+ * The setfacl command solves a similar problem with this logic:
+ *
+ * "If a Default ACL entry is created, and the Default ACL contains
+ * no owner, owning group, or others entry, a copy of the ACL
+ * owner, owning group, or others entry is added to the Default ACL."
+ *
+ * Copy any missing ACEs from the effective set, if any ACEs were
+ * explicitly set.
+ */
+ if (default_acl_state.valid) {
+ if (!(default_acl_state.valid & ACL_USER_OBJ))
+ default_acl_state.owner = effective_acl_state.owner;
+ if (!(default_acl_state.valid & ACL_GROUP_OBJ))
+ default_acl_state.group = effective_acl_state.group;
+ if (!(default_acl_state.valid & ACL_OTHER))
+ default_acl_state.other = effective_acl_state.other;
+ }
+
*pacl = posix_state_to_acl(&effective_acl_state, flags);
if (IS_ERR(*pacl)) {
ret = PTR_ERR(*pacl);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5ae670807449..5ca748309c26 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -297,12 +297,12 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
if (d_really_is_positive(child)) {
- status = nfs_ok;
-
/* NFSv4 protocol requires change attributes even though
* no change happened.
*/
- fh_fill_both_attrs(fhp);
+ status = fh_fill_both_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
switch (open->op_createmode) {
case NFS4_CREATE_UNCHECKED:
@@ -345,7 +345,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
- fh_fill_pre_attrs(fhp);
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
status = nfsd4_vfs_create(fhp, child, open);
if (status != nfs_ok)
goto out;
@@ -380,6 +382,38 @@ out:
return status;
}
+/**
+ * set_change_info - set up the change_info4 for a reply
+ * @cinfo: pointer to nfsd4_change_info to be populated
+ * @fhp: pointer to svc_fh to use as source
+ *
+ * Many operations in NFSv4 require change_info4 in the reply. This function
+ * populates that from the info that we (should!) have already collected. In
+ * the event that we didn't get any pre-attrs, just zero out both.
+ */
+static void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+ cinfo->atomic = (u32)(fhp->fh_pre_saved && fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
+ cinfo->before_change = fhp->fh_pre_change;
+ cinfo->after_change = fhp->fh_post_change;
+
+ /*
+ * If fetching the pre-change attributes failed, then we should
+ * have already failed the whole operation. We could have still
+ * failed to fetch post-change attributes however.
+ *
+ * If we didn't get post-op attrs, just zero-out the after
+ * field since we don't know what it should be. If the pre_saved
+ * field isn't set for some reason, throw warning and just copy
+ * whatever is in the after field.
+ */
+ if (WARN_ON_ONCE(!fhp->fh_pre_saved))
+ cinfo->before_change = 0;
+ if (!fhp->fh_post_saved)
+ cinfo->after_change = cinfo->before_change + 1;
+}
+
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
{
@@ -424,11 +458,11 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
} else {
status = nfsd_lookup(rqstp, current_fh,
open->op_fname, open->op_fnamelen, *resfh);
- if (!status)
+ if (status == nfs_ok)
/* NFSv4 protocol requires change attributes even though
* no change happened.
*/
- fh_fill_both_attrs(current_fh);
+ status = fh_fill_both_attrs(current_fh);
}
if (status)
goto out;
@@ -1313,12 +1347,11 @@ try_again:
/* found a match */
if (ni->nsui_busy) {
/* wait - and try again */
- prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
- TASK_INTERRUPTIBLE);
+ prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
spin_unlock(&nn->nfsd_ssc_lock);
/* allow 20secs for mount/unmount for now - revisit */
- if (signal_pending(current) ||
+ if (kthread_should_stop() ||
(schedule_timeout(20*HZ) == 0)) {
finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index daf305daa751..8534693eb6a4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -649,6 +649,18 @@ find_readable_file(struct nfs4_file *f)
return ret;
}
+static struct nfsd_file *
+find_rw_file(struct nfs4_file *f)
+{
+ struct nfsd_file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
+ spin_unlock(&f->fi_lock);
+
+ return ret;
+}
+
struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
@@ -1144,7 +1156,7 @@ static void block_delegations(struct knfsd_fh *fh)
static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
- struct nfs4_clnt_odstate *odstate)
+ struct nfs4_clnt_odstate *odstate, u32 dl_type)
{
struct nfs4_delegation *dp;
long n;
@@ -1170,7 +1182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_clnt_odstate = odstate;
get_clnt_odstate(odstate);
- dp->dl_type = NFS4_OPEN_DELEGATE_READ;
+ dp->dl_type = dl_type;
dp->dl_retries = 1;
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -5449,8 +5461,9 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct nfs4_file *fp = stp->st_stid.sc_file;
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
- struct nfsd_file *nf;
+ struct nfsd_file *nf = NULL;
struct file_lock *fl;
+ u32 dl_type;
/*
* The fi_had_conflict and nfs_get_existing_delegation checks
@@ -5460,15 +5473,35 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- nf = find_readable_file(fp);
- if (!nf) {
- /*
- * We probably could attempt another open and get a read
- * delegation, but for now, don't bother until the
- * client actually sends us one.
- */
- return ERR_PTR(-EAGAIN);
+ /*
+ * Try for a write delegation first. RFC8881 section 10.4 says:
+ *
+ * "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
+ * on its own, all opens."
+ *
+ * Furthermore the client can use a write delegation for most READ
+ * operations as well, so we require a O_RDWR file here.
+ *
+ * Offer a write delegation in the case of a BOTH open, and ensure
+ * we get the O_RDWR descriptor.
+ */
+ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
+ nf = find_rw_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_WRITE;
}
+
+ /*
+ * If the file is being opened O_RDONLY or we couldn't get a O_RDWR
+ * file for some reason, then try for a read delegation instead.
+ */
+ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
+ nf = find_readable_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_READ;
+ }
+
+ if (!nf)
+ return ERR_PTR(-EAGAIN);
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
@@ -5491,11 +5524,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
return ERR_PTR(status);
status = -ENOMEM;
- dp = alloc_init_deleg(clp, fp, odstate);
+ dp = alloc_init_deleg(clp, fp, odstate, dl_type);
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+ fl = nfs4_alloc_init_lease(dp, dl_type);
if (!fl)
goto out_clnt_odstate;
@@ -5568,10 +5601,28 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
}
/*
- * Attempt to hand out a delegation.
+ * The Linux NFS server does not offer write delegations to NFSv4.0
+ * clients in order to avoid conflicts between write delegations and
+ * GETATTRs requesting CHANGE or SIZE attributes.
+ *
+ * With NFSv4.1 and later minorversions, the SEQUENCE operation that
+ * begins each COMPOUND contains a client ID. Delegation recall can
+ * be avoided when the server recognizes the client sending a
+ * GETATTR also holds write delegation it conflicts with.
+ *
+ * However, the NFSv4.0 protocol does not enable a server to
+ * determine that a GETATTR originated from the client holding the
+ * conflicting delegation versus coming from some other client. Per
+ * RFC 7530 Section 16.7.5, the server must recall or send a
+ * CB_GETATTR even when the GETATTR originates from the client that
+ * holds the conflicting delegation.
*
- * Note we don't support write delegations, and won't until the vfs has
- * proper support for them.
+ * An NFSv4.0 client can trigger a pathological situation if it
+ * always sends a DELEGRETURN preceded by a conflicting GETATTR in
+ * the same COMPOUND. COMPOUND execution will always stop at the
+ * GETATTR and the DELEGRETURN will never get executed. The server
+ * eventually revokes the delegation, which can result in loss of
+ * open or lock state.
*/
static void
nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
@@ -5590,8 +5641,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!cb_up)
open->op_recall = 1;
- if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
- goto out_no_deleg;
break;
case NFS4_OPEN_CLAIM_NULL:
parent = currentfh;
@@ -5606,6 +5655,9 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
goto out_no_deleg;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out_no_deleg;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE &&
+ !clp->cl_minorversion)
+ goto out_no_deleg;
break;
default:
goto out_no_deleg;
@@ -5616,8 +5668,13 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
- trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+ trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ } else {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
+ }
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
@@ -8341,3 +8398,68 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
{
get_stateid(cstate, &u->write.wr_stateid);
}
+
+/**
+ * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
+ * @rqstp: RPC transaction context
+ * @inode: file to be checked for a conflict
+ *
+ * This function is called when there is a conflict between a write
+ * delegation and a change/size GETATTR from another client. The server
+ * must either use the CB_GETATTR to get the current values of the
+ * attributes from the client that holds the delegation or recall the
+ * delegation before replying to the GETATTR. See RFC 8881 section
+ * 18.7.4.
+ *
+ * The current implementation does not support CB_GETATTR yet. However
+ * this can avoid recalling the delegation could be added in follow up
+ * work.
+ *
+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned.
+ */
+__be32
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+ __be32 status;
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+ struct nfs4_delegation *dp;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return 0;
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+ if (fl->fl_flags == FL_LAYOUT)
+ continue;
+ if (fl->fl_lmops != &nfsd_lease_mng_ops) {
+ /*
+ * non-nfs lease, if it's a lease with F_RDLCK then
+ * we are done; there isn't any write delegation
+ * on this inode
+ */
+ if (fl->fl_type == F_RDLCK)
+ break;
+ goto break_lease;
+ }
+ if (fl->fl_type == F_WRLCK) {
+ dp = fl->fl_owner;
+ if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+ spin_unlock(&ctx->flc_lock);
+ return 0;
+ }
+break_lease:
+ spin_unlock(&ctx->flc_lock);
+ nfsd_stats_wdeleg_getattr_inc();
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ return 0;
+ }
+ break;
+ }
+ spin_unlock(&ctx->flc_lock);
+ return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b30dca7de8cc..2e40c74d2f72 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2984,6 +2984,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
if (status)
goto out;
}
+ if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+ if (status)
+ goto out;
+ }
err = vfs_getattr(&path, &stat,
STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
@@ -3973,17 +3978,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
if (nfserr)
return nfserr;
- p = xdr_reserve_space(xdr, 32);
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 8);
if (!p)
return nfserr_resource;
*p++ = cpu_to_be32(open->op_recall);
/*
+ * Always flush on close
+ *
* TODO: space_limit's in delegations
*/
*p++ = cpu_to_be32(NFS4_LIMIT_SIZE);
- *p++ = cpu_to_be32(~(u32)0);
- *p++ = cpu_to_be32(~(u32)0);
+ *p++ = xdr_zero;
+ *p++ = xdr_zero;
/*
* TODO: ACE's in delegations
@@ -4678,20 +4686,17 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = cpu_to_be32(gdev->gd_layout_type);
- /* If maxcount is 0 then just update notifications */
- if (gdev->gd_maxcount != 0) {
- ops = nfsd4_layout_ops[gdev->gd_layout_type];
- nfserr = ops->encode_getdeviceinfo(xdr, gdev);
- if (nfserr) {
- /*
- * We don't bother to burden the layout drivers with
- * enforcing gd_maxcount, just tell the client to
- * come back with a bigger buffer if it's not enough.
- */
- if (xdr->buf->len + 4 > gdev->gd_maxcount)
- goto toosmall;
- return nfserr;
- }
+ ops = nfsd4_layout_ops[gdev->gd_layout_type];
+ nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+ if (nfserr) {
+ /*
+ * We don't bother to burden the layout drivers with
+ * enforcing gd_maxcount, just tell the client to
+ * come back with a bigger buffer if it's not enough.
+ */
+ if (xdr->buf->len + 4 > gdev->gd_maxcount)
+ goto toosmall;
+ return nfserr;
}
if (gdev->gd_notify_types) {
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index a8eda1c85829..80621a709510 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -84,11 +84,11 @@ nfsd_hashsize(unsigned int limit)
return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
}
-static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
- struct nfsd_net *nn)
+static struct nfsd_cacherep *
+nfsd_cacherep_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
if (rp) {
@@ -110,36 +110,64 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
return rp;
}
-static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
- struct nfsd_net *nn)
+static void nfsd_cacherep_free(struct nfsd_cacherep *rp)
{
- if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
+ if (rp->c_type == RC_REPLBUFF)
kfree(rp->c_replvec.iov_base);
+ kmem_cache_free(drc_slab, rp);
+}
+
+static unsigned long
+nfsd_cacherep_dispose(struct list_head *dispose)
+{
+ struct nfsd_cacherep *rp;
+ unsigned long freed = 0;
+
+ while (!list_empty(dispose)) {
+ rp = list_first_entry(dispose, struct nfsd_cacherep, c_lru);
+ list_del(&rp->c_lru);
+ nfsd_cacherep_free(rp);
+ freed++;
}
+ return freed;
+}
+
+static void
+nfsd_cacherep_unlink_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ struct nfsd_cacherep *rp)
+{
+ if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base)
+ nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
atomic_dec(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp));
}
- kmem_cache_free(drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
+ struct nfsd_net *nn)
+{
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ nfsd_cacherep_free(rp);
+}
+
+static void
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(b, rp, nn);
+ nfsd_cacherep_unlink_locked(nn, b, rp);
spin_unlock(&b->cache_lock);
+ nfsd_cacherep_free(rp);
}
int nfsd_drc_slab_create(void)
{
drc_slab = kmem_cache_create("nfsd_drc",
- sizeof(struct svc_cacherep), 0, 0, NULL);
+ sizeof(struct nfsd_cacherep), 0, 0, NULL);
return drc_slab ? 0: -ENOMEM;
}
@@ -208,7 +236,7 @@ out_shrinker:
void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
unsigned int i;
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
@@ -216,7 +244,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
for (i = 0; i < nn->drc_hashsize; i++) {
struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
- rp = list_first_entry(head, struct svc_cacherep, c_lru);
+ rp = list_first_entry(head, struct nfsd_cacherep, c_lru);
nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
rp, nn);
}
@@ -233,7 +261,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
* not already scheduled.
*/
static void
-lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+lru_put_end(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp)
{
rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &b->lru_head);
@@ -247,12 +275,21 @@ nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
return &nn->drc_hashtbl[hash];
}
-static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
- unsigned int max)
+/*
+ * Remove and return no more than @max expired entries in bucket @b.
+ * If @max is zero, do not limit the number of removed entries.
+ */
+static void
+nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ unsigned int max, struct list_head *dispose)
{
- struct svc_cacherep *rp, *tmp;
- long freed = 0;
+ unsigned long expiry = jiffies - RC_EXPIRE;
+ struct nfsd_cacherep *rp, *tmp;
+ unsigned int freed = 0;
+
+ lockdep_assert_held(&b->cache_lock);
+ /* The bucket LRU is ordered oldest-first. */
list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
/*
* Don't free entries attached to calls that are still
@@ -260,60 +297,77 @@ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
*/
if (rp->c_state == RC_INPROG)
continue;
+
if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
- time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
+ time_before(expiry, rp->c_timestamp))
break;
- nfsd_reply_cache_free_locked(b, rp, nn);
- if (max && freed++ > max)
+
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ list_add(&rp->c_lru, dispose);
+
+ if (max && ++freed > max)
break;
}
- return freed;
}
-static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
+/**
+ * nfsd_reply_cache_count - count_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Returns the total number of entries in the duplicate reply cache. To
+ * keep things simple and quick, this is not the number of expired entries
+ * in the cache (ie, the number that would be removed by a call to
+ * nfsd_reply_cache_scan).
+ */
+static unsigned long
+nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_bucket(b, nn, 3);
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return atomic_read(&nn->num_drc_entries);
}
-/*
- * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
- * Also prune the oldest ones when the total exceeds the max number of entries.
+/**
+ * nfsd_reply_cache_scan - scan_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Free expired entries on each bucket's LRU list until we've released
+ * nr_to_scan freed objects. Nothing will be released if the cache
+ * has not exceeded it's max_drc_entries limit.
+ *
+ * Returns the number of entries released by this call.
*/
-static long
-prune_cache_entries(struct nfsd_net *nn)
+static unsigned long
+nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+ unsigned long freed = 0;
+ LIST_HEAD(dispose);
unsigned int i;
- long freed = 0;
for (i = 0; i < nn->drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
+
spin_lock(&b->cache_lock);
- freed += prune_bucket(b, nn, 0);
+ nfsd_prune_bucket_locked(nn, b, 0, &dispose);
spin_unlock(&b->cache_lock);
- }
- return freed;
-}
-static unsigned long
-nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
-{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
+ freed += nfsd_cacherep_dispose(&dispose);
+ if (freed > sc->nr_to_scan)
+ break;
+ }
- return atomic_read(&nn->num_drc_entries);
+ trace_nfsd_drc_gc(nn, freed);
+ return freed;
}
-static unsigned long
-nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
-
- return prune_cache_entries(nn);
-}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
*/
@@ -348,8 +402,8 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key,
- const struct svc_cacherep *rp, struct nfsd_net *nn)
+nfsd_cache_key_cmp(const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum) {
@@ -365,11 +419,11 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
* Must be called with cache_lock held. Returns the found entry or
* inserts an empty key on failure.
*/
-static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+static struct nfsd_cacherep *
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key,
struct nfsd_net *nn)
{
- struct svc_cacherep *rp, *ret = key;
+ struct nfsd_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
*parent = NULL;
unsigned int entries = 0;
@@ -378,7 +432,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
while (*p != NULL) {
++entries;
parent = *p;
- rp = rb_entry(parent, struct svc_cacherep, c_node);
+ rp = rb_entry(parent, struct nfsd_cacherep, c_node);
cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
@@ -411,6 +465,7 @@ out:
/**
* nfsd_cache_lookup - Find an entry in the duplicate reply cache
* @rqstp: Incoming Call to find
+ * @cacherep: OUT: DRC entry for this request
*
* Try to find an entry matching the current call in the cache. When none
* is found, we try to grab the oldest expired entry off the LRU list. If
@@ -423,16 +478,17 @@ out:
* %RC_REPLY: Reply from cache
* %RC_DROPIT: Do not process the request further
*/
-int nfsd_cache_lookup(struct svc_rqst *rqstp)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
{
struct nfsd_net *nn;
- struct svc_cacherep *rp, *found;
+ struct nfsd_cacherep *rp, *found;
__wsum csum;
struct nfsd_drc_bucket *b;
int type = rqstp->rq_cachetype;
+ unsigned long freed;
+ LIST_HEAD(dispose);
int rtn = RC_DOIT;
- rqstp->rq_cacherep = NULL;
if (type == RC_NOCACHE) {
nfsd_stats_rc_nocache_inc();
goto out;
@@ -445,7 +501,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
* preallocate an entry.
*/
nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
+ rp = nfsd_cacherep_alloc(rqstp, csum, nn);
if (!rp)
goto out;
@@ -454,20 +510,18 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
found = nfsd_cache_insert(b, rp, nn);
if (found != rp)
goto found_entry;
-
- nfsd_stats_rc_misses_inc();
- rqstp->rq_cacherep = rp;
+ *cacherep = rp;
rp->c_state = RC_INPROG;
+ nfsd_prune_bucket_locked(nn, b, 3, &dispose);
+ spin_unlock(&b->cache_lock);
+ freed = nfsd_cacherep_dispose(&dispose);
+ trace_nfsd_drc_gc(nn, freed);
+
+ nfsd_stats_rc_misses_inc();
atomic_inc(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
-
- nfsd_prune_bucket(b, nn);
-
-out_unlock:
- spin_unlock(&b->cache_lock);
-out:
- return rtn;
+ goto out;
found_entry:
/* We found a matching entry which is either in progress or done. */
@@ -505,12 +559,16 @@ found_entry:
out_trace:
trace_nfsd_drc_found(nn, rqstp, rtn);
- goto out_unlock;
+out_unlock:
+ spin_unlock(&b->cache_lock);
+out:
+ return rtn;
}
/**
* nfsd_cache_update - Update an entry in the duplicate reply cache.
* @rqstp: svc_rqst with a finished Reply
+ * @rp: IN: DRC entry for this request
* @cachetype: which cache to update
* @statp: pointer to Reply's NFS status code, or NULL
*
@@ -528,10 +586,10 @@ out_trace:
* nfsd failed to encode a reply that otherwise would have been cached.
* In this case, nfsd_cache_update is called with statp == NULL.
*/
-void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
struct nfsd_drc_bucket *b;
int len;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3709830f90a6..7ed02fb88a36 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1627,6 +1627,7 @@ static void __exit exit_nfsd(void)
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("In-kernel NFS server");
MODULE_LICENSE("GPL");
module_init(init_nfsd)
module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d88498f8b275..11c14faa6c67 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -96,7 +96,12 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
-void nfsd_put(struct net *net);
+static inline void nfsd_put(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ svc_put(nn->nfsd_serv);
+}
bool i_am_nfsd(void);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c291389a1d71..355bf0db3235 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -614,7 +614,7 @@ out_negative:
* @fhp: file handle to be updated
*
*/
-void fh_fill_pre_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
struct inode *inode;
@@ -622,12 +622,12 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
__be32 err;
if (fhp->fh_no_wcc || fhp->fh_pre_saved)
- return;
+ return nfs_ok;
inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
if (err)
- return;
+ return err;
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -636,6 +636,7 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
fhp->fh_pre_ctime = stat.ctime;
fhp->fh_pre_size = stat.size;
fhp->fh_pre_saved = true;
+ return nfs_ok;
}
/**
@@ -643,26 +644,27 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
* @fhp: file handle to be updated
*
*/
-void fh_fill_post_attrs(struct svc_fh *fhp)
+__be32 fh_fill_post_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
struct inode *inode = d_inode(fhp->fh_dentry);
__be32 err;
if (fhp->fh_no_wcc)
- return;
+ return nfs_ok;
if (fhp->fh_post_saved)
printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr);
if (err)
- return;
+ return err;
fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =
nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+ return nfs_ok;
}
/**
@@ -672,16 +674,20 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
* This is used when the directory wasn't changed, but wcc attributes
* are needed anyway.
*/
-void fh_fill_both_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp)
{
- fh_fill_post_attrs(fhp);
- if (!fhp->fh_post_saved)
- return;
+ __be32 err;
+
+ err = fh_fill_post_attrs(fhp);
+ if (err)
+ return err;
+
fhp->fh_pre_change = fhp->fh_post_change;
fhp->fh_pre_mtime = fhp->fh_post_attr.mtime;
fhp->fh_pre_ctime = fhp->fh_post_attr.ctime;
fhp->fh_pre_size = fhp->fh_post_attr.size;
fhp->fh_pre_saved = true;
+ return nfs_ok;
}
/*
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4e0ecf0ae2cf..40426f899e76 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -294,7 +294,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
}
u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode);
-extern void fh_fill_pre_attrs(struct svc_fh *fhp);
-extern void fh_fill_post_attrs(struct svc_fh *fhp);
-extern void fh_fill_both_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp);
+__be32 fh_fill_post_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp);
#endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2154fa63c5f2..1582af33e204 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -542,9 +542,14 @@ static struct notifier_block nfsd_inet6addr_notifier = {
/* Only used under nfsd_mutex, so this atomic may be overkill: */
static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+static void nfsd_last_thread(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv = nn->nfsd_serv;
+
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+ spin_unlock(&nfsd_notifier_lock);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
@@ -554,6 +559,8 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
#endif
}
+ svc_xprt_destroy_all(serv, net);
+
/*
* write_ports can create the server without actually starting
* any threads--if we get shut down before any threads are
@@ -644,7 +651,8 @@ void nfsd_shutdown_threads(struct net *net)
svc_get(serv);
/* Kill outstanding nfsd threads */
svc_set_num_threads(serv, NULL, 0);
- nfsd_put(net);
+ nfsd_last_thread(net);
+ svc_put(serv);
mutex_unlock(&nfsd_mutex);
}
@@ -674,9 +682,6 @@ int nfsd_create_serv(struct net *net)
serv->sv_maxconn = nn->max_connections;
error = svc_bind(serv, net);
if (error < 0) {
- /* NOT nfsd_put() as notifiers (see below) haven't
- * been set up yet.
- */
svc_put(serv);
return error;
}
@@ -719,29 +724,6 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
return 0;
}
-/* This is the callback for kref_put() below.
- * There is no code here as the first thing to be done is
- * call svc_shutdown_net(), but we cannot get the 'net' from
- * the kref. So do all the work when kref_put returns true.
- */
-static void nfsd_noop(struct kref *ref)
-{
-}
-
-void nfsd_put(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
- svc_xprt_destroy_all(nn->nfsd_serv, net);
- nfsd_last_thread(nn->nfsd_serv, net);
- svc_destroy(&nn->nfsd_serv->sv_refcnt);
- spin_lock(&nfsd_notifier_lock);
- nn->nfsd_serv = NULL;
- spin_unlock(&nfsd_notifier_lock);
- }
-}
-
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
{
int i = 0;
@@ -792,7 +774,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (err)
break;
}
- nfsd_put(net);
+ svc_put(nn->nfsd_serv);
return err;
}
@@ -807,6 +789,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
int error;
bool nfsd_up_before;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
@@ -826,22 +809,25 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
goto out;
nfsd_up_before = nn->nfsd_net_up;
+ serv = nn->nfsd_serv;
error = nfsd_startup_net(net, cred);
if (error)
goto out_put;
- error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
+ error = svc_set_num_threads(serv, NULL, nrservs);
if (error)
goto out_shutdown;
- error = nn->nfsd_serv->sv_nrthreads;
+ error = serv->sv_nrthreads;
+ if (error == 0)
+ nfsd_last_thread(net);
out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown_net(net);
out_put:
/* Threads now hold service active */
if (xchg(&nn->keep_active, 0))
- nfsd_put(net);
- nfsd_put(net);
+ svc_put(serv);
+ svc_put(serv);
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -953,7 +939,6 @@ nfsd(void *vrqstp)
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int err;
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
@@ -965,15 +950,6 @@ nfsd(void *vrqstp)
current->fs->umask = 0;
- /*
- * thread is spawned with all signals set to SIG_IGN, re-enable
- * the ones that will bring down the thread
- */
- allow_signal(SIGKILL);
- allow_signal(SIGHUP);
- allow_signal(SIGINT);
- allow_signal(SIGQUIT);
-
atomic_inc(&nfsdstats.th_cnt);
set_freezable();
@@ -981,54 +957,19 @@ nfsd(void *vrqstp)
/*
* The main request loop
*/
- for (;;) {
+ while (!kthread_should_stop()) {
/* Update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nn->max_connections;
- /*
- * Find a socket with data available and call its
- * recvfrom routine.
- */
- while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
- ;
- if (err == -EINTR)
- break;
- validate_process_creds();
- svc_process(rqstp);
+ svc_recv(rqstp);
validate_process_creds();
}
- /* Clear signals before calling svc_exit_thread() */
- flush_signals(current);
-
atomic_dec(&nfsdstats.th_cnt);
out:
- /* Take an extra ref so that the svc_put in svc_exit_thread()
- * doesn't call svc_destroy()
- */
- svc_get(nn->nfsd_serv);
-
/* Release the thread */
svc_exit_thread(rqstp);
-
- /* We need to drop a ref, but may not drop the last reference
- * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
- * could deadlock with nfsd_shutdown_threads() waiting for us.
- * So three options are:
- * - drop a non-final reference,
- * - get the mutex without waiting
- * - sleep briefly andd try the above again
- */
- while (!svc_put_not_last(nn->nfsd_serv)) {
- if (mutex_trylock(&nfsd_mutex)) {
- nfsd_put(net);
- mutex_unlock(&nfsd_mutex);
- break;
- }
- msleep(20);
- }
-
return 0;
}
@@ -1046,6 +987,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
{
const struct svc_procedure *proc = rqstp->rq_procinfo;
__be32 *statp = rqstp->rq_accept_statp;
+ struct nfsd_cacherep *rp;
/*
* Give the xdr decoder a chance to change this if it wants
@@ -1056,7 +998,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
goto out_decode_err;
- switch (nfsd_cache_lookup(rqstp)) {
+ rp = NULL;
+ switch (nfsd_cache_lookup(rqstp, &rp)) {
case RC_DOIT:
break;
case RC_REPLY:
@@ -1072,7 +1015,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
goto out_encode_err;
- nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
+ nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
out_cached_reply:
return 1;
@@ -1082,13 +1025,13 @@ out_decode_err:
return 1;
out_update_drop:
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
out_dropit:
return 0;
out_encode_err:
trace_nfsd_cant_encode_err(rqstp);
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
*statp = rpc_system_err;
return 1;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d49d3060ed4f..cbddcf484dba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
return clp->cl_state == NFSD4_EXPIRABLE;
}
+
+extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
+ struct inode *inode);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 777e24e5da33..63797635e1c3 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -65,6 +65,8 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_printf(seq, " %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
}
+ seq_printf(seq, "\nwdeleg_getattr %lld",
+ percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n');
#endif
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 9b43dc3d9991..cf5524e7ca06 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -22,6 +22,7 @@ enum {
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
+ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
#endif
NFSD_STATS_COUNTERS_NUM
};
@@ -93,4 +94,10 @@ static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
}
+#ifdef CONFIG_NFSD_V4
+static inline void nfsd_stats_wdeleg_getattr_inc(void)
+{
+ percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
+}
+#endif
#endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 2af74983f146..803904348871 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -607,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
DEFINE_STATEID_EVENT(open);
DEFINE_STATEID_EVENT(deleg_read);
+DEFINE_STATEID_EVENT(deleg_write);
DEFINE_STATEID_EVENT(deleg_return);
DEFINE_STATEID_EVENT(deleg_recall);
@@ -1240,8 +1241,8 @@ TRACE_EVENT(nfsd_drc_found,
TRACE_EVENT(nfsd_drc_mismatch,
TP_PROTO(
const struct nfsd_net *nn,
- const struct svc_cacherep *key,
- const struct svc_cacherep *rp
+ const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp
),
TP_ARGS(nn, key, rp),
TP_STRUCT__entry(
@@ -1261,6 +1262,28 @@ TRACE_EVENT(nfsd_drc_mismatch,
__entry->ingress)
);
+TRACE_EVENT_CONDITION(nfsd_drc_gc,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ unsigned long freed
+ ),
+ TP_ARGS(nn, freed),
+ TP_CONDITION(freed > 0),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(unsigned long, freed)
+ __field(int, total)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->freed = freed;
+ __entry->total = atomic_read(&nn->num_drc_entries);
+ ),
+ TP_printk("boot_time=%16llx total=%d freed=%lu",
+ __entry->boot_time, __entry->total, __entry->freed
+ )
+);
+
TRACE_EVENT(nfsd_cb_args,
TP_PROTO(
const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9b7acba382fe..48260cf68fde 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1540,7 +1540,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
dput(dchild);
if (err)
goto out_unlock;
- fh_fill_pre_attrs(fhp);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
fh_fill_post_attrs(fhp);
out_unlock:
@@ -1635,13 +1637,16 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_unlock(dentry->d_inode);
goto out_drop_write;
}
- fh_fill_pre_attrs(fhp);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
err = nfserrno(host_err);
cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
if (!err)
nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(dentry->d_inode);
if (!err)
err = nfserrno(commit_metadata(fhp));
@@ -1703,7 +1708,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
err = nfserr_noent;
if (d_really_is_negative(dold))
goto out_dput;
- fh_fill_pre_attrs(ffhp);
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_dput;
host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
fh_fill_post_attrs(ffhp);
inode_unlock(dirp);
@@ -1789,8 +1796,12 @@ retry:
}
trap = lock_rename(tdentry, fdentry);
- fh_fill_pre_attrs(ffhp);
- fh_fill_pre_attrs(tfhp);
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ err = fh_fill_pre_attrs(tfhp);
+ if (err != nfs_ok)
+ goto out_unlock;
odentry = lookup_one_len(fname, fdentry, flen);
host_err = PTR_ERR(odentry);
@@ -1857,6 +1868,7 @@ retry:
fh_fill_post_attrs(ffhp);
fh_fill_post_attrs(tfhp);
}
+out_unlock:
unlock_rename(tdentry, fdentry);
fh_drop_write(ffhp);
@@ -1916,12 +1928,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
goto out_unlock;
}
rinode = d_inode(rdentry);
- ihold(rinode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ihold(rinode);
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
- fh_fill_pre_attrs(fhp);
if (type != S_IFDIR) {
int retries;
@@ -2341,16 +2355,18 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
return nfserrno(ret);
inode_lock(fhp->fh_dentry->d_inode);
- fh_fill_pre_attrs(fhp);
-
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
name, NULL);
-
+ err = nfsd_xattr_errno(ret);
fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(fhp->fh_dentry->d_inode);
fh_drop_write(fhp);
- return nfsd_xattr_errno(ret);
+ return err;
}
__be32
@@ -2368,15 +2384,17 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
if (ret)
return nfserrno(ret);
inode_lock(fhp->fh_dentry->d_inode);
- fh_fill_pre_attrs(fhp);
-
- ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf,
- len, flags, NULL);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
+ name, buf, len, flags, NULL);
fh_fill_post_attrs(fhp);
+ err = nfsd_xattr_errno(ret);
+out_unlock:
inode_unlock(fhp->fh_dentry->d_inode);
fh_drop_write(fhp);
-
- return nfsd_xattr_errno(ret);
+ return err;
}
#endif
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 510978e602da..9d918a79dc16 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -774,17 +774,6 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op);
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
- BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
-
- cinfo->before_change = fhp->fh_pre_change;
- cinfo->after_change = fhp->fh_post_change;
-}
-
-
bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);