summaryrefslogtreecommitdiff
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/filecache.c79
-rw-r--r--fs/nfsd/filecache.h1
-rw-r--r--fs/nfsd/netns.h27
-rw-r--r--fs/nfsd/nfs3proc.c6
-rw-r--r--fs/nfsd/nfs3xdr.c65
-rw-r--r--fs/nfsd/nfs4proc.c24
-rw-r--r--fs/nfsd/nfs4state.c63
-rw-r--r--fs/nfsd/nfs4xdr.c21
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c27
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.c66
-rw-r--r--fs/nfsd/nfsfh.h40
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c220
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nfsd/stats.h4
-rw-r--r--fs/nfsd/trace.h106
-rw-r--r--fs/nfsd/vfs.c122
-rw-r--r--fs/nfsd/vfs.h3
21 files changed, 493 insertions, 400 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index fdf89fcf1a0c..8bc807c5fea4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -44,12 +44,9 @@ struct nfsd_fcache_bucket {
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
struct nfsd_fcache_disposal {
- struct list_head list;
struct work_struct work;
- struct net *net;
spinlock_t lock;
struct list_head freeme;
- struct rcu_head rcu;
};
static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
@@ -62,8 +59,6 @@ static long nfsd_file_lru_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
static atomic_long_t nfsd_filecache_count;
static struct delayed_work nfsd_filecache_laundrette;
-static DEFINE_SPINLOCK(laundrette_lock);
-static LIST_HEAD(laundrettes);
static void nfsd_file_gc(void);
@@ -194,7 +189,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
}
nf->nf_mark = NULL;
- init_rwsem(&nf->nf_rwsem);
trace_nfsd_file_alloc(nf);
}
return nf;
@@ -249,7 +243,7 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
trace_nfsd_file_unhash(nf);
if (nfsd_file_check_write_error(nf))
- nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
hlist_del_rcu(&nf->nf_node);
atomic_long_dec(&nfsd_filecache_count);
@@ -367,19 +361,13 @@ nfsd_file_list_remove_disposal(struct list_head *dst,
static void
nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
{
- struct nfsd_fcache_disposal *l;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
- rcu_read_lock();
- list_for_each_entry_rcu(l, &laundrettes, list) {
- if (l->net == net) {
- spin_lock(&l->lock);
- list_splice_tail_init(files, &l->freeme);
- spin_unlock(&l->lock);
- queue_work(nfsd_filecache_wq, &l->work);
- break;
- }
- }
- rcu_read_unlock();
+ spin_lock(&l->lock);
+ list_splice_tail_init(files, &l->freeme);
+ spin_unlock(&l->lock);
+ queue_work(nfsd_filecache_wq, &l->work);
}
static void
@@ -755,7 +743,7 @@ nfsd_file_cache_purge(struct net *net)
}
static struct nfsd_fcache_disposal *
-nfsd_alloc_fcache_disposal(struct net *net)
+nfsd_alloc_fcache_disposal(void)
{
struct nfsd_fcache_disposal *l;
@@ -763,7 +751,6 @@ nfsd_alloc_fcache_disposal(struct net *net)
if (!l)
return NULL;
INIT_WORK(&l->work, nfsd_file_delayed_close);
- l->net = net;
spin_lock_init(&l->lock);
INIT_LIST_HEAD(&l->freeme);
return l;
@@ -772,61 +759,27 @@ nfsd_alloc_fcache_disposal(struct net *net)
static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
{
- rcu_assign_pointer(l->net, NULL);
cancel_work_sync(&l->work);
nfsd_file_dispose_list(&l->freeme);
- kfree_rcu(l, rcu);
-}
-
-static void
-nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
-{
- spin_lock(&laundrette_lock);
- list_add_tail_rcu(&l->list, &laundrettes);
- spin_unlock(&laundrette_lock);
-}
-
-static void
-nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
-{
- spin_lock(&laundrette_lock);
- list_del_rcu(&l->list);
- spin_unlock(&laundrette_lock);
-}
-
-static int
-nfsd_alloc_fcache_disposal_net(struct net *net)
-{
- struct nfsd_fcache_disposal *l;
-
- l = nfsd_alloc_fcache_disposal(net);
- if (!l)
- return -ENOMEM;
- nfsd_add_fcache_disposal(l);
- return 0;
+ kfree(l);
}
static void
nfsd_free_fcache_disposal_net(struct net *net)
{
- struct nfsd_fcache_disposal *l;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
- rcu_read_lock();
- list_for_each_entry_rcu(l, &laundrettes, list) {
- if (l->net != net)
- continue;
- nfsd_del_fcache_disposal(l);
- rcu_read_unlock();
- nfsd_free_fcache_disposal(l);
- return;
- }
- rcu_read_unlock();
+ nfsd_free_fcache_disposal(l);
}
int
nfsd_file_cache_start_net(struct net *net)
{
- return nfsd_alloc_fcache_disposal_net(net);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->fcache_disposal = nfsd_alloc_fcache_disposal();
+ return nn->fcache_disposal ? 0 : -ENOMEM;
}
void
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 7872df5a0fe3..435ceab27897 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -46,7 +46,6 @@ struct nfsd_file {
refcount_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
- struct rw_semaphore nf_rwsem;
};
int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 935c1028c217..1b1a962a1804 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -11,6 +11,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/percpu_counter.h>
+#include <linux/siphash.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@@ -108,9 +109,8 @@ struct nfsd_net {
bool nfsd_net_up;
bool lockd_up;
- /* Time of server startup */
- struct timespec64 nfssvc_boot;
- seqlock_t boot_lock;
+ seqlock_t writeverf_lock;
+ unsigned char writeverf[8];
/*
* Max number of connections this nfsd container will allow. Defaults
@@ -123,12 +123,13 @@ struct nfsd_net {
u32 clverifier_counter;
struct svc_serv *nfsd_serv;
-
- wait_queue_head_t ntf_wq;
- atomic_t ntf_refcnt;
-
- /* Allow umount to wait for nfsd state cleanup */
- struct completion nfsd_shutdown_complete;
+ /* When a listening socket is added to nfsd, keep_active is set
+ * and this justifies a reference on nfsd_serv. This stops
+ * nfsd_serv from being freed. When the number of threads is
+ * set, keep_active is cleared and the reference is dropped. So
+ * when the last thread exits, the service will be destroyed.
+ */
+ int keep_active;
/*
* clientid and stateid data for construction of net unique COPY
@@ -184,6 +185,10 @@ struct nfsd_net {
/* utsname taken from the process that starts the server */
char nfsd_name[UNX_MAXNODENAME+1];
+
+ struct nfsd_fcache_disposal *fcache_disposal;
+
+ siphash_key_t siphash_key;
};
/* Simple check to find out if a given net was properly initialized */
@@ -193,6 +198,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id;
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
-void nfsd_reset_boot_verifier(struct nfsd_net *nn);
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_write_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 15dac36ca852..8ef53f6726ec 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -202,15 +202,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
nvecs = svc_fill_write_vector(rqstp, &argp->payload);
- if (!nvecs) {
- resp->status = nfserr_io;
- goto out;
- }
+
resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, nvecs, &cnt,
resp->committed, resp->verf);
resp->count = cnt;
-out:
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index c3ac1b6aa3aa..7c45ba4db61b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -487,71 +487,6 @@ neither:
return true;
}
-static bool fs_supports_change_attribute(struct super_block *sb)
-{
- return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion;
-}
-
-/*
- * Fill in the pre_op attr for the wcc data
- */
-void fill_pre_wcc(struct svc_fh *fhp)
-{
- struct inode *inode;
- struct kstat stat;
- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
-
- if (fhp->fh_no_wcc || fhp->fh_pre_saved)
- return;
- inode = d_inode(fhp->fh_dentry);
- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
- __be32 err = fh_getattr(fhp, &stat);
-
- if (err) {
- /* Grab the times from inode anyway */
- stat.mtime = inode->i_mtime;
- stat.ctime = inode->i_ctime;
- stat.size = inode->i_size;
- }
- fhp->fh_pre_mtime = stat.mtime;
- fhp->fh_pre_ctime = stat.ctime;
- fhp->fh_pre_size = stat.size;
- }
- if (v4)
- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
-
- fhp->fh_pre_saved = true;
-}
-
-/*
- * Fill in the post_op attr for the wcc data
- */
-void fill_post_wcc(struct svc_fh *fhp)
-{
- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
- struct inode *inode = d_inode(fhp->fh_dentry);
-
- if (fhp->fh_no_wcc)
- return;
-
- if (fhp->fh_post_saved)
- printk("nfsd: inode locked twice during operation.\n");
-
- fhp->fh_post_saved = true;
-
- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
- __be32 err = fh_getattr(fhp, &fhp->fh_post_attr);
-
- if (err) {
- fhp->fh_post_saved = false;
- fhp->fh_post_attr.ctime = inode->i_ctime;
- }
- }
- if (v4)
- fhp->fh_post_change =
- nfsd4_change_attribute(&fhp->fh_post_attr, inode);
-}
-
/*
* XDR decode functions
*/
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a36261f89bdf..ed1ee25647be 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -598,7 +598,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
- nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
+ nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
}
static __be32
@@ -1101,7 +1101,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));
@@ -1510,11 +1510,14 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
{
+ struct file *dst = copy->nf_dst->nf_file;
+ struct file *src = copy->nf_src->nf_file;
+ errseq_t since;
ssize_t bytes_copied = 0;
u64 bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos;
u64 dst_pos = copy->cp_dst_pos;
- __be32 status;
+ int status;
/* See RFC 7862 p.67: */
if (bytes_total == 0)
@@ -1522,9 +1525,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do {
if (kthread_should_stop())
break;
- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
- src_pos, copy->nf_dst->nf_file, dst_pos,
- bytes_total);
+ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
+ bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
@@ -1534,11 +1536,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
} while (bytes_total > 0 && !copy->cp_synchronous);
/* for a non-zero asynchronous copy do a commit of data */
if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
- down_write(&copy->nf_dst->nf_rwsem);
- status = vfs_fsync_range(copy->nf_dst->nf_file,
- copy->cp_dst_pos,
+ since = READ_ONCE(dst->f_wb_err);
+ status = vfs_fsync_range(dst, copy->cp_dst_pos,
copy->cp_res.wr_bytes_written, 0);
- up_write(&copy->nf_dst->nf_rwsem);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
if (!status)
copy->committed = true;
}
@@ -2528,7 +2530,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto encode_op;
}
- fh_clear_wcc(current_fh);
+ fh_clear_pre_post_attrs(current_fh);
/* If op is non-idempotent */
if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1956d377d1a6..72900b89cf84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -246,6 +246,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
if (fh_match(fh, &cur->nbl_fh)) {
list_del_init(&cur->nbl_list);
+ WARN_ON(list_empty(&cur->nbl_lru));
list_del_init(&cur->nbl_lru);
found = cur;
break;
@@ -271,6 +272,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
INIT_LIST_HEAD(&nbl->nbl_lru);
fh_copy_shallow(&nbl->nbl_fh, fh);
locks_init_lock(&nbl->nbl_lock);
+ kref_init(&nbl->nbl_kref);
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
&nfsd4_cb_notify_lock_ops,
NFSPROC4_CLNT_CB_NOTIFY_LOCK);
@@ -280,11 +282,20 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
}
static void
+free_nbl(struct kref *kref)
+{
+ struct nfsd4_blocked_lock *nbl;
+
+ nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
+ kfree(nbl);
+}
+
+static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
- kfree(nbl);
+ kref_put(&nbl->nbl_kref, free_nbl);
}
static void
@@ -302,6 +313,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
struct nfsd4_blocked_lock,
nbl_list);
list_del_init(&nbl->nbl_list);
+ WARN_ON(list_empty(&nbl->nbl_lru));
list_move(&nbl->nbl_lru, &reaplist);
}
spin_unlock(&nn->blocked_locks_lock);
@@ -360,11 +372,13 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
* st_{access,deny}_bmap field of the stateid, in order to track not
* only what share bits are currently in force, but also what
* combinations of share bits previous opens have used. This allows us
- * to enforce the recommendation of rfc 3530 14.2.19 that the server
- * return an error if the client attempt to downgrade to a combination
- * of share bits not explicable by closing some of its previous opens.
+ * to enforce the recommendation in
+ * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that
+ * the server return an error if the client attempt to downgrade to a
+ * combination of share bits not explicable by closing some of its
+ * previous opens.
*
- * XXX: This enforcement is actually incomplete, since we don't keep
+ * This enforcement is arguably incomplete, since we don't keep
* track of access/deny bit combinations; so, e.g., we allow:
*
* OPEN allow read, deny write
@@ -372,6 +386,10 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
* DOWNGRADE allow read, deny none
*
* which we should reject.
+ *
+ * But you could also argue that our current code is already overkill,
+ * since it only exists to return NFS4ERR_INVAL on incorrect client
+ * behavior.
*/
static unsigned int
bmap_to_share_mode(unsigned long bmap)
@@ -6040,7 +6058,11 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
*nfp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
- status = check_special_stateids(net, fhp, stateid, flags);
+ if (cstid)
+ status = nfserr_bad_stateid;
+ else
+ status = check_special_stateids(net, fhp, stateid,
+ flags);
goto done;
}
@@ -6836,7 +6858,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
- struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -6858,7 +6879,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
- sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -6910,8 +6930,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate) &&
- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
+ if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
@@ -6923,8 +6942,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fl_type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate) &&
- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
+ if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
@@ -6945,6 +6963,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
+ /*
+ * Most filesystems with their own ->lock operations will block
+ * the nfsd thread waiting to acquire the lock. That leads to
+ * deadlocks (we don't want every nfsd thread tied up waiting
+ * for file locks), so don't attempt blocking lock notifications
+ * on those filesystems:
+ */
+ if (nf->nf_file->f_op->lock)
+ fl_flags &= ~FL_SLEEP;
+
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
dprintk("NFSD: %s: unable to allocate block!\n", __func__);
@@ -6975,6 +7003,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
+ kref_get(&nbl->nbl_kref);
spin_unlock(&nn->blocked_locks_lock);
}
@@ -6987,6 +7016,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nn->somebody_reclaimed = true;
break;
case FILE_LOCK_DEFERRED:
+ kref_put(&nbl->nbl_kref, free_nbl);
nbl = NULL;
fallthrough;
case -EAGAIN: /* conflock holds conflicting lock */
@@ -7007,8 +7037,13 @@ out:
/* dequeue it if we queued it before */
if (fl_flags & FL_SLEEP) {
spin_lock(&nn->blocked_locks_lock);
- list_del_init(&nbl->nbl_list);
- list_del_init(&nbl->nbl_lru);
+ if (!list_empty(&nbl->nbl_list) &&
+ !list_empty(&nbl->nbl_lru)) {
+ list_del_init(&nbl->nbl_list);
+ list_del_init(&nbl->nbl_lru);
+ kref_put(&nbl->nbl_kref, free_nbl);
+ }
+ /* nbl can use one of lists to be linked to reaplist */
spin_unlock(&nn->blocked_locks_lock);
}
free_blocked_lock(nbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5a93a5db4fb0..899de438e529 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -277,21 +277,10 @@ nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf)
static __be32
nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
{
- u32 i, count;
- __be32 *p;
-
- if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
- return nfserr_bad_xdr;
- /* request sanity */
- if (count > 1000)
- return nfserr_bad_xdr;
- p = xdr_inline_decode(argp->xdr, count << 2);
- if (!p)
- return nfserr_bad_xdr;
- for (i = 0; i < bmlen; i++)
- bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
+ ssize_t status;
- return nfs_ok;
+ status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen);
+ return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok;
}
static __be32
@@ -4804,8 +4793,8 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
return nfserr_resource;
*p++ = htonl(NFS4_CONTENT_HOLE);
- p = xdr_encode_hyper(p, read->rd_offset);
- p = xdr_encode_hyper(p, count);
+ p = xdr_encode_hyper(p, read->rd_offset);
+ p = xdr_encode_hyper(p, count);
*eof = (read->rd_offset + count) >= f_size;
*maxcount = min_t(unsigned long, count, *maxcount);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 6e0b6f3148dc..a4a69ab6ab28 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -87,7 +87,7 @@ nfsd_hashsize(unsigned int limit)
static u32
nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
- return hash_32(be32_to_cpu(xid), nn->maskbits);
+ return hash_32((__force u32)xid, nn->maskbits);
}
static struct svc_cacherep *
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 51a49e0cfe37..b9f27fbcd768 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -742,13 +742,12 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
return err;
err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
- if (err < 0) {
- nfsd_destroy(net);
- return err;
- }
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
+ if (err >= 0 &&
+ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ svc_get(nn->nfsd_serv);
+
+ nfsd_put(net);
return err;
}
@@ -783,8 +782,10 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
+ if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ svc_get(nn->nfsd_serv);
+
+ nfsd_put(net);
return 0;
out_close:
xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
@@ -793,10 +794,7 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- if (!list_empty(&nn->nfsd_serv->sv_permsocks))
- nn->nfsd_serv->sv_nrthreads--;
- else
- nfsd_destroy(net);
+ nfsd_put(net);
return err;
}
@@ -1485,9 +1483,8 @@ static __net_init int nfsd_init_net(struct net *net)
nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
- atomic_set(&nn->ntf_refcnt, 0);
- init_waitqueue_head(&nn->ntf_wq);
- seqlock_init(&nn->boot_lock);
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
return 0;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 498e5a489826..3e5008b475ff 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -97,7 +97,7 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
-void nfsd_destroy(struct net *net);
+void nfsd_put(struct net *net);
bool i_am_nfsd(void);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f3779fa72c89..145208bcb9bd 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -611,6 +611,70 @@ out_negative:
return nfserr_serverfault;
}
+#ifdef CONFIG_NFSD_V3
+
+/**
+ * fh_fill_pre_attrs - Fill in pre-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+void fh_fill_pre_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode;
+ struct kstat stat;
+ __be32 err;
+
+ if (fhp->fh_no_wcc || fhp->fh_pre_saved)
+ return;
+
+ inode = d_inode(fhp->fh_dentry);
+ err = fh_getattr(fhp, &stat);
+ if (err) {
+ /* Grab the times from inode anyway */
+ stat.mtime = inode->i_mtime;
+ stat.ctime = inode->i_ctime;
+ stat.size = inode->i_size;
+ }
+ if (v4)
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+
+ fhp->fh_pre_mtime = stat.mtime;
+ fhp->fh_pre_ctime = stat.ctime;
+ fhp->fh_pre_size = stat.size;
+ fhp->fh_pre_saved = true;
+}
+
+/**
+ * fh_fill_post_attrs - Fill in post-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+void fh_fill_post_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode = d_inode(fhp->fh_dentry);
+ __be32 err;
+
+ if (fhp->fh_no_wcc)
+ return;
+
+ if (fhp->fh_post_saved)
+ printk("nfsd: inode locked twice during operation.\n");
+
+ err = fh_getattr(fhp, &fhp->fh_post_attr);
+ if (err) {
+ fhp->fh_post_saved = false;
+ fhp->fh_post_attr.ctime = inode->i_ctime;
+ } else
+ fhp->fh_post_saved = true;
+ if (v4)
+ fhp->fh_post_change =
+ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+}
+
+#endif /* CONFIG_NFSD_V3 */
+
/*
* Release a file handle.
*/
@@ -623,7 +687,7 @@ fh_put(struct svc_fh *fhp)
fh_unlock(fhp);
fhp->fh_dentry = NULL;
dput(dentry);
- fh_clear_wcc(fhp);
+ fh_clear_pre_post_attrs(fhp);
}
fh_drop_write(fhp);
if (exp) {
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index d11e4b6870d6..434930d8a946 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -284,12 +284,13 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
#endif
#ifdef CONFIG_NFSD_V3
-/*
- * The wcc data stored in current_fh should be cleared
- * between compound ops.
+
+/**
+ * fh_clear_pre_post_attrs - Reset pre/post attributes
+ * @fhp: file handle to be updated
+ *
*/
-static inline void
-fh_clear_wcc(struct svc_fh *fhp)
+static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
{
fhp->fh_post_saved = false;
fhp->fh_pre_saved = false;
@@ -323,13 +324,24 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
return time_to_chattr(&stat->ctime);
}
-extern void fill_pre_wcc(struct svc_fh *fhp);
-extern void fill_post_wcc(struct svc_fh *fhp);
-#else
-#define fh_clear_wcc(ignored)
-#define fill_pre_wcc(ignored)
-#define fill_post_wcc(notused)
-#endif /* CONFIG_NFSD_V3 */
+extern void fh_fill_pre_attrs(struct svc_fh *fhp);
+extern void fh_fill_post_attrs(struct svc_fh *fhp);
+
+#else /* !CONFIG_NFSD_V3 */
+
+static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
+{
+}
+
+static inline void fh_fill_pre_attrs(struct svc_fh *fhp)
+{
+}
+
+static inline void fh_fill_post_attrs(struct svc_fh *fhp)
+{
+}
+
+#endif /* !CONFIG_NFSD_V3 */
/*
@@ -355,7 +367,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
inode = d_inode(dentry);
inode_lock_nested(inode, subclass);
- fill_pre_wcc(fhp);
+ fh_fill_pre_attrs(fhp);
fhp->fh_locked = true;
}
@@ -372,7 +384,7 @@ static inline void
fh_unlock(struct svc_fh *fhp)
{
if (fhp->fh_locked) {
- fill_post_wcc(fhp);
+ fh_fill_post_attrs(fhp);
inode_unlock(d_inode(fhp->fh_dentry));
fhp->fh_locked = false;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index de282f3273c5..18b8eb43a19b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -235,10 +235,6 @@ nfsd_proc_write(struct svc_rqst *rqstp)
argp->len, argp->offset);
nvecs = svc_fill_write_vector(rqstp, &argp->payload);
- if (!nvecs) {
- resp->status = nfserr_io;
- goto out;
- }
resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, rqstp->rq_vec, nvecs,
@@ -247,7 +243,6 @@ nfsd_proc_write(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
return rpc_drop_reply;
-out:
return rpc_success;
}
@@ -850,6 +845,7 @@ nfserrno (int errno)
{ nfserr_io, -EIO },
{ nfserr_nxio, -ENXIO },
{ nfserr_fbig, -E2BIG },
+ { nfserr_stale, -EBADF },
{ nfserr_acces, -EACCES },
{ nfserr_exist, -EEXIST },
{ nfserr_xdev, -EXDEV },
@@ -878,6 +874,8 @@ nfserrno (int errno)
{ nfserr_toosmall, -ETOOSMALL },
{ nfserr_serverfault, -ESERVERFAULT },
{ nfserr_serverfault, -ENFILE },
+ { nfserr_io, -EREMOTEIO },
+ { nfserr_stale, -EOPENSTALE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
{ nfserr_no_grace, -ENOGRACE},
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 80431921e5d7..07193595b8e0 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/fs_struct.h>
#include <linux/swap.h>
+#include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
@@ -55,18 +56,17 @@ static __be32 nfsd_init_request(struct svc_rqst *,
struct svc_process_info *);
/*
- * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
- * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
- * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members
+ * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks.
*
* If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
- * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
- * of nfsd threads must exist and each must listed in ->sp_all_threads in each
- * entry of ->sv_pools[].
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
+ * nn->keep_active is set). That number of nfsd threads must
+ * exist and each must be listed in ->sp_all_threads in some entry of
+ * ->sv_pools[].
*
- * Transitions of the thread count between zero and non-zero are of particular
- * interest since the svc_serv needs to be created and initialized at that
- * point, or freed.
+ * Each active thread holds a counted reference on nn->nfsd_serv, as does
+ * the nn->keep_active flag and various transient calls to svc_get().
*
* Finally, the nfsd_mutex also protects some of the global variables that are
* accessed when nfsd starts and that are settable via the write_* routines in
@@ -345,33 +345,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+/**
+ * nfsd_copy_write_verifier - Atomically copy a write verifier
+ * @verf: buffer in which to receive the verifier cookie
+ * @nn: NFS net namespace
+ *
+ * This function provides a wait-free mechanism for copying the
+ * namespace's write verifier without tearing it.
+ */
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
{
int seq = 0;
do {
- read_seqbegin_or_lock(&nn->boot_lock, &seq);
- /*
- * This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy. y2038 time_t overflow is
- * irrelevant in this usage
- */
- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
- } while (need_seqretry(&nn->boot_lock, seq));
- done_seqretry(&nn->boot_lock, seq);
+ read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
+ memcpy(verf, nn->writeverf, sizeof(*verf));
+ } while (need_seqretry(&nn->writeverf_lock, seq));
+ done_seqretry(&nn->writeverf_lock, seq);
}
-static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn)
{
- ktime_get_real_ts64(&nn->nfssvc_boot);
+ struct timespec64 now;
+ u64 verf;
+
+ /*
+ * Because the time value is hashed, y2038 time_t overflow
+ * is irrelevant in this usage.
+ */
+ ktime_get_raw_ts64(&now);
+ verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key);
+ memcpy(nn->writeverf, &verf, sizeof(nn->writeverf));
}
-void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+/**
+ * nfsd_reset_write_verifier - Generate a new write verifier
+ * @nn: NFS net namespace
+ *
+ * This function updates the ->writeverf field of @nn. This field
+ * contains an opaque cookie that, according to Section 18.32.3 of
+ * RFC 8881, "the client can use to determine whether a server has
+ * changed instance state (e.g., server restart) between a call to
+ * WRITE and a subsequent call to either WRITE or COMMIT. This
+ * cookie MUST be unchanged during a single instance of the NFSv4.1
+ * server and MUST be unique between instances of the NFSv4.1
+ * server."
+ */
+void nfsd_reset_write_verifier(struct nfsd_net *nn)
{
- write_seqlock(&nn->boot_lock);
- nfsd_reset_boot_verifier_locked(nn);
- write_sequnlock(&nn->boot_lock);
+ write_seqlock(&nn->writeverf_lock);
+ nfsd_reset_write_verifier_locked(nn);
+ write_sequnlock(&nn->writeverf_lock);
}
static int nfsd_startup_net(struct net *net, const struct cred *cred)
@@ -435,6 +459,7 @@ static void nfsd_shutdown_net(struct net *net)
nfsd_shutdown_generic();
}
+static DEFINE_SPINLOCK(nfsd_notifier_lock);
static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -444,18 +469,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in sin;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ifa->ifa_local;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
out:
return NOTIFY_DONE;
@@ -475,10 +499,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in6 sin6;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
@@ -487,8 +511,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
+
out:
return NOTIFY_DONE;
}
@@ -505,7 +529,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- atomic_dec(&nn->ntf_refcnt);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -513,7 +536,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);
/*
* write_ports can create the server without actually starting
@@ -594,20 +616,9 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_shutdown = nfsd_last_thread,
.svo_function = nfsd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads,
.svo_module = THIS_MODULE,
};
-static void nfsd_complete_shutdown(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- WARN_ON(!mutex_is_locked(&nfsd_mutex));
-
- nn->nfsd_serv = NULL;
- complete(&nn->nfsd_shutdown_complete);
-}
-
void nfsd_shutdown_threads(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -622,11 +633,9 @@ void nfsd_shutdown_threads(struct net *net)
svc_get(serv);
/* Kill outstanding nfsd threads */
- serv->sv_ops->svo_setup(serv, NULL, 0);
- nfsd_destroy(net);
+ svc_set_num_threads(serv, NULL, 0);
+ nfsd_put(net);
mutex_unlock(&nfsd_mutex);
- /* Wait for shutdown of nfsd_serv to complete */
- wait_for_completion(&nn->nfsd_shutdown_complete);
}
bool i_am_nfsd(void)
@@ -638,6 +647,7 @@ int nfsd_create_serv(struct net *net)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nn->nfsd_serv) {
@@ -647,19 +657,23 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
- nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
- &nfsd_thread_sv_ops);
- if (nn->nfsd_serv == NULL)
+ serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+ &nfsd_thread_sv_ops);
+ if (serv == NULL)
return -ENOMEM;
- init_completion(&nn->nfsd_shutdown_complete);
- nn->nfsd_serv->sv_maxconn = nn->max_connections;
- error = svc_bind(nn->nfsd_serv, net);
+ serv->sv_maxconn = nn->max_connections;
+ error = svc_bind(serv, net);
if (error < 0) {
- svc_destroy(nn->nfsd_serv);
- nfsd_complete_shutdown(net);
+ /* NOT nfsd_put() as notifiers (see below) haven't
+ * been set up yet.
+ */
+ svc_put(serv);
return error;
}
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = serv;
+ spin_unlock(&nfsd_notifier_lock);
set_max_drc();
/* check if the notifier is already set */
@@ -669,8 +683,7 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- atomic_inc(&nn->ntf_refcnt);
- nfsd_reset_boot_verifier(nn);
+ nfsd_reset_write_verifier(nn);
return 0;
}
@@ -697,16 +710,26 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
return 0;
}
-void nfsd_destroy(struct net *net)
+/* This is the callback for kref_put() below.
+ * There is no code here as the first thing to be done is
+ * call svc_shutdown_net(), but we cannot get the 'net' from
+ * the kref. So do all the work when kref_put returns true.
+ */
+static void nfsd_noop(struct kref *ref)
+{
+}
+
+void nfsd_put(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
- if (destroy)
+ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
svc_shutdown_net(nn->nfsd_serv, net);
- svc_destroy(nn->nfsd_serv);
- if (destroy)
- nfsd_complete_shutdown(net);
+ svc_destroy(&nn->nfsd_serv->sv_refcnt);
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+ spin_unlock(&nfsd_notifier_lock);
+ }
}
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
@@ -733,7 +756,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (tot > NFSD_MAXSERVS) {
/* total too large: scale down requested numbers */
for (i = 0; i < n && tot > 0; i++) {
- int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ int new = nthreads[i] * NFSD_MAXSERVS / tot;
tot -= (nthreads[i] - new);
nthreads[i] = new;
}
@@ -753,12 +776,13 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
/* apply the new numbers */
svc_get(nn->nfsd_serv);
for (i = 0; i < n; i++) {
- err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- &nn->nfsd_serv->sv_pools[i], nthreads[i]);
+ err = svc_set_num_threads(nn->nfsd_serv,
+ &nn->nfsd_serv->sv_pools[i],
+ nthreads[i]);
if (err)
break;
}
- nfsd_destroy(net);
+ nfsd_put(net);
return err;
}
@@ -795,21 +819,19 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
error = nfsd_startup_net(net, cred);
if (error)
- goto out_destroy;
- error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- NULL, nrservs);
+ goto out_put;
+ error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
if (error)
goto out_shutdown;
- /* We are holding a reference to nn->nfsd_serv which
- * we don't want to count in the return value,
- * so subtract 1
- */
- error = nn->nfsd_serv->sv_nrthreads - 1;
+ error = nn->nfsd_serv->sv_nrthreads;
out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown_net(net);
-out_destroy:
- nfsd_destroy(net); /* Release server */
+out_put:
+ /* Threads now hold service active */
+ if (xchg(&nn->keep_active, 0))
+ nfsd_put(net);
+ nfsd_put(net);
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -923,9 +945,6 @@ nfsd(void *vrqstp)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int err;
- /* Lock module and set up kernel thread */
- mutex_lock(&nfsd_mutex);
-
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
* umask as defined by the client instead of init's umask. */
@@ -945,8 +964,7 @@ nfsd(void *vrqstp)
allow_signal(SIGINT);
allow_signal(SIGQUIT);
- nfsdstats.th_cnt++;
- mutex_unlock(&nfsd_mutex);
+ atomic_inc(&nfsdstats.th_cnt);
set_freezable();
@@ -973,19 +991,35 @@ nfsd(void *vrqstp)
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);
- mutex_lock(&nfsd_mutex);
- nfsdstats.th_cnt --;
+ atomic_dec(&nfsdstats.th_cnt);
out:
- rqstp->rq_server = NULL;
+ /* Take an extra ref so that the svc_put in svc_exit_thread()
+ * doesn't call svc_destroy()
+ */
+ svc_get(nn->nfsd_serv);
/* Release the thread */
svc_exit_thread(rqstp);
- nfsd_destroy(net);
+ /* We need to drop a ref, but may not drop the last reference
+ * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
+ * could deadlock with nfsd_shutdown_threads() waiting for us.
+ * So three options are:
+ * - drop a non-final reference,
+ * - get the mutex without waiting
+ * - sleep briefly andd try the above again
+ */
+ while (!svc_put_not_last(nn->nfsd_serv)) {
+ if (mutex_trylock(&nfsd_mutex)) {
+ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+ break;
+ }
+ msleep(20);
+ }
/* Release module */
- mutex_unlock(&nfsd_mutex);
module_put_and_exit(0);
return 0;
}
@@ -1096,7 +1130,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
mutex_unlock(&nfsd_mutex);
return -ENODEV;
}
- /* bump up the psudo refcount while traversing */
svc_get(nn->nfsd_serv);
ret = svc_pool_stats_open(nn->nfsd_serv, file);
mutex_unlock(&nfsd_mutex);
@@ -1109,8 +1142,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file)
struct net *net = inode->i_sb->s_fs_info;
mutex_lock(&nfsd_mutex);
- /* this function really, really should have been called svc_put() */
- nfsd_destroy(net);
+ nfsd_put(net);
mutex_unlock(&nfsd_mutex);
return ret;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e73bdbb1634a..95457cfd37fc 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -568,6 +568,10 @@ struct nfs4_ol_stateid {
struct list_head st_locks;
struct nfs4_stateowner *st_stateowner;
struct nfs4_clnt_odstate *st_clnt_odstate;
+/*
+ * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the
+ * comment above bmap_to_share_mode() for explanation:
+ */
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid *st_openstp;
@@ -629,6 +633,7 @@ struct nfsd4_blocked_lock {
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
+ struct kref nbl_kref;
};
struct nfsd4_compound_state;
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 1d3b881e7382..a8c5a02a84f0 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -45,7 +45,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */
- seq_printf(seq, "th %u 0", nfsdstats.th_cnt);
+ seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
/* deprecated thread usage histogram stats */
for (i = 0; i < 10; i++)
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 51ecda852e23..9b43dc3d9991 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -29,11 +29,9 @@ enum {
struct nfsd_stats {
struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
- /* Protected by nfsd_mutex */
- unsigned int th_cnt; /* number of available threads */
+ atomic_t th_cnt; /* number of available threads */
};
-
extern struct nfsd_stats nfsdstats;
extern struct svc_stat nfsd_svcstats;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index f1e0d3c51bc2..c4cf56327843 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -47,7 +47,7 @@
rqstp->rq_xprt->xpt_remotelen); \
} while (0);
-TRACE_EVENT(nfsd_garbage_args_err,
+DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
TP_PROTO(
const struct svc_rqst *rqstp
),
@@ -69,27 +69,13 @@ TRACE_EVENT(nfsd_garbage_args_err,
)
);
-TRACE_EVENT(nfsd_cant_encode_err,
- TP_PROTO(
- const struct svc_rqst *rqstp
- ),
- TP_ARGS(rqstp),
- TP_STRUCT__entry(
- NFSD_TRACE_PROC_ARG_FIELDS
-
- __field(u32, vers)
- __field(u32, proc)
- ),
- TP_fast_assign(
- NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+#define DEFINE_NFSD_XDR_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \
+ TP_PROTO(const struct svc_rqst *rqstp), \
+ TP_ARGS(rqstp))
- __entry->vers = rqstp->rq_vers;
- __entry->proc = rqstp->rq_proc;
- ),
- TP_printk("xid=0x%08x vers=%u proc=%u",
- __entry->xid, __entry->vers, __entry->proc
- )
-);
+DEFINE_NFSD_XDR_ERR_EVENT(garbage_args);
+DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
#define show_nfsd_may_flags(x) \
__print_flags(x, "|", \
@@ -413,6 +399,56 @@ TRACE_EVENT(nfsd_dirent,
)
)
+DECLARE_EVENT_CLASS(nfsd_copy_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *src_fhp,
+ loff_t src_offset,
+ struct svc_fh *dst_fhp,
+ loff_t dst_offset,
+ u64 count,
+ int status),
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, src_fh_hash)
+ __field(loff_t, src_offset)
+ __field(u32, dst_fh_hash)
+ __field(loff_t, dst_offset)
+ __field(u64, count)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
+ __entry->src_offset = src_offset;
+ __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
+ __entry->dst_offset = dst_offset;
+ __entry->count = count;
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
+ "dst_fh_hash=0x%08x dst_offset=%lld "
+ "count=%llu status=%d",
+ __entry->xid, __entry->src_fh_hash, __entry->src_offset,
+ __entry->dst_fh_hash, __entry->dst_offset,
+ (unsigned long long)__entry->count,
+ __entry->status)
+)
+
+#define DEFINE_NFSD_COPY_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *src_fhp, \
+ loff_t src_offset, \
+ struct svc_fh *dst_fhp, \
+ loff_t dst_offset, \
+ u64 count, \
+ int status), \
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
+ count, status))
+
+DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
+
#include "state.h"
#include "filecache.h"
#include "vfs.h"
@@ -538,6 +574,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
DEFINE_NET_EVENT(grace_start);
DEFINE_NET_EVENT(grace_complete);
+TRACE_EVENT(nfsd_writeverf_reset,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_rqst *rqstp,
+ int error
+ ),
+ TP_ARGS(nn, rqstp, error),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(u32, xid)
+ __field(int, error)
+ __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->error = error;
+
+ /* avoid seqlock inside TP_fast_assign */
+ memcpy(__entry->verifier, nn->writeverf,
+ NFS4_VERIFIER_SIZE);
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
+ __entry->boot_time, __entry->xid, __entry->error,
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+ )
+);
+
TRACE_EVENT(nfsd_clid_cred_mismatch,
TP_PROTO(
const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c99857689e2c..99c2b9dfbb10 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -40,6 +40,7 @@
#include "../internal.h"
#include "acl.h"
#include "idmap.h"
+#include "xdr4.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
@@ -517,15 +518,23 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
- struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
+static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
+{
+ return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
+}
+
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync)
{
struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file;
+ errseq_t since;
loff_t cloned;
__be32 ret = 0;
- down_write(&nf_dst->nf_rwsem);
+ since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
if (cloned < 0) {
ret = nfserrno(cloned);
@@ -540,15 +549,25 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
- nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
- nfsd_net_id));
+ struct nfsd_net *nn = net_generic(nf_dst->nf_net,
+ nfsd_net_id);
+
+ trace_nfsd_clone_file_range_err(rqstp,
+ &nfsd4_get_cstate(rqstp)->save_fh,
+ src_pos,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, status);
ret = nfserrno(status);
}
}
out_err:
- up_write(&nf_dst->nf_rwsem);
return ret;
}
@@ -777,6 +796,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
__be32 err;
+ bool retried = false;
validate_process_creds();
/*
@@ -792,9 +812,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+retry:
err = fh_verify(rqstp, fhp, type, may_flags);
- if (!err)
+ if (!err) {
err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ if (err == nfserr_stale && !retried) {
+ retried = true;
+ fh_put(fhp);
+ goto retry;
+ }
+ }
validate_process_creds();
return err;
}
@@ -944,10 +971,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
unsigned long *cnt, int stable,
__be32 *verf)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
struct super_block *sb = file_inode(file)->i_sb;
struct svc_export *exp;
struct iov_iter iter;
+ errseq_t since;
__be32 nfserr;
int host_err;
int use_wgather;
@@ -985,36 +1014,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
flags |= RWF_SYNC;
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
- if (flags & RWF_SYNC) {
- down_write(&nf->nf_rwsem);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- up_write(&nf->nf_rwsem);
- } else {
- down_read(&nf->nf_rwsem);
- if (verf)
- nfsd_copy_boot_verifier(verf,
- net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- up_read(&nf->nf_rwsem);
- }
+ since = READ_ONCE(file->f_wb_err);
+ if (verf)
+ nfsd_copy_write_verifier(verf, nn);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) {
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
goto out_nfserr;
}
*cnt = host_err;
nfsd_stats_io_write_add(exp, *cnt);
fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+ goto out_nfserr;
if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
- if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ if (host_err < 0) {
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+ }
}
out_nfserr:
@@ -1089,19 +1110,6 @@ out:
}
#ifdef CONFIG_NFSD_V3
-static int
-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
- loff_t end)
-{
- struct address_space *mapping = nf->nf_file->f_mapping;
- int ret = filemap_fdatawrite_range(mapping, offset, end);
-
- if (ret)
- return ret;
- filemap_fdatawait_range_keep_errors(mapping, offset, end);
- return 0;
-}
-
/*
* Commit all pending writes to stable storage.
*
@@ -1115,6 +1123,7 @@ __be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long count, __be32 *verf)
{
+ struct nfsd_net *nn;
struct nfsd_file *nf;
loff_t end = LLONG_MAX;
__be32 err = nfserr_inval;
@@ -1131,29 +1140,28 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+ nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end);
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+ int err2;
- down_write(&nf->nf_rwsem);
- if (!err2)
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_write_verifier(verf, nn);
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
break;
case -EINVAL:
err = nfserr_notsupp;
break;
default:
- err = nfserrno(err2);
- nfsd_reset_boot_verifier(net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, err2);
}
- up_write(&nf->nf_rwsem);
+ err = nfserrno(err2);
} else
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_write_verifier(verf, nn);
nfsd_file_put(nf);
out:
@@ -1747,8 +1755,8 @@ retry:
* so do it by hand */
trap = lock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = true;
- fill_pre_wcc(ffhp);
- fill_pre_wcc(tfhp);
+ fh_fill_pre_attrs(ffhp);
+ fh_fill_pre_attrs(tfhp);
odentry = lookup_one_len(fname, fdentry, flen);
host_err = PTR_ERR(odentry);
@@ -1808,8 +1816,8 @@ retry:
* were the same, so again we do it by hand.
*/
if (!close_cached) {
- fill_post_wcc(ffhp);
- fill_post_wcc(tfhp);
+ fh_fill_post_attrs(ffhp);
+ fh_fill_post_attrs(tfhp);
}
unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = false;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index b21b76e6b9a8..9f56dcb22ff7 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -57,7 +57,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
struct nfsd_file *nf_dst, u64 dst_pos,
u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */