summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c5
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c29
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/inode.c4
-rw-r--r--fs/afs/proc.c6
-rw-r--r--fs/aio.c31
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/cachefiles/cache.c17
-rw-r--r--fs/cachefiles/daemon.c11
-rw-r--r--fs/cachefiles/internal.h2
-rw-r--r--fs/cachefiles/io.c2
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c3
-rw-r--r--fs/ceph/file.c24
-rw-r--r--fs/ceph/metric.c2
-rw-r--r--fs/ceph/quota.c17
-rw-r--r--fs/ceph/super.c169
-rw-r--r--fs/ceph/super.h28
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cache.c105
-rw-r--r--fs/cifs/cifs_debug.c8
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifs_spnego.h3
-rw-r--r--fs/cifs/cifs_swn.c9
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c24
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/cifsglob.h86
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/cifsproto.h39
-rw-r--r--fs/cifs/cifssmb.c94
-rw-r--r--fs/cifs/connect.c326
-rw-r--r--fs/cifs/dfs_cache.c2
-rw-r--r--fs/cifs/dir.c5
-rw-r--r--fs/cifs/file.c66
-rw-r--r--fs/cifs/fs_context.c8
-rw-r--r--fs/cifs/fscache.c333
-rw-r--r--fs/cifs/fscache.h128
-rw-r--r--fs/cifs/inode.c25
-rw-r--r--fs/cifs/misc.c49
-rw-r--r--fs/cifs/netmisc.c5
-rw-r--r--fs/cifs/ntlmssp.h32
-rw-r--r--fs/cifs/sess.c275
-rw-r--r--fs/cifs/smb1ops.c24
-rw-r--r--fs/cifs/smb2glob.h2
-rw-r--r--fs/cifs/smb2misc.c5
-rw-r--r--fs/cifs/smb2ops.c34
-rw-r--r--fs/cifs/smb2pdu.c263
-rw-r--r--fs/cifs/smb2proto.h6
-rw-r--r--fs/cifs/smb2transport.c67
-rw-r--r--fs/cifs/transport.c89
-rw-r--r--fs/coredump.c80
-rw-r--r--fs/dcache.c37
-rw-r--r--fs/eventpoll.c10
-rw-r--r--fs/exec.c52
-rw-r--r--fs/exfat/balloc.c2
-rw-r--r--fs/exfat/dir.c42
-rw-r--r--fs/exfat/exfat_fs.h6
-rw-r--r--fs/exfat/fatent.c4
-rw-r--r--fs/exfat/file.c18
-rw-r--r--fs/exfat/inode.c15
-rw-r--r--fs/exfat/misc.c3
-rw-r--r--fs/exfat/namei.c48
-rw-r--r--fs/exfat/nls.c2
-rw-r--r--fs/exfat/super.c11
-rw-r--r--fs/ext4/extents.c8
-rw-r--r--fs/ext4/inline.c5
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--fs/ext4/page-io.c9
-rw-r--r--fs/ext4/readpage.c6
-rw-r--r--fs/ext4/super.c42
-rw-r--r--fs/f2fs/Kconfig1
-rw-r--r--fs/f2fs/checkpoint.c6
-rw-r--r--fs/f2fs/compress.c84
-rw-r--r--fs/f2fs/data.c368
-rw-r--r--fs/f2fs/f2fs.h43
-rw-r--r--fs/f2fs/file.c509
-rw-r--r--fs/f2fs/gc.c31
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c22
-rw-r--r--fs/f2fs/iostat.c40
-rw-r--r--fs/f2fs/node.c27
-rw-r--r--fs/f2fs/recovery.c8
-rw-r--r--fs/f2fs/segment.c19
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c89
-rw-r--r--fs/f2fs/sysfs.c29
-rw-r--r--fs/f2fs/xattr.c40
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/file_table.c47
-rw-r--r--fs/fs_context.c2
-rw-r--r--fs/fscache/volume.c4
-rw-r--r--fs/fuse/virtio_fs.c4
-rw-r--r--fs/hfsplus/hfsplus_raw.h12
-rw-r--r--fs/hfsplus/xattr.c4
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/inode.c88
-rw-r--r--fs/io-wq.c97
-rw-r--r--fs/io-wq.h2
-rw-r--r--fs/io_uring.c79
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/background.c2
-rw-r--r--fs/ksmbd/asn1.c142
-rw-r--r--fs/ksmbd/auth.c27
-rw-r--r--fs/ksmbd/auth.h10
-rw-r--r--fs/ksmbd/connection.c10
-rw-r--r--fs/ksmbd/connection.h12
-rw-r--r--fs/ksmbd/ksmbd_netlink.h12
-rw-r--r--fs/ksmbd/mgmt/user_config.c10
-rw-r--r--fs/ksmbd/mgmt/user_config.h1
-rw-r--r--fs/ksmbd/mgmt/user_session.h1
-rw-r--r--fs/ksmbd/smb2misc.c18
-rw-r--r--fs/ksmbd/smb2ops.c16
-rw-r--r--fs/ksmbd/smb2pdu.c222
-rw-r--r--fs/ksmbd/smb2pdu.h1
-rw-r--r--fs/ksmbd/smb_common.h1
-rw-r--r--fs/ksmbd/transport_ipc.c2
-rw-r--r--fs/ksmbd/transport_rdma.c261
-rw-r--r--fs/ksmbd/transport_rdma.h4
-rw-r--r--fs/ksmbd/transport_tcp.c3
-rw-r--r--fs/ksmbd/vfs_cache.h10
-rw-r--r--fs/lockd/svc.c200
-rw-r--r--fs/lockd/svclock.c6
-rw-r--r--fs/locks.c34
-rw-r--r--fs/mpage.c7
-rw-r--r--fs/namei.c58
-rw-r--r--fs/namespace.c24
-rw-r--r--fs/netfs/read_helper.c3
-rw-r--r--fs/nfs/callback.c36
-rw-r--r--fs/nfs/export.c2
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfsd/filecache.c79
-rw-r--r--fs/nfsd/filecache.h1
-rw-r--r--fs/nfsd/netns.h27
-rw-r--r--fs/nfsd/nfs3proc.c6
-rw-r--r--fs/nfsd/nfs3xdr.c65
-rw-r--r--fs/nfsd/nfs4proc.c24
-rw-r--r--fs/nfsd/nfs4state.c63
-rw-r--r--fs/nfsd/nfs4xdr.c21
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c27
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.c66
-rw-r--r--fs/nfsd/nfsfh.h40
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c222
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nfsd/stats.h4
-rw-r--r--fs/nfsd/trace.h106
-rw-r--r--fs/nfsd/vfs.c122
-rw-r--r--fs/nfsd/vfs.h3
-rw-r--r--fs/nilfs2/page.c4
-rw-r--r--fs/notify/dnotify/dnotify.c21
-rw-r--r--fs/notify/fanotify/fanotify_user.c10
-rw-r--r--fs/notify/inotify/inotify_user.c11
-rw-r--r--fs/ntfs/attrib.c2
-rw-r--r--fs/ntfs3/ntfs_fs.h1
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c26
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/masklog.c11
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c18
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/dlm/dlmthread.c2
-rw-r--r--fs/ocfs2/filecheck.c3
-rw-r--r--fs/ocfs2/journal.c6
-rw-r--r--fs/ocfs2/stackglue.c25
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/orangefs/orangefs-bufmap.c7
-rw-r--r--fs/orangefs/orangefs-sysfs.c21
-rw-r--r--fs/pipe.c64
-rw-r--r--fs/proc/array.c9
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/generic.c6
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/proc_net.c8
-rw-r--r--fs/proc/proc_sysctl.c72
-rw-r--r--fs/proc/task_mmu.c13
-rw-r--r--fs/proc/vmcore.c10
-rw-r--r--fs/signalfd.c5
-rw-r--r--fs/smbfs_common/smb2pdu.h2
-rw-r--r--fs/smbfs_common/smbfsctl.h2
-rw-r--r--fs/squashfs/super.c33
-rw-r--r--fs/super.c7
-rw-r--r--fs/sysctls.c39
-rw-r--r--fs/tracefs/inode.c24
-rw-r--r--fs/unicode/.gitignore2
-rw-r--r--fs/unicode/Kconfig13
-rw-r--r--fs/unicode/Makefile13
-rw-r--r--fs/unicode/mkutf8data.c24
-rw-r--r--fs/unicode/utf8-core.c109
-rw-r--r--fs/unicode/utf8-norm.c262
-rw-r--r--fs/unicode/utf8-selftest.c94
-rw-r--r--fs/unicode/utf8data.c_shipped (renamed from fs/unicode/utf8data.h_shipped)22
-rw-r--r--fs/unicode/utf8n.h81
-rw-r--r--fs/userfaultfd.c8
-rw-r--r--fs/xfs/kmem.c3
-rw-r--r--fs/xfs/libxfs/xfs_fs.h37
-rw-r--r--fs/xfs/scrub/agheader.c53
-rw-r--r--fs/xfs/scrub/agheader_repair.c12
-rw-r--r--fs/xfs/xfs_bmap_util.c7
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c53
-rw-r--r--fs/xfs/xfs_file.c3
-rw-r--r--fs/xfs/xfs_icache.c22
-rw-r--r--fs/xfs/xfs_ioctl.c102
-rw-r--r--fs/xfs/xfs_ioctl.h6
-rw-r--r--fs/xfs/xfs_ioctl32.c27
-rw-r--r--fs/xfs/xfs_ioctl32.h22
222 files changed, 4610 insertions, 3512 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index c72e9f8f5f32..9a10e68c5f30 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -43,6 +43,11 @@ static void v9fs_req_issue_op(struct netfs_read_subrequest *subreq)
iov_iter_xarray(&to, READ, &rreq->mapping->i_pages, pos, len);
total = p9_client_read(fid, pos, &to, &err);
+
+ /* if we just extended the file size, any portion not in
+ * cache won't be on server and is zeroes */
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+
netfs_subreq_terminated(subreq, err ?: total, false);
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index be72ad9edb3e..2573c08f335c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -115,7 +115,6 @@ out_error:
static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
{
- int res = 0;
struct inode *inode = file_inode(filp);
p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
@@ -125,7 +124,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
invalidate_mapping_pages(&inode->i_data, 0, -1);
}
- return res;
+ return 0;
}
static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
@@ -140,8 +139,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
fid = filp->private_data;
BUG_ON(fid == NULL);
- if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
- BUG();
+ BUG_ON((fl->fl_flags & FL_POSIX) != FL_POSIX);
res = locks_lock_file_wait(filp, fl);
if (res < 0)
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index cae301d09cd3..d17502a738a9 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -552,7 +552,10 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
{
int retval, use_dentry = 0;
struct p9_fid *fid = NULL;
- struct p9_iattr_dotl p9attr;
+ struct p9_iattr_dotl p9attr = {
+ .uid = INVALID_UID,
+ .gid = INVALID_GID,
+ };
struct inode *inode = d_inode(dentry);
p9_debug(P9_DEBUG_VFS, "\n");
@@ -562,14 +565,22 @@ int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
return retval;
p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
- p9attr.mode = iattr->ia_mode;
- p9attr.uid = iattr->ia_uid;
- p9attr.gid = iattr->ia_gid;
- p9attr.size = iattr->ia_size;
- p9attr.atime_sec = iattr->ia_atime.tv_sec;
- p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
- p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
- p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+ if (iattr->ia_valid & ATTR_MODE)
+ p9attr.mode = iattr->ia_mode;
+ if (iattr->ia_valid & ATTR_UID)
+ p9attr.uid = iattr->ia_uid;
+ if (iattr->ia_valid & ATTR_GID)
+ p9attr.gid = iattr->ia_gid;
+ if (iattr->ia_valid & ATTR_SIZE)
+ p9attr.size = iattr->ia_size;
+ if (iattr->ia_valid & ATTR_ATIME_SET) {
+ p9attr.atime_sec = iattr->ia_atime.tv_sec;
+ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+ }
+ if (iattr->ia_valid & ATTR_MTIME_SET) {
+ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+ }
if (iattr->ia_valid & ATTR_FILE) {
fid = iattr->ia_file->private_data;
diff --git a/fs/Makefile b/fs/Makefile
index 84c5e4cdfee5..dab324aea08f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -6,6 +6,8 @@
# Rewritten to use lists instead of if-statements.
#
+obj-$(CONFIG_SYSCTL) += sysctls.o
+
obj-y := open.o read_write.o file_table.o super.o \
char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
ioctl.o readdir.o select.o dcache.o inode.o \
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index adbb3a1edcbf..5156821bfe6a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -355,7 +355,6 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct super_block *sb = inode->i_sb;
struct object_info obj;
- int ret;
obj.indaddr = ADFS_I(inode)->indaddr;
obj.name_len = 0;
@@ -365,6 +364,5 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
obj.attr = ADFS_I(inode)->attr;
obj.size = inode->i_size;
- ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
- return ret;
+ return adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
}
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 065a28bfa3f1..e1b863449296 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -227,7 +227,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
__acquires(cell->proc_lock)
{
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
rcu_read_lock();
return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos);
@@ -236,7 +236,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v,
loff_t *_pos)
{
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos);
}
@@ -322,7 +322,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
{
struct afs_vl_seq_net_private *priv = m->private;
struct afs_vlserver_list *vllist;
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_cell *cell = pde_data(file_inode(m->file));
loff_t pos = *_pos;
rcu_read_lock();
diff --git a/fs/aio.c b/fs/aio.c
index f6f1cbffef9e..4ceba13a7db0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -220,9 +220,35 @@ struct aio_kiocb {
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
-unsigned long aio_nr; /* current system wide number of aio requests */
-unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
+static unsigned long aio_nr; /* current system wide number of aio requests */
+static unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
/*----end sysctl variables---*/
+#ifdef CONFIG_SYSCTL
+static struct ctl_table aio_sysctls[] = {
+ {
+ .procname = "aio-nr",
+ .data = &aio_nr,
+ .maxlen = sizeof(aio_nr),
+ .mode = 0444,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "aio-max-nr",
+ .data = &aio_max_nr,
+ .maxlen = sizeof(aio_max_nr),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {}
+};
+
+static void __init aio_sysctl_init(void)
+{
+ register_sysctl_init("fs", aio_sysctls);
+}
+#else
+#define aio_sysctl_init() do { } while (0)
+#endif
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
@@ -275,6 +301,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ aio_sysctl_init();
return 0;
}
__initcall(aio_setup);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8c7f26f1fbb..605017eb9349 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1116,11 +1116,11 @@ out_free_interp:
* independently randomized mmap region (0 load_bias
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/
- if (interpreter) {
+ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
+ if (alignment > ELF_MIN_ALIGN) {
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
- alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
@@ -1585,7 +1585,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
- strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+ get_task_comm(psinfo->pr_fname, p);
return 0;
}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1eae7ea823a..ddea6acbddde 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -822,7 +822,11 @@ static int __init init_misc_binfmt(void)
int err = register_filesystem(&bm_fs_type);
if (!err)
insert_binfmt(&misc_format);
- return err;
+ if (!register_sysctl_mount_point("fs/binfmt_misc")) {
+ pr_warn("Failed to create fs/binfmt_misc sysctl mount point");
+ return -ENOMEM;
+ }
+ return 0;
}
static void __exit exit_misc_binfmt(void)
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 520a0f6a7d9e..183e5c4aed34 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -18,8 +18,7 @@ config BTRFS_FS
select RAID6_PQ
select XOR_BLOCKS
select SRCU
- depends on !PPC_256K_PAGES # powerpc
- depends on !PAGE_SIZE_256KB # hexagon
+ depends on PAGE_SIZE_LESS_THAN_256KB
help
Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d6d48ecf823c..409bad3928db 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -12,7 +12,6 @@
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/prefetch.h>
-#include <linux/cleancache.h>
#include <linux/fsverity.h>
#include "misc.h"
#include "extent_io.h"
@@ -3578,15 +3577,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
goto out;
}
- if (!PageUptodate(page)) {
- if (cleancache_get_page(page) == 0) {
- BUG_ON(blocksize != PAGE_SIZE);
- unlock_extent(tree, start, end);
- unlock_page(page);
- goto out;
- }
- }
-
if (page->index == last_byte >> PAGE_SHIFT) {
size_t zero_offset = offset_in_page(last_byte);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0ec09fe01be6..4d947ba32da9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -23,7 +23,6 @@
#include <linux/miscdevice.h>
#include <linux/magic.h>
#include <linux/slab.h>
-#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/crc32c.h>
#include <linux/btrfs.h>
@@ -1374,7 +1373,6 @@ static int btrfs_fill_super(struct super_block *sb,
goto fail_close;
}
- cleancache_init_fs(sb);
sb->s_flags |= SB_ACTIVE;
return 0;
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index ce4d4785003c..7077f72e6f47 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -49,11 +49,19 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
}
- /* check parameters */
+ /* Check features of the backing filesystem:
+ * - Directories must support looking up and directory creation
+ * - We create tmpfiles to handle invalidation
+ * - We use xattrs to store metadata
+ * - We need to be able to query the amount of space available
+ * - We want to be able to sync the filesystem when stopping the cache
+ * - We use DIO to/from pages, so the blocksize mustn't be too big.
+ */
ret = -EOPNOTSUPP;
if (d_is_negative(root) ||
!d_backing_inode(root)->i_op->lookup ||
!d_backing_inode(root)->i_op->mkdir ||
+ !d_backing_inode(root)->i_op->tmpfile ||
!(d_backing_inode(root)->i_opflags & IOP_XATTR) ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs ||
@@ -84,9 +92,7 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
cache->bsize = stats.f_bsize;
- cache->bshift = 0;
- if (stats.f_bsize < PAGE_SIZE)
- cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
+ cache->bshift = ilog2(stats.f_bsize);
_debug("blksize %u (shift %u)",
cache->bsize, cache->bshift);
@@ -106,7 +112,6 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
(unsigned long long) cache->fcull,
(unsigned long long) cache->fstop);
- stats.f_blocks >>= cache->bshift;
do_div(stats.f_blocks, 100);
cache->bstop = stats.f_blocks * cache->bstop_percent;
cache->bcull = stats.f_blocks * cache->bcull_percent;
@@ -209,7 +214,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
return ret;
}
- b_avail = stats.f_bavail >> cache->bshift;
+ b_avail = stats.f_bavail;
b_writing = atomic_long_read(&cache->b_writing);
if (b_avail > b_writing)
b_avail -= b_writing;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 40a792421fc1..7ac04ee2c0a0 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -703,6 +703,17 @@ static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
return -EBUSY;
}
+ /* Make sure we have copies of the tag string */
+ if (!cache->tag) {
+ /*
+ * The tag string is released by the fops->release()
+ * function, so we don't release it on error here
+ */
+ cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+ if (!cache->tag)
+ return -ENOMEM;
+ }
+
return cachefiles_add_cache(cache);
}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 8dd54d9375b6..c793d33b0224 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -86,7 +86,7 @@ struct cachefiles_cache {
unsigned bcull_percent; /* when to start culling (% blocks) */
unsigned bstop_percent; /* when to stop allocating (% blocks) */
unsigned bsize; /* cache's block size */
- unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */
+ unsigned bshift; /* ilog2(bsize) */
uint64_t frun; /* when to stop culling */
uint64_t fcull; /* when to start culling */
uint64_t fstop; /* when to stop allocating */
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 60b1eac2ce78..04eb52736990 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -264,7 +264,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
ki->term_func = term_func;
ki->term_func_priv = term_func_priv;
ki->was_async = true;
- ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift;
+ ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
if (ki->term_func)
ki->iocb.ki_complete = cachefiles_write_complete;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 9bd692870617..f256c8aff7bb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -25,7 +25,9 @@ static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
trace_cachefiles_mark_active(object, inode);
can_use = true;
} else {
- pr_notice("cachefiles: Inode already in use: %pd\n", dentry);
+ trace_cachefiles_mark_failed(object, inode);
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ dentry, inode->i_ino);
}
return can_use;
@@ -101,6 +103,7 @@ retry:
subdir = lookup_one_len(dirname, dir, strlen(dirname));
else
subdir = ERR_PTR(ret);
+ trace_cachefiles_lookup(NULL, dir, subdir);
if (IS_ERR(subdir)) {
trace_cachefiles_vfs_error(NULL, d_backing_inode(dir),
PTR_ERR(subdir),
@@ -135,6 +138,7 @@ retry:
cachefiles_trace_mkdir_error);
goto mkdir_error;
}
+ trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir))) {
cachefiles_put_directory(subdir);
@@ -233,7 +237,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
};
int ret;
- trace_cachefiles_unlink(object, dentry, why);
+ trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why);
ret = security_path_unlink(&path, dentry);
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
@@ -386,7 +390,7 @@ try_again:
.new_dir = d_inode(cache->graveyard),
.new_dentry = grave,
};
- trace_cachefiles_rename(object, rep, grave, why);
+ trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
ret = cachefiles_inject_read_error();
if (ret == 0)
ret = vfs_rename(&rd);
@@ -617,7 +621,7 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
object->d_name_len);
else
dentry = ERR_PTR(ret);
- trace_cachefiles_lookup(object, dentry);
+ trace_cachefiles_lookup(object, fan, dentry);
if (IS_ERR(dentry)) {
if (dentry == ERR_PTR(-ENOENT))
goto new_file;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b3d9459c9bbd..c98e5238a1b6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -297,10 +297,6 @@ out:
dout("%s: result %d\n", __func__, err);
}
-static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
-{
-}
-
static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
{
struct inode *inode = mapping->host;
@@ -312,7 +308,6 @@ static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
}
static const struct netfs_read_request_ops ceph_netfs_read_ops = {
- .init_rreq = ceph_init_rreq,
.is_cache_enabled = ceph_is_cache_enabled,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_op = ceph_netfs_issue_op,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7d2c33cdbac6..7d305b974824 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3376,8 +3376,7 @@ static void handle_cap_grant(struct inode *inode,
if ((newcaps & CEPH_CAP_LINK_SHARED) &&
(extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
set_nlink(inode, le32_to_cpu(grant->nlink));
- if (inode->i_nlink == 0 &&
- (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+ if (inode->i_nlink == 0)
deleted_inode = true;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9d9304e712d9..5b9104b8e453 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
int fmode, bool isdir)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mount_options *opt =
+ ceph_inode_to_client(&ci->vfs_inode)->mount_options;
struct ceph_file_info *fi;
dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
if (!fi)
return -ENOMEM;
+ if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ fi->flags |= CEPH_F_SYNC;
+
file->private_data = fi;
}
@@ -1541,7 +1546,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct ceph_inode_info *ci = ceph_inode(inode);
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret;
- int want, got = 0;
+ int want = 0, got = 0;
int retry_op = 0, read = 0;
again:
@@ -1556,13 +1561,14 @@ again:
else
ceph_start_io_read(inode);
+ if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+ want |= CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
- want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
- else
- want = CEPH_CAP_FILE_CACHE;
+ want |= CEPH_CAP_FILE_LAZYIO;
+
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) {
- if (iocb->ki_flags & IOCB_DIRECT)
+ if (direct_lock)
ceph_end_io_direct(inode);
else
ceph_end_io_read(inode);
@@ -1696,7 +1702,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
- int err, want, got;
+ int err, want = 0, got;
bool direct_lock = false;
u32 map_flags;
u64 pool_flags;
@@ -1771,10 +1777,10 @@ retry_snap:
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode));
+ if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
+ want |= CEPH_CAP_FILE_BUFFER;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
- want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
- else
- want = CEPH_CAP_FILE_BUFFER;
+ want |= CEPH_CAP_FILE_LAZYIO;
got = 0;
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0)
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index c57699d8408d..0fcba68f9a99 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
msg->hdr.version = cpu_to_le16(1);
msg->hdr.compat_version = cpu_to_le16(1);
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- dout("client%llu send metrics to mds%d\n",
- ceph_client_gid(mdsc->fsc->client), s->s_mds);
ceph_con_send(&s->s_con, msg);
return true;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 620c691af40e..a338a3ec0dc4 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode)
/* if root is the real CephFS root, we don't have quota realms */
if (root && ceph_ino(root) == CEPH_INO_ROOT)
return false;
+ /* MDS stray dirs have no quota realms */
+ if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
+ return false;
/* otherwise, we can't know for sure */
return true;
}
@@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
if (ci->i_max_bytes) {
total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
+ /* For quota size less than 4MB, use 4KB block size */
+ if (!total) {
+ total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
+ used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
+ buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+ }
/* It is possible for a quota to be exceeded.
* Report 'zero' in that case
*/
free = total > used ? total - used : 0;
+ /* For quota size less than 4KB, report the
+ * total=used=4KB,free=0 when quota is full
+ * and total=free=4KB, used=0 otherwise */
+ if (!total) {
+ total = 1;
+ free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
+ buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
+ }
}
spin_unlock(&ci->i_ceph_lock);
if (total) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bea89bdb534a..bf79f369aec6 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -27,6 +27,8 @@
#include <linux/ceph/auth.h>
#include <linux/ceph/debugfs.h>
+#include <uapi/linux/magic.h>
+
static DEFINE_SPINLOCK(ceph_fsc_lock);
static LIST_HEAD(ceph_fsc_list);
@@ -146,6 +148,7 @@ enum {
Opt_mds_namespace,
Opt_recover_session,
Opt_source,
+ Opt_mon_addr,
/* string args above */
Opt_dirstat,
Opt_rbytes,
@@ -159,6 +162,7 @@ enum {
Opt_quotadf,
Opt_copyfrom,
Opt_wsync,
+ Opt_pagecache,
};
enum ceph_recover_session_mode {
@@ -197,8 +201,10 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source),
+ fsparam_string ("mon_addr", Opt_mon_addr),
fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync),
+ fsparam_flag_no ("pagecache", Opt_pagecache),
{}
};
@@ -228,9 +234,92 @@ static void canonicalize_path(char *path)
}
/*
- * Parse the source parameter. Distinguish the server list from the path.
+ * Check if the mds namespace in ceph_mount_options matches
+ * the passed in namespace string. First time match (when
+ * ->mds_namespace is NULL) is treated specially, since
+ * ->mds_namespace needs to be initialized by the caller.
+ */
+static int namespace_equals(struct ceph_mount_options *fsopt,
+ const char *namespace, size_t len)
+{
+ return !(fsopt->mds_namespace &&
+ (strlen(fsopt->mds_namespace) != len ||
+ strncmp(fsopt->mds_namespace, namespace, len)));
+}
+
+static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ int r;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ if (*dev_name_end != ':')
+ return invalfc(fc, "separator ':' missing in source");
+
+ r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
+ pctx->copts, fc->log.log, ',');
+ if (r)
+ return r;
+
+ fsopt->new_dev_syntax = false;
+ return 0;
+}
+
+static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
+ struct fs_context *fc)
+{
+ size_t len;
+ struct ceph_fsid fsid;
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+ char *fsid_start, *fs_name_start;
+
+ if (*dev_name_end != '=') {
+ dout("separator '=' missing in source");
+ return -EINVAL;
+ }
+
+ fsid_start = strchr(dev_name, '@');
+ if (!fsid_start)
+ return invalfc(fc, "missing cluster fsid");
+ ++fsid_start; /* start of cluster fsid */
+
+ fs_name_start = strchr(fsid_start, '.');
+ if (!fs_name_start)
+ return invalfc(fc, "missing file system name");
+
+ if (ceph_parse_fsid(fsid_start, &fsid))
+ return invalfc(fc, "Invalid FSID");
+
+ ++fs_name_start; /* start of file system name */
+ len = dev_name_end - fs_name_start;
+
+ if (!namespace_equals(fsopt, fs_name_start, len))
+ return invalfc(fc, "Mismatching mds_namespace");
+ kfree(fsopt->mds_namespace);
+ fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
+ if (!fsopt->mds_namespace)
+ return -ENOMEM;
+ dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
+
+ fsopt->new_dev_syntax = true;
+ return 0;
+}
+
+/*
+ * Parse the source parameter for new device format. Distinguish the device
+ * spec from the path. Try parsing new device format and fallback to old
+ * format if needed.
+ *
+ * New device syntax will looks like:
+ * <device_spec>=/<path>
+ * where
+ * <device_spec> is name@fsid.fsname
+ * <path> is optional, but if present must begin with '/'
+ * (monitor addresses are passed via mount option)
*
- * The source will look like:
+ * Old device syntax is:
* <server_spec>[,<server_spec>...]:[<path>]
* where
* <server_spec> is <ip>[:<port>]
@@ -263,24 +352,44 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end = dev_name + strlen(dev_name);
}
- dev_name_end--; /* back up to ':' separator */
- if (dev_name_end < dev_name || *dev_name_end != ':')
- return invalfc(fc, "No path or : separator in source");
+ dev_name_end--; /* back up to separator */
+ if (dev_name_end < dev_name)
+ return invalfc(fc, "Path missing in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path);
- ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
- pctx->copts, fc->log.log);
- if (ret)
- return ret;
+ dout("trying new device syntax");
+ ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
+ if (ret) {
+ if (ret != -EINVAL)
+ return ret;
+ dout("trying old device syntax");
+ ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
+ if (ret)
+ return ret;
+ }
fc->source = param->string;
param->string = NULL;
return 0;
}
+static int ceph_parse_mon_addr(struct fs_parameter *param,
+ struct fs_context *fc)
+{
+ struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
+
+ kfree(fsopt->mon_addr);
+ fsopt->mon_addr = param->string;
+ param->string = NULL;
+
+ return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
+ pctx->copts, fc->log.log, '/');
+}
+
static int ceph_parse_mount_param(struct fs_context *fc,
struct fs_parameter *param)
{
@@ -306,6 +415,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
param->string = NULL;
break;
case Opt_mds_namespace:
+ if (!namespace_equals(fsopt, param->string, strlen(param->string)))
+ return invalfc(fc, "Mismatching mds_namespace");
kfree(fsopt->mds_namespace);
fsopt->mds_namespace = param->string;
param->string = NULL;
@@ -323,6 +434,8 @@ static int ceph_parse_mount_param(struct fs_context *fc,
if (fc->source)
return invalfc(fc, "Multiple sources specified");
return ceph_parse_source(param, fc);
+ case Opt_mon_addr:
+ return ceph_parse_mon_addr(param, fc);
case Opt_wsize:
if (result.uint_32 < PAGE_SIZE ||
result.uint_32 > CEPH_MAX_WRITE_SIZE)
@@ -455,6 +568,12 @@ static int ceph_parse_mount_param(struct fs_context *fc,
else
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
break;
+ case Opt_pagecache:
+ if (result.negated)
+ fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
+ else
+ fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
+ break;
default:
BUG();
}
@@ -474,6 +593,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
kfree(args->mds_namespace);
kfree(args->server_path);
kfree(args->fscache_uniq);
+ kfree(args->mon_addr);
kfree(args);
}
@@ -517,6 +637,10 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
if (ret)
return ret;
+ ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
+ if (ret)
+ return ret;
+
return ceph_compare_options(new_opt, fsc->client);
}
@@ -572,15 +696,22 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
seq_puts(m, ",copyfrom");
- if (fsopt->mds_namespace)
+ /* dump mds_namespace when old device syntax is in use */
+ if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
+ if (fsopt->mon_addr)
+ seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
+
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
seq_show_option(m, "recover_session", "clean");
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync");
+ if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
+ seq_puts(m, ",nopagecache");
+
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -1052,6 +1183,7 @@ static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
static int ceph_get_tree(struct fs_context *fc)
{
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
+ struct ceph_mount_options *fsopt = pctx->opts;
struct super_block *sb;
struct ceph_fs_client *fsc;
struct dentry *res;
@@ -1063,6 +1195,8 @@ static int ceph_get_tree(struct fs_context *fc)
if (!fc->source)
return invalfc(fc, "No source");
+ if (fsopt->new_dev_syntax && !fsopt->mon_addr)
+ return invalfc(fc, "No monitor address");
/* create client (which we may/may not use) */
fsc = create_fs_client(pctx->opts, pctx->copts);
@@ -1148,6 +1282,13 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
+ if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
+ kfree(fsc->mount_options->mon_addr);
+ fsc->mount_options->mon_addr = fsopt->mon_addr;
+ fsopt->mon_addr = NULL;
+ pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
+ }
+
sync_filesystem(fc->root->d_sb);
return 0;
}
@@ -1325,6 +1466,14 @@ bool disable_send_metrics = false;
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
+/* for both v1 and v2 syntax */
+static bool mount_support = true;
+static const struct kernel_param_ops param_ops_mount_syntax = {
+ .get = param_get_bool,
+};
+module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
+module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
+
module_init(init_ceph);
module_exit(exit_ceph);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d0142cc5c41b..67f145e1ae7a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -24,13 +24,11 @@
#include <linux/fscache.h>
#endif
-/* f_type in struct statfs */
-#define CEPH_SUPER_MAGIC 0x00c36400
-
/* large granularity for statfs utilization stats to facilitate
* large volume sizes on 32-bit machines. */
#define CEPH_BLOCK_SHIFT 22 /* 4 MB */
#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
+#define CEPH_4K_BLOCK_SHIFT 12 /* 4 KB */
#define CEPH_MOUNT_OPT_CLEANRECOVER (1<<1) /* auto reonnect (clean mode) after blocklisted */
#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
@@ -44,6 +42,7 @@
#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
+#define CEPH_MOUNT_OPT_NOPAGECACHE (1<<16) /* bypass pagecache altogether */
#define CEPH_MOUNT_OPT_DEFAULT \
(CEPH_MOUNT_OPT_DCACHE | \
@@ -88,6 +87,8 @@ struct ceph_mount_options {
unsigned int max_readdir; /* max readdir result (entries) */
unsigned int max_readdir_bytes; /* max readdir result (bytes) */
+ bool new_dev_syntax;
+
/*
* everything above this point can be memcmp'd; everything below
* is handled in compare_mount_options()
@@ -97,6 +98,7 @@ struct ceph_mount_options {
char *mds_namespace; /* default NULL */
char *server_path; /* default NULL (means "/") */
char *fscache_uniq; /* default NULL */
+ char *mon_addr;
};
struct ceph_fs_client {
@@ -534,19 +536,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
*
* These come from src/mds/mdstypes.h in the ceph sources.
*/
-#define CEPH_MAX_MDS 0x100
-#define CEPH_NUM_STRAY 10
+#define CEPH_MAX_MDS 0x100
+#define CEPH_NUM_STRAY 10
#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
+#define CEPH_MDS_INO_LOG_OFFSET (2 * CEPH_MAX_MDS)
#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
{
- if (vino.ino < CEPH_INO_SYSTEM_BASE &&
- vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
- WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
- return true;
- }
- return false;
+ if (vino.ino >= CEPH_INO_SYSTEM_BASE ||
+ vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET)
+ return false;
+
+ /* Don't warn on mdsdirs */
+ WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET,
+ "Attempt to access reserved inode number 0x%llx",
+ vino.ino);
+ return true;
}
static inline struct inode *ceph_find_inode(struct super_block *sb,
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 346ae8716deb..3b7e3b9e4fd2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -188,7 +188,7 @@ config CIFS_SMB_DIRECT
config CIFS_FSCACHE
bool "Provide CIFS client caching support"
- depends on CIFS=m && FSCACHE_OLD_API || CIFS=y && FSCACHE_OLD_API=y
+ depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
help
Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
to be cached locally on disk through the general filesystem cache
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 87fcacdf3de7..cc8fdcb35b71 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -25,7 +25,7 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o
cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o
-cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
+cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o
cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
deleted file mode 100644
index 8be57aaedab6..000000000000
--- a/fs/cifs/cache.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * CIFS filesystem cache index structure definitions
- *
- * Copyright (c) 2010 Novell, Inc.
- * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
- *
- */
-#include "fscache.h"
-#include "cifs_debug.h"
-
-/*
- * CIFS filesystem definition for FS-Cache
- */
-struct fscache_netfs cifs_fscache_netfs = {
- .name = "cifs",
- .version = 0,
-};
-
-/*
- * Register CIFS for caching with FS-Cache
- */
-int cifs_fscache_register(void)
-{
- return fscache_register_netfs(&cifs_fscache_netfs);
-}
-
-/*
- * Unregister CIFS for caching
- */
-void cifs_fscache_unregister(void)
-{
- fscache_unregister_netfs(&cifs_fscache_netfs);
-}
-
-/*
- * Server object for FS-Cache
- */
-const struct fscache_cookie_def cifs_fscache_server_index_def = {
- .name = "CIFS.server",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
-};
-
-static enum
-fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
- const void *data,
- uint16_t datalen,
- loff_t object_size)
-{
- struct cifs_fscache_super_auxdata auxdata;
- const struct cifs_tcon *tcon = cookie_netfs_data;
-
- if (datalen != sizeof(auxdata))
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
-
- if (memcmp(data, &auxdata, datalen) != 0)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*
- * Superblock object for FS-Cache
- */
-const struct fscache_cookie_def cifs_fscache_super_index_def = {
- .name = "CIFS.super",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
- .check_aux = cifs_fscache_super_check_aux,
-};
-
-static enum
-fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
- const void *data,
- uint16_t datalen,
- loff_t object_size)
-{
- struct cifs_fscache_inode_auxdata auxdata;
- struct cifsInodeInfo *cifsi = cookie_netfs_data;
-
- if (datalen != sizeof(auxdata))
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
- if (memcmp(data, &auxdata, datalen) != 0)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-const struct fscache_cookie_def cifs_fscache_inode_object_def = {
- .name = "CIFS.uniqueid",
- .type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .check_aux = cifs_fscache_inode_check_aux,
-};
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index d282caf9f037..ea00e1a91250 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -416,11 +416,17 @@ skip_rdma:
from_kuid(&init_user_ns, ses->cred_uid));
spin_lock(&ses->chan_lock);
+ if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0))
+ seq_puts(m, "\tPrimary channel: DISCONNECTED ");
+
if (ses->chan_count > 1) {
seq_printf(m, "\n\n\tExtra Channels: %zu ",
ses->chan_count-1);
- for (j = 1; j < ses->chan_count; j++)
+ for (j = 1; j < ses->chan_count; j++) {
cifs_dump_channel(m, j, &ses->chans[j]);
+ if (CIFS_CHAN_NEEDS_RECONNECT(ses, j))
+ seq_puts(m, "\tDISCONNECTED ");
+ }
}
spin_unlock(&ses->chan_lock);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 353bd0dd7026..342717bf1dc2 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -84,9 +84,9 @@ struct key_type cifs_spnego_key_type = {
/* get a key struct with a SPNEGO security blob, suitable for session setup */
struct key *
-cifs_get_spnego_key(struct cifs_ses *sesInfo)
+cifs_get_spnego_key(struct cifs_ses *sesInfo,
+ struct TCP_Server_Info *server)
{
- struct TCP_Server_Info *server = cifs_ses_server(sesInfo);
struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr;
struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr;
char *description, *dp;
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index e6a0451877d4..7f102ffeb675 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -29,7 +29,8 @@ struct cifs_spnego_msg {
#ifdef __KERNEL__
extern struct key_type cifs_spnego_key_type;
-extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo);
+extern struct key *cifs_get_spnego_key(struct cifs_ses *sesInfo,
+ struct TCP_Server_Info *server);
#endif /* KERNEL */
#endif /* _CIFS_SPNEGO_H */
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 23a1ed2fb769..463ebe34892b 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch
switch (state) {
case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
- cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
+ cifs_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
- cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
+ cifs_reconnect(swnreg->tcon->ses->server, true);
break;
case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
@@ -498,10 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a
goto unlock;
}
- spin_lock(&GlobalMid_Lock);
- if (tcon->ses->server->tcpStatus != CifsExiting)
- tcon->ses->server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+ cifs_reconnect(tcon->ses->server, false);
unlock:
mutex_unlock(&tcon->ses->server->srv_mutex);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d118282071b3..0912d8bbbac1 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -141,9 +141,13 @@ int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
if ((cifs_pdu == NULL) || (server == NULL))
return -EINVAL;
+ spin_lock(&cifs_tcp_ses_lock);
if (!(cifs_pdu->Flags2 & SMBFLG2_SECURITY_SIGNATURE) ||
- server->tcpStatus == CifsNeedNegotiate)
+ server->tcpStatus == CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
return rc;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (!server->session_estab) {
memcpy(cifs_pdu->Signature.SecuritySignature, "BSRSPYL", 8);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index dca42aa87d30..199edac0cb59 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -26,6 +26,7 @@
#include <linux/random.h>
#include <linux/uuid.h>
#include <linux/xattr.h>
+#include <uapi/linux/magic.h>
#include <net/ipv6.h>
#include "cifsfs.h"
#include "cifspdu.h"
@@ -202,7 +203,7 @@ cifs_read_super(struct super_block *sb)
sb->s_time_max = ts.tv_sec;
}
- sb->s_magic = CIFS_MAGIC_NUMBER;
+ sb->s_magic = CIFS_SUPER_MAGIC;
sb->s_op = &cifs_super_ops;
sb->s_xattr = cifs_xattr_handlers;
rc = super_setup_bdi(sb);
@@ -396,6 +397,9 @@ static void
cifs_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
+ if (inode->i_state & I_PINNING_FSCACHE_WB)
+ cifs_fscache_unuse_inode_cookie(inode, true);
+ cifs_fscache_release_inode_cookie(inode);
clear_inode(inode);
}
@@ -720,6 +724,12 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
}
#endif
+static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ fscache_unpin_writeback(wbc, cifs_inode_cookie(inode));
+ return 0;
+}
+
static int cifs_drop_inode(struct inode *inode)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -732,6 +742,7 @@ static int cifs_drop_inode(struct inode *inode)
static const struct super_operations cifs_super_ops = {
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
+ .write_inode = cifs_write_inode,
.free_inode = cifs_free_inode,
.drop_inode = cifs_drop_inode,
.evict_inode = cifs_evict_inode,
@@ -773,7 +784,7 @@ cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb)
sep = CIFS_DIR_SEP(cifs_sb);
dentry = dget(sb->s_root);
- p = s = full_path;
+ s = full_path;
do {
struct inode *dir = d_inode(dentry);
@@ -1624,13 +1635,9 @@ init_cifs(void)
goto out_destroy_cifsoplockd_wq;
}
- rc = cifs_fscache_register();
- if (rc)
- goto out_destroy_deferredclose_wq;
-
rc = cifs_init_inodecache();
if (rc)
- goto out_unreg_fscache;
+ goto out_destroy_deferredclose_wq;
rc = cifs_init_mids();
if (rc)
@@ -1692,8 +1699,6 @@ out_destroy_mids:
cifs_destroy_mids();
out_destroy_inodecache:
cifs_destroy_inodecache();
-out_unreg_fscache:
- cifs_fscache_unregister();
out_destroy_deferredclose_wq:
destroy_workqueue(deferredclose_wq);
out_destroy_cifsoplockd_wq:
@@ -1729,7 +1734,6 @@ exit_cifs(void)
cifs_destroy_request_bufs();
cifs_destroy_mids();
cifs_destroy_inodecache();
- cifs_fscache_unregister();
destroy_workqueue(deferredclose_wq);
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9e5d9e192ef0..15a5c5db038b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,5 +152,6 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.34"
+#define SMB3_PRODUCT_BUILD 35
+#define CIFS_VERSION "2.35"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index be74606724c7..48b343d03430 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -24,8 +24,6 @@
#include "../smbfs_common/smb2pdu.h"
#include "smb2pdu.h"
-#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
-
#define SMB_PATH_MAX 260
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -113,7 +111,14 @@ enum statusEnum {
CifsGood,
CifsExiting,
CifsNeedReconnect,
- CifsNeedNegotiate
+ CifsNeedNegotiate,
+ CifsInNegotiate,
+ CifsNeedSessSetup,
+ CifsInSessSetup,
+ CifsNeedTcon,
+ CifsInTcon,
+ CifsNeedFilesInvalidate,
+ CifsInFilesInvalidate
};
enum securityEnum {
@@ -263,13 +268,16 @@ struct smb_version_operations {
/* check if we need to negotiate */
bool (*need_neg)(struct TCP_Server_Info *);
/* negotiate to the server */
- int (*negotiate)(const unsigned int, struct cifs_ses *);
+ int (*negotiate)(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
/* set negotiated write size */
unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
/* set negotiated read size */
unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
/* setup smb sessionn */
int (*sess_setup)(const unsigned int, struct cifs_ses *,
+ struct TCP_Server_Info *server,
const struct nls_table *);
/* close smb session */
int (*logoff)(const unsigned int, struct cifs_ses *);
@@ -414,7 +422,8 @@ struct smb_version_operations {
void (*set_lease_key)(struct inode *, struct cifs_fid *);
/* generate new lease key */
void (*new_lease_key)(struct cifs_fid *);
- int (*generate_signingkey)(struct cifs_ses *);
+ int (*generate_signingkey)(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *,
bool allocate_crypto);
int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
@@ -582,7 +591,7 @@ struct TCP_Server_Info {
char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
struct smb_version_operations *ops;
struct smb_version_values *vals;
- /* updates to tcpStatus protected by GlobalMid_Lock */
+ /* updates to tcpStatus protected by cifs_tcp_ses_lock */
enum statusEnum tcpStatus; /* what we think the status is */
char *hostname; /* hostname portion of UNC string */
struct socket *ssocket;
@@ -659,9 +668,6 @@ struct TCP_Server_Info {
unsigned int total_read; /* total amount of data read in this pass */
atomic_t in_send; /* requests trying to send */
atomic_t num_waiters; /* blocked waiting to get in sendrecv */
-#ifdef CONFIG_CIFS_FSCACHE
- struct fscache_cookie *fscache; /* client index cache cookie */
-#endif
#ifdef CONFIG_CIFS_STATS2
atomic_t num_cmds[NUMBER_OF_SMB2_COMMANDS]; /* total requests by cmd */
atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */
@@ -915,12 +921,13 @@ struct cifs_chan {
*/
struct cifs_ses {
struct list_head smb_ses_list;
+ struct list_head rlist; /* reconnect list */
struct list_head tcon_list;
struct cifs_tcon *tcon_ipc;
struct mutex session_mutex;
struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */
- enum statusEnum status; /* updates protected by GlobalMid_Lock */
+ enum statusEnum status; /* updates protected by cifs_tcp_ses_lock */
unsigned overrideSecFlg; /* if non-zero override global sec flags */
char *serverOS; /* name of operating system underlying server */
char *serverNOS; /* name of network operating system of server */
@@ -939,17 +946,13 @@ struct cifs_ses {
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
enum securityEnum sectype; /* what security flavor was specified? */
bool sign; /* is signing required? */
- bool need_reconnect:1; /* connection reset, uid now invalid */
bool domainAuto:1;
- bool binding:1; /* are we binding the session? */
__u16 session_flags;
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
__u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
- __u8 binding_preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
-
/*
* Network interfaces available on the server this session is
* connected to.
@@ -969,45 +972,34 @@ struct cifs_ses {
spinlock_t chan_lock;
/* ========= begin: protected by chan_lock ======== */
#define CIFS_MAX_CHANNELS 16
+#define CIFS_ALL_CHANNELS_SET(ses) \
+ ((1UL << (ses)->chan_count) - 1)
+#define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \
+ ((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses))
+#define CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses) \
+ ((ses)->chans_need_reconnect = CIFS_ALL_CHANNELS_SET(ses))
+#define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \
+ test_bit((index), &(ses)->chans_need_reconnect)
+
struct cifs_chan chans[CIFS_MAX_CHANNELS];
- struct cifs_chan *binding_chan;
size_t chan_count;
size_t chan_max;
atomic_t chan_seq; /* round robin state */
+
+ /*
+ * chans_need_reconnect is a bitmap indicating which of the channels
+ * under this smb session needs to be reconnected.
+ * If not multichannel session, only one bit will be used.
+ *
+ * We will ask for sess and tcon reconnection only if all the
+ * channels are marked for needing reconnection. This will
+ * enable the sessions on top to continue to live till any
+ * of the channels below are active.
+ */
+ unsigned long chans_need_reconnect;
/* ========= end: protected by chan_lock ======== */
};
-/*
- * When binding a new channel, we need to access the channel which isn't fully
- * established yet.
- */
-
-static inline
-struct cifs_chan *cifs_ses_binding_channel(struct cifs_ses *ses)
-{
- if (ses->binding)
- return ses->binding_chan;
- else
- return NULL;
-}
-
-/*
- * Returns the server pointer of the session. When binding a new
- * channel this returns the last channel which isn't fully established
- * yet.
- *
- * This function should be use for negprot/sess.setup codepaths. For
- * the other requests see cifs_pick_channel().
- */
-static inline
-struct TCP_Server_Info *cifs_ses_server(struct cifs_ses *ses)
-{
- if (ses->binding)
- return ses->binding_chan->server;
- else
- return ses->server;
-}
-
static inline bool
cap_unix(struct cifs_ses *ses)
{
@@ -1117,7 +1109,7 @@ struct cifs_tcon {
__u32 max_bytes_copy;
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
- struct fscache_cookie *fscache; /* cookie for share */
+ struct fscache_volume *fscache; /* cookie for share */
#endif
struct list_head pending_opens; /* list of incomplete opens */
struct cached_fid crfid; /* Cached root fid */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index d2ff438fd31f..68b9a436af4b 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2560,7 +2560,7 @@ typedef struct {
__le32 EaSize; /* length of the xattrs */
__u8 ShortNameLength;
__u8 Reserved;
- __u8 ShortName[12];
+ __u8 ShortName[24];
char FileName[1];
} __attribute__((packed)) FILE_BOTH_DIRECTORY_INFO; /* level 0x104 FFrsp data */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4f5a3e857df4..d3701295402d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -131,7 +131,11 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *in_buf ,
struct smb_hdr *out_buf,
int *bytes_returned);
-extern int cifs_reconnect(struct TCP_Server_Info *server);
+void
+cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session);
+extern int cifs_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session);
extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
extern bool backup_cred(struct cifs_sb_info *);
@@ -164,6 +168,7 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
extern enum securityEnum select_sectype(struct TCP_Server_Info *server,
enum securityEnum requested);
extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
extern struct timespec64 cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
extern u64 cifs_UnixTimeToNT(struct timespec64);
@@ -293,11 +298,15 @@ extern int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon,
const struct nls_table *nlsc);
extern int cifs_negotiate_protocol(const unsigned int xid,
- struct cifs_ses *ses);
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
struct nls_table *nls_info);
extern int cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required);
-extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses);
+extern int CIFSSMBNegotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
const char *tree, struct cifs_tcon *tcon,
@@ -504,8 +513,10 @@ extern int cifs_verify_signature(struct smb_rqst *rqst,
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern void cifs_crypto_secmech_release(struct TCP_Server_Info *server);
extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb30signingkey(struct cifs_ses *);
-extern int generate_smb311signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+extern int generate_smb311signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int CIFSSMBCopy(unsigned int xid,
struct cifs_tcon *source_tcon,
@@ -601,6 +612,19 @@ bool is_server_using_iface(struct TCP_Server_Info *server,
bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface);
void cifs_ses_mark_for_reconnect(struct cifs_ses *ses);
+unsigned int
+cifs_ses_get_chan_index(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+void
+cifs_chan_set_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+void
+cifs_chan_clear_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+bool
+cifs_chan_needs_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+
void extract_unc_hostname(const char *unc, const char **h, size_t *len);
int copy_path_name(char *dst, const char *src);
int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov,
@@ -626,6 +650,11 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
int match_target_ip(struct TCP_Server_Info *server,
const char *share, size_t share_len,
bool *result);
+
+int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *dfs_link_path);
#endif
static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 243d17696f06..071e2f21a7db 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -73,6 +73,16 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
struct list_head *tmp;
struct list_head *tmp1;
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->ses->status != CifsGood ||
+ tcon->tidStatus != CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return;
+ }
+ tcon->tidStatus = CifsInFilesInvalidate;
+ spin_unlock(&cifs_tcp_ses_lock);
+
/* list all files open on tree connection and mark them invalid */
spin_lock(&tcon->open_file_lock);
list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
@@ -89,6 +99,11 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid));
mutex_unlock(&tcon->crfid.fid_mutex);
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInFilesInvalidate)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
* to this tcon.
@@ -120,15 +135,18 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
* only tree disconnect, open, and write, (and ulogoff which does not
* have tcon) are allowed as we start force umount
*/
+ spin_lock(&cifs_tcp_ses_lock);
if (tcon->tidStatus == CifsExiting) {
if (smb_command != SMB_COM_WRITE_ANDX &&
smb_command != SMB_COM_OPEN_ANDX &&
smb_command != SMB_COM_TREE_DISCONNECT) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "can not send cmd %d while umounting\n",
smb_command);
return -ENODEV;
}
}
+ spin_unlock(&cifs_tcp_ses_lock);
retries = server->nr_targets;
@@ -148,8 +166,12 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
}
/* are we still trying to reconnect? */
- if (server->tcpStatus != CifsNeedReconnect)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
break;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (retries && --retries)
continue;
@@ -166,31 +188,49 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
retries = server->nr_targets;
}
- if (!ses->need_reconnect && !tcon->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) {
+ spin_unlock(&ses->chan_lock);
return 0;
+ }
+ spin_unlock(&ses->chan_lock);
nls_codepage = load_nls_default();
/*
- * need to prevent multiple threads trying to simultaneously
- * reconnect the same SMB session
- */
- mutex_lock(&ses->session_mutex);
-
- /*
* Recheck after acquire mutex. If another thread is negotiating
* and the server never sends an answer the socket will be closed
* and tcpStatus set to reconnect.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
rc = -EHOSTDOWN;
- mutex_unlock(&ses->session_mutex);
goto out;
}
+ spin_unlock(&cifs_tcp_ses_lock);
- rc = cifs_negotiate_protocol(0, ses);
- if (rc == 0 && ses->need_reconnect)
- rc = cifs_setup_session(0, ses, nls_codepage);
+ /*
+ * need to prevent multiple threads trying to simultaneously
+ * reconnect the same SMB session
+ */
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server)) {
+ spin_unlock(&ses->chan_lock);
+
+ /* this means that we only need to tree connect */
+ if (tcon->need_reconnect)
+ goto skip_sess_setup;
+
+ rc = -EHOSTDOWN;
+ goto out;
+ }
+ spin_unlock(&ses->chan_lock);
+
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(0, ses, server);
+ if (!rc)
+ rc = cifs_setup_session(0, ses, server, nls_codepage);
/* do we need to reconnect tcon? */
if (rc || !tcon->need_reconnect) {
@@ -198,6 +238,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
goto out;
}
+skip_sess_setup:
cifs_mark_open_files_invalid(tcon);
rc = cifs_tree_connect(0, tcon, nls_codepage);
mutex_unlock(&ses->session_mutex);
@@ -337,8 +378,13 @@ static int
smb_init_no_reconnect(int smb_command, int wct, struct cifs_tcon *tcon,
void **request_buf, void **response_buf)
{
- if (tcon->ses->need_reconnect || tcon->need_reconnect)
+ spin_lock(&tcon->ses->chan_lock);
+ if (cifs_chan_needs_reconnect(tcon->ses, tcon->ses->server) ||
+ tcon->need_reconnect) {
+ spin_unlock(&tcon->ses->chan_lock);
return -EHOSTDOWN;
+ }
+ spin_unlock(&tcon->ses->chan_lock);
return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
}
@@ -476,14 +522,15 @@ should_set_ext_sec_flag(enum securityEnum sectype)
}
int
-CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
+CIFSSMBNegotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
NEGOTIATE_REQ *pSMB;
NEGOTIATE_RSP *pSMBr;
int rc = 0;
int bytes_returned;
int i;
- struct TCP_Server_Info *server = ses->server;
u16 count;
if (!server) {
@@ -600,8 +647,12 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon)
* the tcon is no longer on the list, so no need to take lock before
* checking this.
*/
- if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
- return 0;
+ spin_lock(&tcon->ses->chan_lock);
+ if ((tcon->need_reconnect) || CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses)) {
+ spin_unlock(&tcon->ses->chan_lock);
+ return -EIO;
+ }
+ spin_unlock(&tcon->ses->chan_lock);
rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon,
(void **)&smb_buffer);
@@ -696,9 +747,14 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses)
return -EIO;
mutex_lock(&ses->session_mutex);
- if (ses->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
+ spin_unlock(&ses->chan_lock);
goto session_already_dead; /* no need to send SMBlogoff if uid
already closed due to reconnect */
+ }
+ spin_unlock(&ses->chan_lock);
+
rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB);
if (rc) {
mutex_unlock(&ses->session_mutex);
@@ -1401,7 +1457,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -1;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1060164b984a..11a22a30ee14 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -166,36 +166,57 @@ static void cifs_resolve_server(struct work_struct *work)
* Mark all sessions and tcons for reconnect.
*
* @server needs to be previously set to CifsNeedReconnect.
+ *
*/
-static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server)
+void
+cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session)
{
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct mid_q_entry *mid, *nmid;
struct list_head retry_list;
- struct TCP_Server_Info *pserver;
server->maxBuf = 0;
server->max_read = 0;
- cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
- trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
/*
* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they
* are not used until reconnected.
*/
- cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__);
+ cifs_dbg(FYI, "%s: marking necessary sessions and tcons for reconnect\n", __func__);
/* If server is a channel, select the primary channel */
pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
- ses->need_reconnect = true;
- list_for_each_entry(tcon, &ses->tcon_list, tcon_list)
+ spin_lock(&ses->chan_lock);
+ if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server))
+ goto next_session;
+
+ if (mark_smb_session)
+ CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses);
+ else
+ cifs_chan_set_need_reconnect(ses, server);
+
+ /* If all channels need reconnect, then tcon needs reconnect */
+ if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses))
+ goto next_session;
+
+ ses->status = CifsNeedReconnect;
+
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
tcon->need_reconnect = true;
+ tcon->tidStatus = CifsNeedReconnect;
+ }
if (ses->tcon_ipc)
ses->tcon_ipc->need_reconnect = true;
+
+next_session:
+ spin_unlock(&ses->chan_lock);
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -248,16 +269,21 @@ static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server
static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets)
{
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
server->nr_targets = num_targets;
if (server->tcpStatus == CifsExiting) {
/* the demux thread will exit normally next time through the loop */
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
wake_up(&server->response_q);
return false;
}
+
+ cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
+ trace_smb3_reconnect(server->CurrentMid, server->conn_id,
+ server->hostname);
server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+
+ spin_unlock(&cifs_tcp_ses_lock);
return true;
}
@@ -268,15 +294,21 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num
* mark all smb sessions as reconnecting for tcp session
* reconnect tcp session
* wake up waiters on reconnection? - (not needed currently)
+ *
+ * if mark_smb_session is passed as true, unconditionally mark
+ * the smb session (and tcon) for reconnect as well. This value
+ * doesn't really matter for non-multichannel scenario.
+ *
*/
-static int __cifs_reconnect(struct TCP_Server_Info *server)
+static int __cifs_reconnect(struct TCP_Server_Info *server,
+ bool mark_smb_session)
{
int rc = 0;
if (!cifs_tcp_ses_needs_reconnect(server, 1))
return 0;
- cifs_mark_tcp_ses_conns_for_reconnect(server);
+ cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
do {
try_to_freeze();
@@ -299,17 +331,20 @@ static int __cifs_reconnect(struct TCP_Server_Info *server)
} else {
atomic_inc(&tcpSesReconnectCount);
set_credits(server, 1);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedNegotiate;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
mutex_unlock(&server->srv_mutex);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
}
} while (server->tcpStatus == CifsNeedReconnect);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate)
mod_delayed_work(cifsiod_wq, &server->echo, 0);
+ spin_unlock(&cifs_tcp_ses_lock);
wake_up(&server->response_q);
return rc;
@@ -371,7 +406,9 @@ static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_
return rc;
}
-static int reconnect_dfs_server(struct TCP_Server_Info *server)
+static int
+reconnect_dfs_server(struct TCP_Server_Info *server,
+ bool mark_smb_session)
{
int rc = 0;
const char *refpath = server->current_fullpath + 1;
@@ -395,7 +432,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
if (!cifs_tcp_ses_needs_reconnect(server, num_targets))
return 0;
- cifs_mark_tcp_ses_conns_for_reconnect(server);
+ cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
do {
try_to_freeze();
@@ -416,12 +453,13 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
*/
atomic_inc(&tcpSesReconnectCount);
set_credits(server, 1);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedNegotiate;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_swn_reset_server_dstaddr(server);
mutex_unlock(&server->srv_mutex);
+ mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
} while (server->tcpStatus == CifsNeedReconnect);
if (target_hint)
@@ -430,29 +468,32 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server)
dfs_cache_free_tgts(&tl);
/* Need to set up echo worker again once connection has been established */
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate)
mod_delayed_work(cifsiod_wq, &server->echo, 0);
+ spin_unlock(&cifs_tcp_ses_lock);
+
wake_up(&server->response_q);
return rc;
}
-int cifs_reconnect(struct TCP_Server_Info *server)
+int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
{
/* If tcp session is not an dfs connection, then reconnect to last target server */
spin_lock(&cifs_tcp_ses_lock);
if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) {
spin_unlock(&cifs_tcp_ses_lock);
- return __cifs_reconnect(server);
+ return __cifs_reconnect(server, mark_smb_session);
}
spin_unlock(&cifs_tcp_ses_lock);
- return reconnect_dfs_server(server);
+ return reconnect_dfs_server(server, mark_smb_session);
}
#else
-int cifs_reconnect(struct TCP_Server_Info *server)
+int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
{
- return __cifs_reconnect(server);
+ return __cifs_reconnect(server, mark_smb_session);
}
#endif
@@ -534,15 +575,18 @@ server_unresponsive(struct TCP_Server_Info *server)
* 65s kernel_recvmsg times out, and we see that we haven't gotten
* a response in >60s.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if ((server->tcpStatus == CifsGood ||
server->tcpStatus == CifsNeedNegotiate) &&
(!server->ops->can_echo || server->ops->can_echo(server)) &&
time_after(jiffies, server->lstrp + 3 * server->echo_interval)) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_server_dbg(VFS, "has not responded in %lu seconds. Reconnecting...\n",
(3 * server->echo_interval) / HZ);
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
return true;
}
+ spin_unlock(&cifs_tcp_ses_lock);
return false;
}
@@ -576,7 +620,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
/* reconnect if no credits and no requests in flight */
if (zero_credits(server)) {
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
return -ECONNABORTED;
}
@@ -587,13 +631,17 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
else
length = sock_recvmsg(server->ssocket, smb_msg, 0);
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ESHUTDOWN;
+ }
if (server->tcpStatus == CifsNeedReconnect) {
- cifs_reconnect(server);
+ spin_unlock(&cifs_tcp_ses_lock);
return -ECONNABORTED;
}
+ spin_unlock(&cifs_tcp_ses_lock);
if (length == -ERESTARTSYS ||
length == -EAGAIN ||
@@ -610,7 +658,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
if (length <= 0) {
cifs_dbg(FYI, "Received no data or error: %d\n", length);
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
return -ECONNABORTED;
}
}
@@ -689,11 +737,11 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type)
* initialize frame).
*/
cifs_set_port((struct sockaddr *)&server->dstaddr, CIFS_PORT);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
break;
default:
cifs_server_dbg(VFS, "RFC 1002 unknown response type 0x%x\n", type);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
}
return false;
@@ -771,9 +819,9 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
cancel_delayed_work_sync(&server->echo);
cancel_delayed_work_sync(&server->resolve);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
server->tcpStatus = CifsExiting;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
wake_up_all(&server->response_q);
/* check if we have blocked requests that need to free */
@@ -866,7 +914,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) -
server->vals->header_preamble_size) {
cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -ECONNABORTED;
}
@@ -913,7 +961,7 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -1;
}
@@ -1017,7 +1065,7 @@ next_pdu:
server->vals->header_preamble_size) {
cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n",
server->pdu_size);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
continue;
}
@@ -1069,7 +1117,7 @@ next_pdu:
server->ops->is_status_io_timeout(buf)) {
num_io_timeout++;
if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) {
- cifs_reconnect(server);
+ cifs_reconnect(server, false);
num_io_timeout = 0;
continue;
}
@@ -1139,7 +1187,7 @@ next_pdu:
}
memalloc_noreclaim_restore(noreclaim_flag);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
}
/*
@@ -1390,16 +1438,12 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
else
cancel_delayed_work_sync(&server->reconnect);
- spin_lock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
server->tcpStatus = CifsExiting;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_crypto_secmech_release(server);
- /* fscache server cookies are based on primary channel only */
- if (!CIFS_SERVER_IS_CHAN(server))
- cifs_fscache_release_client_cookie(server);
-
kfree(server->session_key.response);
server->session_key.response = NULL;
server->session_key.len = 0;
@@ -1545,7 +1589,9 @@ smbd_connected:
* to the struct since the kernel thread not created yet
* no need to spinlock this update of tcpStatus
*/
+ spin_lock(&cifs_tcp_ses_lock);
tcp_ses->tcpStatus = CifsNeedNegotiate;
+ spin_unlock(&cifs_tcp_ses_lock);
if ((ctx->max_credits < 20) || (ctx->max_credits > 60000))
tcp_ses->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
@@ -1559,14 +1605,6 @@ smbd_connected:
list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- /* fscache server cookies are based on primary channel only */
- if (!CIFS_SERVER_IS_CHAN(tcp_ses))
- cifs_fscache_get_client_cookie(tcp_ses);
-#ifdef CONFIG_CIFS_FSCACHE
- else
- tcp_ses->fscache = tcp_ses->primary_server->fscache;
-#endif /* CONFIG_CIFS_FSCACHE */
-
/* queue echo request delayed work */
queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
@@ -1762,15 +1800,13 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
spin_unlock(&cifs_tcp_ses_lock);
return;
}
- spin_unlock(&cifs_tcp_ses_lock);
/* ses_count can never go negative */
WARN_ON(ses->ses_count < 0);
- spin_lock(&GlobalMid_Lock);
if (ses->status == CifsGood)
ses->status = CifsExiting;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_free_ipc(ses);
@@ -1789,7 +1825,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
spin_lock(&ses->chan_lock);
chan_count = ses->chan_count;
- spin_unlock(&ses->chan_lock);
/* close any extra channels */
if (chan_count > 1) {
@@ -1806,6 +1841,7 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
ses->chans[i].server = NULL;
}
}
+ spin_unlock(&ses->chan_lock);
sesInfoFree(ses);
cifs_put_tcp_session(server, 0);
@@ -1987,11 +2023,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
cifs_dbg(FYI, "Existing smb sess found (status=%d)\n",
ses->status);
- mutex_lock(&ses->session_mutex);
- if (ses->need_reconnect) {
+ spin_lock(&ses->chan_lock);
+ if (cifs_chan_needs_reconnect(ses, server)) {
+ spin_unlock(&ses->chan_lock);
cifs_dbg(FYI, "Session needs reconnect\n");
- rc = cifs_negotiate_protocol(xid, ses);
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(xid, ses, server);
if (rc) {
mutex_unlock(&ses->session_mutex);
/* problem -- put our ses reference */
@@ -2000,7 +2038,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
return ERR_PTR(rc);
}
- rc = cifs_setup_session(xid, ses,
+ rc = cifs_setup_session(xid, ses, server,
ctx->local_nls);
if (rc) {
mutex_unlock(&ses->session_mutex);
@@ -2009,8 +2047,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
free_xid(xid);
return ERR_PTR(rc);
}
+ mutex_unlock(&ses->session_mutex);
+
+ spin_lock(&ses->chan_lock);
}
- mutex_unlock(&ses->session_mutex);
+ spin_unlock(&ses->chan_lock);
/* existing SMB ses has a server reference already */
cifs_put_tcp_session(server, 0);
@@ -2060,28 +2101,35 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
ses->sectype = ctx->sectype;
ses->sign = ctx->sign;
- mutex_lock(&ses->session_mutex);
/* add server as first channel */
spin_lock(&ses->chan_lock);
ses->chans[0].server = server;
ses->chan_count = 1;
ses->chan_max = ctx->multichannel ? ctx->max_channels:1;
+ ses->chans_need_reconnect = 1;
spin_unlock(&ses->chan_lock);
- rc = cifs_negotiate_protocol(xid, ses);
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(xid, ses, server);
if (!rc)
- rc = cifs_setup_session(xid, ses, ctx->local_nls);
+ rc = cifs_setup_session(xid, ses, server, ctx->local_nls);
+ mutex_unlock(&ses->session_mutex);
/* each channel uses a different signing key */
+ spin_lock(&ses->chan_lock);
memcpy(ses->chans[0].signkey, ses->smb3signingkey,
sizeof(ses->smb3signingkey));
+ spin_unlock(&ses->chan_lock);
- mutex_unlock(&ses->session_mutex);
if (rc)
goto get_ses_fail;
- /* success, put it on the list and add it as first channel */
+ /*
+ * success, put it on the list and add it as first channel
+ * note: the session becomes active soon after this. So you'll
+ * need to lock before changing something in the session.
+ */
spin_lock(&cifs_tcp_ses_lock);
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -2161,6 +2209,9 @@ cifs_put_tcon(struct cifs_tcon *tcon)
/* tc_count can never go negative */
WARN_ON(tcon->tc_count < 0);
+ list_del_init(&tcon->tcon_list);
+ spin_unlock(&cifs_tcp_ses_lock);
+
if (tcon->use_witness) {
int rc;
@@ -2171,9 +2222,6 @@ cifs_put_tcon(struct cifs_tcon *tcon)
}
}
- list_del_init(&tcon->tcon_list);
- spin_unlock(&cifs_tcp_ses_lock);
-
xid = get_xid();
if (ses->server->ops->tree_disconnect)
ses->server->ops->tree_disconnect(xid, tcon);
@@ -2290,10 +2338,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
}
}
- /*
- * BB Do we need to wrap session_mutex around this TCon call and Unix
- * SetFS as we do on SessSetup and reconnect?
- */
xid = get_xid();
rc = ses->server->ops->tree_connect(xid, ses, ctx->UNC, tcon,
ctx->local_nls);
@@ -3029,12 +3073,15 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
* for just this mount.
*/
reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx);
+ spin_lock(&cifs_tcp_ses_lock);
if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
(le64_to_cpu(tcon->fsUnixInfo.Capability) &
CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
+ spin_unlock(&cifs_tcp_ses_lock);
rc = -EACCES;
goto out;
}
+ spin_unlock(&cifs_tcp_ses_lock);
} else
tcon->unix_ext = 0; /* server does not support them */
@@ -3069,7 +3116,8 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
* Inside cifs_fscache_get_super_cookie it checks
* that we do not get super cookie twice.
*/
- cifs_fscache_get_super_cookie(tcon);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+ cifs_fscache_get_super_cookie(tcon);
out:
mnt_ctx->server = server;
@@ -3322,6 +3370,11 @@ static int is_path_remote(struct mount_ctx *mnt_ctx)
rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
full_path);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (rc == -ENOENT && is_tcon_dfs(tcon))
+ rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, cifs_sb,
+ full_path);
+#endif
if (rc != 0 && rc != -EREMOTE) {
kfree(full_path);
return rc;
@@ -3709,8 +3762,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
if (rc == 0) {
bool is_unicode;
- tcon->tidStatus = CifsGood;
- tcon->need_reconnect = false;
tcon->tid = smb_buffer_response->Tid;
bcc_ptr = pByteArea(smb_buffer_response);
bytes_left = get_bcc(smb_buffer_response);
@@ -3799,26 +3850,37 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
}
int
-cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses)
+cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
int rc = 0;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
if (!server->ops->need_neg || !server->ops->negotiate)
return -ENOSYS;
/* only send once per connect */
- if (!server->ops->need_neg(server))
+ spin_lock(&cifs_tcp_ses_lock);
+ if (!server->ops->need_neg(server) ||
+ server->tcpStatus != CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
return 0;
+ }
+ server->tcpStatus = CifsInNegotiate;
+ spin_unlock(&cifs_tcp_ses_lock);
- rc = server->ops->negotiate(xid, ses);
+ rc = server->ops->negotiate(xid, ses, server);
if (rc == 0) {
- spin_lock(&GlobalMid_Lock);
- if (server->tcpStatus == CifsNeedNegotiate)
- server->tcpStatus = CifsGood;
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInNegotiate)
+ server->tcpStatus = CifsNeedSessSetup;
else
rc = -EHOSTDOWN;
- spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInNegotiate)
+ server->tcpStatus = CifsNeedNegotiate;
+ spin_unlock(&cifs_tcp_ses_lock);
}
return rc;
@@ -3826,12 +3888,26 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses)
int
cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
struct nls_table *nls_info)
{
int rc = -ENOSYS;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
+ bool is_binding = false;
- if (!ses->binding) {
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsNeedSessSetup) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ server->tcpStatus = CifsInSessSetup;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
+ if (!is_binding) {
ses->capabilities = server->capabilities;
if (!linuxExtEnabled)
ses->capabilities &= (~server->vals->cap_unix);
@@ -3849,10 +3925,26 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
server->sec_mode, server->capabilities, server->timeAdj);
if (server->ops->sess_setup)
- rc = server->ops->sess_setup(xid, ses, nls_info);
+ rc = server->ops->sess_setup(xid, ses, server, nls_info);
- if (rc)
+ if (rc) {
cifs_server_dbg(VFS, "Send error in SessSetup = %d\n", rc);
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInSessSetup)
+ server->tcpStatus = CifsNeedSessSetup;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsInSessSetup)
+ server->tcpStatus = CifsGood;
+ /* Even if one channel is active, session is in good state */
+ ses->status = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ spin_lock(&ses->chan_lock);
+ cifs_chan_clear_need_reconnect(ses, server);
+ spin_unlock(&ses->chan_lock);
+ }
return rc;
}
@@ -4296,7 +4388,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco
*/
if (rc && server->current_fullpath != server->origin_fullpath) {
server->current_fullpath = server->origin_fullpath;
- cifs_ses_mark_for_reconnect(tcon->ses);
+ cifs_reconnect(tcon->ses->server, true);
}
dfs_cache_free_tgts(tl);
@@ -4314,9 +4406,22 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
char *tree;
struct dfs_info3_param ref = {0};
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->ses->status != CifsGood ||
+ (tcon->tidStatus != CifsNew &&
+ tcon->tidStatus != CifsNeedTcon)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ tcon->tidStatus = CifsInTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+
tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
- if (!tree)
- return -ENOMEM;
+ if (!tree) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (tcon->ipc) {
scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
@@ -4348,13 +4453,52 @@ out:
kfree(tree);
cifs_put_tcp_super(sb);
+ if (rc) {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+ tcon->need_reconnect = false;
+ }
+
return rc;
}
#else
int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
{
+ int rc;
const struct smb_version_operations *ops = tcon->ses->server->ops;
- return ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ /* only send once per connect */
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->ses->status != CifsGood ||
+ (tcon->tidStatus != CifsNew &&
+ tcon->tidStatus != CifsNeedTcon)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
+ tcon->tidStatus = CifsInTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
+ if (rc) {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsNeedTcon;
+ spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tidStatus == CifsInTcon)
+ tcon->tidStatus = CifsGood;
+ spin_unlock(&cifs_tcp_ses_lock);
+ tcon->need_reconnect = false;
+ }
+
+ return rc;
}
#endif
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index e9b0fa2a9614..dd9643751671 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach
}
cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
- cifs_ses_mark_for_reconnect(tcon->ses);
+ cifs_reconnect(tcon->ses->server, true);
}
/* Refresh dfs referral of tcon and mark it for reconnect if needed */
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 6e8e7cc26ae2..ce9b22aecfba 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -22,6 +22,7 @@
#include "cifs_unicode.h"
#include "fs_context.h"
#include "cifs_ioctl.h"
+#include "fscache.h"
static void
renew_parental_timestamps(struct dentry *direntry)
@@ -507,8 +508,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
server->ops->close(xid, tcon, &fid);
cifs_del_pending_open(&open);
rc = -ENOMEM;
+ goto out;
}
+ fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ file->f_mode & FMODE_WRITE);
+
out:
cifs_put_tlink(tlink);
out_free_xid:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9fee3af83a73..59334be9ed3b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -376,8 +376,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
struct cifsLockInfo *li, *tmp;
struct super_block *sb = inode->i_sb;
- cifs_fscache_release_inode_cookie(inode);
-
/*
* Delete any outstanding lock records. We'll lose them when the file
* is closed anyway.
@@ -570,7 +568,7 @@ int cifs_open(struct inode *inode, struct file *file)
spin_lock(&CIFS_I(inode)->deferred_lock);
cifs_del_deferred_close(cfile);
spin_unlock(&CIFS_I(inode)->deferred_lock);
- goto out;
+ goto use_cache;
} else {
_cifsFileInfo_put(cfile, true, false);
}
@@ -632,8 +630,6 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
- cifs_fscache_set_inode_cookie(inode, file);
-
if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
/*
* Time to set mode which we can not set earlier due to
@@ -652,6 +648,15 @@ int cifs_open(struct inode *inode, struct file *file)
cfile->pid);
}
+use_cache:
+ fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
+ file->f_mode & FMODE_WRITE);
+ if (file->f_flags & O_DIRECT &&
+ (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
+ file->f_flags & O_APPEND))
+ cifs_invalidate_cache(file_inode(file),
+ FSCACHE_INVAL_DIO_WRITE);
+
out:
free_dentry_path(page);
free_xid(xid);
@@ -876,6 +881,8 @@ int cifs_close(struct inode *inode, struct file *file)
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_deferred_close *dclose;
+ cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
+
if (file->private_data != NULL) {
cfile = file->private_data;
file->private_data = NULL;
@@ -886,7 +893,6 @@ int cifs_close(struct inode *inode, struct file *file)
dclose) {
if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
inode->i_ctime = inode->i_mtime = current_time(inode);
- cifs_fscache_update_inode_cookie(inode);
}
spin_lock(&cinode->deferred_lock);
cifs_add_deferred_close(cfile, dclose);
@@ -4198,10 +4204,12 @@ static vm_fault_t
cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct file *file = vmf->vma->vm_file;
- struct inode *inode = file_inode(file);
- cifs_fscache_wait_on_page_write(inode, page);
+#ifdef CONFIG_CIFS_FSCACHE
+ if (PageFsCache(page) &&
+ wait_on_page_fscache_killable(page) < 0)
+ return VM_FAULT_RETRY;
+#endif
lock_page(page);
return VM_FAULT_LOCKED;
@@ -4275,8 +4283,6 @@ cifs_readv_complete(struct work_struct *work)
if (rdata->result == 0 ||
(rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
- else
- cifs_fscache_uncache_page(rdata->mapping->host, page);
got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
@@ -4593,11 +4599,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
kref_put(&rdata->refcount, cifs_readdata_release);
}
- /* Any pages that have been shown to fscache but didn't get added to
- * the pagecache must be uncached before they get returned to the
- * allocator.
- */
- cifs_fscache_readpages_cancel(mapping->host, page_list);
free_xid(xid);
return rc;
}
@@ -4801,17 +4802,19 @@ static int cifs_release_page(struct page *page, gfp_t gfp)
{
if (PagePrivate(page))
return 0;
-
- return cifs_fscache_release_page(page, gfp);
+ if (PageFsCache(page)) {
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ wait_on_page_fscache(page);
+ }
+ fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
+ return true;
}
static void cifs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
- struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
-
- if (offset == 0 && length == PAGE_SIZE)
- cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
+ wait_on_page_fscache(page);
}
static int cifs_launder_page(struct page *page)
@@ -4831,7 +4834,7 @@ static int cifs_launder_page(struct page *page)
if (clear_page_dirty_for_io(page))
rc = cifs_writepage_locked(page, &wbc);
- cifs_fscache_invalidate_page(page, page->mapping->host);
+ wait_on_page_fscache(page);
return rc;
}
@@ -4988,6 +4991,19 @@ static void cifs_swap_deactivate(struct file *file)
/* do we need to unpin (or unlock) the file */
}
+/*
+ * Mark a page as having been made dirty and thus needing writeback. We also
+ * need to pin the cache object to write back to.
+ */
+#ifdef CONFIG_CIFS_FSCACHE
+static int cifs_set_page_dirty(struct page *page)
+{
+ return fscache_set_page_dirty(page, cifs_inode_cookie(page->mapping->host));
+}
+#else
+#define cifs_set_page_dirty __set_page_dirty_nobuffers
+#endif
+
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,
@@ -4995,7 +5011,7 @@ const struct address_space_operations cifs_addr_ops = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = cifs_set_page_dirty,
.releasepage = cifs_release_page,
.direct_IO = cifs_direct_io,
.invalidatepage = cifs_invalidate_page,
@@ -5020,7 +5036,7 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.writepages = cifs_writepages,
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = cifs_set_page_dirty,
.releasepage = cifs_release_page,
.invalidatepage = cifs_invalidate_page,
.launder_page = cifs_launder_page,
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index e3ed25dc6f3f..7ec35f3f0a5f 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -37,6 +37,8 @@
#include "rfc1002pdu.h"
#include "fs_context.h"
+static DEFINE_MUTEX(cifs_mount_mutex);
+
static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
{ Smb_20, SMB20_VERSION_STRING},
@@ -707,10 +709,14 @@ static int smb3_get_tree_common(struct fs_context *fc)
static int smb3_get_tree(struct fs_context *fc)
{
int err = smb3_fs_context_validate(fc);
+ int ret;
if (err)
return err;
- return smb3_get_tree_common(fc);
+ mutex_lock(&cifs_mount_mutex);
+ ret = smb3_get_tree_common(fc);
+ mutex_unlock(&cifs_mount_mutex);
+ return ret;
}
static void smb3_fs_context_free(struct fs_context *fc)
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 003c5f1f4dfb..efaac4d5ff55 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -12,250 +12,136 @@
#include "cifs_fs_sb.h"
#include "cifsproto.h"
-/*
- * Key layout of CIFS server cache index object
- */
-struct cifs_server_key {
- __u64 conn_id;
-} __packed;
-
-/*
- * Get a cookie for a server object keyed by {IPaddress,port,family} tuple
- */
-void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
-{
- struct cifs_server_key key;
-
- /*
- * Check if cookie was already initialized so don't reinitialize it.
- * In the future, as we integrate with newer fscache features,
- * we may want to instead add a check if cookie has changed
- */
- if (server->fscache)
- return;
-
- memset(&key, 0, sizeof(key));
- key.conn_id = server->conn_id;
-
- server->fscache =
- fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
- &cifs_fscache_server_index_def,
- &key, sizeof(key),
- NULL, 0,
- server, 0, true);
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server, server->fscache);
-}
-
-void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
+static void cifs_fscache_fill_volume_coherency(
+ struct cifs_tcon *tcon,
+ struct cifs_fscache_volume_coherency_data *cd)
{
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server, server->fscache);
- fscache_relinquish_cookie(server->fscache, NULL, false);
- server->fscache = NULL;
+ memset(cd, 0, sizeof(*cd));
+ cd->resource_id = cpu_to_le64(tcon->resource_id);
+ cd->vol_create_time = tcon->vol_create_time;
+ cd->vol_serial_number = cpu_to_le32(tcon->vol_serial_number);
}
-void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
+int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
{
+ struct cifs_fscache_volume_coherency_data cd;
struct TCP_Server_Info *server = tcon->ses->server;
+ struct fscache_volume *vcookie;
+ const struct sockaddr *sa = (struct sockaddr *)&server->dstaddr;
+ size_t slen, i;
char *sharename;
- struct cifs_fscache_super_auxdata auxdata;
+ char *key;
+ int ret = -ENOMEM;
- /*
- * Check if cookie was already initialized so don't reinitialize it.
- * In the future, as we integrate with newer fscache features,
- * we may want to instead add a check if cookie has changed
- */
- if (tcon->fscache)
- return;
+ tcon->fscache = NULL;
+ switch (sa->sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ break;
+ default:
+ cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
+ return -EINVAL;
+ }
+
+ memset(&key, 0, sizeof(key));
sharename = extract_sharename(tcon->treeName);
if (IS_ERR(sharename)) {
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
- tcon->fscache = NULL;
- return;
+ return -EINVAL;
}
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
+ slen = strlen(sharename);
+ for (i = 0; i < slen; i++)
+ if (sharename[i] == '/')
+ sharename[i] = ';';
+
+ key = kasprintf(GFP_KERNEL, "cifs,%pISpc,%s", sa, sharename);
+ if (!key)
+ goto out;
+
+ cifs_fscache_fill_volume_coherency(tcon, &cd);
+ vcookie = fscache_acquire_volume(key,
+ NULL, /* preferred_cache */
+ &cd, sizeof(cd));
+ cifs_dbg(FYI, "%s: (%s/0x%p)\n", __func__, key, vcookie);
+ if (IS_ERR(vcookie)) {
+ if (vcookie != ERR_PTR(-EBUSY)) {
+ ret = PTR_ERR(vcookie);
+ goto out_2;
+ }
+ pr_err("Cache volume key already in use (%s)\n", key);
+ vcookie = NULL;
+ }
- tcon->fscache =
- fscache_acquire_cookie(server->fscache,
- &cifs_fscache_super_index_def,
- sharename, strlen(sharename),
- &auxdata, sizeof(auxdata),
- tcon, 0, true);
+ tcon->fscache = vcookie;
+ ret = 0;
+out_2:
+ kfree(key);
+out:
kfree(sharename);
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, server->fscache, tcon->fscache);
+ return ret;
}
void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
{
- struct cifs_fscache_super_auxdata auxdata;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.resource_id = tcon->resource_id;
- auxdata.vol_create_time = tcon->vol_create_time;
- auxdata.vol_serial_number = tcon->vol_serial_number;
+ struct cifs_fscache_volume_coherency_data cd;
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache);
- fscache_relinquish_cookie(tcon->fscache, &auxdata, false);
- tcon->fscache = NULL;
-}
-
-static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi,
- struct cifs_tcon *tcon)
-{
- struct cifs_fscache_inode_auxdata auxdata;
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
- cifsi->fscache =
- fscache_acquire_cookie(tcon->fscache,
- &cifs_fscache_inode_object_def,
- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
- &auxdata, sizeof(auxdata),
- cifsi, cifsi->vfs_inode.i_size, true);
+ cifs_fscache_fill_volume_coherency(tcon, &cd);
+ fscache_relinquish_volume(tcon->fscache, &cd, false);
+ tcon->fscache = NULL;
}
-static void cifs_fscache_enable_inode_cookie(struct inode *inode)
+void cifs_fscache_get_inode_cookie(struct inode *inode)
{
+ struct cifs_fscache_inode_coherency_data cd;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- if (cifsi->fscache)
- return;
-
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE))
- return;
-
- cifs_fscache_acquire_inode_cookie(cifsi, tcon);
+ cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
- cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
- __func__, tcon->fscache, cifsi->fscache);
+ cifsi->fscache =
+ fscache_acquire_cookie(tcon->fscache, 0,
+ &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &cd, sizeof(cd),
+ i_size_read(&cifsi->vfs_inode));
}
-void cifs_fscache_release_inode_cookie(struct inode *inode)
+void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update)
{
- struct cifs_fscache_inode_auxdata auxdata;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
- if (cifsi->fscache) {
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
+ if (update) {
+ struct cifs_fscache_inode_coherency_data cd;
+ loff_t i_size = i_size_read(inode);
- cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- /* fscache_relinquish_cookie does not seem to update auxdata */
- fscache_update_cookie(cifsi->fscache, &auxdata);
- fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
- cifsi->fscache = NULL;
+ cifs_fscache_fill_coherency(inode, &cd);
+ fscache_unuse_cookie(cifs_inode_cookie(inode), &cd, &i_size);
+ } else {
+ fscache_unuse_cookie(cifs_inode_cookie(inode), NULL, NULL);
}
}
-void cifs_fscache_update_inode_cookie(struct inode *inode)
+void cifs_fscache_release_inode_cookie(struct inode *inode)
{
- struct cifs_fscache_inode_auxdata auxdata;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
- auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
- auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
-
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- fscache_update_cookie(cifsi->fscache, &auxdata);
- }
-}
-
-void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
-{
- cifs_fscache_enable_inode_cookie(inode);
-}
-
-void cifs_fscache_reset_inode_cookie(struct inode *inode)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- struct fscache_cookie *old = cifsi->fscache;
-
- if (cifsi->fscache) {
- /* retire the current fscache cache and get a new one */
- fscache_relinquish_cookie(cifsi->fscache, NULL, true);
-
- cifs_fscache_acquire_inode_cookie(cifsi, tcon);
- cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
- __func__, cifsi->fscache, old);
+ fscache_relinquish_cookie(cifsi->fscache, false);
+ cifsi->fscache = NULL;
}
}
-int cifs_fscache_release_page(struct page *page, gfp_t gfp)
-{
- if (PageFsCache(page)) {
- struct inode *inode = page->mapping->host;
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
- __func__, page, cifsi->fscache);
- if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
- return 0;
- }
-
- return 1;
-}
-
-static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
- int error)
-{
- cifs_dbg(FYI, "%s: (0x%p/%d)\n", __func__, page, error);
- if (!error)
- SetPageUptodate(page);
- unlock_page(page);
-}
-
/*
* Retrieve a page from FS-Cache
*/
int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
- int ret;
-
cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
__func__, CIFS_I(inode)->fscache, page, inode);
- ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
- cifs_readpage_from_fscache_complete,
- NULL,
- GFP_KERNEL);
- switch (ret) {
-
- case 0: /* page found in fscache, read submitted */
- cifs_dbg(FYI, "%s: submitted\n", __func__);
- return ret;
- case -ENOBUFS: /* page won't be cached */
- case -ENODATA: /* page not in cache */
- cifs_dbg(FYI, "%s: %d\n", __func__, ret);
- return 1;
-
- default:
- cifs_dbg(VFS, "unknown error ret = %d\n", ret);
- }
- return ret;
+ return -ENOBUFS; // Needs conversion to using netfslib
}
/*
@@ -266,78 +152,19 @@ int __cifs_readpages_from_fscache(struct inode *inode,
struct list_head *pages,
unsigned *nr_pages)
{
- int ret;
-
cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
__func__, CIFS_I(inode)->fscache, *nr_pages, inode);
- ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
- pages, nr_pages,
- cifs_readpage_from_fscache_complete,
- NULL,
- mapping_gfp_mask(mapping));
- switch (ret) {
- case 0: /* read submitted to the cache for all pages */
- cifs_dbg(FYI, "%s: submitted\n", __func__);
- return ret;
-
- case -ENOBUFS: /* some pages are not cached and can't be */
- case -ENODATA: /* some pages are not cached */
- cifs_dbg(FYI, "%s: no page\n", __func__);
- return 1;
-
- default:
- cifs_dbg(FYI, "unknown error ret = %d\n", ret);
- }
-
- return ret;
+ return -ENOBUFS; // Needs conversion to using netfslib
}
void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
{
struct cifsInodeInfo *cifsi = CIFS_I(inode);
- int ret;
WARN_ON(!cifsi->fscache);
cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
__func__, cifsi->fscache, page, inode);
- ret = fscache_write_page(cifsi->fscache, page,
- cifsi->vfs_inode.i_size, GFP_KERNEL);
- if (ret != 0)
- fscache_uncache_page(cifsi->fscache, page);
-}
-
-void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
-{
- cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n",
- __func__, CIFS_I(inode)->fscache, inode);
- fscache_readpages_cancel(CIFS_I(inode)->fscache, pages);
-}
-
-void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_wait_on_page_write(cookie, page);
- fscache_uncache_page(cookie, page);
-}
-
-void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
-
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_wait_on_page_write(cookie, page);
-}
-
-void __cifs_fscache_uncache_page(struct inode *inode, struct page *page)
-{
- struct cifsInodeInfo *cifsi = CIFS_I(inode);
- struct fscache_cookie *cookie = cifsi->fscache;
- cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
- fscache_uncache_page(cookie, page);
+ // Needs conversion to using netfslib
}
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 9baa1d0f22bd..c6ca49ac33d4 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -13,84 +13,71 @@
#include "cifsglob.h"
-#ifdef CONFIG_CIFS_FSCACHE
-
/*
- * Auxiliary data attached to CIFS superblock within the cache
+ * Coherency data attached to CIFS volume within the cache
*/
-struct cifs_fscache_super_auxdata {
- u64 resource_id; /* unique server resource id */
+struct cifs_fscache_volume_coherency_data {
+ __le64 resource_id; /* unique server resource id */
__le64 vol_create_time;
- u32 vol_serial_number;
+ __le32 vol_serial_number;
} __packed;
/*
- * Auxiliary data attached to CIFS inode within the cache
+ * Coherency data attached to CIFS inode within the cache.
*/
-struct cifs_fscache_inode_auxdata {
- u64 last_write_time_sec;
- u64 last_change_time_sec;
- u32 last_write_time_nsec;
- u32 last_change_time_nsec;
- u64 eof;
+struct cifs_fscache_inode_coherency_data {
+ __le64 last_write_time_sec;
+ __le64 last_change_time_sec;
+ __le32 last_write_time_nsec;
+ __le32 last_change_time_nsec;
};
-/*
- * cache.c
- */
-extern struct fscache_netfs cifs_fscache_netfs;
-extern const struct fscache_cookie_def cifs_fscache_server_index_def;
-extern const struct fscache_cookie_def cifs_fscache_super_index_def;
-extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
-
-extern int cifs_fscache_register(void);
-extern void cifs_fscache_unregister(void);
+#ifdef CONFIG_CIFS_FSCACHE
/*
* fscache.c
*/
-extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
-extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
-extern void cifs_fscache_get_super_cookie(struct cifs_tcon *);
+extern int cifs_fscache_get_super_cookie(struct cifs_tcon *);
extern void cifs_fscache_release_super_cookie(struct cifs_tcon *);
+extern void cifs_fscache_get_inode_cookie(struct inode *inode);
extern void cifs_fscache_release_inode_cookie(struct inode *);
-extern void cifs_fscache_update_inode_cookie(struct inode *inode);
-extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
-extern void cifs_fscache_reset_inode_cookie(struct inode *);
+extern void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update);
+
+static inline
+void cifs_fscache_fill_coherency(struct inode *inode,
+ struct cifs_fscache_inode_coherency_data *cd)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ memset(cd, 0, sizeof(*cd));
+ cd->last_write_time_sec = cpu_to_le64(cifsi->vfs_inode.i_mtime.tv_sec);
+ cd->last_write_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_mtime.tv_nsec);
+ cd->last_change_time_sec = cpu_to_le64(cifsi->vfs_inode.i_ctime.tv_sec);
+ cd->last_change_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_ctime.tv_nsec);
+}
+
-extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
-extern void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page);
-extern void __cifs_fscache_uncache_page(struct inode *inode, struct page *page);
extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
extern int __cifs_readpages_from_fscache(struct inode *,
struct address_space *,
struct list_head *,
unsigned *);
-extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *);
-
extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
-static inline void cifs_fscache_invalidate_page(struct page *page,
- struct inode *inode)
+static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
{
- if (PageFsCache(page))
- __cifs_fscache_invalidate_page(page, inode);
+ return CIFS_I(inode)->fscache;
}
-static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
- struct page *page)
+static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags)
{
- if (PageFsCache(page))
- __cifs_fscache_wait_on_page_write(inode, page);
-}
+ struct cifs_fscache_inode_coherency_data cd;
-static inline void cifs_fscache_uncache_page(struct inode *inode,
- struct page *page)
-{
- if (PageFsCache(page))
- __cifs_fscache_uncache_page(inode, page);
+ cifs_fscache_fill_coherency(inode, &cd);
+ fscache_invalidate(cifs_inode_cookie(inode), &cd,
+ i_size_read(inode), flags);
}
static inline int cifs_readpage_from_fscache(struct inode *inode,
@@ -120,41 +107,21 @@ static inline void cifs_readpage_to_fscache(struct inode *inode,
__cifs_readpage_to_fscache(inode, page);
}
-static inline void cifs_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
+#else /* CONFIG_CIFS_FSCACHE */
+static inline
+void cifs_fscache_fill_coherency(struct inode *inode,
+ struct cifs_fscache_inode_coherency_data *cd)
{
- if (CIFS_I(inode)->fscache)
- return __cifs_fscache_readpages_cancel(inode, pages);
}
-#else /* CONFIG_CIFS_FSCACHE */
-static inline int cifs_fscache_register(void) { return 0; }
-static inline void cifs_fscache_unregister(void) {}
-
-static inline void
-cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
-static inline void
-cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
-static inline void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) {}
-static inline void
-cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
+static inline int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) { return 0; }
+static inline void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
+static inline void cifs_fscache_get_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
-static inline void cifs_fscache_update_inode_cookie(struct inode *inode) {}
-static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
- struct file *filp) {}
-static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
-static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
-{
- return 1; /* May release page */
-}
-
-static inline void cifs_fscache_invalidate_page(struct page *page,
- struct inode *inode) {}
-static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
- struct page *page) {}
-static inline void cifs_fscache_uncache_page(struct inode *inode,
- struct page *page) {}
+static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
+static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
+static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
static inline int
cifs_readpage_from_fscache(struct inode *inode, struct page *page)
@@ -173,11 +140,6 @@ static inline int cifs_readpages_from_fscache(struct inode *inode,
static inline void cifs_readpage_to_fscache(struct inode *inode,
struct page *page) {}
-static inline void cifs_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
-{
-}
-
#endif /* CONFIG_CIFS_FSCACHE */
#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 279622e4eb1c..7d8b3ceb2af3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -952,6 +952,12 @@ cifs_get_inode_info(struct inode **inode,
rc = server->ops->query_path_info(xid, tcon, cifs_sb,
full_path, tmp_data,
&adjust_tz, &is_reparse_point);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (rc == -ENOENT && is_tcon_dfs(tcon))
+ rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon,
+ cifs_sb,
+ full_path);
+#endif
data = tmp_data;
}
@@ -1298,10 +1304,7 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
-#ifdef CONFIG_CIFS_FSCACHE
- /* initialize per-inode cache cookie pointer */
- CIFS_I(inode)->fscache = NULL;
-#endif
+ cifs_fscache_get_inode_cookie(inode);
unlock_new_inode(inode);
}
}
@@ -1370,6 +1373,7 @@ iget_no_retry:
iget_failed(inode);
inode = ERR_PTR(rc);
}
+
out:
kfree(path);
free_xid(xid);
@@ -2257,6 +2261,8 @@ cifs_dentry_needs_reval(struct dentry *dentry)
int
cifs_invalidate_mapping(struct inode *inode)
{
+ struct cifs_fscache_inode_coherency_data cd;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
int rc = 0;
if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
@@ -2266,7 +2272,8 @@ cifs_invalidate_mapping(struct inode *inode)
__func__, inode);
}
- cifs_fscache_reset_inode_cookie(inode);
+ cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
+ fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
return rc;
}
@@ -2771,8 +2778,10 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
goto out;
if ((attrs->ia_valid & ATTR_SIZE) &&
- attrs->ia_size != i_size_read(inode))
+ attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
+ }
setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
@@ -2967,8 +2976,10 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
goto cifs_setattr_exit;
if ((attrs->ia_valid & ATTR_SIZE) &&
- attrs->ia_size != i_size_read(inode))
+ attrs->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attrs->ia_size);
+ fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size);
+ }
setattr_copy(&init_user_ns, inode, attrs);
mark_inode_dirty(inode);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 5148d48d6a35..56598f7dbe00 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1302,4 +1302,53 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
return 0;
}
+
+/** cifs_dfs_query_info_nonascii_quirk
+ * Handle weird Windows SMB server behaviour. It responds with
+ * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request
+ * for "\<server>\<dfsname>\<linkpath>" DFS reference,
+ * where <dfsname> contains non-ASCII unicode symbols.
+ *
+ * Check such DFS reference and emulate -ENOENT if it is actual.
+ */
+int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ const char *linkpath)
+{
+ char *treename, *dfspath, sep;
+ int treenamelen, linkpathlen, rc;
+
+ treename = tcon->treeName;
+ /* MS-DFSC: All paths in REQ_GET_DFS_REFERRAL and RESP_GET_DFS_REFERRAL
+ * messages MUST be encoded with exactly one leading backslash, not two
+ * leading backslashes.
+ */
+ sep = CIFS_DIR_SEP(cifs_sb);
+ if (treename[0] == sep && treename[1] == sep)
+ treename++;
+ linkpathlen = strlen(linkpath);
+ treenamelen = strnlen(treename, MAX_TREE_SIZE + 1);
+ dfspath = kzalloc(treenamelen + linkpathlen + 1, GFP_KERNEL);
+ if (!dfspath)
+ return -ENOMEM;
+ if (treenamelen)
+ memcpy(dfspath, treename, treenamelen);
+ memcpy(dfspath + treenamelen, linkpath, linkpathlen);
+ rc = dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls,
+ cifs_remap(cifs_sb), dfspath, NULL, NULL);
+ if (rc == 0) {
+ cifs_dbg(FYI, "DFS ref '%s' is found, emulate -EREMOTE\n",
+ dfspath);
+ rc = -EREMOTE;
+ } else if (rc == -EEXIST) {
+ cifs_dbg(FYI, "DFS ref '%s' is not found, emulate -ENOENT\n",
+ dfspath);
+ rc = -ENOENT;
+ } else {
+ cifs_dbg(FYI, "%s: dfs_cache_find returned %d\n", __func__, rc);
+ }
+ kfree(dfspath);
+ return rc;
+}
#endif
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index fa9fbd6a819c..ebe236b9d9f5 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -896,10 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr)
if (class == ERRSRV && code == ERRbaduid) {
cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n",
code);
- spin_lock(&GlobalMid_Lock);
- if (mid->server->tcpStatus != CifsExiting)
- mid->server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+ cifs_reconnect(mid->server, false);
}
}
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index fe707f45da89..298458404252 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -40,7 +40,7 @@
#define NTLMSSP_REQUEST_NON_NT_KEY 0x400000
#define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000
/* #define reserved4 0x1000000 */
-#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */
+#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we only set for SMB2+ */
/* #define reserved3 0x4000000 */
/* #define reserved2 0x8000000 */
/* #define reserved1 0x10000000 */
@@ -87,6 +87,30 @@ typedef struct _NEGOTIATE_MESSAGE {
/* followed by WorkstationString */
} __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE;
+#define NTLMSSP_REVISION_W2K3 0x0F
+
+/* See MS-NLMP section 2.2.2.10 */
+struct ntlmssp_version {
+ __u8 ProductMajorVersion;
+ __u8 ProductMinorVersion;
+ __le16 ProductBuild; /* we send the cifs.ko module version here */
+ __u8 Reserved[3];
+ __u8 NTLMRevisionCurrent; /* currently 0x0F */
+} __packed;
+
+/* see MS-NLMP section 2.2.1.1 */
+struct negotiate_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmNegotiate = 1 */
+ __le32 NegotiateFlags;
+ SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */
+ SECURITY_BUFFER WorkstationName; /* RFC 1001 and ASCII */
+ struct ntlmssp_version Version;
+ /* SECURITY_BUFFER */
+ char DomainString[0];
+ /* followed by WorkstationString */
+} __packed;
+
typedef struct _CHALLENGE_MESSAGE {
__u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
__le32 MessageType; /* NtLmChallenge = 2 */
@@ -121,7 +145,13 @@ typedef struct _AUTHENTICATE_MESSAGE {
int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp);
+int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 035dc3e245dc..dc3b16d1be09 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -17,6 +17,8 @@
#include "nterr.h"
#include <linux/utsname.h>
#include <linux/slab.h>
+#include <linux/version.h>
+#include "cifsfs.h"
#include "cifs_spnego.h"
#include "smb2proto.h"
#include "fs_context.h"
@@ -65,6 +67,55 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
return false;
}
+/* channel helper functions. assumed that chan_lock is held by caller. */
+
+unsigned int
+cifs_ses_get_chan_index(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int i;
+
+ for (i = 0; i < ses->chan_count; i++) {
+ if (ses->chans[i].server == server)
+ return i;
+ }
+
+ /* If we didn't find the channel, it is likely a bug */
+ WARN_ON(1);
+ return 0;
+}
+
+void
+cifs_chan_set_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ set_bit(chan_index, &ses->chans_need_reconnect);
+ cifs_dbg(FYI, "Set reconnect bitmask for chan %u; now 0x%lx\n",
+ chan_index, ses->chans_need_reconnect);
+}
+
+void
+cifs_chan_clear_need_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ clear_bit(chan_index, &ses->chans_need_reconnect);
+ cifs_dbg(FYI, "Cleared reconnect bitmask for chan %u; now 0x%lx\n",
+ chan_index, ses->chans_need_reconnect);
+}
+
+bool
+cifs_chan_needs_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index);
+}
+
/* returns number of channels added */
int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
{
@@ -87,10 +138,10 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
left = ses->chan_max - ses->chan_count;
if (left <= 0) {
+ spin_unlock(&ses->chan_lock);
cifs_dbg(FYI,
"ses already at max_channels (%zu), nothing to open\n",
ses->chan_max);
- spin_unlock(&ses->chan_lock);
return 0;
}
@@ -261,9 +312,8 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
chan_server = cifs_get_tcp_session(&ctx, ses->server);
- mutex_lock(&ses->session_mutex);
spin_lock(&ses->chan_lock);
- chan = ses->binding_chan = &ses->chans[ses->chan_count];
+ chan = &ses->chans[ses->chan_count];
chan->server = chan_server;
if (IS_ERR(chan->server)) {
rc = PTR_ERR(chan->server);
@@ -271,8 +321,15 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
spin_unlock(&ses->chan_lock);
goto out;
}
+ ses->chan_count++;
+ atomic_set(&ses->chan_seq, 0);
+
+ /* Mark this channel as needing connect/setup */
+ cifs_chan_set_need_reconnect(ses, chan->server);
+
spin_unlock(&ses->chan_lock);
+ mutex_lock(&ses->session_mutex);
/*
* We need to allocate the server crypto now as we will need
* to sign packets before we generate the channel signing key
@@ -281,37 +338,29 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
rc = smb311_crypto_shash_allocate(chan->server);
if (rc) {
cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
+ mutex_unlock(&ses->session_mutex);
goto out;
}
- ses->binding = true;
- rc = cifs_negotiate_protocol(xid, ses);
- if (rc)
- goto out;
-
- rc = cifs_setup_session(xid, ses, cifs_sb->local_nls);
- if (rc)
- goto out;
-
- /* success, put it on the list
- * XXX: sharing ses between 2 tcp servers is not possible, the
- * way "internal" linked lists works in linux makes element
- * only able to belong to one list
- *
- * the binding session is already established so the rest of
- * the code should be able to look it up, no need to add the
- * ses to the new server.
- */
+ rc = cifs_negotiate_protocol(xid, ses, chan->server);
+ if (!rc)
+ rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls);
- spin_lock(&ses->chan_lock);
- ses->chan_count++;
- atomic_set(&ses->chan_seq, 0);
- spin_unlock(&ses->chan_lock);
+ mutex_unlock(&ses->session_mutex);
out:
- ses->binding = false;
- ses->binding_chan = NULL;
- mutex_unlock(&ses->session_mutex);
+ if (rc && chan->server) {
+ spin_lock(&ses->chan_lock);
+ /* we rely on all bits beyond chan_count to be clear */
+ cifs_chan_clear_need_reconnect(ses, chan->server);
+ ses->chan_count--;
+ /*
+ * chan_count should never reach 0 as at least the primary
+ * channel is always allocated
+ */
+ WARN_ON(ses->chan_count < 1);
+ spin_unlock(&ses->chan_lock);
+ }
if (rc && chan->server)
cifs_put_tcp_session(chan->server, 0);
@@ -319,20 +368,9 @@ out:
return rc;
}
-/* Mark all session channels for reconnect */
-void cifs_ses_mark_for_reconnect(struct cifs_ses *ses)
-{
- int i;
-
- for (i = 0; i < ses->chan_count; i++) {
- spin_lock(&GlobalMid_Lock);
- if (ses->chans[i].server->tcpStatus != CifsExiting)
- ses->chans[i].server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
- }
-}
-
-static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
+static __u32 cifs_ssetup_hdr(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ SESSION_SETUP_ANDX *pSMB)
{
__u32 capabilities = 0;
@@ -345,7 +383,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32,
CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
USHRT_MAX));
- pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
+ pSMB->req.MaxMpxCount = cpu_to_le16(server->maxReq);
pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
@@ -356,7 +394,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
- if (ses->server->sign)
+ if (server->sign)
pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
if (ses->capabilities & CAP_UNICODE) {
@@ -719,10 +757,10 @@ static inline void cifs_security_buffer_from_str(SECURITY_BUFFER *pbuf,
int build_ntlmssp_negotiate_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp)
{
int rc = 0;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
NEGOTIATE_MESSAGE *sec_blob;
__u32 flags;
unsigned char *tmp;
@@ -773,9 +811,78 @@ setup_ntlm_neg_ret:
return rc;
}
+/*
+ * Build ntlmssp blob with additional fields, such as version,
+ * supported by modern servers. For safety limit to SMB3 or later
+ * See notes in MS-NLMP Section 2.2.2.1 e.g.
+ */
+int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer,
+ u16 *buflen,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp)
+{
+ int rc = 0;
+ struct negotiate_message *sec_blob;
+ __u32 flags;
+ unsigned char *tmp;
+ int len;
+
+ len = size_of_ntlmssp_blob(ses, sizeof(struct negotiate_message));
+ *pbuffer = kmalloc(len, GFP_KERNEL);
+ if (!*pbuffer) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
+ *buflen = 0;
+ goto setup_ntlm_smb3_neg_ret;
+ }
+ sec_blob = (struct negotiate_message *)*pbuffer;
+
+ memset(*pbuffer, 0, sizeof(struct negotiate_message));
+ memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
+ sec_blob->MessageType = NtLmNegotiate;
+
+ /* BB is NTLMV2 session security format easier to use here? */
+ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
+ NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
+ NTLMSSP_NEGOTIATE_ALWAYS_SIGN | NTLMSSP_NEGOTIATE_SEAL |
+ NTLMSSP_NEGOTIATE_SIGN | NTLMSSP_NEGOTIATE_VERSION;
+ if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
+ flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
+
+ sec_blob->Version.ProductMajorVersion = LINUX_VERSION_MAJOR;
+ sec_blob->Version.ProductMinorVersion = LINUX_VERSION_PATCHLEVEL;
+ sec_blob->Version.ProductBuild = cpu_to_le16(SMB3_PRODUCT_BUILD);
+ sec_blob->Version.NTLMRevisionCurrent = NTLMSSP_REVISION_W2K3;
+
+ tmp = *pbuffer + sizeof(struct negotiate_message);
+ ses->ntlmssp->client_flags = flags;
+ sec_blob->NegotiateFlags = cpu_to_le32(flags);
+
+ /* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */
+ cifs_security_buffer_from_str(&sec_blob->DomainName,
+ NULL,
+ CIFS_MAX_DOMAINNAME_LEN,
+ *pbuffer, &tmp,
+ nls_cp);
+
+ cifs_security_buffer_from_str(&sec_blob->WorkstationName,
+ NULL,
+ CIFS_MAX_WORKSTATION_LEN,
+ *pbuffer, &tmp,
+ nls_cp);
+
+ *buflen = tmp - *pbuffer;
+setup_ntlm_smb3_neg_ret:
+ return rc;
+}
+
+
int build_ntlmssp_auth_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp)
{
int rc;
@@ -912,6 +1019,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
struct sess_data {
unsigned int xid;
struct cifs_ses *ses;
+ struct TCP_Server_Info *server;
struct nls_table *nls_cp;
void (*func)(struct sess_data *);
int result;
@@ -978,31 +1086,27 @@ static int
sess_establish_session(struct sess_data *sess_data)
{
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
- mutex_lock(&ses->server->srv_mutex);
- if (!ses->server->session_estab) {
- if (ses->server->sign) {
- ses->server->session_key.response =
+ mutex_lock(&server->srv_mutex);
+ if (!server->session_estab) {
+ if (server->sign) {
+ server->session_key.response =
kmemdup(ses->auth_key.response,
ses->auth_key.len, GFP_KERNEL);
- if (!ses->server->session_key.response) {
- mutex_unlock(&ses->server->srv_mutex);
+ if (!server->session_key.response) {
+ mutex_unlock(&server->srv_mutex);
return -ENOMEM;
}
- ses->server->session_key.len =
+ server->session_key.len =
ses->auth_key.len;
}
- ses->server->sequence_number = 0x2;
- ses->server->session_estab = true;
+ server->sequence_number = 0x2;
+ server->session_estab = true;
}
- mutex_unlock(&ses->server->srv_mutex);
+ mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "CIFS session established successfully\n");
- spin_lock(&GlobalMid_Lock);
- ses->status = CifsGood;
- ses->need_reconnect = false;
- spin_unlock(&GlobalMid_Lock);
-
return 0;
}
@@ -1036,6 +1140,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
SESSION_SETUP_ANDX *pSMB;
char *bcc_ptr;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u32 capabilities;
__u16 bytes_remaining;
@@ -1047,7 +1152,7 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
bcc_ptr = sess_data->iov[2].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
+ capabilities = cifs_ssetup_hdr(ses, server, pSMB);
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
@@ -1145,6 +1250,7 @@ sess_auth_kerberos(struct sess_data *sess_data)
SESSION_SETUP_ANDX *pSMB;
char *bcc_ptr;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u32 capabilities;
__u16 bytes_remaining;
struct key *spnego_key = NULL;
@@ -1159,9 +1265,9 @@ sess_auth_kerberos(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
bcc_ptr = sess_data->iov[2].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
+ capabilities = cifs_ssetup_hdr(ses, server, pSMB);
- spnego_key = cifs_get_spnego_key(ses);
+ spnego_key = cifs_get_spnego_key(ses, server);
if (IS_ERR(spnego_key)) {
rc = PTR_ERR(spnego_key);
spnego_key = NULL;
@@ -1285,12 +1391,13 @@ _sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
{
SESSION_SETUP_ANDX *pSMB;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u32 capabilities;
char *bcc_ptr;
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
+ capabilities = cifs_ssetup_hdr(ses, server, pSMB);
if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
return -ENOSYS;
@@ -1324,6 +1431,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
struct smb_hdr *smb_buf;
SESSION_SETUP_ANDX *pSMB;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u16 bytes_remaining;
char *bcc_ptr;
unsigned char *ntlmsspblob = NULL;
@@ -1351,10 +1459,10 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
/* Build security blob before we assemble the request */
rc = build_ntlmssp_negotiate_blob(&ntlmsspblob,
- &blob_len, ses,
+ &blob_len, ses, server,
sess_data->nls_cp);
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
sess_data->iov[1].iov_len = blob_len;
sess_data->iov[1].iov_base = ntlmsspblob;
@@ -1362,7 +1470,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
rc = sess_sendreceive(sess_data);
@@ -1376,14 +1484,14 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
rc = 0;
if (rc)
- goto out;
+ goto out_free_ntlmsspblob;
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
if (smb_buf->WordCount != 4) {
rc = -EIO;
cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto out;
+ goto out_free_ntlmsspblob;
}
ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
@@ -1397,10 +1505,13 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
cifs_dbg(VFS, "bad security blob length %d\n",
blob_len);
rc = -EINVAL;
- goto out;
+ goto out_free_ntlmsspblob;
}
rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
+
+out_free_ntlmsspblob:
+ kfree(ntlmsspblob);
out:
sess_free_buffer(sess_data);
@@ -1426,6 +1537,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
struct smb_hdr *smb_buf;
SESSION_SETUP_ANDX *pSMB;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
__u16 bytes_remaining;
char *bcc_ptr;
unsigned char *ntlmsspblob = NULL;
@@ -1442,7 +1554,8 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
smb_buf = (struct smb_hdr *)pSMB;
rc = build_ntlmssp_auth_blob(&ntlmsspblob,
- &blob_len, ses, sess_data->nls_cp);
+ &blob_len, ses, server,
+ sess_data->nls_cp);
if (rc)
goto out_free_ntlmsspblob;
sess_data->iov[1].iov_len = blob_len;
@@ -1513,7 +1626,7 @@ out_free_ntlmsspblob:
out:
sess_free_buffer(sess_data);
- if (!rc)
+ if (!rc)
rc = sess_establish_session(sess_data);
/* Cleanup */
@@ -1526,11 +1639,13 @@ out:
sess_data->result = rc;
}
-static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
+static int select_sec(struct sess_data *sess_data)
{
int type;
+ struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
- type = cifs_select_sectype(ses->server, ses->sectype);
+ type = cifs_select_sectype(server, ses->sectype);
cifs_dbg(FYI, "sess setup type %d\n", type);
if (type == Unspecified) {
cifs_dbg(VFS, "Unable to select appropriate authentication method!\n");
@@ -1561,7 +1676,8 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
}
int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
- const struct nls_table *nls_cp)
+ struct TCP_Server_Info *server,
+ const struct nls_table *nls_cp)
{
int rc = 0;
struct sess_data *sess_data;
@@ -1575,15 +1691,16 @@ int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
if (!sess_data)
return -ENOMEM;
- rc = select_sec(ses, sess_data);
- if (rc)
- goto out;
-
sess_data->xid = xid;
sess_data->ses = ses;
+ sess_data->server = server;
sess_data->buf0_type = CIFS_NO_BUFFER;
sess_data->nls_cp = (struct nls_table *) nls_cp;
+ rc = select_sec(sess_data);
+ if (rc)
+ goto out;
+
while (sess_data->func)
sess_data->func(sess_data);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3b83839fc2c2..8272c91e15ef 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -7,6 +7,7 @@
#include <linux/pagemap.h>
#include <linux/vfs.h>
+#include <uapi/linux/magic.h>
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
@@ -163,7 +164,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
{
__u64 mid = 0;
__u16 last_mid, cur_mid;
- bool collision;
+ bool collision, reconnect = false;
spin_lock(&GlobalMid_Lock);
@@ -215,7 +216,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
* an eventual reconnect to clean out the pending_mid_q.
*/
if (num_mids > 32768)
- server->tcpStatus = CifsNeedReconnect;
+ reconnect = true;
if (!collision) {
mid = (__u64)cur_mid;
@@ -225,6 +226,13 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
cur_mid++;
}
spin_unlock(&GlobalMid_Lock);
+
+ if (reconnect) {
+ spin_lock(&cifs_tcp_ses_lock);
+ server->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+
return mid;
}
@@ -414,14 +422,16 @@ cifs_need_neg(struct TCP_Server_Info *server)
}
static int
-cifs_negotiate(const unsigned int xid, struct cifs_ses *ses)
+cifs_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
int rc;
- rc = CIFSSMBNegotiate(xid, ses);
+ rc = CIFSSMBNegotiate(xid, ses, server);
if (rc == -EAGAIN) {
/* retry only once on 1st time connection */
- set_credits(ses->server, 1);
- rc = CIFSSMBNegotiate(xid, ses);
+ set_credits(server, 1);
+ rc = CIFSSMBNegotiate(xid, ses, server);
if (rc == -EAGAIN)
rc = -EHOSTDOWN;
}
@@ -878,7 +888,7 @@ cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
{
int rc = -EOPNOTSUPP;
- buf->f_type = CIFS_MAGIC_NUMBER;
+ buf->f_type = CIFS_SUPER_MAGIC;
/*
* We could add a second check for a QFS Unix capability bit
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index ca692b2283cd..4125fd113cfb 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -13,8 +13,6 @@
#ifndef _SMB2_GLOB_H
#define _SMB2_GLOB_H
-#define SMB2_MAGIC_NUMBER 0xFE534D42
-
/*
*****************************************************************
* Constants go here
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index cdcdef32759e..b25623e3fe3d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -847,16 +847,17 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve
* SMB2 header.
*
* @ses: server session structure
+ * @server: pointer to server info
* @iov: array containing the SMB request we will send to the server
* @nvec: number of array entries for the iov
*/
int
-smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec)
+smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
+ struct kvec *iov, int nvec)
{
int i, rc;
struct sdesc *d;
struct smb2_hdr *hdr;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
hdr = (struct smb2_hdr *)iov[0].iov_base;
/* neg prot are always taken */
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c5b1dea54ebc..af5d0830bc8a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -13,6 +13,7 @@
#include <linux/sort.h>
#include <crypto/aead.h>
#include <linux/fiemap.h>
+#include <uapi/linux/magic.h>
#include "cifsfs.h"
#include "cifsglob.h"
#include "smb2pdu.h"
@@ -121,9 +122,13 @@ smb2_add_credits(struct TCP_Server_Info *server,
optype, scredits, add);
}
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedReconnect
- || server->tcpStatus == CifsExiting)
+ || server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
switch (rc) {
case -1:
@@ -208,11 +213,15 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
return rc;
spin_lock(&server->req_lock);
} else {
+ spin_unlock(&server->req_lock);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsExiting) {
- spin_unlock(&server->req_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
}
+ spin_unlock(&cifs_tcp_ses_lock);
+ spin_lock(&server->req_lock);
scredits = server->credits;
/* can deadlock with reopen */
if (scredits <= 8) {
@@ -384,14 +393,16 @@ smb2_need_neg(struct TCP_Server_Info *server)
}
static int
-smb2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+smb2_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
int rc;
spin_lock(&GlobalMid_Lock);
- cifs_ses_server(ses)->CurrentMid = 0;
+ server->CurrentMid = 0;
spin_unlock(&GlobalMid_Lock);
- rc = SMB2_negotiate(xid, ses);
+ rc = SMB2_negotiate(xid, ses, server);
/* BB we probably don't need to retry with modern servers */
if (rc == -EAGAIN)
rc = -EHOSTDOWN;
@@ -2747,7 +2758,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
goto qfs_exit;
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
- buf->f_type = SMB2_MAGIC_NUMBER;
+ buf->f_type = SMB2_SUPER_MAGIC;
info = (struct smb2_fs_full_size_info *)(
le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
@@ -2789,7 +2800,7 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB311_posix_qfs_info(xid, tcon, fid.persistent_fid,
fid.volatile_fid, buf);
- buf->f_type = SMB2_MAGIC_NUMBER;
+ buf->f_type = SMB2_SUPER_MAGIC;
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
return rc;
}
@@ -4808,7 +4819,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
if (!is_offloaded)
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -1;
}
@@ -4981,10 +4992,12 @@ static void smb2_decrypt_offload(struct work_struct *work)
mid->callback(mid);
} else {
+ spin_lock(&cifs_tcp_ses_lock);
spin_lock(&GlobalMid_Lock);
if (dw->server->tcpStatus == CifsNeedReconnect) {
mid->mid_state = MID_RETRY_NEEDED;
spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
mid->callback(mid);
} else {
mid->mid_state = MID_REQUEST_SUBMITTED;
@@ -4992,6 +5005,7 @@ static void smb2_decrypt_offload(struct work_struct *work)
list_add_tail(&mid->qhead,
&dw->server->pending_mid_q);
spin_unlock(&GlobalMid_Lock);
+ spin_unlock(&cifs_tcp_ses_lock);
}
}
cifs_mid_q_entry_release(mid);
@@ -5221,13 +5235,13 @@ smb3_receive_transform(struct TCP_Server_Info *server,
sizeof(struct smb2_hdr)) {
cifs_server_dbg(VFS, "Transform message is too small (%u)\n",
pdu_length);
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -ECONNABORTED;
}
if (pdu_length < orig_len + sizeof(struct smb2_transform_hdr)) {
cifs_server_dbg(VFS, "Transform message is broken\n");
- cifs_reconnect(server);
+ cifs_reconnect(server, true);
return -ECONNABORTED;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8b3670388cda..7e7909b1ae11 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -162,6 +162,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL)
return 0;
+ spin_lock(&cifs_tcp_ses_lock);
if (tcon->tidStatus == CifsExiting) {
/*
* only tree disconnect, open, and write,
@@ -171,11 +172,13 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
if ((smb2_command != SMB2_WRITE) &&
(smb2_command != SMB2_CREATE) &&
(smb2_command != SMB2_TREE_DISCONNECT)) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "can not send cmd %d while umounting\n",
smb2_command);
return -ENODEV;
}
}
+ spin_unlock(&cifs_tcp_ses_lock);
if ((!tcon->ses) || (tcon->ses->status == CifsExiting) ||
(!tcon->ses->server) || !server)
return -EIO;
@@ -214,8 +217,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
}
/* are we still trying to reconnect? */
- if (server->tcpStatus != CifsNeedReconnect)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
break;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (retries && --retries)
continue;
@@ -232,64 +239,74 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
retries = server->nr_targets;
}
- if (!tcon->ses->need_reconnect && !tcon->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server) && !tcon->need_reconnect) {
+ spin_unlock(&ses->chan_lock);
return 0;
+ }
+ spin_unlock(&ses->chan_lock);
+ cifs_dbg(FYI, "sess reconnect mask: 0x%lx, tcon reconnect: %d",
+ tcon->ses->chans_need_reconnect,
+ tcon->need_reconnect);
nls_codepage = load_nls_default();
/*
- * need to prevent multiple threads trying to simultaneously reconnect
- * the same SMB session
- */
- mutex_lock(&tcon->ses->session_mutex);
-
- /*
* Recheck after acquire mutex. If another thread is negotiating
* and the server never sends an answer the socket will be closed
* and tcpStatus set to reconnect.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
rc = -EHOSTDOWN;
- mutex_unlock(&tcon->ses->session_mutex);
goto out;
}
+ spin_unlock(&cifs_tcp_ses_lock);
/*
- * If we are reconnecting an extra channel, bind
+ * need to prevent multiple threads trying to simultaneously
+ * reconnect the same SMB session
*/
- if (CIFS_SERVER_IS_CHAN(server)) {
- ses->binding = true;
- ses->binding_chan = cifs_ses_find_chan(ses, server);
+ spin_lock(&ses->chan_lock);
+ if (!cifs_chan_needs_reconnect(ses, server)) {
+ spin_unlock(&ses->chan_lock);
+
+ /* this means that we only need to tree connect */
+ if (tcon->need_reconnect)
+ goto skip_sess_setup;
+
+ goto out;
}
+ spin_unlock(&ses->chan_lock);
- rc = cifs_negotiate_protocol(0, tcon->ses);
- if (!rc && tcon->ses->need_reconnect) {
- rc = cifs_setup_session(0, tcon->ses, nls_codepage);
+ mutex_lock(&ses->session_mutex);
+ rc = cifs_negotiate_protocol(0, ses, server);
+ if (!rc) {
+ rc = cifs_setup_session(0, ses, server, nls_codepage);
if ((rc == -EACCES) && !tcon->retry) {
+ mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
- ses->binding = false;
- ses->binding_chan = NULL;
- mutex_unlock(&tcon->ses->session_mutex);
goto failed;
}
+ } else {
+ mutex_unlock(&ses->session_mutex);
+ goto out;
}
- /*
- * End of channel binding
- */
- ses->binding = false;
- ses->binding_chan = NULL;
+ mutex_unlock(&ses->session_mutex);
- if (rc || !tcon->need_reconnect) {
- mutex_unlock(&tcon->ses->session_mutex);
+skip_sess_setup:
+ mutex_lock(&ses->session_mutex);
+ if (!tcon->need_reconnect) {
+ mutex_unlock(&ses->session_mutex);
goto out;
}
-
cifs_mark_open_files_invalid(tcon);
if (tcon->use_persistent)
tcon->need_reopen_files = true;
rc = cifs_tree_connect(0, tcon, nls_codepage);
- mutex_unlock(&tcon->ses->session_mutex);
+ mutex_unlock(&ses->session_mutex);
cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
if (rc) {
@@ -833,7 +850,9 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
*/
int
-SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
+SMB2_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
struct smb_rqst rqst;
struct smb2_negotiate_req *req;
@@ -842,7 +861,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
struct kvec rsp_iov;
int rc = 0;
int resp_buftype;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
int blob_offset, blob_length;
char *security_blob;
int flags = CIFS_NEG_OP;
@@ -1221,6 +1239,7 @@ smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
struct SMB2_sess_data {
unsigned int xid;
struct cifs_ses *ses;
+ struct TCP_Server_Info *server;
struct nls_table *nls_cp;
void (*func)(struct SMB2_sess_data *);
int result;
@@ -1242,9 +1261,10 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct smb2_sess_setup_req *req;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
unsigned int total_len;
+ bool is_binding = false;
rc = smb2_plain_req_init(SMB2_SESSION_SETUP, NULL, server,
(void **) &req,
@@ -1252,11 +1272,16 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
if (rc)
return rc;
- if (sess_data->ses->binding) {
- req->hdr.SessionId = cpu_to_le64(sess_data->ses->Suid);
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
+ if (is_binding) {
+ req->hdr.SessionId = cpu_to_le64(ses->Suid);
req->hdr.Flags |= SMB2_FLAGS_SIGNED;
req->PreviousSessionId = 0;
req->Flags = SMB2_SESSION_REQ_FLAG_BINDING;
+ cifs_dbg(FYI, "Binding to sess id: %llx\n", ses->Suid);
} else {
/* First session, not a reauthenticate */
req->hdr.SessionId = 0;
@@ -1266,6 +1291,8 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
*/
req->PreviousSessionId = cpu_to_le64(sess_data->previous_session);
req->Flags = 0; /* MBZ */
+ cifs_dbg(FYI, "Fresh session. Previous: %llx\n",
+ sess_data->previous_session);
}
/* enough to enable echos and oplocks and one max size write */
@@ -1325,7 +1352,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
/* BB add code to build os and lm fields */
rc = cifs_send_recv(sess_data->xid, sess_data->ses,
- cifs_ses_server(sess_data->ses),
+ sess_data->server,
&rqst,
&sess_data->buf0_type,
CIFS_LOG_ERROR | CIFS_SESS_OP, &rsp_iov);
@@ -1340,11 +1367,11 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
{
int rc = 0;
struct cifs_ses *ses = sess_data->ses;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
+ struct TCP_Server_Info *server = sess_data->server;
mutex_lock(&server->srv_mutex);
if (server->ops->generate_signingkey) {
- rc = server->ops->generate_signingkey(ses);
+ rc = server->ops->generate_signingkey(ses, server);
if (rc) {
cifs_dbg(FYI,
"SMB3 session key generation failed\n");
@@ -1359,14 +1386,6 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "SMB2/3 session established successfully\n");
- /* keep existing ses state if binding */
- if (!ses->binding) {
- spin_lock(&GlobalMid_Lock);
- ses->status = CifsGood;
- ses->need_reconnect = false;
- spin_unlock(&GlobalMid_Lock);
- }
-
return rc;
}
@@ -1376,15 +1395,17 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct cifs_spnego_msg *msg;
struct key *spnego_key = NULL;
struct smb2_sess_setup_rsp *rsp = NULL;
+ bool is_binding = false;
rc = SMB2_sess_alloc_buffer(sess_data);
if (rc)
goto out;
- spnego_key = cifs_get_spnego_key(ses);
+ spnego_key = cifs_get_spnego_key(ses, server);
if (IS_ERR(spnego_key)) {
rc = PTR_ERR(spnego_key);
if (rc == -ENOKEY)
@@ -1405,8 +1426,12 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
goto out_put_spnego_key;
}
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
/* keep session key if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
GFP_KERNEL);
if (!ses->auth_key.response) {
@@ -1427,7 +1452,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
/* keep session id and flags if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->Suid = le64_to_cpu(rsp->hdr.SessionId);
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
}
@@ -1459,10 +1484,12 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct smb2_sess_setup_rsp *rsp = NULL;
unsigned char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
u16 blob_length = 0;
+ bool is_binding = false;
/*
* If memory allocation is successful, caller of this function
@@ -1479,8 +1506,8 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
if (rc)
goto out_err;
- rc = build_ntlmssp_negotiate_blob(&ntlmssp_blob,
- &blob_length, ses,
+ rc = build_ntlmssp_smb3_negotiate_blob(&ntlmssp_blob,
+ &blob_length, ses, server,
sess_data->nls_cp);
if (rc)
goto out_err;
@@ -1519,8 +1546,12 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
/* keep existing ses id and flags if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->Suid = le64_to_cpu(rsp->hdr.SessionId);
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
}
@@ -1545,11 +1576,13 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
{
int rc;
struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
struct smb2_sess_setup_req *req;
struct smb2_sess_setup_rsp *rsp = NULL;
unsigned char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
u16 blob_length = 0;
+ bool is_binding = false;
rc = SMB2_sess_alloc_buffer(sess_data);
if (rc)
@@ -1558,8 +1591,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
req->hdr.SessionId = cpu_to_le64(ses->Suid);
- rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
- sess_data->nls_cp);
+ rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length,
+ ses, server,
+ sess_data->nls_cp);
if (rc) {
cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", rc);
goto out;
@@ -1580,8 +1614,12 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ spin_unlock(&ses->chan_lock);
+
/* keep existing ses id and flags if binding */
- if (!ses->binding) {
+ if (!is_binding) {
ses->Suid = le64_to_cpu(rsp->hdr.SessionId);
ses->session_flags = le16_to_cpu(rsp->SessionFlags);
}
@@ -1612,11 +1650,13 @@ out:
}
static int
-SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
+SMB2_select_sec(struct SMB2_sess_data *sess_data)
{
int type;
+ struct cifs_ses *ses = sess_data->ses;
+ struct TCP_Server_Info *server = sess_data->server;
- type = smb2_select_sectype(cifs_ses_server(ses), ses->sectype);
+ type = smb2_select_sectype(server, ses->sectype);
cifs_dbg(FYI, "sess setup type %d\n", type);
if (type == Unspecified) {
cifs_dbg(VFS, "Unable to select appropriate authentication method!\n");
@@ -1640,10 +1680,10 @@ SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
int
SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp)
{
int rc = 0;
- struct TCP_Server_Info *server = cifs_ses_server(ses);
struct SMB2_sess_data *sess_data;
cifs_dbg(FYI, "Session Setup\n");
@@ -1657,15 +1697,17 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
if (!sess_data)
return -ENOMEM;
- rc = SMB2_select_sec(ses, sess_data);
- if (rc)
- goto out;
sess_data->xid = xid;
sess_data->ses = ses;
+ sess_data->server = server;
sess_data->buf0_type = CIFS_NO_BUFFER;
sess_data->nls_cp = (struct nls_table *) nls_cp;
sess_data->previous_session = ses->Suid;
+ rc = SMB2_select_sec(sess_data);
+ if (rc)
+ goto out;
+
/*
* Initialize the session hash with the server one.
*/
@@ -1704,8 +1746,12 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
return -EIO;
/* no need to send SMB logoff if uid already closed due to reconnect */
- if (ses->need_reconnect)
+ spin_lock(&ses->chan_lock);
+ if (CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
+ spin_unlock(&ses->chan_lock);
goto smb2_session_already_dead;
+ }
+ spin_unlock(&ses->chan_lock);
rc = smb2_plain_req_init(SMB2_LOGOFF, NULL, ses->server,
(void **) &req, &total_len);
@@ -1867,8 +1913,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
tcon->share_flags = le32_to_cpu(rsp->ShareFlags);
tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */
tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
- tcon->tidStatus = CifsGood;
- tcon->need_reconnect = false;
tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId);
strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
@@ -1913,8 +1957,13 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if (!ses || !(ses->server))
return -EIO;
- if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
+ spin_lock(&ses->chan_lock);
+ if ((tcon->need_reconnect) ||
+ (CIFS_ALL_CHANS_NEED_RECONNECT(tcon->ses))) {
+ spin_unlock(&ses->chan_lock);
return 0;
+ }
+ spin_unlock(&ses->chan_lock);
close_cached_dir_lease(&tcon->crfid);
@@ -2527,8 +2576,13 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
cp = load_nls_default();
cifs_strtoUTF16(*out_path, treename, treename_len, cp);
- UniStrcat(*out_path, sep);
- UniStrcat(*out_path, path);
+
+ /* Do not append the separator if the path is empty */
+ if (path[0] != cpu_to_le16(0x0000)) {
+ UniStrcat(*out_path, sep);
+ UniStrcat(*out_path, path);
+ }
+
unload_nls(cp);
return 0;
@@ -3722,27 +3776,35 @@ void smb2_reconnect_server(struct work_struct *work)
{
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, reconnect.work);
- struct cifs_ses *ses;
+ struct TCP_Server_Info *pserver;
+ struct cifs_ses *ses, *ses2;
struct cifs_tcon *tcon, *tcon2;
- struct list_head tmp_list;
- int tcon_exist = false;
+ struct list_head tmp_list, tmp_ses_list;
+ bool tcon_exist = false, ses_exist = false;
+ bool tcon_selected = false;
int rc;
- int resched = false;
+ bool resched = false;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
/* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
- mutex_lock(&server->reconnect_mutex);
+ mutex_lock(&pserver->reconnect_mutex);
INIT_LIST_HEAD(&tmp_list);
- cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+ INIT_LIST_HEAD(&tmp_ses_list);
+ cifs_dbg(FYI, "Reconnecting tcons and channels\n");
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+
+ tcon_selected = false;
+
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->need_reconnect || tcon->need_reopen_files) {
tcon->tc_count++;
list_add_tail(&tcon->rlist, &tmp_list);
- tcon_exist = true;
+ tcon_selected = tcon_exist = true;
}
}
/*
@@ -3751,15 +3813,27 @@ void smb2_reconnect_server(struct work_struct *work)
*/
if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) {
list_add_tail(&ses->tcon_ipc->rlist, &tmp_list);
- tcon_exist = true;
+ tcon_selected = tcon_exist = true;
ses->ses_count++;
}
+ /*
+ * handle the case where channel needs to reconnect
+ * binding session, but tcon is healthy (some other channel
+ * is active)
+ */
+ spin_lock(&ses->chan_lock);
+ if (!tcon_selected && cifs_chan_needs_reconnect(ses, server)) {
+ list_add_tail(&ses->rlist, &tmp_ses_list);
+ ses_exist = true;
+ ses->ses_count++;
+ }
+ spin_unlock(&ses->chan_lock);
}
/*
* Get the reference to server struct to be sure that the last call of
* cifs_put_tcon() in the loop below won't release the server pointer.
*/
- if (tcon_exist)
+ if (tcon_exist || ses_exist)
server->srv_count++;
spin_unlock(&cifs_tcp_ses_lock);
@@ -3777,13 +3851,41 @@ void smb2_reconnect_server(struct work_struct *work)
cifs_put_tcon(tcon);
}
- cifs_dbg(FYI, "Reconnecting tcons finished\n");
+ if (!ses_exist)
+ goto done;
+
+ /* allocate a dummy tcon struct used for reconnect */
+ tcon = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL);
+ if (!tcon) {
+ resched = true;
+ list_del_init(&ses->rlist);
+ cifs_put_smb_ses(ses);
+ goto done;
+ }
+
+ tcon->tidStatus = CifsGood;
+ tcon->retry = false;
+ tcon->need_reconnect = false;
+
+ /* now reconnect sessions for necessary channels */
+ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
+ tcon->ses = ses;
+ rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server);
+ if (rc)
+ resched = true;
+ list_del_init(&ses->rlist);
+ cifs_put_smb_ses(ses);
+ }
+ kfree(tcon);
+
+done:
+ cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
if (resched)
queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
- mutex_unlock(&server->reconnect_mutex);
+ mutex_unlock(&pserver->reconnect_mutex);
/* now we can safely release srv struct */
- if (tcon_exist)
+ if (tcon_exist || ses_exist)
cifs_put_tcp_session(server, 1);
}
@@ -3797,13 +3899,16 @@ SMB2_echo(struct TCP_Server_Info *server)
.rq_nvec = 1 };
unsigned int total_len;
- cifs_dbg(FYI, "In echo request\n");
+ cifs_dbg(FYI, "In echo request for conn_id %lld\n", server->conn_id);
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
/* No need to send echo on newly established connections */
mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
return rc;
}
+ spin_unlock(&cifs_tcp_ses_lock);
rc = smb2_plain_req_init(SMB2_ECHO, NULL, server,
(void **)&req, &total_len);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 096fada16ebd..4a7062fd1c26 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -123,8 +123,11 @@ extern void smb2_set_related(struct smb_rqst *rqst);
* SMB2 Worker functions - most of protocol specific implementation details
* are contained within these calls.
*/
-extern int SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses);
+extern int SMB2_negotiate(const unsigned int xid,
+ struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
extern int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct nls_table *nls_cp);
extern int SMB2_logoff(const unsigned int xid, struct cifs_ses *ses);
extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses,
@@ -276,6 +279,7 @@ extern void smb2_copy_fs_info_to_kstatfs(
struct kstatfs *kst);
extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
extern int smb311_update_preauth_hash(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
struct kvec *iov, int nvec);
extern int smb2_query_info_compound(const unsigned int xid,
struct cifs_tcon *tcon,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 2bf047b390a9..2af79093b78b 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -100,13 +100,16 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
goto out;
found:
- if (ses->binding) {
+ spin_lock(&ses->chan_lock);
+ if (cifs_chan_needs_reconnect(ses, server) &&
+ !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
/*
* If we are in the process of binding a new channel
* to an existing session, use the master connection
* session key
*/
memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
goto out;
}
@@ -118,9 +121,11 @@ found:
chan = ses->chans + i;
if (chan->server == server) {
memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
goto out;
}
}
+ spin_unlock(&ses->chan_lock);
cifs_dbg(VFS,
"%s: Could not find channel signing key for session 0x%llx\n",
@@ -390,12 +395,18 @@ struct derivation_triplet {
static int
generate_smb3signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
const struct derivation_triplet *ptriplet)
{
int rc;
-#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
- struct TCP_Server_Info *server = ses->server;
-#endif
+ bool is_binding = false;
+ int chan_index = 0;
+
+ spin_lock(&ses->chan_lock);
+ is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ chan_index = cifs_ses_get_chan_index(ses, server);
+ /* TODO: introduce ref counting for channels when the can be freed */
+ spin_unlock(&ses->chan_lock);
/*
* All channels use the same encryption/decryption keys but
@@ -407,10 +418,10 @@ generate_smb3signingkey(struct cifs_ses *ses,
* master connection signing key stored in the session
*/
- if (ses->binding) {
+ if (is_binding) {
rc = generate_key(ses, ptriplet->signing.label,
ptriplet->signing.context,
- cifs_ses_binding_channel(ses)->signkey,
+ ses->chans[chan_index].signkey,
SMB3_SIGN_KEY_SIZE);
if (rc)
return rc;
@@ -422,8 +433,11 @@ generate_smb3signingkey(struct cifs_ses *ses,
if (rc)
return rc;
+ /* safe to access primary channel, since it will never go away */
+ spin_lock(&ses->chan_lock);
memcpy(ses->chans[0].signkey, ses->smb3signingkey,
SMB3_SIGN_KEY_SIZE);
+ spin_unlock(&ses->chan_lock);
rc = generate_key(ses, ptriplet->encryption.label,
ptriplet->encryption.context,
@@ -470,7 +484,8 @@ generate_smb3signingkey(struct cifs_ses *ses,
}
int
-generate_smb30signingkey(struct cifs_ses *ses)
+generate_smb30signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
struct derivation_triplet triplet;
@@ -494,11 +509,12 @@ generate_smb30signingkey(struct cifs_ses *ses)
d->context.iov_base = "ServerOut";
d->context.iov_len = 10;
- return generate_smb3signingkey(ses, &triplet);
+ return generate_smb3signingkey(ses, server, &triplet);
}
int
-generate_smb311signingkey(struct cifs_ses *ses)
+generate_smb311signingkey(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
{
struct derivation_triplet triplet;
@@ -522,7 +538,7 @@ generate_smb311signingkey(struct cifs_ses *ses)
d->context.iov_base = ses->preauth_sha_hash;
d->context.iov_len = 64;
- return generate_smb3signingkey(ses, &triplet);
+ return generate_smb3signingkey(ses, server, &triplet);
}
int
@@ -624,8 +640,12 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
if (!is_signed)
return 0;
- if (server->tcpStatus == CifsNeedNegotiate)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsNeedNegotiate) {
+ spin_unlock(&cifs_tcp_ses_lock);
return 0;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
if (!is_binding && !server->session_estab) {
strncpy(shdr->Signature, "BSRSPYL", 8);
return 0;
@@ -741,30 +761,41 @@ static int
smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server,
struct smb2_hdr *shdr, struct mid_q_entry **mid)
{
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
if (server->tcpStatus == CifsNeedReconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "tcp session dead - return to caller to retry\n");
return -EAGAIN;
}
if (server->tcpStatus == CifsNeedNegotiate &&
- shdr->Command != SMB2_NEGOTIATE)
+ shdr->Command != SMB2_NEGOTIATE) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
if (ses->status == CifsNew) {
if ((shdr->Command != SMB2_SESSION_SETUP) &&
- (shdr->Command != SMB2_NEGOTIATE))
+ (shdr->Command != SMB2_NEGOTIATE)) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are setting up session */
}
if (ses->status == CifsExiting) {
- if (shdr->Command != SMB2_LOGOFF)
+ if (shdr->Command != SMB2_LOGOFF) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are shutting down the session */
}
+ spin_unlock(&cifs_tcp_ses_lock);
*mid = smb2_mid_entry_alloc(shdr, server);
if (*mid == NULL)
@@ -837,9 +868,13 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
(struct smb2_hdr *)rqst->rq_iov[0].iov_base;
struct mid_q_entry *mid;
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsNeedNegotiate &&
- shdr->Command != SMB2_NEGOTIATE)
+ shdr->Command != SMB2_NEGOTIATE) {
+ spin_unlock(&cifs_tcp_ses_lock);
return ERR_PTR(-EAGAIN);
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
smb2_seq_num_into_buf(server, shdr);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 61ea3d3f95b4..8540f7c13eae 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -430,9 +430,10 @@ unmask:
* be taken as the remainder of this one. We need to kill the
* socket so the server throws away the partial SMB
*/
- spin_lock(&GlobalMid_Lock);
- server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&GlobalMid_Lock);
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus != CifsExiting)
+ server->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&cifs_tcp_ses_lock);
trace_smb3_partial_send_reconnect(server->CurrentMid,
server->conn_id, server->hostname);
}
@@ -578,10 +579,14 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
return -ERESTARTSYS;
spin_lock(&server->req_lock);
} else {
+ spin_unlock(&server->req_lock);
+
+ spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsExiting) {
- spin_unlock(&server->req_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
}
+ spin_unlock(&cifs_tcp_ses_lock);
/*
* For normal commands, reserve the last MAX_COMPOUND
@@ -596,6 +601,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
* for servers that are slow to hand out credits on
* new sessions.
*/
+ spin_lock(&server->req_lock);
if (!optype && num_credits == 1 &&
server->in_flight > 2 * MAX_COMPOUND &&
*credits <= MAX_COMPOUND) {
@@ -723,28 +729,25 @@ cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
struct mid_q_entry **ppmidQ)
{
- if (ses->server->tcpStatus == CifsExiting) {
- return -ENOENT;
- }
-
- if (ses->server->tcpStatus == CifsNeedReconnect) {
- cifs_dbg(FYI, "tcp session dead - return to caller to retry\n");
- return -EAGAIN;
- }
-
+ spin_lock(&cifs_tcp_ses_lock);
if (ses->status == CifsNew) {
if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
- (in_buf->Command != SMB_COM_NEGOTIATE))
+ (in_buf->Command != SMB_COM_NEGOTIATE)) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are setting up session */
}
if (ses->status == CifsExiting) {
/* check if SMB session is bad because we are setting it up */
- if (in_buf->Command != SMB_COM_LOGOFF_ANDX)
+ if (in_buf->Command != SMB_COM_LOGOFF_ANDX) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
+ }
/* else ok - we are shutting down session */
}
+ spin_unlock(&cifs_tcp_ses_lock);
*ppmidQ = AllocMidQEntry(in_buf, ses->server);
if (*ppmidQ == NULL)
@@ -1044,19 +1047,14 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
if (!ses)
return NULL;
+ /* round robin */
+ index = (uint)atomic_inc_return(&ses->chan_seq);
+
spin_lock(&ses->chan_lock);
- if (!ses->binding) {
- /* round robin */
- if (ses->chan_count > 1) {
- index = (uint)atomic_inc_return(&ses->chan_seq);
- index %= ses->chan_count;
- }
- spin_unlock(&ses->chan_lock);
- return ses->chans[index].server;
- } else {
- spin_unlock(&ses->chan_lock);
- return cifs_ses_server(ses);
- }
+ index %= ses->chan_count;
+ spin_unlock(&ses->chan_lock);
+
+ return ses->chans[index].server;
}
int
@@ -1084,8 +1082,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
return -EIO;
}
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
/*
* Wait for all the requests to become available.
@@ -1188,12 +1190,17 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+
mutex_lock(&server->srv_mutex);
- smb311_update_preauth_hash(ses, rqst[0].rq_iov,
- rqst[0].rq_nvec);
+ smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
mutex_unlock(&server->srv_mutex);
+
+ spin_lock(&cifs_tcp_ses_lock);
}
+ spin_unlock(&cifs_tcp_ses_lock);
for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(server, midQ[i]);
@@ -1256,15 +1263,19 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
+ spin_lock(&cifs_tcp_ses_lock);
if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
struct kvec iov = {
.iov_base = resp_iov[0].iov_base,
.iov_len = resp_iov[0].iov_len
};
+ spin_unlock(&cifs_tcp_ses_lock);
mutex_lock(&server->srv_mutex);
- smb311_update_preauth_hash(ses, &iov, 1);
+ smb311_update_preauth_hash(ses, server, &iov, 1);
mutex_unlock(&server->srv_mutex);
+ spin_lock(&cifs_tcp_ses_lock);
}
+ spin_unlock(&cifs_tcp_ses_lock);
out:
/*
@@ -1353,8 +1364,12 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
return -EIO;
}
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
/* Ensure that we do not send more than 50 overlapping requests
to the same server. We may make this configurable later or
@@ -1494,8 +1509,12 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
return -EIO;
}
- if (server->tcpStatus == CifsExiting)
+ spin_lock(&cifs_tcp_ses_lock);
+ if (server->tcpStatus == CifsExiting) {
+ spin_unlock(&cifs_tcp_ses_lock);
return -ENOENT;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
/* Ensure that we do not send more than 50 overlapping requests
to the same server. We may make this configurable later or
@@ -1553,10 +1572,12 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
(server->tcpStatus != CifsNew)));
/* Were we interrupted by a signal ? */
+ spin_lock(&cifs_tcp_ses_lock);
if ((rc == -ERESTARTSYS) &&
(midQ->mid_state == MID_REQUEST_SUBMITTED) &&
((server->tcpStatus == CifsGood) ||
(server->tcpStatus == CifsNew))) {
+ spin_unlock(&cifs_tcp_ses_lock);
if (in_buf->Command == SMB_COM_TRANSACTION2) {
/* POSIX lock. We send a NT_CANCEL SMB to cause the
@@ -1595,7 +1616,9 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
/* We got the response - restart system call. */
rstart = 1;
+ spin_lock(&cifs_tcp_ses_lock);
}
+ spin_unlock(&cifs_tcp_ses_lock);
rc = cifs_sync_mid_result(midQ, server);
if (rc != 0)
diff --git a/fs/coredump.c b/fs/coredump.c
index a6b3c196cdef..1c060c0a2d72 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/path.h>
#include <linux/timekeeping.h>
+#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -52,9 +53,9 @@
#include <trace/events/sched.h>
-int core_uses_pid;
-unsigned int core_pipe_limit;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
+static int core_uses_pid;
+static unsigned int core_pipe_limit;
+static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
struct core_name {
@@ -62,8 +63,6 @@ struct core_name {
int used, size;
};
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static int expand_corename(struct core_name *cn, int size)
{
char *corename = krealloc(cn->corename, size, GFP_KERNEL);
@@ -347,13 +346,13 @@ out:
return ispipe;
}
-static int zap_process(struct task_struct *start, int exit_code, int flags)
+static int zap_process(struct task_struct *start, int exit_code)
{
struct task_struct *t;
int nr = 0;
/* ignore all signals except SIGKILL, see prepare_signal() */
- start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
+ start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
@@ -372,13 +371,13 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
static int zap_threads(struct task_struct *tsk,
struct core_state *core_state, int exit_code)
{
+ struct signal_struct *signal = tsk->signal;
int nr = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- tsk->signal->core_state = core_state;
- tsk->signal->group_exit_task = tsk;
- nr = zap_process(tsk, exit_code, 0);
+ if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
+ signal->core_state = core_state;
+ nr = zap_process(tsk, exit_code);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
tsk->flags |= PF_DUMPCORE;
atomic_set(&core_state->nr_threads, nr);
@@ -426,8 +425,6 @@ static void coredump_finish(bool core_dumped)
spin_lock_irq(&current->sighand->siglock);
if (core_dumped && !__fatal_signal_pending(current))
current->signal->group_exit_code |= 0x80;
- current->signal->group_exit_task = NULL;
- current->signal->flags = SIGNAL_GROUP_EXIT;
next = current->signal->core_state->dumper.next;
current->signal->core_state = NULL;
spin_unlock_irq(&current->sighand->siglock);
@@ -895,6 +892,63 @@ int dump_align(struct coredump_params *cprm, int align)
}
EXPORT_SYMBOL(dump_align);
+#ifdef CONFIG_SYSCTL
+
+void validate_coredump_safety(void)
+{
+ if (suid_dumpable == SUID_DUMP_ROOT &&
+ core_pattern[0] != '/' && core_pattern[0] != '|') {
+ pr_warn(
+"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
+"Pipe handler or fully qualified core dump path required.\n"
+"Set kernel.core_pattern before fs.suid_dumpable.\n"
+ );
+ }
+}
+
+static int proc_dostring_coredump(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dostring(table, write, buffer, lenp, ppos);
+
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static struct ctl_table coredump_sysctls[] = {
+ {
+ .procname = "core_uses_pid",
+ .data = &core_uses_pid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "core_pattern",
+ .data = core_pattern,
+ .maxlen = CORENAME_MAX_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring_coredump,
+ },
+ {
+ .procname = "core_pipe_limit",
+ .data = &core_pipe_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { }
+};
+
+static int __init init_fs_coredump_sysctls(void)
+{
+ register_sysctl_init("kernel", coredump_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_coredump_sysctls);
+#endif /* CONFIG_SYSCTL */
+
/*
* The purpose of always_dump_vma() is to make sure that special kernel mappings
* that are useful for post-mortem analysis are included in every core dump.
diff --git a/fs/dcache.c b/fs/dcache.c
index cf871a81f4fd..c84269c6e8bf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -115,10 +115,13 @@ static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
}
-
-/* Statistics gathering. */
-struct dentry_stat_t dentry_stat = {
- .age_limit = 45,
+struct dentry_stat_t {
+ long nr_dentry;
+ long nr_unused;
+ long age_limit; /* age in seconds */
+ long want_pages; /* pages requested by system */
+ long nr_negative; /* # of unused negative dentries */
+ long dummy; /* Reserved for future use */
};
static DEFINE_PER_CPU(long, nr_dentry);
@@ -126,6 +129,10 @@ static DEFINE_PER_CPU(long, nr_dentry_unused);
static DEFINE_PER_CPU(long, nr_dentry_negative);
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+/* Statistics gathering. */
+static struct dentry_stat_t dentry_stat = {
+ .age_limit = 45,
+};
/*
* Here we resort to our own counters instead of using generic per-cpu counters
@@ -167,14 +174,32 @@ static long get_nr_dentry_negative(void)
return sum < 0 ? 0 : sum;
}
-int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos)
+static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
dentry_stat.nr_dentry = get_nr_dentry();
dentry_stat.nr_unused = get_nr_dentry_unused();
dentry_stat.nr_negative = get_nr_dentry_negative();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
+
+static struct ctl_table fs_dcache_sysctls[] = {
+ {
+ .procname = "dentry-state",
+ .data = &dentry_stat,
+ .maxlen = 6*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_dentry,
+ },
+ { }
+};
+
+static int __init init_fs_dcache_sysctls(void)
+{
+ register_sysctl_init("fs", fs_dcache_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_dcache_sysctls);
#endif
/*
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 06f4c5ae1451..e2daa940ebce 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -307,7 +307,7 @@ static void unlist_file(struct epitems_head *head)
static long long_zero;
static long long_max = LONG_MAX;
-struct ctl_table epoll_table[] = {
+static struct ctl_table epoll_table[] = {
{
.procname = "max_user_watches",
.data = &max_user_watches,
@@ -319,6 +319,13 @@ struct ctl_table epoll_table[] = {
},
{ }
};
+
+static void __init epoll_sysctls_init(void)
+{
+ register_sysctl("fs/epoll", epoll_table);
+}
+#else
+#define epoll_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static const struct file_operations eventpoll_fops;
@@ -2378,6 +2385,7 @@ static int __init eventpoll_init(void)
/* Allocates slab cache used to allocate "struct eppoll_entry" */
pwq_cache = kmem_cache_create("eventpoll_pwq",
sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ epoll_sysctls_init();
ephead_cache = kmem_cache_create("ep_head",
sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
diff --git a/fs/exec.c b/fs/exec.c
index 537d92c41105..79f2c9483302 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -65,6 +65,7 @@
#include <linux/vmalloc.h>
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
+#include <linux/coredump.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -1045,7 +1046,7 @@ static int de_thread(struct task_struct *tsk)
* Kill all other threads in the thread group.
*/
spin_lock_irq(lock);
- if (signal_group_exit(sig)) {
+ if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
/*
* Another group action in progress, just
* return so that the signal is processed.
@@ -1054,7 +1055,7 @@ static int de_thread(struct task_struct *tsk)
return -EAGAIN;
}
- sig->group_exit_task = tsk;
+ sig->group_exec_task = tsk;
sig->notify_count = zap_other_threads(tsk);
if (!thread_group_leader(tsk))
sig->notify_count--;
@@ -1082,7 +1083,7 @@ static int de_thread(struct task_struct *tsk)
write_lock_irq(&tasklist_lock);
/*
* Do this under tasklist_lock to ensure that
- * exit_notify() can't miss ->group_exit_task
+ * exit_notify() can't miss ->group_exec_task
*/
sig->notify_count = -1;
if (likely(leader->exit_state))
@@ -1149,7 +1150,7 @@ static int de_thread(struct task_struct *tsk)
release_task(leader);
}
- sig->group_exit_task = NULL;
+ sig->group_exec_task = NULL;
sig->notify_count = 0;
no_thread_group:
@@ -1162,7 +1163,7 @@ no_thread_group:
killed:
/* protects against exit_notify() and __exit_signal() */
read_lock(&tasklist_lock);
- sig->group_exit_task = NULL;
+ sig->group_exec_task = NULL;
sig->notify_count = 0;
read_unlock(&tasklist_lock);
return -EAGAIN;
@@ -1207,7 +1208,8 @@ static int unshare_sighand(struct task_struct *me)
char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
{
task_lock(tsk);
- strncpy(buf, tsk->comm, buf_size);
+ /* Always NUL terminated and zero-padded */
+ strscpy_pad(buf, tsk->comm, buf_size);
task_unlock(tsk);
return buf;
}
@@ -1222,7 +1224,7 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
{
task_lock(tsk);
trace_task_rename(tsk, buf);
- strlcpy(tsk->comm, buf, sizeof(tsk->comm));
+ strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk, exec);
}
@@ -1307,6 +1309,8 @@ int begin_new_exec(struct linux_binprm * bprm)
*/
force_uaccess_begin();
+ if (me->flags & PF_KTHREAD)
+ free_kthread_struct(me);
me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
PF_NOFREEZE | PF_NO_SETAFFINITY);
flush_thread();
@@ -2096,3 +2100,37 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
argv, envp, flags);
}
#endif
+
+#ifdef CONFIG_SYSCTL
+
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static struct ctl_table fs_exec_sysctls[] = {
+ {
+ .procname = "suid_dumpable",
+ .data = &suid_dumpable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_coredump,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
+static int __init init_fs_exec_sysctls(void)
+{
+ register_sysctl_init("fs", fs_exec_sysctls);
+ return 0;
+}
+
+fs_initcall(init_fs_exec_sysctls);
+#endif /* CONFIG_SYSCTL */
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index cc5cffc4a769..03f142307174 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -105,7 +105,7 @@ int exfat_load_bitmap(struct super_block *sb)
struct exfat_dentry *ep;
struct buffer_head *bh;
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index cb1c0d8c1714..a27b55ec060a 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -64,7 +64,6 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
{
int i, dentries_per_clu, dentries_per_clu_bits = 0, num_ext;
unsigned int type, clu_offset, max_dentries;
- sector_t sector;
struct exfat_chain dir, clu;
struct exfat_uni_name uni_name;
struct exfat_dentry *ep;
@@ -115,7 +114,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
i = dentry & (dentries_per_clu - 1);
for ( ; i < dentries_per_clu; i++, dentry++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, &sector);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
@@ -156,7 +155,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
dir_entry->namebuf.lfnbuf_len);
brelse(bh);
- ep = exfat_get_dentry(sb, &clu, i + 1, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i + 1, &bh);
if (!ep)
return -EIO;
dir_entry->size =
@@ -445,7 +444,6 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct timespec64 ts = current_time(inode);
- sector_t sector;
struct exfat_dentry *ep;
struct buffer_head *bh;
@@ -453,7 +451,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
* We cannot use exfat_get_dentry_set here because file ep is not
* initialized yet.
*/
- ep = exfat_get_dentry(sb, p_dir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry, &bh);
if (!ep)
return -EIO;
@@ -477,7 +475,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
exfat_update_bh(bh, IS_DIRSYNC(inode));
brelse(bh);
- ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh);
if (!ep)
return -EIO;
@@ -496,12 +494,11 @@ int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir,
struct super_block *sb = inode->i_sb;
int ret = 0;
int i, num_entries;
- sector_t sector;
u16 chksum;
struct exfat_dentry *ep, *fep;
struct buffer_head *fbh, *bh;
- fep = exfat_get_dentry(sb, p_dir, entry, &fbh, &sector);
+ fep = exfat_get_dentry(sb, p_dir, entry, &fbh);
if (!fep)
return -EIO;
@@ -509,7 +506,7 @@ int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir,
chksum = exfat_calc_chksum16(fep, DENTRY_SIZE, 0, CS_DIR_ENTRY);
for (i = 1; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, NULL);
+ ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
if (!ep) {
ret = -EIO;
goto release_fbh;
@@ -531,13 +528,12 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
{
struct super_block *sb = inode->i_sb;
int i;
- sector_t sector;
unsigned short *uniname = p_uniname->name;
struct exfat_dentry *ep;
struct buffer_head *bh;
int sync = IS_DIRSYNC(inode);
- ep = exfat_get_dentry(sb, p_dir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry, &bh);
if (!ep)
return -EIO;
@@ -545,7 +541,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
exfat_update_bh(bh, sync);
brelse(bh);
- ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh);
if (!ep)
return -EIO;
@@ -555,7 +551,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
brelse(bh);
for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
if (!ep)
return -EIO;
@@ -574,12 +570,11 @@ int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir,
{
struct super_block *sb = inode->i_sb;
int i;
- sector_t sector;
struct exfat_dentry *ep;
struct buffer_head *bh;
for (i = order; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh, &sector);
+ ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
if (!ep)
return -EIO;
@@ -656,8 +651,8 @@ static int exfat_walk_fat_chain(struct super_block *sb,
return 0;
}
-int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir,
- int entry, sector_t *sector, int *offset)
+static int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir,
+ int entry, sector_t *sector, int *offset)
{
int ret;
unsigned int off, clu = 0;
@@ -717,8 +712,7 @@ static int exfat_dir_readahead(struct super_block *sb, sector_t sec)
}
struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
- struct exfat_chain *p_dir, int entry, struct buffer_head **bh,
- sector_t *sector)
+ struct exfat_chain *p_dir, int entry, struct buffer_head **bh)
{
unsigned int dentries_per_page = EXFAT_B_TO_DEN(PAGE_SIZE);
int off;
@@ -740,8 +734,6 @@ struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
if (!*bh)
return NULL;
- if (sector)
- *sector = sec;
return (struct exfat_dentry *)((*bh)->b_data + off);
}
@@ -892,7 +884,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
es->bh[es->num_bh++] = bh;
}
- /* validiate cached dentries */
+ /* validate cached dentries */
for (i = 1; i < num_entries; i++) {
ep = exfat_get_dentry_cached(es, i);
if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
@@ -960,7 +952,7 @@ rewind:
if (rewind && dentry == end_eidx)
goto not_found;
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
@@ -1145,7 +1137,7 @@ int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir,
struct buffer_head *bh;
for (i = 0, entry++; i < ep->dentry.file.num_ext; i++, entry++) {
- ext_ep = exfat_get_dentry(sb, p_dir, entry, &bh, NULL);
+ ext_ep = exfat_get_dentry(sb, p_dir, entry, &bh);
if (!ext_ep)
return -EIO;
@@ -1175,7 +1167,7 @@ int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir)
while (clu.dir != EXFAT_EOF_CLUSTER) {
for (i = 0; i < dentries_per_clu; i++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
entry_type = exfat_get_entry_type(ep);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 1d6da61157c9..619e5b4bed10 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -10,7 +10,6 @@
#include <linux/ratelimit.h>
#include <linux/nls.h>
-#define EXFAT_SUPER_MAGIC 0x2011BAB0UL
#define EXFAT_ROOT_INO 1
#define EXFAT_CLUSTERS_UNTRACKED (~0u)
@@ -459,11 +458,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname,
int num_entries, unsigned int type, struct exfat_hint *hint_opt);
int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu);
-int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir,
- int entry, sector_t *sector, int *offset);
struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
- struct exfat_chain *p_dir, int entry, struct buffer_head **bh,
- sector_t *sector);
+ struct exfat_chain *p_dir, int entry, struct buffer_head **bh);
struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es,
int num);
struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index e949e563443c..a3464e56a7e1 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -84,9 +84,7 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc,
static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
unsigned int clus)
{
- if (clus < EXFAT_FIRST_CLUSTER || sbi->num_clusters <= clus)
- return false;
- return true;
+ return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters;
}
int exfat_ent_get(struct super_block *sb, unsigned int loc,
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 6af0191b648f..d890fd34bb2d 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -110,8 +110,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
exfat_set_volume_dirty(sb);
num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi);
- num_clusters_phys =
- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi);
+ num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
@@ -228,12 +227,13 @@ void exfat_truncate(struct inode *inode, loff_t size)
{
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ struct exfat_inode_info *ei = EXFAT_I(inode);
unsigned int blocksize = i_blocksize(inode);
loff_t aligned_size;
int err;
mutex_lock(&sbi->s_lock);
- if (EXFAT_I(inode)->start_clu == 0) {
+ if (ei->start_clu == 0) {
/*
* Empty start_clu != ~0 (not allocated)
*/
@@ -251,8 +251,8 @@ void exfat_truncate(struct inode *inode, loff_t size)
else
mark_inode_dirty(inode);
- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
- ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
+ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
+ inode->i_blkbits;
write_size:
aligned_size = i_size_read(inode);
if (aligned_size & (blocksize - 1)) {
@@ -260,11 +260,11 @@ write_size:
aligned_size++;
}
- if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode))
- EXFAT_I(inode)->i_size_ondisk = aligned_size;
+ if (ei->i_size_ondisk > i_size_read(inode))
+ ei->i_size_ondisk = aligned_size;
- if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode))
- EXFAT_I(inode)->i_size_aligned = aligned_size;
+ if (ei->i_size_aligned > i_size_read(inode))
+ ei->i_size_aligned = aligned_size;
mutex_unlock(&sbi->s_lock);
}
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1c7aa1ea4724..df805bd05508 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -31,7 +31,7 @@ static int __exfat_write_inode(struct inode *inode, int sync)
return 0;
/*
- * If the indode is already unlinked, there is no need for updating it.
+ * If the inode is already unlinked, there is no need for updating it.
*/
if (ei->dir.dir == DIR_DELETED)
return 0;
@@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
unsigned int local_clu_offset = clu_offset;
unsigned int num_to_be_allocated = 0, num_clusters = 0;
- if (EXFAT_I(inode)->i_size_ondisk > 0)
+ if (ei->i_size_ondisk > 0)
num_clusters =
- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk,
- sbi);
+ EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
if (clu_offset >= num_clusters)
num_to_be_allocated = clu_offset - num_clusters + 1;
@@ -416,10 +415,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
- if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) {
+ if (ei->i_size_aligned < i_size_read(inode)) {
exfat_fs_error(inode->i_sb,
"invalid size(size(%llu) > aligned(%llu)\n",
- i_size_read(inode), EXFAT_I(inode)->i_size_aligned);
+ i_size_read(inode), ei->i_size_aligned);
return -EIO;
}
@@ -603,8 +602,8 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
exfat_save_attr(inode, info->attr);
- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
- ~((loff_t)sbi->cluster_size - 1)) >> inode->i_blkbits;
+ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
+ inode->i_blkbits;
inode->i_mtime = info->mtime;
inode->i_ctime = info->mtime;
ei->i_crtime = info->crtime;
diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c
index d34e6193258d..d5bd8e6d9741 100644
--- a/fs/exfat/misc.c
+++ b/fs/exfat/misc.c
@@ -10,6 +10,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/buffer_head.h>
+#include <linux/blk_types.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
@@ -180,7 +181,7 @@ int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync)
set_buffer_uptodate(bhs[i]);
mark_buffer_dirty(bhs[i]);
if (sync)
- write_dirty_buffer(bhs[i], 0);
+ write_dirty_buffer(bhs[i], REQ_SYNC);
}
for (i = 0; i < nr_bhs && sync; i++) {
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 24b41103d1cc..af4eb39cc0c3 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -229,7 +229,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
i = dentry & (dentries_per_clu - 1);
for (; i < dentries_per_clu; i++, dentry++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
type = exfat_get_entry_type(ep);
@@ -306,7 +306,6 @@ static int exfat_find_empty_entry(struct inode *inode,
{
int dentry;
unsigned int ret, last_clu;
- sector_t sector;
loff_t size = 0;
struct exfat_chain clu;
struct exfat_dentry *ep = NULL;
@@ -379,7 +378,7 @@ static int exfat_find_empty_entry(struct inode *inode,
struct buffer_head *bh;
ep = exfat_get_dentry(sb,
- &(ei->dir), ei->entry + 1, &bh, &sector);
+ &(ei->dir), ei->entry + 1, &bh);
if (!ep)
return -EIO;
@@ -395,9 +394,9 @@ static int exfat_find_empty_entry(struct inode *inode,
/* directory inode should be updated in here */
i_size_write(inode, size);
- EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size;
- EXFAT_I(inode)->i_size_aligned += sbi->cluster_size;
- EXFAT_I(inode)->flags = p_dir->flags;
+ ei->i_size_ondisk += sbi->cluster_size;
+ ei->i_size_aligned += sbi->cluster_size;
+ ei->flags = p_dir->flags;
inode->i_blocks += 1 << sbi->sect_per_clus_bits;
}
@@ -779,7 +778,6 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct exfat_inode_info *ei = EXFAT_I(inode);
struct buffer_head *bh;
- sector_t sector;
int num_entries, entry, err = 0;
mutex_lock(&EXFAT_SB(sb)->s_lock);
@@ -791,7 +789,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
goto unlock;
}
- ep = exfat_get_dentry(sb, &cdir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, &cdir, entry, &bh);
if (!ep) {
err = -EIO;
goto unlock;
@@ -895,7 +893,7 @@ static int exfat_check_dir_empty(struct super_block *sb,
while (clu.dir != EXFAT_EOF_CLUSTER) {
for (i = 0; i < dentries_per_clu; i++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
type = exfat_get_entry_type(ep);
@@ -932,7 +930,6 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_inode_info *ei = EXFAT_I(inode);
struct buffer_head *bh;
- sector_t sector;
int num_entries, entry, err;
mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock);
@@ -957,7 +954,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
goto unlock;
}
- ep = exfat_get_dentry(sb, &cdir, entry, &bh, &sector);
+ ep = exfat_get_dentry(sb, &cdir, entry, &bh);
if (!ep) {
err = -EIO;
goto unlock;
@@ -1005,13 +1002,12 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
struct exfat_inode_info *ei)
{
int ret, num_old_entries, num_new_entries;
- sector_t sector_old, sector_new;
struct exfat_dentry *epold, *epnew;
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh, *old_bh;
int sync = IS_DIRSYNC(inode);
- epold = exfat_get_dentry(sb, p_dir, oldentry, &old_bh, &sector_old);
+ epold = exfat_get_dentry(sb, p_dir, oldentry, &old_bh);
if (!epold)
return -EIO;
@@ -1032,8 +1028,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
if (newentry < 0)
return newentry; /* -EIO or -ENOSPC */
- epnew = exfat_get_dentry(sb, p_dir, newentry, &new_bh,
- &sector_new);
+ epnew = exfat_get_dentry(sb, p_dir, newentry, &new_bh);
if (!epnew)
return -EIO;
@@ -1046,12 +1041,10 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
brelse(old_bh);
brelse(new_bh);
- epold = exfat_get_dentry(sb, p_dir, oldentry + 1, &old_bh,
- &sector_old);
+ epold = exfat_get_dentry(sb, p_dir, oldentry + 1, &old_bh);
if (!epold)
return -EIO;
- epnew = exfat_get_dentry(sb, p_dir, newentry + 1, &new_bh,
- &sector_new);
+ epnew = exfat_get_dentry(sb, p_dir, newentry + 1, &new_bh);
if (!epnew) {
brelse(old_bh);
return -EIO;
@@ -1093,12 +1086,11 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
struct exfat_uni_name *p_uniname, struct exfat_inode_info *ei)
{
int ret, newentry, num_new_entries, num_old_entries;
- sector_t sector_mov, sector_new;
struct exfat_dentry *epmov, *epnew;
struct super_block *sb = inode->i_sb;
struct buffer_head *mov_bh, *new_bh;
- epmov = exfat_get_dentry(sb, p_olddir, oldentry, &mov_bh, &sector_mov);
+ epmov = exfat_get_dentry(sb, p_olddir, oldentry, &mov_bh);
if (!epmov)
return -EIO;
@@ -1116,7 +1108,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
if (newentry < 0)
return newentry; /* -EIO or -ENOSPC */
- epnew = exfat_get_dentry(sb, p_newdir, newentry, &new_bh, &sector_new);
+ epnew = exfat_get_dentry(sb, p_newdir, newentry, &new_bh);
if (!epnew)
return -EIO;
@@ -1129,12 +1121,10 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
brelse(mov_bh);
brelse(new_bh);
- epmov = exfat_get_dentry(sb, p_olddir, oldentry + 1, &mov_bh,
- &sector_mov);
+ epmov = exfat_get_dentry(sb, p_olddir, oldentry + 1, &mov_bh);
if (!epmov)
return -EIO;
- epnew = exfat_get_dentry(sb, p_newdir, newentry + 1, &new_bh,
- &sector_new);
+ epnew = exfat_get_dentry(sb, p_newdir, newentry + 1, &new_bh);
if (!epnew) {
brelse(mov_bh);
return -EIO;
@@ -1216,7 +1206,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
exfat_chain_dup(&olddir, &ei->dir);
dentry = ei->entry;
- ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh, NULL);
+ ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh);
if (!ep) {
ret = -EIO;
goto out;
@@ -1237,7 +1227,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
p_dir = &(new_ei->dir);
new_entry = new_ei->entry;
- ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL);
+ ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh);
if (!ep)
goto out;
@@ -1277,7 +1267,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
if (!ret && new_inode) {
/* delete entries of new_dir */
- ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh, NULL);
+ ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh);
if (!ep) {
ret = -EIO;
goto del_out;
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 314d5407a1be..ef115e673406 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -761,7 +761,7 @@ int exfat_create_upcase_table(struct super_block *sb)
while (clu.dir != EXFAT_EOF_CLUSTER) {
for (i = 0; i < sbi->dentries_per_clu; i++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh, NULL);
+ ep = exfat_get_dentry(sb, &clu, i, &bh);
if (!ep)
return -EIO;
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 5539ffc20d16..8c9fb7dcec16 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -17,6 +17,7 @@
#include <linux/iversion.h>
#include <linux/nls.h>
#include <linux/buffer_head.h>
+#include <linux/magic.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
@@ -364,11 +365,11 @@ static int exfat_read_root(struct inode *inode)
inode->i_op = &exfat_dir_inode_operations;
inode->i_fop = &exfat_dir_operations;
- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1))
- & ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
- EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
- EXFAT_I(inode)->i_size_aligned = i_size_read(inode);
- EXFAT_I(inode)->i_size_ondisk = i_size_read(inode);
+ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
+ inode->i_blkbits;
+ ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
+ ei->i_size_aligned = i_size_read(inode);
+ ei->i_size_ondisk = i_size_read(inode);
exfat_save_attr(inode, ATTR_SUBDIR);
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1077ce7e189f..74c91da585d7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -27,8 +27,8 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/fiemap.h>
-#include <linux/backing-dev.h>
#include <linux/iomap.h>
+#include <linux/sched/mm.h>
#include "ext4_jbd2.h"
#include "ext4_extents.h"
#include "xattr.h"
@@ -4404,8 +4404,7 @@ retry:
err = ext4_es_remove_extent(inode, last_block,
EXT_MAX_BLOCKS - last_block);
if (err == -ENOMEM) {
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_ATOMIC);
goto retry;
}
if (err)
@@ -4413,8 +4412,7 @@ retry:
retry_remove_space:
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
if (err == -ENOMEM) {
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_ATOMIC);
goto retry_remove_space;
}
return err;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 39a1ab129fdc..635bcf68a67e 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -7,7 +7,7 @@
#include <linux/iomap.h>
#include <linux/fiemap.h>
#include <linux/iversion.h>
-#include <linux/backing-dev.h>
+#include <linux/sched/mm.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -1929,8 +1929,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
retry:
err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
if (err == -ENOMEM) {
- cond_resched();
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(GFP_ATOMIC);
goto retry;
}
if (err)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cf2fd9fc7d98..9f86dd947032 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2834,7 +2834,7 @@ out:
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group;
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
@@ -2845,7 +2845,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group;
++*pos;
@@ -2857,7 +2857,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
ext4_group_t group = (ext4_group_t) ((unsigned long) v);
int i;
int err, buddy_loaded = 0;
@@ -2985,7 +2985,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
@@ -2998,7 +2998,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
++*pos;
@@ -3010,7 +3010,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof
static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long position = ((unsigned long) v);
struct ext4_group_info *grp;
@@ -3058,7 +3058,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
__releases(&EXT4_SB(sb)->s_mb_rb_lock)
{
- struct super_block *sb = PDE_DATA(file_inode(seq->file));
+ struct super_block *sb = pde_data(file_inode(seq->file));
read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 9cb261714991..1d370364230e 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -24,7 +24,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/backing-dev.h>
+#include <linux/sched/mm.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -523,12 +523,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM &&
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
- gfp_flags = GFP_NOFS;
+ gfp_t new_gfp_flags = GFP_NOFS;
if (io->io_bio)
ext4_io_submit(io);
else
- gfp_flags |= __GFP_NOFAIL;
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ new_gfp_flags |= __GFP_NOFAIL;
+ memalloc_retry_wait(gfp_flags);
+ gfp_flags = new_gfp_flags;
goto retry_encrypt;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 3db923403505..4cd62f1d848c 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -43,7 +43,6 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
-#include <linux/cleancache.h>
#include "ext4.h"
@@ -350,11 +349,6 @@ int ext4_mpage_readpages(struct inode *inode,
} else if (fully_mapped) {
SetPageMappedToDisk(page);
}
- if (fully_mapped && blocks_per_page == 1 &&
- !PageUptodate(page) && cleancache_get_page(page) == 0) {
- SetPageUptodate(page);
- goto confused;
- }
/*
* This page will go to BIO. Do we need to send this
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0343f682504d..eee0d9ebfa6c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -39,7 +39,6 @@
#include <linux/log2.h>
#include <linux/crc16.h>
#include <linux/dax.h>
-#include <linux/cleancache.h>
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
@@ -1966,29 +1965,22 @@ static const struct mount_opts {
static const struct ext4_sb_encodings {
__u16 magic;
char *name;
- char *version;
+ unsigned int version;
} ext4_sb_encoding_map[] = {
- {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
+ {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
};
-static int ext4_sb_read_encoding(const struct ext4_super_block *es,
- const struct ext4_sb_encodings **encoding,
- __u16 *flags)
+static const struct ext4_sb_encodings *
+ext4_sb_read_encoding(const struct ext4_super_block *es)
{
__u16 magic = le16_to_cpu(es->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
if (magic == ext4_sb_encoding_map[i].magic)
- break;
-
- if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
- return -EINVAL;
-
- *encoding = &ext4_sb_encoding_map[i];
- *flags = le16_to_cpu(es->s_encoding_flags);
+ return &ext4_sb_encoding_map[i];
- return 0;
+ return NULL;
}
#endif
@@ -3156,8 +3148,6 @@ done:
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt, sbi->s_mount_opt2);
-
- cleancache_init_fs(sb);
return err;
}
@@ -4624,10 +4614,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
const struct ext4_sb_encodings *encoding_info;
struct unicode_map *encoding;
- __u16 encoding_flags;
+ __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
- if (ext4_sb_read_encoding(es, &encoding_info,
- &encoding_flags)) {
+ encoding_info = ext4_sb_read_encoding(es);
+ if (!encoding_info) {
ext4_msg(sb, KERN_ERR,
"Encoding requested by superblock is unknown");
goto failed_mount;
@@ -4636,15 +4626,21 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
ext4_msg(sb, KERN_ERR,
- "can't mount with superblock charset: %s-%s "
+ "can't mount with superblock charset: %s-%u.%u.%u "
"not supported by the kernel. flags: 0x%x.",
- encoding_info->name, encoding_info->version,
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
encoding_flags);
goto failed_mount;
}
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
- "%s-%s with flags 0x%hx", encoding_info->name,
- encoding_info->version?:"\b", encoding_flags);
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
sb->s_encoding = encoding;
sb->s_encoding_flags = encoding_flags;
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 7eea3cfd894d..f46a7339d6cf 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -7,6 +7,7 @@ config F2FS_FS
select CRYPTO_CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
+ select FS_IOMAP
select LZ4_COMPRESS if F2FS_FS_LZ4
select LZ4_DECOMPRESS if F2FS_FS_LZ4
select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f1693d45bb78..982f0170639f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -664,7 +664,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
- err = f2fs_get_node_info(sbi, ino, &ni);
+ err = f2fs_get_node_info(sbi, ino, &ni, false);
if (err)
goto err_out;
@@ -1302,8 +1302,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned long flags;
if (cpc->reason & CP_UMOUNT) {
- if (le32_to_cpu(ckpt->cp_pack_total_block_count) >
- sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) {
+ if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
+ NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
f2fs_notice(sbi, "Disable nat_bits due to no space");
} else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 49121a21f749..d0c3aeba5945 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -154,6 +154,7 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
cc->rpages = NULL;
cc->nr_rpages = 0;
cc->nr_cpages = 0;
+ cc->valid_nr_cpages = 0;
if (!reuse)
cc->cluster_idx = NULL_CLUSTER;
}
@@ -620,7 +621,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
unsigned int max_len, new_nr_cpages;
- struct page **new_cpages;
u32 chksum = 0;
int i, ret;
@@ -635,6 +635,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
max_len = COMPRESS_HEADER_SIZE + cc->clen;
cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
+ cc->valid_nr_cpages = cc->nr_cpages;
cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
if (!cc->cpages) {
@@ -685,13 +686,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
- /* Now we're going to cut unnecessary tail pages */
- new_cpages = page_array_alloc(cc->inode, new_nr_cpages);
- if (!new_cpages) {
- ret = -ENOMEM;
- goto out_vunmap_cbuf;
- }
-
/* zero out any unused part of the last page */
memset(&cc->cbuf->cdata[cc->clen], 0,
(new_nr_cpages * PAGE_SIZE) -
@@ -701,10 +695,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
vm_unmap_ram(cc->rbuf, cc->cluster_size);
for (i = 0; i < cc->nr_cpages; i++) {
- if (i < new_nr_cpages) {
- new_cpages[i] = cc->cpages[i];
+ if (i < new_nr_cpages)
continue;
- }
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
@@ -712,9 +704,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
if (cops->destroy_compress_ctx)
cops->destroy_compress_ctx(cc);
- page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
- cc->cpages = new_cpages;
- cc->nr_cpages = new_nr_cpages;
+ cc->valid_nr_cpages = new_nr_cpages;
trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
cc->clen, ret);
@@ -1296,7 +1286,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT;
- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
if (err)
goto out_put_dnode;
@@ -1308,14 +1298,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
cic->inode = inode;
- atomic_set(&cic->pending_pages, cc->nr_cpages);
+ atomic_set(&cic->pending_pages, cc->valid_nr_cpages);
cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
if (!cic->rpages)
goto out_put_cic;
cic->nr_rpages = cc->cluster_size;
- for (i = 0; i < cc->nr_cpages; i++) {
+ for (i = 0; i < cc->valid_nr_cpages; i++) {
f2fs_set_compressed_page(cc->cpages[i], inode,
cc->rpages[i + 1]->index, cic);
fio.compressed_page = cc->cpages[i];
@@ -1360,7 +1350,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr))
fio.compr_blocks++;
- if (i > cc->nr_cpages) {
+ if (i > cc->valid_nr_cpages) {
if (__is_valid_data_blkaddr(blkaddr)) {
f2fs_invalidate_blocks(sbi, blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
@@ -1385,8 +1375,8 @@ unlock_continue:
if (fio.compr_blocks)
f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
- f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
- add_compr_block_stat(inode, cc->nr_cpages);
+ f2fs_i_compr_blocks_update(inode, cc->valid_nr_cpages, true);
+ add_compr_block_stat(inode, cc->valid_nr_cpages);
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
@@ -1424,9 +1414,7 @@ out_unlock_op:
else
f2fs_unlock_op(sbi);
out_free:
- for (i = 0; i < cc->nr_cpages; i++) {
- if (!cc->cpages[i])
- continue;
+ for (i = 0; i < cc->valid_nr_cpages; i++) {
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
@@ -1468,25 +1456,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
- int _submitted, compr_blocks, ret;
- int i = -1, err = 0;
+ int _submitted, compr_blocks, ret, i;
compr_blocks = f2fs_compressed_blocks(cc);
- if (compr_blocks < 0) {
- err = compr_blocks;
- goto out_err;
+
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (!cc->rpages[i])
+ continue;
+
+ redirty_page_for_writepage(wbc, cc->rpages[i]);
+ unlock_page(cc->rpages[i]);
}
+ if (compr_blocks < 0)
+ return compr_blocks;
+
for (i = 0; i < cc->cluster_size; i++) {
if (!cc->rpages[i])
continue;
retry_write:
+ lock_page(cc->rpages[i]);
+
if (cc->rpages[i]->mapping != mapping) {
+continue_unlock:
unlock_page(cc->rpages[i]);
continue;
}
- BUG_ON(!PageLocked(cc->rpages[i]));
+ if (!PageDirty(cc->rpages[i]))
+ goto continue_unlock;
+
+ if (!clear_page_dirty_for_io(cc->rpages[i]))
+ goto continue_unlock;
ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
NULL, NULL, wbc, io_type,
@@ -1501,26 +1502,15 @@ retry_write:
* avoid deadlock caused by cluster update race
* from foreground operation.
*/
- if (IS_NOQUOTA(cc->inode)) {
- err = 0;
- goto out_err;
- }
+ if (IS_NOQUOTA(cc->inode))
+ return 0;
ret = 0;
cond_resched();
congestion_wait(BLK_RW_ASYNC,
DEFAULT_IO_TIMEOUT);
- lock_page(cc->rpages[i]);
-
- if (!PageDirty(cc->rpages[i])) {
- unlock_page(cc->rpages[i]);
- continue;
- }
-
- clear_page_dirty_for_io(cc->rpages[i]);
goto retry_write;
}
- err = ret;
- goto out_err;
+ return ret;
}
*submitted += _submitted;
@@ -1529,14 +1519,6 @@ retry_write:
f2fs_balance_fs(F2FS_M_SB(mapping), true);
return 0;
-out_err:
- for (++i; i < cc->cluster_size; i++) {
- if (!cc->rpages[i])
- continue;
- redirty_page_for_writepage(wbc, cc->rpages[i]);
- unlock_page(cc->rpages[i]);
- }
- return err;
}
int f2fs_write_multi_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9f754aaef558..8c417864c66a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -8,9 +8,9 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
+#include <linux/sched/mm.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
-#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
@@ -18,9 +18,9 @@
#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
-#include <linux/cleancache.h>
#include <linux/sched/signal.h>
#include <linux/fiemap.h>
+#include <linux/iomap.h>
#include "f2fs.h"
#include "node.h"
@@ -1354,7 +1354,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
if (err)
return err;
@@ -1376,61 +1376,9 @@ alloc:
f2fs_invalidate_compress_page(sbi, old_blkaddr);
}
f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-
- /*
- * i_size will be updated by direct_IO. Otherwise, we'll get stale
- * data from unwritten block via dio_read.
- */
return 0;
}
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct f2fs_map_blocks map;
- int flag;
- int err = 0;
- bool direct_io = iocb->ki_flags & IOCB_DIRECT;
-
- map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
- map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
- if (map.m_len > map.m_lblk)
- map.m_len -= map.m_lblk;
- else
- map.m_len = 0;
-
- map.m_next_pgofs = NULL;
- map.m_next_extent = NULL;
- map.m_seg_type = NO_CHECK_TYPE;
- map.m_may_create = true;
-
- if (direct_io) {
- map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
- flag = f2fs_force_buffered_io(inode, iocb, from) ?
- F2FS_GET_BLOCK_PRE_AIO :
- F2FS_GET_BLOCK_PRE_DIO;
- goto map_blocks;
- }
- if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
- if (f2fs_has_inline_data(inode))
- return err;
-
- flag = F2FS_GET_BLOCK_PRE_AIO;
-
-map_blocks:
- err = f2fs_map_blocks(inode, &map, 1, flag);
- if (map.m_len > 0 && err == -ENOSPC) {
- if (!direct_io)
- set_inode_flag(inode, FI_NO_PREALLOC);
- err = 0;
- }
- return err;
-}
-
void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
@@ -1590,8 +1538,11 @@ next_block:
flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
- if (!err)
+ if (!err) {
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ file_need_truncate(inode);
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
if (err)
goto sync_out;
@@ -1786,50 +1737,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
return (blks << inode->i_blkbits);
}
-static int __get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type, bool may_write)
-{
- struct f2fs_map_blocks map;
- int err;
-
- map.m_lblk = iblock;
- map.m_len = bytes_to_blks(inode, bh->b_size);
- map.m_next_pgofs = next_pgofs;
- map.m_next_extent = NULL;
- map.m_seg_type = seg_type;
- map.m_may_create = may_write;
-
- err = f2fs_map_blocks(inode, &map, create, flag);
- if (!err) {
- map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
- bh->b_size = blks_to_bytes(inode, map.m_len);
-
- if (map.m_multidev_dio)
- bh->b_bdev = map.m_bdev;
- }
- return err;
-}
-
-static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- true);
-}
-
-static int get_data_block_dio(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- false);
-}
-
static int f2fs_xattr_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo)
{
@@ -1849,7 +1756,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
f2fs_put_page(page, 1);
return err;
@@ -1881,7 +1788,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- err = f2fs_get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni, false);
if (err) {
f2fs_put_page(page, 1);
return err;
@@ -2127,12 +2034,6 @@ got_it:
block_nr = map->m_pblk + block_in_file - map->m_lblk;
SetPageMappedToDisk(page);
- if (!PageUptodate(page) && (!PageSwapCache(page) &&
- !cleancache_get_page(page))) {
- SetPageUptodate(page);
- goto confused;
- }
-
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
ret = -EFSCORRUPTED;
@@ -2188,12 +2089,6 @@ submit_and_realloc:
ClearPageError(page);
*last_block_in_bio = block_nr;
goto out;
-confused:
- if (bio) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- unlock_page(page);
out:
*bio_ret = bio;
return ret;
@@ -2542,7 +2437,7 @@ retry_encrypt:
/* flush pending IOs and wait for a while in the ENOMEM case */
if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt;
}
@@ -2617,6 +2512,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ /* The below cases were checked when setting it. */
+ if (f2fs_is_pinned_file(inode))
+ return false;
+ if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ return true;
if (f2fs_lfs_mode(sbi))
return true;
if (S_ISDIR(inode->i_mode))
@@ -2625,8 +2525,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (f2fs_is_atomic_file(inode))
return true;
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
- return true;
/* swap file is migrating in aligned write mode */
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@ -2738,7 +2636,7 @@ got_it:
fio->need_lock = LOCK_REQ;
}
- err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
if (err)
goto out_writepage;
@@ -2987,6 +2885,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
.rpages = NULL,
.nr_rpages = 0,
.cpages = NULL,
+ .valid_nr_cpages = 0,
.rbuf = NULL,
.cbuf = NULL,
.rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
@@ -3305,7 +3204,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
FS_CP_DATA_IO : FS_DATA_IO);
}
-static void f2fs_write_failed(struct inode *inode, loff_t to)
+void f2fs_write_failed(struct inode *inode, loff_t to)
{
loff_t i_size = i_size_read(inode);
@@ -3339,12 +3238,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
int flag;
/*
- * we already allocated all the blocks, so we don't need to get
- * the block addresses when there is no need to fill the page.
+ * If a whole page is being written and we already preallocated all the
+ * blocks, then there is no need to get a block address now.
*/
- if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
- !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
- !f2fs_verity_in_progress(inode))
+ if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
return 0;
/* f2fs_lock_op avoids race between write CP and convert_inline_page */
@@ -3595,158 +3492,6 @@ unlock_out:
return copied;
}
-static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
- loff_t offset)
-{
- unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
- unsigned blkbits = i_blkbits;
- unsigned blocksize_mask = (1 << blkbits) - 1;
- unsigned long align = offset | iov_iter_alignment(iter);
- struct block_device *bdev = inode->i_sb->s_bdev;
-
- if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
- return 1;
-
- if (align & blocksize_mask) {
- if (bdev)
- blkbits = blksize_bits(bdev_logical_block_size(bdev));
- blocksize_mask = (1 << blkbits) - 1;
- if (align & blocksize_mask)
- return -EINVAL;
- return 1;
- }
- return 0;
-}
-
-static void f2fs_dio_end_io(struct bio *bio)
-{
- struct f2fs_private_dio *dio = bio->bi_private;
-
- dec_page_count(F2FS_I_SB(dio->inode),
- dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- bio->bi_private = dio->orig_private;
- bio->bi_end_io = dio->orig_end_io;
-
- kfree(dio);
-
- bio_endio(bio);
-}
-
-static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
- loff_t file_offset)
-{
- struct f2fs_private_dio *dio;
- bool write = (bio_op(bio) == REQ_OP_WRITE);
-
- dio = f2fs_kzalloc(F2FS_I_SB(inode),
- sizeof(struct f2fs_private_dio), GFP_NOFS);
- if (!dio)
- goto out;
-
- dio->inode = inode;
- dio->orig_end_io = bio->bi_end_io;
- dio->orig_private = bio->bi_private;
- dio->write = write;
-
- bio->bi_end_io = f2fs_dio_end_io;
- bio->bi_private = dio;
-
- inc_page_count(F2FS_I_SB(inode),
- write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- submit_bio(bio);
- return;
-out:
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
-}
-
-static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
- size_t count = iov_iter_count(iter);
- loff_t offset = iocb->ki_pos;
- int rw = iov_iter_rw(iter);
- int err;
- enum rw_hint hint = iocb->ki_hint;
- int whint_mode = F2FS_OPTION(sbi).whint_mode;
- bool do_opu;
-
- err = check_direct_IO(inode, iter, offset);
- if (err)
- return err < 0 ? err : 0;
-
- if (f2fs_force_buffered_io(inode, iocb, iter))
- return 0;
-
- do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
-
- trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-
- if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = WRITE_LIFE_NOT_SET;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
- up_read(&fi->i_gc_rwsem[rw]);
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- } else {
- down_read(&fi->i_gc_rwsem[rw]);
- if (do_opu)
- down_read(&fi->i_gc_rwsem[READ]);
- }
-
- err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- iter, rw == WRITE ? get_data_block_dio_write :
- get_data_block_dio, NULL, f2fs_dio_submit_bio,
- rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
- DIO_SKIP_HOLES);
-
- if (do_opu)
- up_read(&fi->i_gc_rwsem[READ]);
-
- up_read(&fi->i_gc_rwsem[rw]);
-
- if (rw == WRITE) {
- if (whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = hint;
- if (err > 0) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- err);
- if (!do_opu)
- set_inode_flag(inode, FI_UPDATE_WRITE);
- } else if (err == -EIOCBQUEUED) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- count - iov_iter_count(iter));
- } else if (err < 0) {
- f2fs_write_failed(inode, offset + count);
- }
- } else {
- if (err > 0)
- f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
- else if (err == -EIOCBQUEUED)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
- count - iov_iter_count(iter));
- }
-
-out:
- trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
-
- return err;
-}
-
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
@@ -3770,12 +3515,9 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
clear_page_private_gcing(page);
- if (test_opt(sbi, COMPRESS_CACHE)) {
- if (f2fs_compressed_file(inode))
- f2fs_invalidate_compress_pages(sbi, inode->i_ino);
- if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
- clear_page_private_data(page);
- }
+ if (test_opt(sbi, COMPRESS_CACHE) &&
+ inode->i_ino == F2FS_COMPRESS_INO(sbi))
+ clear_page_private_data(page);
if (page_private_atomic(page))
return f2fs_drop_inmem_page(inode, page);
@@ -3795,12 +3537,9 @@ int f2fs_release_page(struct page *page, gfp_t wait)
return 0;
if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) {
- struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct inode *inode = page->mapping->host;
- if (f2fs_compressed_file(inode))
- f2fs_invalidate_compress_pages(sbi, inode->i_ino);
- if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
+ if (inode->i_ino == F2FS_COMPRESS_INO(F2FS_I_SB(inode)))
clear_page_private_data(page);
}
@@ -4202,7 +3941,7 @@ const struct address_space_operations f2fs_dblock_aops = {
.set_page_dirty = f2fs_set_data_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
- .direct_IO = f2fs_direct_IO,
+ .direct_IO = noop_direct_IO,
.bmap = f2fs_bmap,
.swap_activate = f2fs_swap_activate,
.swap_deactivate = f2fs_swap_deactivate,
@@ -4282,3 +4021,58 @@ void f2fs_destroy_bio_entry_cache(void)
{
kmem_cache_destroy(bio_entry_slab);
}
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct f2fs_map_blocks map = {};
+ pgoff_t next_pgofs = 0;
+ int err;
+
+ map.m_lblk = bytes_to_blks(inode, offset);
+ map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+ map.m_next_pgofs = &next_pgofs;
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ if (flags & IOMAP_WRITE)
+ map.m_may_create = true;
+
+ err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
+ F2FS_GET_BLOCK_DIO);
+ if (err)
+ return err;
+
+ iomap->offset = blks_to_bytes(inode, map.m_lblk);
+
+ if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
+ iomap->length = blks_to_bytes(inode, map.m_len);
+ if (map.m_flags & F2FS_MAP_MAPPED) {
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags |= IOMAP_F_MERGED;
+ } else {
+ iomap->type = IOMAP_UNWRITTEN;
+ }
+ if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
+ return -EINVAL;
+
+ iomap->bdev = map.m_bdev;
+ iomap->addr = blks_to_bytes(inode, map.m_pblk);
+ } else {
+ iomap->length = blks_to_bytes(inode, next_pgofs) -
+ iomap->offset;
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ }
+
+ if (map.m_flags & F2FS_MAP_NEW)
+ iomap->flags |= IOMAP_F_NEW;
+ if ((inode->i_state & I_DIRTY_DATASYNC) ||
+ offset + length > i_size_read(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
+
+ return 0;
+}
+
+const struct iomap_ops f2fs_iomap_ops = {
+ .iomap_begin = f2fs_iomap_begin,
+};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d0d603187171..eb22fa91c2b2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -58,6 +58,7 @@ enum {
FAULT_WRITE_IO,
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
+ FAULT_LOCK_OP,
FAULT_MAX,
};
@@ -656,6 +657,7 @@ enum {
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
+#define FADVISE_TRUNC_BIT 0x80
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
@@ -683,6 +685,10 @@ enum {
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
+#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
+#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
+#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
+
#define DEF_DIR_LEVEL 0
enum {
@@ -717,7 +723,7 @@ enum {
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
- FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
FI_HOT_DATA, /* indicate file is hot */
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
@@ -1020,6 +1026,7 @@ struct f2fs_sm_info {
unsigned int segment_count; /* total # of segments */
unsigned int main_segments; /* # of segments in main area */
unsigned int reserved_segments; /* # of reserved segments */
+ unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
unsigned int ovp_segments; /* # of overprovision segments */
/* a threshold to reclaim prefree segments */
@@ -1488,6 +1495,7 @@ struct compress_ctx {
unsigned int nr_rpages; /* total page number in rpages */
struct page **cpages; /* pages store compressed data in cluster */
unsigned int nr_cpages; /* total page number in cpages */
+ unsigned int valid_nr_cpages; /* valid page number in cpages */
void *rbuf; /* virtual mapped address on rpages */
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen; /* valid data length in rbuf */
@@ -1679,6 +1687,9 @@ struct f2fs_sb_info {
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
+ spinlock_t gc_urgent_high_lock;
+ bool gc_urgent_high_limited; /* indicates having limited trial count */
+ unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */
/* for skip statistic */
unsigned int atomic_files; /* # of opened atomic file */
@@ -1803,13 +1814,6 @@ struct f2fs_sb_info {
#endif
};
-struct f2fs_private_dio {
- struct inode *inode;
- void *orig_private;
- bio_end_io_t *orig_end_io;
- bool write;
-};
-
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(sbi, type) \
printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
@@ -2095,6 +2099,10 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
{
+ if (time_to_inject(sbi, FAULT_LOCK_OP)) {
+ f2fs_show_injection_info(sbi, FAULT_LOCK_OP);
+ return 0;
+ }
return down_read_trylock(&sbi->cp_rwsem);
}
@@ -2200,6 +2208,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ avail_user_block_count -= sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (avail_user_block_count > sbi->unusable_block_count)
avail_user_block_count -= sbi->unusable_block_count;
@@ -2446,6 +2459,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (F2FS_IO_ALIGNED(sbi))
+ valid_block_count += sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments;
+
user_block_count = sbi->user_block_count;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
user_block_count -= sbi->unusable_block_count;
@@ -3118,12 +3136,16 @@ static inline int is_file(struct inode *inode, int type)
static inline void set_file(struct inode *inode, int type)
{
+ if (is_file(inode, type))
+ return;
F2FS_I(inode)->i_advise |= type;
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void clear_file(struct inode *inode, int type)
{
+ if (!is_file(inode, type))
+ return;
F2FS_I(inode)->i_advise &= ~type;
f2fs_mark_inode_dirty_sync(inode, true);
}
@@ -3408,7 +3430,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
- struct node_info *ni);
+ struct node_info *ni, bool checkpoint_context);
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
@@ -3616,7 +3638,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
int f2fs_reserve_new_block(struct dnode_of_data *dn);
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write);
@@ -3639,6 +3660,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
struct writeback_control *wbc,
enum iostat_type io_type,
int compr_blocks, bool allow_balance);
+void f2fs_write_failed(struct inode *inode, loff_t to);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
@@ -3652,6 +3674,7 @@ int f2fs_init_post_read_processing(void);
void f2fs_destroy_post_read_processing(void);
int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+extern const struct iomap_ops f2fs_iomap_ops;
/*
* gc.c
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 92ec2699bc85..3c98ef6af97d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -24,6 +24,7 @@
#include <linux/sched/signal.h>
#include <linux/fileattr.h>
#include <linux/fadvise.h>
+#include <linux/iomap.h>
#include "f2fs.h"
#include "node.h"
@@ -1232,7 +1233,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
if (ret)
return ret;
- ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (ret) {
f2fs_put_dnode(&dn);
return ret;
@@ -1687,6 +1688,7 @@ next_alloc:
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ file_dont_truncate(inode);
up_write(&sbi->pin_sem);
@@ -1748,7 +1750,11 @@ static long f2fs_fallocate(struct file *file, int mode,
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
- if (f2fs_compressed_file(inode) &&
+ /*
+ * Pinned file should not support partial trucation since the block
+ * can be used by applications.
+ */
+ if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
@@ -3143,17 +3149,17 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
inode_lock(inode);
- if (f2fs_should_update_outplace(inode, NULL)) {
- ret = -EINVAL;
- goto out;
- }
-
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
f2fs_i_gc_failures_write(inode, 0);
goto done;
}
+ if (f2fs_should_update_outplace(inode, NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (f2fs_pin_file_control(inode, false)) {
ret = -EAGAIN;
goto out;
@@ -4218,27 +4224,385 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return __f2fs_ioctl(filp, cmd, arg);
}
-static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+/*
+ * Return %true if the given read or write request should use direct I/O, or
+ * %false if it should use buffered I/O.
+ */
+static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ unsigned int align;
+
+ if (!(iocb->ki_flags & IOCB_DIRECT))
+ return false;
+
+ if (f2fs_force_buffered_io(inode, iocb, iter))
+ return false;
+
+ /*
+ * Direct I/O not aligned to the disk's logical_block_size will be
+ * attempted, but will fail with -EINVAL.
+ *
+ * f2fs additionally requires that direct I/O be aligned to the
+ * filesystem block size, which is often a stricter requirement.
+ * However, f2fs traditionally falls back to buffered I/O on requests
+ * that are logical_block_size-aligned but not fs-block aligned.
+ *
+ * The below logic implements this behavior.
+ */
+ align = iocb->ki_pos | iov_iter_alignment(iter);
+ if (!IS_ALIGNED(align, i_blocksize(inode)) &&
+ IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
+ return false;
+
+ return true;
+}
+
+static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_READ);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
+ .end_io = f2fs_dio_read_end_io,
+};
+
+static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- int ret;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(to);
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ if (count == 0)
+ return 0; /* skip atime update */
+
+ trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_READ counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_READ);
+ dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_read_ops, 0, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_READ);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ up_read(&fi->i_gc_rwsem[READ]);
+
+ file_accessed(file);
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
- ret = generic_file_read_iter(iocb, iter);
+ if (f2fs_should_use_dio(inode, iocb, to))
+ return f2fs_dio_read_iter(iocb, to);
+ ret = filemap_read(iocb, to, 0);
if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
+ return ret;
+}
+
+static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t count;
+ int err;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+ return -EPERM;
+
+ count = generic_write_checks(iocb, from);
+ if (count <= 0)
+ return count;
+
+ err = file_modified(file);
+ if (err)
+ return err;
+ return count;
+}
+
+/*
+ * Preallocate blocks for a write request, if it is possible and helpful to do
+ * so. Returns a positive number if blocks may have been preallocated, 0 if no
+ * blocks were preallocated, or a negative errno value if something went
+ * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
+ * requested blocks (not just some of them) have been allocated.
+ */
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
+ bool dio)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(iter);
+ struct f2fs_map_blocks map = {};
+ int flag;
+ int ret;
+
+ /* If it will be an out-of-place direct write, don't bother. */
+ if (dio && f2fs_lfs_mode(sbi))
+ return 0;
+ /*
+ * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+ * buffered IO, if DIO meets any holes.
+ */
+ if (dio && i_size_read(inode) &&
+ (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+ return 0;
+
+ /* No-wait I/O can't allocate blocks. */
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return 0;
+
+ /* If it will be a short write, don't bother. */
+ if (fault_in_iov_iter_readable(iter, count))
+ return 0;
+
+ if (f2fs_has_inline_data(inode)) {
+ /* If the data will fit inline, don't bother. */
+ if (pos + count <= MAX_INLINE_DATA(inode))
+ return 0;
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+
+ /* Do not preallocate blocks that will be written partially in 4KB. */
+ map.m_lblk = F2FS_BLK_ALIGN(pos);
+ map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+ if (map.m_len > map.m_lblk)
+ map.m_len -= map.m_lblk;
+ else
+ map.m_len = 0;
+ map.m_may_create = true;
+ if (dio) {
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ flag = F2FS_GET_BLOCK_PRE_DIO;
+ } else {
+ map.m_seg_type = NO_CHECK_TYPE;
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+ }
+
+ ret = f2fs_map_blocks(inode, &map, 1, flag);
+ /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
+ if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
+ return ret;
+ if (ret == 0)
+ set_inode_flag(inode, FI_PREALLOCATED_ALL);
+ return map.m_len;
+}
+
+static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t ret;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+ current->backing_dev_info = inode_to_bdi(inode);
+ ret = generic_perform_write(file, from, iocb->ki_pos);
+ current->backing_dev_info = NULL;
+
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+ }
return ret;
}
-static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
+ .end_io = f2fs_dio_write_end_io,
+};
+
+static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ bool *may_need_sync)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const bool do_opu = f2fs_lfs_mode(sbi);
+ const int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ const loff_t pos = iocb->ki_pos;
+ const ssize_t count = iov_iter_count(from);
+ const enum rw_hint hint = iocb->ki_hint;
+ unsigned int dio_flags;
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* f2fs_convert_inline_inode() and block allocation can block */
+ if (f2fs_has_inline_data(inode) ||
+ !f2fs_overwrite_io(inode, pos, count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ up_read(&fi->i_gc_rwsem[WRITE]);
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto out;
+
+ down_read(&fi->i_gc_rwsem[WRITE]);
+ if (do_opu)
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_WRITE);
+ dio_flags = 0;
+ if (pos + count > inode->i_size)
+ dio_flags |= IOMAP_DIO_FORCE_WAIT;
+ dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_write_ops, dio_flags, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret == -ENOTBLK)
+ ret = 0;
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = hint;
+ if (do_opu)
+ up_read(&fi->i_gc_rwsem[READ]);
+ up_read(&fi->i_gc_rwsem[WRITE]);
+
+ if (ret < 0)
+ goto out;
+ if (pos + ret > inode->i_size)
+ f2fs_i_size_write(inode, pos + ret);
+ if (!do_opu)
+ set_inode_flag(inode, FI_UPDATE_WRITE);
+
+ if (iov_iter_count(from)) {
+ ssize_t ret2;
+ loff_t bufio_start_pos = iocb->ki_pos;
+
+ /*
+ * The direct write was partial, so we need to fall back to a
+ * buffered write for the remainder.
+ */
+
+ ret2 = f2fs_buffered_write_iter(iocb, from);
+ if (iov_iter_count(from))
+ f2fs_write_failed(inode, iocb->ki_pos);
+ if (ret2 < 0)
+ goto out;
+
+ /*
+ * Ensure that the pagecache pages are written to disk and
+ * invalidated to preserve the expected O_DIRECT semantics.
+ */
+ if (ret2 > 0) {
+ loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
+
+ ret += ret2;
+
+ ret2 = filemap_write_and_wait_range(file->f_mapping,
+ bufio_start_pos,
+ bufio_end_pos);
+ if (ret2 < 0)
+ goto out;
+ invalidate_mapping_pages(file->f_mapping,
+ bufio_start_pos >> PAGE_SHIFT,
+ bufio_end_pos >> PAGE_SHIFT);
+ }
+ } else {
+ /* iomap_dio_rw() already handled the generic_write_sync(). */
+ *may_need_sync = false;
+ }
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ const loff_t orig_pos = iocb->ki_pos;
+ const size_t orig_count = iov_iter_count(from);
+ loff_t target_size;
+ bool dio;
+ bool may_need_sync = true;
+ int preallocated;
ssize_t ret;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4260,91 +4624,42 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
}
- if (unlikely(IS_IMMUTABLE(inode))) {
- ret = -EPERM;
- goto unlock;
- }
-
- if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
- ret = -EPERM;
- goto unlock;
- }
-
- ret = generic_write_checks(iocb, from);
- if (ret > 0) {
- bool preallocated = false;
- size_t target_size = 0;
- int err;
-
- if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
- set_inode_flag(inode, FI_NO_PREALLOC);
-
- if ((iocb->ki_flags & IOCB_NOWAIT)) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
- iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode, iocb, from)) {
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- ret = -EAGAIN;
- goto out;
- }
- goto write;
- }
-
- if (is_inode_flag_set(inode, FI_NO_PREALLOC))
- goto write;
-
- if (iocb->ki_flags & IOCB_DIRECT) {
- /*
- * Convert inline data for Direct I/O before entering
- * f2fs_direct_IO().
- */
- err = f2fs_convert_inline_inode(inode);
- if (err)
- goto out_err;
- /*
- * If force_buffere_io() is true, we have to allocate
- * blocks all the time, since f2fs_direct_IO will fall
- * back to buffered IO.
- */
- if (!f2fs_force_buffered_io(inode, iocb, from) &&
- f2fs_lfs_mode(F2FS_I_SB(inode)))
- goto write;
- }
- preallocated = true;
- target_size = iocb->ki_pos + iov_iter_count(from);
+ ret = f2fs_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
- err = f2fs_preallocate_blocks(iocb, from);
- if (err) {
-out_err:
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- ret = err;
- goto out;
- }
-write:
- ret = __generic_file_write_iter(iocb, from);
- clear_inode_flag(inode, FI_NO_PREALLOC);
+ /* Determine whether we will do a direct write or a buffered write. */
+ dio = f2fs_should_use_dio(inode, iocb, from);
- /* if we couldn't write data, we should deallocate blocks. */
- if (preallocated && i_size_read(inode) < target_size) {
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_truncate(inode);
- filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- }
+ /* Possibly preallocate the blocks for the write. */
+ target_size = iocb->ki_pos + iov_iter_count(from);
+ preallocated = f2fs_preallocate_blocks(iocb, from, dio);
+ if (preallocated < 0)
+ ret = preallocated;
+ else
+ /* Do the actual write. */
+ ret = dio ?
+ f2fs_dio_write_iter(iocb, from, &may_need_sync):
+ f2fs_buffered_write_iter(iocb, from);
- if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
+ /* Don't leave any preallocated blocks around past i_size. */
+ if (preallocated && i_size_read(inode) < target_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ filemap_invalidate_lock(inode->i_mapping);
+ if (!f2fs_truncate(inode))
+ file_dont_truncate(inode);
+ filemap_invalidate_unlock(inode->i_mapping);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
}
-unlock:
+
+ clear_inode_flag(inode, FI_PREALLOCATED_ALL);
+out_unlock:
inode_unlock(inode);
out:
- trace_f2fs_file_write_iter(inode, iocb->ki_pos,
- iov_iter_count(from), ret);
- if (ret > 0)
+ trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
+ if (ret > 0 && may_need_sync)
ret = generic_write_sync(iocb, ret);
return ret;
}
@@ -4352,12 +4667,12 @@ out:
static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
int advice)
{
- struct inode *inode;
struct address_space *mapping;
struct backing_dev_info *bdi;
+ struct inode *inode = file_inode(filp);
+ int err;
if (advice == POSIX_FADV_SEQUENTIAL) {
- inode = file_inode(filp);
if (S_ISFIFO(inode->i_mode))
return -ESPIPE;
@@ -4374,7 +4689,13 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
return 0;
}
- return generic_fadvise(filp, offset, len, advice);
+ err = generic_fadvise(filp, offset, len, advice);
+ if (!err && advice == POSIX_FADV_DONTNEED &&
+ test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
+ f2fs_compressed_file(inode))
+ f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
+
+ return err;
}
#ifdef CONFIG_COMPAT
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a946ce0ead34..ee308a8de432 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -7,7 +7,6 @@
*/
#include <linux/fs.h>
#include <linux/module.h>
-#include <linux/backing-dev.h>
#include <linux/init.h>
#include <linux/f2fs_fs.h>
#include <linux/kthread.h>
@@ -15,6 +14,7 @@
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
@@ -92,6 +92,18 @@ static int gc_thread_func(void *data)
* So, I'd like to wait some time to collect dirty segments.
*/
if (sbi->gc_mode == GC_URGENT_HIGH) {
+ spin_lock(&sbi->gc_urgent_high_lock);
+ if (sbi->gc_urgent_high_limited) {
+ if (!sbi->gc_urgent_high_remaining) {
+ sbi->gc_urgent_high_limited = false;
+ spin_unlock(&sbi->gc_urgent_high_lock);
+ sbi->gc_mode = GC_NORMAL;
+ continue;
+ }
+ sbi->gc_urgent_high_remaining--;
+ }
+ spin_unlock(&sbi->gc_urgent_high_lock);
+
wait_ms = gc_th->urgent_sleep_time;
down_write(&sbi->gc_lock);
goto do_gc;
@@ -947,7 +959,7 @@ next_step:
continue;
}
- if (f2fs_get_node_info(sbi, nid, &ni)) {
+ if (f2fs_get_node_info(sbi, nid, &ni, false)) {
f2fs_put_page(node_page, 1);
continue;
}
@@ -1015,7 +1027,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(node_page))
return false;
- if (f2fs_get_node_info(sbi, nid, dni)) {
+ if (f2fs_get_node_info(sbi, nid, dni, false)) {
f2fs_put_page(node_page, 1);
return false;
}
@@ -1026,6 +1038,9 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
+ if (f2fs_check_nid_range(sbi, dni->ino))
+ return false;
+
*nofs = ofs_of_node(node_page);
source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
@@ -1039,7 +1054,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) {
f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u",
blkaddr, source_blkaddr, segno);
- f2fs_bug_on(sbi, 1);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
}
}
#endif
@@ -1206,7 +1221,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
- err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false);
if (err)
goto put_out;
@@ -1375,8 +1390,7 @@ retry:
if (err) {
clear_page_private_gcing(page);
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
if (is_dirty)
@@ -1457,7 +1471,8 @@ next_step:
if (phase == 3) {
inode = f2fs_iget(sb, dni.ino);
- if (IS_ERR(inode) || is_bad_inode(inode))
+ if (IS_ERR(inode) || is_bad_inode(inode) ||
+ special_file(inode->i_mode))
continue;
if (!down_write_trylock(
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index ea08f0dfa1bd..4b5cefa3f90c 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -131,7 +131,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
- err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false);
if (err) {
f2fs_truncate_data_blocks_range(dn, 1);
f2fs_put_dnode(dn);
@@ -786,7 +786,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
- err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false);
if (err)
goto out;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0f8b2df3e1e0..0ec8e32a00b4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -8,8 +8,8 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
-#include <linux/backing-dev.h>
#include <linux/writeback.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
@@ -516,6 +516,11 @@ make_now:
} else if (ino == F2FS_COMPRESS_INO(sbi)) {
#ifdef CONFIG_F2FS_FS_COMPRESSION
inode->i_mapping->a_ops = &f2fs_compress_aops;
+ /*
+ * generic_error_remove_page only truncates pages of regular
+ * inode
+ */
+ inode->i_mode |= S_IFREG;
#endif
mapping_set_gfp_mask(inode->i_mapping,
GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
@@ -544,6 +549,14 @@ make_now:
goto bad_inode;
}
f2fs_set_inode_flags(inode);
+
+ if (file_should_truncate(inode)) {
+ ret = f2fs_truncate(inode);
+ if (ret)
+ goto bad_inode;
+ file_dont_truncate(inode);
+ }
+
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
@@ -562,7 +575,7 @@ retry:
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
}
@@ -738,7 +751,8 @@ void f2fs_evict_inode(struct inode *inode)
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
- if (test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
+ if ((inode->i_nlink || is_bad_inode(inode)) &&
+ test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
f2fs_invalidate_compress_pages(sbi, inode->i_ino);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
@@ -868,7 +882,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index cdcf54ae0db8..be599f31d3c4 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -92,7 +92,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
- spin_lock_irq(&sbi->iostat_lat_lock);
+ spin_lock_bh(&sbi->iostat_lat_lock);
for (idx = 0; idx < MAX_IO_TYPE; idx++) {
for (io = 0; io < NR_PAGE_TYPE; io++) {
cnt = io_lat->bio_cnt[idx][io];
@@ -106,7 +106,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
io_lat->bio_cnt[idx][io] = 0;
}
}
- spin_unlock_irq(&sbi->iostat_lat_lock);
+ spin_unlock_bh(&sbi->iostat_lat_lock);
trace_f2fs_iostat_latency(sbi, iostat_lat);
}
@@ -120,9 +120,9 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
return;
/* Need double check under the lock */
- spin_lock(&sbi->iostat_lock);
+ spin_lock_bh(&sbi->iostat_lock);
if (time_is_after_jiffies(sbi->iostat_next_period)) {
- spin_unlock(&sbi->iostat_lock);
+ spin_unlock_bh(&sbi->iostat_lock);
return;
}
sbi->iostat_next_period = jiffies +
@@ -133,7 +133,7 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
sbi->prev_rw_iostat[i];
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
}
- spin_unlock(&sbi->iostat_lock);
+ spin_unlock_bh(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff);
@@ -145,16 +145,16 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int i;
- spin_lock(&sbi->iostat_lock);
+ spin_lock_bh(&sbi->iostat_lock);
for (i = 0; i < NR_IO_TYPE; i++) {
sbi->rw_iostat[i] = 0;
sbi->prev_rw_iostat[i] = 0;
}
- spin_unlock(&sbi->iostat_lock);
+ spin_unlock_bh(&sbi->iostat_lock);
- spin_lock_irq(&sbi->iostat_lat_lock);
+ spin_lock_bh(&sbi->iostat_lat_lock);
memset(io_lat, 0, sizeof(struct iostat_lat_info));
- spin_unlock_irq(&sbi->iostat_lat_lock);
+ spin_unlock_bh(&sbi->iostat_lat_lock);
}
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
@@ -163,19 +163,16 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
if (!sbi->iostat_enable)
return;
- spin_lock(&sbi->iostat_lock);
+ spin_lock_bh(&sbi->iostat_lock);
sbi->rw_iostat[type] += io_bytes;
- if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
- sbi->rw_iostat[APP_BUFFERED_IO] =
- sbi->rw_iostat[APP_WRITE_IO] -
- sbi->rw_iostat[APP_DIRECT_IO];
+ if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
+ sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
- if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
- sbi->rw_iostat[APP_BUFFERED_READ_IO] =
- sbi->rw_iostat[APP_READ_IO] -
- sbi->rw_iostat[APP_DIRECT_READ_IO];
- spin_unlock(&sbi->iostat_lock);
+ if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
+ sbi->rw_iostat[APP_READ_IO] += io_bytes;
+
+ spin_unlock_bh(&sbi->iostat_lock);
f2fs_record_iostat(sbi);
}
@@ -185,7 +182,6 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
{
unsigned long ts_diff;
unsigned int iotype = iostat_ctx->type;
- unsigned long flags;
struct f2fs_sb_info *sbi = iostat_ctx->sbi;
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int idx;
@@ -206,12 +202,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
idx = WRITE_ASYNC_IO;
}
- spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
+ spin_lock_bh(&sbi->iostat_lat_lock);
io_lat->sum_lat[idx][iotype] += ts_diff;
io_lat->bio_cnt[idx][iotype]++;
if (ts_diff > io_lat->peak_lat[idx][iotype])
io_lat->peak_lat[idx][iotype] = ts_diff;
- spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
+ spin_unlock_bh(&sbi->iostat_lat_lock);
}
void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 556fcd8457f3..50b2874e758c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -8,7 +8,7 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/mpage.h>
-#include <linux/backing-dev.h>
+#include <linux/sched/mm.h>
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/swap.h>
@@ -430,6 +430,10 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *new, *e;
+ /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
+ if (rwsem_is_locked(&sbi->cp_global_sem))
+ return;
+
new = __alloc_nat_entry(sbi, nid, false);
if (!new)
return;
@@ -539,7 +543,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
}
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
- struct node_info *ni)
+ struct node_info *ni, bool checkpoint_context)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -572,9 +576,10 @@ retry:
* nat_tree_lock. Therefore, we should retry, if we failed to grab here
* while not bothering checkpoint.
*/
- if (!rwsem_is_locked(&sbi->cp_global_sem)) {
+ if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
down_read(&curseg->journal_rwsem);
- } else if (!down_read_trylock(&curseg->journal_rwsem)) {
+ } else if (rwsem_is_contended(&nm_i->nat_tree_lock) ||
+ !down_read_trylock(&curseg->journal_rwsem)) {
up_read(&nm_i->nat_tree_lock);
goto retry;
}
@@ -887,7 +892,7 @@ static int truncate_node(struct dnode_of_data *dn)
int err;
pgoff_t index;
- err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
if (err)
return err;
@@ -1286,7 +1291,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
if (err) {
dec_valid_node_count(sbi, dn->inode, !ofs);
goto fail;
@@ -1348,7 +1353,7 @@ static int read_node_page(struct page *page, int op_flags)
return LOCKED_PAGE;
}
- err = f2fs_get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni, false);
if (err)
return err;
@@ -1600,7 +1605,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
- if (f2fs_get_node_info(sbi, nid, &ni))
+ if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
goto redirty_out;
if (wbc->for_reclaim) {
@@ -2701,7 +2706,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
if (err)
return err;
@@ -2741,7 +2746,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
struct page *ipage;
int err;
- err = f2fs_get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni, false);
if (err)
return err;
@@ -2750,7 +2755,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
retry:
ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
if (!ipage) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 6a1b4668d933..9683c80ff8c2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -8,6 +8,7 @@
#include <asm/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/sched/mm.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
@@ -587,7 +588,7 @@ retry_dn:
err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_dn;
}
goto out;
@@ -595,7 +596,7 @@ retry_dn:
f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err)
goto err;
@@ -670,8 +671,7 @@ retry_prev:
err = check_index_in_prev_nodes(sbi, dest, &dn);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto retry_prev;
}
goto err;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index df9ed75f0b7a..1dabc8244083 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -9,6 +9,7 @@
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/sched/mm.h>
#include <linux/prefetch.h>
#include <linux/kthread.h>
#include <linux/swap.h>
@@ -245,16 +246,14 @@ retry:
LOOKUP_NODE);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
- cond_resched();
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
err = -EAGAIN;
goto next;
}
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err) {
f2fs_put_dnode(&dn);
return err;
@@ -424,9 +423,7 @@ retry:
err = f2fs_do_write_data_page(&fio);
if (err) {
if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
- cond_resched();
+ memalloc_retry_wait(GFP_NOFS);
goto retry;
}
unlock_page(page);
@@ -2558,8 +2555,8 @@ find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
if (secno >= MAIN_SECS(sbi)) {
if (dir == ALLOC_RIGHT) {
- secno = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ secno = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
} else {
go_left = 1;
@@ -2574,8 +2571,8 @@ find_other_zone:
left_start--;
continue;
}
- left_start = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ left_start = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
break;
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 46fde9f3f28e..0291cd55cf09 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -538,7 +538,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
{
- return SM_I(sbi)->reserved_segments;
+ return SM_I(sbi)->reserved_segments +
+ SM_I(sbi)->additional_reserved_segments;
}
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 040b6d02e1d8..76e6a3df9aba 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -8,9 +8,9 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/sched/mm.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
-#include <linux/backing-dev.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/mount.h>
@@ -59,6 +59,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_WRITE_IO] = "write IO error",
[FAULT_SLAB_ALLOC] = "slab alloc",
[FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -260,29 +261,22 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
static const struct f2fs_sb_encodings {
__u16 magic;
char *name;
- char *version;
+ unsigned int version;
} f2fs_sb_encoding_map[] = {
- {F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
+ {F2FS_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
};
-static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
- const struct f2fs_sb_encodings **encoding,
- __u16 *flags)
+static const struct f2fs_sb_encodings *
+f2fs_sb_read_encoding(const struct f2fs_super_block *sb)
{
__u16 magic = le16_to_cpu(sb->s_encoding);
int i;
for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
if (magic == f2fs_sb_encoding_map[i].magic)
- break;
-
- if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
- return -EINVAL;
-
- *encoding = &f2fs_sb_encoding_map[i];
- *flags = le16_to_cpu(sb->s_encoding_flags);
+ return &f2fs_sb_encoding_map[i];
- return 0;
+ return NULL;
}
struct kmem_cache *f2fs_cf_name_slab;
@@ -328,6 +322,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).s_resgid));
}
+static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+{
+ unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+ unsigned int avg_vblocks;
+ unsigned int wanted_reserved_segments;
+ block_t avail_user_block_count;
+
+ if (!F2FS_IO_ALIGNED(sbi))
+ return 0;
+
+ /* average valid block count in section in worst case */
+ avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+
+ /*
+ * we need enough free space when migrating one section in worst case
+ */
+ wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+ reserved_segments(sbi);
+ wanted_reserved_segments -= reserved_segments(sbi);
+
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks -
+ F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (wanted_reserved_segments * sbi->blocks_per_seg >
+ avail_user_block_count) {
+ f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
+ wanted_reserved_segments,
+ avail_user_block_count >> sbi->log_blocks_per_seg);
+ return -ENOSPC;
+ }
+
+ SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+
+ f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+ wanted_reserved_segments);
+
+ return 0;
+}
+
static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
{
if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -2415,8 +2449,7 @@ repeat:
page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS);
if (IS_ERR(page)) {
if (PTR_ERR(page) == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC,
- DEFAULT_IO_TIMEOUT);
+ memalloc_retry_wait(GFP_NOFS);
goto repeat;
}
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
@@ -3548,6 +3581,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
+ spin_lock_init(&sbi->gc_urgent_high_lock);
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3875,25 +3909,32 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
struct unicode_map *encoding;
__u16 encoding_flags;
- if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
- &encoding_flags)) {
+ encoding_info = f2fs_sb_read_encoding(sbi->raw_super);
+ if (!encoding_info) {
f2fs_err(sbi,
"Encoding requested by superblock is unknown");
return -EINVAL;
}
+ encoding_flags = le16_to_cpu(sbi->raw_super->s_encoding_flags);
encoding = utf8_load(encoding_info->version);
if (IS_ERR(encoding)) {
f2fs_err(sbi,
- "can't mount with superblock charset: %s-%s "
+ "can't mount with superblock charset: %s-%u.%u.%u "
"not supported by the kernel. flags: 0x%x.",
- encoding_info->name, encoding_info->version,
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
encoding_flags);
return PTR_ERR(encoding);
}
f2fs_info(sbi, "Using encoding defined by superblock: "
- "%s-%s with flags 0x%hx", encoding_info->name,
- encoding_info->version?:"\b", encoding_flags);
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
sbi->sb->s_encoding = encoding;
sbi->sb->s_encoding_flags = encoding_flags;
@@ -4180,6 +4221,10 @@ try_onemore:
goto free_nm;
}
+ err = adjust_reserved_segment(sbi);
+ if (err)
+ goto free_nm;
+
/* For write statistics */
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 7d289249cd7e..df406c16b2eb 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -118,6 +118,15 @@ static ssize_t sb_status_show(struct f2fs_attr *a,
return sprintf(buf, "%lx\n", sbi->s_flag);
}
+static ssize_t pending_discard_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ if (!SM_I(sbi)->dcc_info)
+ return -EINVAL;
+ return sprintf(buf, "%llu\n", (unsigned long long)atomic_read(
+ &SM_I(sbi)->dcc_info->discard_cmd_cnt));
+}
+
static ssize_t features_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -196,8 +205,7 @@ static ssize_t encoding_show(struct f2fs_attr *a,
struct super_block *sb = sbi->sb;
if (f2fs_sb_has_casefold(sbi))
- return sysfs_emit(buf, "%s (%d.%d.%d)\n",
- sb->s_encoding->charset,
+ return sysfs_emit(buf, "UTF-8 (%d.%d.%d)\n",
(sb->s_encoding->version >> 16) & 0xff,
(sb->s_encoding->version >> 8) & 0xff,
sb->s_encoding->version & 0xff);
@@ -415,7 +423,9 @@ out:
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
- F2FS_OPTION(sbi).root_reserved_blocks)) {
+ F2FS_OPTION(sbi).root_reserved_blocks -
+ sbi->blocks_per_seg *
+ SM_I(sbi)->additional_reserved_segments)) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
@@ -478,6 +488,15 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) {
+ spin_lock(&sbi->gc_urgent_high_lock);
+ sbi->gc_urgent_high_limited = t != 0;
+ sbi->gc_urgent_high_remaining = t;
+ spin_unlock(&sbi->gc_urgent_high_lock);
+
+ return count;
+ }
+
#ifdef CONFIG_F2FS_IOSTAT
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
@@ -733,6 +752,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining);
F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(free_segments);
@@ -744,6 +764,7 @@ F2FS_GENERAL_RO_ATTR(unusable);
F2FS_GENERAL_RO_ATTR(encoding);
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
F2FS_GENERAL_RO_ATTR(main_blkaddr);
+F2FS_GENERAL_RO_ATTR(pending_discard);
#ifdef CONFIG_F2FS_STAT_FS
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
@@ -812,6 +833,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(main_blkaddr),
ATTR_LIST(max_small_discards),
ATTR_LIST(discard_granularity),
+ ATTR_LIST(pending_discard),
ATTR_LIST(batched_trim_sections),
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
@@ -844,6 +866,7 @@ static struct attribute *f2fs_attrs[] = {
#endif
ATTR_LIST(data_io_flag),
ATTR_LIST(node_io_flag),
+ ATTR_LIST(gc_urgent_high_remaining),
ATTR_LIST(ckpt_thread_ioprio),
ATTR_LIST(dirty_segments),
ATTR_LIST(free_segments),
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index e348f33bcb2b..8e5cd9c916ff 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -226,15 +226,18 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index)
}
static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
- void *last_base_addr, int index,
- size_t len, const char *name)
+ void *last_base_addr, void **last_addr,
+ int index, size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
- (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+ (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
+ if (last_addr)
+ *last_addr = entry;
return NULL;
+ }
if (entry->e_name_index != index)
continue;
@@ -254,19 +257,9 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
unsigned int inline_size = inline_xattr_size(inode);
void *max_addr = base_addr + inline_size;
- list_for_each_xattr(entry, base_addr) {
- if ((void *)entry + sizeof(__u32) > max_addr ||
- (void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
- *last_addr = entry;
- return NULL;
- }
- if (entry->e_name_index != index)
- continue;
- if (entry->e_name_len != len)
- continue;
- if (!memcmp(entry->e_name, name, len))
- break;
- }
+ entry = __find_xattr(base_addr, max_addr, last_addr, index, len, name);
+ if (!entry)
+ return NULL;
/* inline xattr header or entry across max inline xattr size */
if (IS_XATTR_LAST_ENTRY(entry) &&
@@ -368,7 +361,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
else
cur_addr = txattr_addr;
- *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+ *xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name);
if (!*xe) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
inode->i_ino);
@@ -659,7 +652,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
last_base_addr = (void *)base_addr + XATTR_SIZE(inode);
/* find entry with wanted name. */
- here = __find_xattr(base_addr, last_base_addr, index, len, name);
+ here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name);
if (!here) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
inode->i_ino);
@@ -684,8 +677,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
}
last = here;
- while (!IS_XATTR_LAST_ENTRY(last))
+ while (!IS_XATTR_LAST_ENTRY(last)) {
+ if ((void *)(last) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu",
+ inode->i_ino, ENTRY_SIZE(last));
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
+ goto exit;
+ }
last = XATTR_NEXT_ENTRY(last);
+ }
newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 13855ba49cd9..a5a309fcc7fa 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -175,9 +175,10 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static int fat_file_release(struct inode *inode, struct file *filp)
{
if ((filp->f_mode & FMODE_WRITE) &&
- MSDOS_SB(inode->i_sb)->options.flush) {
+ MSDOS_SB(inode->i_sb)->options.flush) {
fat_flush_inodes(inode->i_sb, inode, NULL);
- congestion_wait(BLK_RW_ASYNC, HZ/10);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ io_schedule_timeout(HZ/10);
}
return 0;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 45437f8e1003..57edef16dce4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -33,7 +33,7 @@
#include "internal.h"
/* sysctl tunables... */
-struct files_stat_struct files_stat = {
+static struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
@@ -75,22 +75,53 @@ unsigned long get_max_files(void)
}
EXPORT_SYMBOL_GPL(get_max_files);
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+
/*
* Handle nr_files sysctl
*/
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_nr_files(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
-#else
-int proc_nr_files(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+
+static struct ctl_table fs_stat_sysctls[] = {
+ {
+ .procname = "file-nr",
+ .data = &files_stat,
+ .maxlen = sizeof(files_stat),
+ .mode = 0444,
+ .proc_handler = proc_nr_files,
+ },
+ {
+ .procname = "file-max",
+ .data = &files_stat.max_files,
+ .maxlen = sizeof(files_stat.max_files),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = SYSCTL_LONG_ZERO,
+ .extra2 = SYSCTL_LONG_MAX,
+ },
+ {
+ .procname = "nr_open",
+ .data = &sysctl_nr_open,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sysctl_nr_open_min,
+ .extra2 = &sysctl_nr_open_max,
+ },
+ { }
+};
+
+static int __init init_fs_stat_sysctls(void)
{
- return -ENOSYS;
+ register_sysctl_init("fs", fs_stat_sysctls);
+ return 0;
}
+fs_initcall(init_fs_stat_sysctls);
#endif
static struct file *__alloc_file(int flags, const struct cred *cred)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index b7e43a780a62..24ce12f0db32 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -548,7 +548,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
param->key);
}
- if (len > PAGE_SIZE - 2 - size)
+ if (size + len + 2 > PAGE_SIZE)
return invalf(fc, "VFS: Legacy: Cumulative options too large");
if (strchr(param->key, ',') ||
(param->type == fs_value_is_string &&
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index a57c6cbee858..f2aa7dbad766 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -142,12 +142,12 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
unsigned int collidee_debug_id)
{
wait_var_event_timeout(&candidate->flags,
- fscache_is_acquire_pending(candidate), 20 * HZ);
+ !fscache_is_acquire_pending(candidate), 20 * HZ);
if (!fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
- wait_var_event(&candidate->flags, fscache_is_acquire_pending(candidate));
+ wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate));
}
}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 74e627109b79..9d737904d07c 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -891,7 +891,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
return 0;
out_vqs:
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
kfree(fs->vqs);
@@ -923,7 +923,7 @@ static void virtio_fs_remove(struct virtio_device *vdev)
list_del_init(&fs->list);
virtio_fs_stop_all_queues(fs);
virtio_fs_drain_all_queues_locked(fs);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
vdev->priv = NULL;
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 456e87aec7fd..68b4240c6191 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -260,8 +260,10 @@ struct hfsplus_cat_folder {
__be32 access_date;
__be32 backup_date;
struct hfsplus_perm permissions;
- struct DInfo user_info;
- struct DXInfo finder_info;
+ struct_group_attr(info, __packed,
+ struct DInfo user_info;
+ struct DXInfo finder_info;
+ );
__be32 text_encoding;
__be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */
} __packed;
@@ -294,8 +296,10 @@ struct hfsplus_cat_file {
__be32 access_date;
__be32 backup_date;
struct hfsplus_perm permissions;
- struct FInfo user_info;
- struct FXInfo finder_info;
+ struct_group_attr(info, __packed,
+ struct FInfo user_info;
+ struct FXInfo finder_info;
+ );
__be32 text_encoding;
u32 reserved2;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e2855ceefd39..49891b12c415 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -296,7 +296,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
sizeof(hfsplus_cat_entry));
if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) {
if (size == folder_finderinfo_len) {
- memcpy(&entry.folder.user_info, value,
+ memcpy(&entry.folder.info, value,
folder_finderinfo_len);
hfs_bnode_write(cat_fd.bnode, &entry,
cat_fd.entryoffset,
@@ -309,7 +309,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name,
}
} else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) {
if (size == file_finderinfo_len) {
- memcpy(&entry.file.user_info, value,
+ memcpy(&entry.file.info, value,
file_finderinfo_len);
hfs_bnode_write(cat_fd.bnode, &entry,
cat_fd.entryoffset,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 49d2e686be74..a7c6c7498be0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -409,10 +409,11 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
struct vm_area_struct *vma;
/*
- * end == 0 indicates that the entire range after
- * start should be unmapped.
+ * end == 0 indicates that the entire range after start should be
+ * unmapped. Note, end is exclusive, whereas the interval tree takes
+ * an inclusive "last".
*/
- vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) {
+ vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) {
unsigned long v_offset;
unsigned long v_end;
diff --git a/fs/inode.c b/fs/inode.c
index 6b80a51129d5..63324df6fa27 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -67,11 +67,6 @@ const struct address_space_operations empty_aops = {
};
EXPORT_SYMBOL(empty_aops);
-/*
- * Statistics gathering..
- */
-struct inodes_stat_t inodes_stat;
-
static DEFINE_PER_CPU(unsigned long, nr_inodes);
static DEFINE_PER_CPU(unsigned long, nr_unused);
@@ -106,13 +101,43 @@ long get_nr_dirty_inodes(void)
* Handle nr_inode sysctl
*/
#ifdef CONFIG_SYSCTL
-int proc_nr_inodes(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+/*
+ * Statistics gathering..
+ */
+static struct inodes_stat_t inodes_stat;
+
+static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
+
+static struct ctl_table inodes_sysctls[] = {
+ {
+ .procname = "inode-nr",
+ .data = &inodes_stat,
+ .maxlen = 2*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ {
+ .procname = "inode-state",
+ .data = &inodes_stat,
+ .maxlen = 7*sizeof(long),
+ .mode = 0444,
+ .proc_handler = proc_nr_inodes,
+ },
+ { }
+};
+
+static int __init init_fs_inode_sysctls(void)
+{
+ register_sysctl_init("fs", inodes_sysctls);
+ return 0;
+}
+early_initcall(init_fs_inode_sysctls);
#endif
static int no_open(struct inode *inode, struct file *file)
@@ -526,6 +551,55 @@ void __remove_inode_hash(struct inode *inode)
}
EXPORT_SYMBOL(__remove_inode_hash);
+void dump_mapping(const struct address_space *mapping)
+{
+ struct inode *host;
+ const struct address_space_operations *a_ops;
+ struct hlist_node *dentry_first;
+ struct dentry *dentry_ptr;
+ struct dentry dentry;
+ unsigned long ino;
+
+ /*
+ * If mapping is an invalid pointer, we don't want to crash
+ * accessing it, so probe everything depending on it carefully.
+ */
+ if (get_kernel_nofault(host, &mapping->host) ||
+ get_kernel_nofault(a_ops, &mapping->a_ops)) {
+ pr_warn("invalid mapping:%px\n", mapping);
+ return;
+ }
+
+ if (!host) {
+ pr_warn("aops:%ps\n", a_ops);
+ return;
+ }
+
+ if (get_kernel_nofault(dentry_first, &host->i_dentry.first) ||
+ get_kernel_nofault(ino, &host->i_ino)) {
+ pr_warn("aops:%ps invalid inode:%px\n", a_ops, host);
+ return;
+ }
+
+ if (!dentry_first) {
+ pr_warn("aops:%ps ino:%lx\n", a_ops, ino);
+ return;
+ }
+
+ dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
+ if (get_kernel_nofault(dentry, dentry_ptr)) {
+ pr_warn("aops:%ps ino:%lx invalid dentry:%px\n",
+ a_ops, ino, dentry_ptr);
+ return;
+ }
+
+ /*
+ * if dentry is corrupted, the %pd handler may still crash,
+ * but it's unlikely that we reach here with a corrupt mapping
+ */
+ pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
+}
+
void clear_inode(struct inode *inode)
{
/*
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5c4f582d6549..bb7f161bb19c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -48,7 +48,8 @@ struct io_worker {
struct io_wqe *wqe;
struct io_wq_work *cur_work;
- spinlock_t lock;
+ struct io_wq_work *next_work;
+ raw_spinlock_t lock;
struct completion ref_done;
@@ -405,8 +406,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
* Worker will start processing some work. Move it to the busy list, if
* it's currently on the freelist
*/
-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
- struct io_wq_work *work)
+static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
__must_hold(wqe->lock)
{
if (worker->flags & IO_WORKER_F_FREE) {
@@ -529,9 +529,10 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
- spin_lock(&worker->lock);
+ raw_spin_lock(&worker->lock);
worker->cur_work = work;
- spin_unlock(&worker->lock);
+ worker->next_work = NULL;
+ raw_spin_unlock(&worker->lock);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -546,7 +547,7 @@ static void io_worker_handle_work(struct io_worker *worker)
do {
struct io_wq_work *work;
-get_next:
+
/*
* If we got some work, mark us as busy. If we didn't, but
* the list isn't empty, it means we stalled on hashed work.
@@ -555,9 +556,20 @@ get_next:
* clear the stalled flag.
*/
work = io_get_next_work(acct, worker);
- if (work)
- __io_worker_busy(wqe, worker, work);
-
+ if (work) {
+ __io_worker_busy(wqe, worker);
+
+ /*
+ * Make sure cancelation can find this, even before
+ * it becomes the active work. That avoids a window
+ * where the work has been removed from our general
+ * work list, but isn't yet discoverable as the
+ * current work item for this worker.
+ */
+ raw_spin_lock(&worker->lock);
+ worker->next_work = work;
+ raw_spin_unlock(&worker->lock);
+ }
raw_spin_unlock(&wqe->lock);
if (!work)
break;
@@ -594,11 +606,6 @@ get_next:
spin_unlock_irq(&wq->hash->wait.lock);
if (wq_has_sleeper(&wq->hash->wait))
wake_up(&wq->hash->wait);
- raw_spin_lock(&wqe->lock);
- /* skip unnecessary unlock-lock wqe->lock */
- if (!work)
- goto get_next;
- raw_spin_unlock(&wqe->lock);
}
} while (work);
@@ -670,7 +677,7 @@ loop:
*/
void io_wq_worker_running(struct task_struct *tsk)
{
- struct io_worker *worker = tsk->pf_io_worker;
+ struct io_worker *worker = tsk->worker_private;
if (!worker)
return;
@@ -688,7 +695,7 @@ void io_wq_worker_running(struct task_struct *tsk)
*/
void io_wq_worker_sleeping(struct task_struct *tsk)
{
- struct io_worker *worker = tsk->pf_io_worker;
+ struct io_worker *worker = tsk->worker_private;
if (!worker)
return;
@@ -707,7 +714,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
struct task_struct *tsk)
{
- tsk->pf_io_worker = worker;
+ tsk->worker_private = worker;
worker->task = tsk;
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
tsk->flags |= PF_NO_SETAFFINITY;
@@ -815,7 +822,7 @@ fail:
refcount_set(&worker->ref, 1);
worker->wqe = wqe;
- spin_lock_init(&worker->lock);
+ raw_spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
@@ -973,6 +980,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
+static bool __io_wq_worker_cancel(struct io_worker *worker,
+ struct io_cb_cancel_data *match,
+ struct io_wq_work *work)
+{
+ if (work && match->fn(work, match->data)) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ set_notify_signal(worker->task);
+ return true;
+ }
+
+ return false;
+}
+
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
@@ -981,13 +1001,11 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
* Hold the lock to avoid ->cur_work going out of scope, caller
* may dereference the passed in work.
*/
- spin_lock(&worker->lock);
- if (worker->cur_work &&
- match->fn(worker->cur_work, match->data)) {
- set_notify_signal(worker->task);
+ raw_spin_lock(&worker->lock);
+ if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
+ __io_wq_worker_cancel(worker, match, worker->next_work))
match->nr_running++;
- }
- spin_unlock(&worker->lock);
+ raw_spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
}
@@ -1039,17 +1057,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
{
int i;
retry:
- raw_spin_lock(&wqe->lock);
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) {
+ raw_spin_lock(&wqe->lock);
if (match->cancel_all)
goto retry;
- return;
+ break;
}
}
- raw_spin_unlock(&wqe->lock);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1074,25 +1091,27 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
* First check pending list, if we're lucky we can just remove it
* from there. CANCEL_OK means that the work is returned as-new,
* no completion will be posted for it.
- */
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
-
- io_wqe_cancel_pending_work(wqe, &match);
- if (match.nr_pending && !match.cancel_all)
- return IO_WQ_CANCEL_OK;
- }
-
- /*
- * Now check if a free (going busy) or busy worker has the work
+ *
+ * Then check if a free (going busy) or busy worker has the work
* currently running. If we find it there, we'll return CANCEL_RUNNING
* as an indication that we attempt to signal cancellation. The
* completion will run normally in this case.
+ *
+ * Do both of these while holding the wqe->lock, to ensure that
+ * we'll find a work item regardless of state.
*/
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
+ raw_spin_lock(&wqe->lock);
+ io_wqe_cancel_pending_work(wqe, &match);
+ if (match.nr_pending && !match.cancel_all) {
+ raw_spin_unlock(&wqe->lock);
+ return IO_WQ_CANCEL_OK;
+ }
+
io_wqe_cancel_running_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all)
return IO_WQ_CANCEL_RUNNING;
}
@@ -1263,7 +1282,9 @@ static void io_wq_destroy(struct io_wq *wq)
.fn = io_wq_work_match_all,
.cancel_all = true,
};
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
free_cpumask_var(wqe->cpu_mask);
kfree(wqe);
}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 3709b7c5ec98..dbecd27656c7 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -222,6 +222,6 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
static inline bool io_wq_current_is_worker(void)
{
return in_task() && (current->flags & PF_IO_WORKER) &&
- current->pf_io_worker;
+ current->worker_private;
}
#endif
diff --git a/fs/io_uring.c b/fs/io_uring.c
index de9c9de90655..e54c4127422e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req)
return atomic_dec_and_test(&req->refs);
}
-static inline void req_ref_put(struct io_kiocb *req)
-{
- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
- WARN_ON_ONCE(req_ref_put_and_test(req));
-}
-
static inline void req_ref_get(struct io_kiocb *req)
{
WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
@@ -5468,12 +5462,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
{
- struct wait_queue_head *head = poll->head;
+ struct wait_queue_head *head = smp_load_acquire(&poll->head);
- spin_lock_irq(&head->lock);
- list_del_init(&poll->wait.entry);
- poll->head = NULL;
- spin_unlock_irq(&head->lock);
+ if (head) {
+ spin_lock_irq(&head->lock);
+ list_del_init(&poll->wait.entry);
+ poll->head = NULL;
+ spin_unlock_irq(&head->lock);
+ }
}
static void io_poll_remove_entries(struct io_kiocb *req)
@@ -5481,10 +5477,26 @@ static void io_poll_remove_entries(struct io_kiocb *req)
struct io_poll_iocb *poll = io_poll_get_single(req);
struct io_poll_iocb *poll_double = io_poll_get_double(req);
- if (poll->head)
- io_poll_remove_entry(poll);
- if (poll_double && poll_double->head)
+ /*
+ * While we hold the waitqueue lock and the waitqueue is nonempty,
+ * wake_up_pollfree() will wait for us. However, taking the waitqueue
+ * lock in the first place can race with the waitqueue being freed.
+ *
+ * We solve this as eventpoll does: by taking advantage of the fact that
+ * all users of wake_up_pollfree() will RCU-delay the actual free. If
+ * we enter rcu_read_lock() and see that the pointer to the queue is
+ * non-NULL, we can then lock it without the memory being freed out from
+ * under us.
+ *
+ * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+ * case the caller deletes the entry from the queue, leaving it empty.
+ * In that case, only RCU prevents the queue memory from being freed.
+ */
+ rcu_read_lock();
+ io_poll_remove_entry(poll);
+ if (poll_double)
io_poll_remove_entry(poll_double);
+ rcu_read_unlock();
}
/*
@@ -5624,6 +5636,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
wait);
__poll_t mask = key_to_poll(key);
+ if (unlikely(mask & POLLFREE)) {
+ io_poll_mark_cancelled(req);
+ /* we have to kick tw in case it's not already */
+ io_poll_execute(req, 0);
+
+ /*
+ * If the waitqueue is being freed early but someone is already
+ * holds ownership over it, we have to tear down the request as
+ * best we can. That means immediately removing the request from
+ * its waitqueue and preventing all further accesses to the
+ * waitqueue via the request.
+ */
+ list_del_init(&poll->wait.entry);
+
+ /*
+ * Careful: this *must* be the last step, since as soon
+ * as req->head is NULL'ed out, the request can be
+ * completed and freed, since aio_poll_complete_work()
+ * will no longer need to take the waitqueue lock.
+ */
+ smp_store_release(&poll->head, NULL);
+ return 1;
+ }
+
/* for instances that support it check for an event match first */
if (mask && !(mask & poll->events))
return 0;
@@ -6350,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- if (ret != -ENOENT)
- return ret;
+ /*
+ * Fall-through even for -EALREADY, as we may have poll armed
+ * that need unarming.
+ */
+ if (!ret)
+ return 0;
spin_lock(&ctx->completion_lock);
+ ret = io_poll_cancel(ctx, sqe_addr, false);
+ if (ret != -ENOENT)
+ goto out;
+
spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, sqe_addr);
spin_unlock_irq(&ctx->timeout_lock);
- if (ret != -ENOENT)
- goto out;
- ret = io_poll_cancel(ctx, sqe_addr, false);
out:
spin_unlock(&ctx->completion_lock);
return ret;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 504e69578112..1ed097e94af2 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -430,7 +430,7 @@ static int ioctl_file_dedupe_range(struct file *file,
goto out;
}
- size = offsetof(struct file_dedupe_range __user, info[count]);
+ size = offsetof(struct file_dedupe_range, info[count]);
if (size > PAGE_SIZE) {
ret = -ENOMEM;
goto out;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0b86a4365b66..f13d548e4a7f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1212,7 +1212,7 @@ static const struct seq_operations jbd2_seq_info_ops = {
static int jbd2_seq_info_open(struct inode *inode, struct file *file)
{
- journal_t *journal = PDE_DATA(inode);
+ journal_t *journal = pde_data(inode);
struct jbd2_stats_proc_session *s;
int rc, size;
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 2b4d5013dc5d..6da92ecaf66d 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -161,5 +161,5 @@ static int jffs2_garbage_collect_thread(void *_c)
spin_lock(&c->erase_completion_lock);
c->gc_task = NULL;
spin_unlock(&c->erase_completion_lock);
- complete_and_exit(&c->gc_thread_exit, 0);
+ kthread_complete_and_exit(&c->gc_thread_exit, 0);
}
diff --git a/fs/ksmbd/asn1.c b/fs/ksmbd/asn1.c
index b014f4638610..c03eba090368 100644
--- a/fs/ksmbd/asn1.c
+++ b/fs/ksmbd/asn1.c
@@ -21,101 +21,11 @@
#include "ksmbd_spnego_negtokeninit.asn1.h"
#include "ksmbd_spnego_negtokentarg.asn1.h"
-#define SPNEGO_OID_LEN 7
#define NTLMSSP_OID_LEN 10
-#define KRB5_OID_LEN 7
-#define KRB5U2U_OID_LEN 8
-#define MSKRB5_OID_LEN 7
-static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
-static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
-static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
-static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
-static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01,
0x82, 0x37, 0x02, 0x02, 0x0a };
-static bool
-asn1_subid_decode(const unsigned char **begin, const unsigned char *end,
- unsigned long *subid)
-{
- const unsigned char *ptr = *begin;
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (ptr >= end)
- return false;
-
- ch = *ptr++;
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
-
- *begin = ptr;
- return true;
-}
-
-static bool asn1_oid_decode(const unsigned char *value, size_t vlen,
- unsigned long **oid, size_t *oidlen)
-{
- const unsigned char *iptr = value, *end = value + vlen;
- unsigned long *optr;
- unsigned long subid;
-
- vlen += 1;
- if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long))
- goto fail_nullify;
-
- *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL);
- if (!*oid)
- return false;
-
- optr = *oid;
-
- if (!asn1_subid_decode(&iptr, end, &subid))
- goto fail;
-
- if (subid < 40) {
- optr[0] = 0;
- optr[1] = subid;
- } else if (subid < 80) {
- optr[0] = 1;
- optr[1] = subid - 40;
- } else {
- optr[0] = 2;
- optr[1] = subid - 80;
- }
-
- *oidlen = 2;
- optr += 2;
-
- while (iptr < end) {
- if (++(*oidlen) > vlen)
- goto fail;
-
- if (!asn1_subid_decode(&iptr, end, optr++))
- goto fail;
- }
- return true;
-
-fail:
- kfree(*oid);
-fail_nullify:
- *oid = NULL;
- return false;
-}
-
-static bool oid_eq(unsigned long *oid1, unsigned int oid1len,
- unsigned long *oid2, unsigned int oid2len)
-{
- if (oid1len != oid2len)
- return false;
-
- return memcmp(oid1, oid2, oid1len) == 0;
-}
-
int
ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
struct ksmbd_conn *conn)
@@ -252,26 +162,18 @@ int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag,
const void *value, size_t vlen)
{
- unsigned long *oid;
- size_t oidlen;
- int err = 0;
-
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) {
- err = -EBADMSG;
- goto out;
- }
+ enum OID oid;
- if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN))
- err = -EBADMSG;
- kfree(oid);
-out:
- if (err) {
+ oid = look_up_OID(value, vlen);
+ if (oid != OID_spnego) {
char buf[50];
sprint_oid(value, vlen, buf, sizeof(buf));
ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
}
- return err;
+
+ return 0;
}
int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
@@ -279,37 +181,31 @@ int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
size_t vlen)
{
struct ksmbd_conn *conn = context;
- unsigned long *oid;
- size_t oidlen;
+ enum OID oid;
int mech_type;
- char buf[50];
- if (!asn1_oid_decode(value, vlen, &oid, &oidlen))
- goto fail;
-
- if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN))
+ oid = look_up_OID(value, vlen);
+ if (oid == OID_ntlmssp) {
mech_type = KSMBD_AUTH_NTLMSSP;
- else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN))
+ } else if (oid == OID_mskrb5) {
mech_type = KSMBD_AUTH_MSKRB5;
- else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN))
+ } else if (oid == OID_krb5) {
mech_type = KSMBD_AUTH_KRB5;
- else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN))
+ } else if (oid == OID_krb5u2u) {
mech_type = KSMBD_AUTH_KRB5U2U;
- else
- goto fail;
+ } else {
+ char buf[50];
+
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
+ }
conn->auth_mechs |= mech_type;
if (conn->preferred_auth_mech == 0)
conn->preferred_auth_mech = mech_type;
- kfree(oid);
return 0;
-
-fail:
- kfree(oid);
- sprint_oid(value, vlen, buf, sizeof(buf));
- ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
- return -EBADMSG;
}
int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index 3503b1c48cb4..dc3d061edda9 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -215,7 +215,7 @@ out:
* Return: 0 on success, error number on error
*/
int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
- int blen, char *domain_name)
+ int blen, char *domain_name, char *cryptkey)
{
char ntlmv2_hash[CIFS_ENCPWD_SIZE];
char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE];
@@ -256,7 +256,7 @@ int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
goto out;
}
- memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(construct, cryptkey, CIFS_CRYPTO_KEY_SIZE);
memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen);
rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len);
@@ -295,7 +295,8 @@ out:
* Return: 0 on success, error number on error
*/
int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
- int blob_len, struct ksmbd_session *sess)
+ int blob_len, struct ksmbd_conn *conn,
+ struct ksmbd_session *sess)
{
char *domain_name;
unsigned int nt_off, dn_off;
@@ -324,7 +325,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
/* TODO : use domain name that imported from configuration file */
domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off,
- dn_len, true, sess->conn->local_nls);
+ dn_len, true, conn->local_nls);
if (IS_ERR(domain_name))
return PTR_ERR(domain_name);
@@ -333,7 +334,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
domain_name);
ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off),
nt_len - CIFS_ENCPWD_SIZE,
- domain_name);
+ domain_name, conn->ntlmssp.cryptkey);
kfree(domain_name);
return ret;
}
@@ -347,7 +348,7 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
*
*/
int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
- int blob_len, struct ksmbd_session *sess)
+ int blob_len, struct ksmbd_conn *conn)
{
if (blob_len < sizeof(struct negotiate_message)) {
ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
@@ -361,7 +362,7 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
return -EINVAL;
}
- sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
+ conn->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
return 0;
}
@@ -375,14 +376,14 @@ int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
*/
unsigned int
ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
- struct ksmbd_session *sess)
+ struct ksmbd_conn *conn)
{
struct target_info *tinfo;
wchar_t *name;
__u8 *target_name;
unsigned int flags, blob_off, blob_len, type, target_info_len = 0;
int len, uni_len, conv_len;
- int cflags = sess->ntlmssp.client_flags;
+ int cflags = conn->ntlmssp.client_flags;
memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8);
chgblob->MessageType = NtLmChallenge;
@@ -403,7 +404,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
if (cflags & NTLMSSP_REQUEST_TARGET)
flags |= NTLMSSP_REQUEST_TARGET;
- if (sess->conn->use_spnego &&
+ if (conn->use_spnego &&
(cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
@@ -414,7 +415,7 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
return -ENOMEM;
conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len,
- sess->conn->local_nls);
+ conn->local_nls);
if (conv_len < 0 || conv_len > len) {
kfree(name);
return -EINVAL;
@@ -430,8 +431,8 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off);
/* Initialize random conn challenge */
- get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64));
- memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey,
+ get_random_bytes(conn->ntlmssp.cryptkey, sizeof(__u64));
+ memcpy(chgblob->Challenge, conn->ntlmssp.cryptkey,
CIFS_CRYPTO_KEY_SIZE);
/* Add Target Information to security buffer */
diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
index 9c2d4badd05d..95629651cf26 100644
--- a/fs/ksmbd/auth.h
+++ b/fs/ksmbd/auth.h
@@ -38,16 +38,16 @@ struct kvec;
int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
unsigned int nvec, int enc);
void ksmbd_copy_gss_neg_header(void *buf);
-int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf);
int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
- int blen, char *domain_name);
+ int blen, char *domain_name, char *cryptkey);
int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
- int blob_len, struct ksmbd_session *sess);
+ int blob_len, struct ksmbd_conn *conn,
+ struct ksmbd_session *sess);
int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
- int blob_len, struct ksmbd_session *sess);
+ int blob_len, struct ksmbd_conn *conn);
unsigned int
ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
- struct ksmbd_session *sess);
+ struct ksmbd_conn *conn);
int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
int in_len, char *out_blob, int *out_len);
int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index 83a94d0bb480..208d2cff7bd3 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -62,6 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0);
conn->total_credits = 1;
+ conn->outstanding_credits = 1;
init_waitqueue_head(&conn->req_running_q);
INIT_LIST_HEAD(&conn->conns_list);
@@ -386,17 +387,24 @@ out:
static void stop_sessions(void)
{
struct ksmbd_conn *conn;
+ struct ksmbd_transport *t;
again:
read_lock(&conn_list_lock);
list_for_each_entry(conn, &conn_list, conns_list) {
struct task_struct *task;
- task = conn->transport->handler;
+ t = conn->transport;
+ task = t->handler;
if (task)
ksmbd_debug(CONN, "Stop session handler %s/%d\n",
task->comm, task_pid_nr(task));
conn->status = KSMBD_SESS_EXITING;
+ if (t->ops->shutdown) {
+ read_unlock(&conn_list_lock);
+ t->ops->shutdown(t);
+ read_lock(&conn_list_lock);
+ }
}
read_unlock(&conn_list_lock);
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
index e5403c587a58..7a59aacb5daa 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/ksmbd/connection.h
@@ -61,8 +61,8 @@ struct ksmbd_conn {
atomic_t req_running;
/* References which are made for this Server object*/
atomic_t r_count;
- unsigned short total_credits;
- unsigned short max_credits;
+ unsigned int total_credits;
+ unsigned int outstanding_credits;
spinlock_t credits_lock;
wait_queue_head_t req_running_q;
/* Lock to protect requests list*/
@@ -72,12 +72,7 @@ struct ksmbd_conn {
int connection_type;
struct ksmbd_stats stats;
char ClientGUID[SMB2_CLIENT_GUID_SIZE];
- union {
- /* pending trans request table */
- struct trans_state *recent_trans;
- /* Used by ntlmssp */
- char *ntlmssp_cryptkey;
- };
+ struct ntlmssp_auth ntlmssp;
spinlock_t llist_lock;
struct list_head lock_list;
@@ -122,6 +117,7 @@ struct ksmbd_conn_ops {
struct ksmbd_transport_ops {
int (*prepare)(struct ksmbd_transport *t);
void (*disconnect)(struct ksmbd_transport *t);
+ void (*shutdown)(struct ksmbd_transport *t);
int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size);
int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
int size, bool need_invalidate_rkey,
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index c6718a05d347..71bfb7de4472 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -103,6 +103,8 @@ struct ksmbd_startup_request {
* we set the SPARSE_FILES bit (0x40).
*/
__u32 sub_auth[3]; /* Subauth value for Security ID */
+ __u32 smb2_max_credits; /* MAX credits */
+ __u32 reserved[128]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[];
};
@@ -113,7 +115,7 @@ struct ksmbd_startup_request {
* IPC request to shutdown ksmbd server.
*/
struct ksmbd_shutdown_request {
- __s32 reserved;
+ __s32 reserved[16];
};
/*
@@ -122,6 +124,7 @@ struct ksmbd_shutdown_request {
struct ksmbd_login_request {
__u32 handle;
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -135,6 +138,7 @@ struct ksmbd_login_response {
__u16 status;
__u16 hash_sz; /* hash size */
__s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -143,6 +147,7 @@ struct ksmbd_login_response {
struct ksmbd_share_config_request {
__u32 handle;
__s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -157,6 +162,7 @@ struct ksmbd_share_config_response {
__u16 force_directory_mode;
__u16 force_uid;
__u16 force_gid;
+ __u32 reserved[128]; /* Reserved room */
__u32 veto_list_sz;
__s8 ____payload[];
};
@@ -187,6 +193,7 @@ struct ksmbd_tree_connect_request {
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ];
__s8 share[KSMBD_REQ_MAX_SHARE_NAME];
__s8 peer_addr[64];
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -196,6 +203,7 @@ struct ksmbd_tree_connect_response {
__u32 handle;
__u16 status;
__u16 connection_flags;
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -204,6 +212,7 @@ struct ksmbd_tree_connect_response {
struct ksmbd_tree_disconnect_request {
__u64 session_id; /* session id */
__u64 connect_id; /* tree connection id */
+ __u32 reserved[16]; /* Reserved room */
};
/*
@@ -212,6 +221,7 @@ struct ksmbd_tree_disconnect_request {
struct ksmbd_logout_request {
__s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
__u32 account_flags;
+ __u32 reserved[16]; /* Reserved room */
};
/*
diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c
index 1019d3677d55..279d00feff21 100644
--- a/fs/ksmbd/mgmt/user_config.c
+++ b/fs/ksmbd/mgmt/user_config.c
@@ -67,3 +67,13 @@ int ksmbd_anonymous_user(struct ksmbd_user *user)
return 1;
return 0;
}
+
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2)
+{
+ if (strcmp(u1->name, u2->name))
+ return false;
+ if (memcmp(u1->passkey, u2->passkey, u1->passkey_sz))
+ return false;
+
+ return true;
+}
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h
index aff80b029579..6a44109617f1 100644
--- a/fs/ksmbd/mgmt/user_config.h
+++ b/fs/ksmbd/mgmt/user_config.h
@@ -64,4 +64,5 @@ struct ksmbd_user *ksmbd_login_user(const char *account);
struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp);
void ksmbd_free_user(struct ksmbd_user *user);
int ksmbd_anonymous_user(struct ksmbd_user *user);
+bool ksmbd_compare_user(struct ksmbd_user *u1, struct ksmbd_user *u2);
#endif /* __USER_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
index 82289c3cbd2b..e241f16a3851 100644
--- a/fs/ksmbd/mgmt/user_session.h
+++ b/fs/ksmbd/mgmt/user_session.h
@@ -45,7 +45,6 @@ struct ksmbd_session {
int state;
__u8 *Preauth_HashValue;
- struct ntlmssp_auth ntlmssp;
char sess_key[CIFS_KEY_SIZE];
struct hlist_node hlist;
diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
index 50d0b1022289..4a9460153b59 100644
--- a/fs/ksmbd/smb2misc.c
+++ b/fs/ksmbd/smb2misc.c
@@ -289,7 +289,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge);
void *__hdr = hdr;
- int ret;
+ int ret = 0;
switch (hdr->Command) {
case SMB2_QUERY_INFO:
@@ -326,21 +326,27 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n",
credit_charge, calc_credit_num);
return 1;
- } else if (credit_charge > conn->max_credits) {
+ } else if (credit_charge > conn->vals->max_credits) {
ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge);
return 1;
}
spin_lock(&conn->credits_lock);
- if (credit_charge <= conn->total_credits) {
- conn->total_credits -= credit_charge;
- ret = 0;
- } else {
+ if (credit_charge > conn->total_credits) {
ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
credit_charge, conn->total_credits);
ret = 1;
}
+
+ if ((u64)conn->outstanding_credits + credit_charge > conn->vals->max_credits) {
+ ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n",
+ credit_charge, conn->outstanding_credits);
+ ret = 1;
+ } else
+ conn->outstanding_credits += credit_charge;
+
spin_unlock(&conn->credits_lock);
+
return ret;
}
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
index 02a44d28bdaf..ab23da2120b9 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/ksmbd/smb2ops.c
@@ -19,6 +19,7 @@ static struct smb_version_values smb21_server_values = {
.max_read_size = SMB21_DEFAULT_IOSIZE,
.max_write_size = SMB21_DEFAULT_IOSIZE,
.max_trans_size = SMB21_DEFAULT_IOSIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -44,6 +45,7 @@ static struct smb_version_values smb30_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -70,6 +72,7 @@ static struct smb_version_values smb302_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -96,6 +99,7 @@ static struct smb_version_values smb311_server_values = {
.max_read_size = SMB3_DEFAULT_IOSIZE,
.max_write_size = SMB3_DEFAULT_IOSIZE,
.max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .max_credits = SMB2_MAX_CREDITS,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
.shared_lock_type = SMB2_LOCKFLAG_SHARED,
@@ -197,7 +201,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn)
conn->ops = &smb2_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -215,7 +218,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->ops = &smb3_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -240,7 +242,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
conn->ops = &smb3_0_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -265,7 +266,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->ops = &smb3_11_server_ops;
conn->cmds = smb2_0_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
- conn->max_credits = SMB2_MAX_CREDITS;
conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
@@ -304,3 +304,11 @@ void init_smb2_max_trans_size(unsigned int sz)
smb302_server_values.max_trans_size = sz;
smb311_server_values.max_trans_size = sz;
}
+
+void init_smb2_max_credits(unsigned int sz)
+{
+ smb21_server_values.max_credits = sz;
+ smb30_server_values.max_credits = sz;
+ smb302_server_values.max_credits = sz;
+ smb311_server_values.max_credits = sz;
+}
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index b8b3a4c28b74..1866c81c5c99 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -299,16 +299,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
struct ksmbd_conn *conn = work->conn;
- unsigned short credits_requested;
+ unsigned short credits_requested, aux_max;
unsigned short credit_charge, credits_granted = 0;
- unsigned short aux_max, aux_credits;
if (work->send_no_response)
return 0;
hdr->CreditCharge = req_hdr->CreditCharge;
- if (conn->total_credits > conn->max_credits) {
+ if (conn->total_credits > conn->vals->max_credits) {
hdr->CreditRequest = 0;
pr_err("Total credits overflow: %d\n", conn->total_credits);
return -EINVAL;
@@ -316,6 +315,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
credit_charge = max_t(unsigned short,
le16_to_cpu(req_hdr->CreditCharge), 1);
+ if (credit_charge > conn->total_credits) {
+ ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n",
+ credit_charge, conn->total_credits);
+ return -EINVAL;
+ }
+
+ conn->total_credits -= credit_charge;
+ conn->outstanding_credits -= credit_charge;
credits_requested = max_t(unsigned short,
le16_to_cpu(req_hdr->CreditRequest), 1);
@@ -325,16 +332,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
* TODO: Need to adjuct CreditRequest value according to
* current cpu load
*/
- aux_credits = credits_requested - 1;
if (hdr->Command == SMB2_NEGOTIATE)
- aux_max = 0;
+ aux_max = 1;
else
- aux_max = conn->max_credits - credit_charge;
- aux_credits = min_t(unsigned short, aux_credits, aux_max);
- credits_granted = credit_charge + aux_credits;
+ aux_max = conn->vals->max_credits - credit_charge;
+ credits_granted = min_t(unsigned short, credits_requested, aux_max);
- if (conn->max_credits - conn->total_credits < credits_granted)
- credits_granted = conn->max_credits -
+ if (conn->vals->max_credits - conn->total_credits < credits_granted)
+ credits_granted = conn->vals->max_credits -
conn->total_credits;
conn->total_credits += credits_granted;
@@ -610,16 +615,14 @@ static void destroy_previous_session(struct ksmbd_user *user, u64 id)
/**
* smb2_get_name() - get filename string from on the wire smb format
- * @share: ksmbd_share_config pointer
* @src: source buffer
* @maxlen: maxlen of source string
- * @nls_table: nls_table pointer
+ * @local_nls: nls_table pointer
*
* Return: matching converted filename on success, otherwise error ptr
*/
static char *
-smb2_get_name(struct ksmbd_share_config *share, const char *src,
- const int maxlen, struct nls_table *local_nls)
+smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls)
{
char *name;
@@ -1303,7 +1306,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
int sz, rc;
ksmbd_debug(SMB, "negotiate phase\n");
- rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess);
+ rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->conn);
if (rc)
return rc;
@@ -1313,7 +1316,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
memset(chgblob, 0, sizeof(struct challenge_message));
if (!work->conn->use_spnego) {
- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
if (sz < 0)
return -ENOMEM;
@@ -1329,7 +1332,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
return -ENOMEM;
chgblob = (struct challenge_message *)neg_blob;
- sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->conn);
if (sz < 0) {
rc = -ENOMEM;
goto out;
@@ -1450,60 +1453,62 @@ static int ntlm_authenticate(struct ksmbd_work *work)
ksmbd_free_user(user);
return 0;
}
- ksmbd_free_user(sess->user);
- }
- sess->user = user;
- if (user_guest(sess->user)) {
- if (conn->sign) {
- ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n");
+ if (!ksmbd_compare_user(sess->user, user)) {
+ ksmbd_free_user(user);
return -EPERM;
}
+ ksmbd_free_user(user);
+ } else {
+ sess->user = user;
+ }
+ if (user_guest(sess->user)) {
rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE;
} else {
struct authenticate_message *authblob;
authblob = user_authblob(conn, req);
sz = le16_to_cpu(req->SecurityBufferLength);
- rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess);
+ rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess);
if (rc) {
set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
ksmbd_debug(SMB, "authentication failed\n");
return -EPERM;
}
+ }
- /*
- * If session state is SMB2_SESSION_VALID, We can assume
- * that it is reauthentication. And the user/password
- * has been verified, so return it here.
- */
- if (sess->state == SMB2_SESSION_VALID) {
- if (conn->binding)
- goto binding_session;
- return 0;
- }
+ /*
+ * If session state is SMB2_SESSION_VALID, We can assume
+ * that it is reauthentication. And the user/password
+ * has been verified, so return it here.
+ */
+ if (sess->state == SMB2_SESSION_VALID) {
+ if (conn->binding)
+ goto binding_session;
+ return 0;
+ }
- if ((conn->sign || server_conf.enforced_signing) ||
- (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
- sess->sign = true;
+ if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE &&
+ (conn->sign || server_conf.enforced_signing)) ||
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
- if (smb3_encryption_negotiated(conn) &&
- !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
- rc = conn->ops->generate_encryptionkey(sess);
- if (rc) {
- ksmbd_debug(SMB,
- "SMB3 encryption key generation failed\n");
- return -EINVAL;
- }
- sess->enc = true;
- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
- /*
- * signing is disable if encryption is enable
- * on this session
- */
- sess->sign = false;
+ if (smb3_encryption_negotiated(conn) &&
+ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ rc = conn->ops->generate_encryptionkey(sess);
+ if (rc) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+ return -EINVAL;
}
+ sess->enc = true;
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ /*
+ * signing is disable if encryption is enable
+ * on this session
+ */
+ sess->sign = false;
}
binding_session:
@@ -2057,9 +2062,6 @@ int smb2_session_logoff(struct ksmbd_work *work)
ksmbd_debug(SMB, "request\n");
- /* Got a valid session, set connection state */
- WARN_ON(sess->conn != conn);
-
/* setting CifsExiting here may race with start_tcp_sess */
ksmbd_conn_set_need_reconnect(work);
ksmbd_close_session_fds(work);
@@ -2530,8 +2532,7 @@ int smb2_open(struct ksmbd_work *work)
goto err_out1;
}
- name = smb2_get_name(share,
- req->Buffer,
+ name = smb2_get_name(req->Buffer,
le16_to_cpu(req->NameLength),
work->conn->local_nls);
if (IS_ERR(name)) {
@@ -3392,7 +3393,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
* @conn: connection instance
* @info_level: smb information level
* @d_info: structure included variables for query dir
- * @user_ns: user namespace
* @ksmbd_kstat: ksmbd wrapper of dirent stat information
*
* if directory has many entries, find first can't read it fully.
@@ -4018,6 +4018,7 @@ err_out2:
* buffer_check_err() - helper function to check buffer errors
* @reqOutputBufferLength: max buffer length expected in command response
* @rsp: query info response buffer contains output buffer length
+ * @rsp_org: base response buffer pointer in case of chained response
* @infoclass_size: query info class response buffer size
*
* Return: 0 on success, otherwise error
@@ -5398,8 +5399,7 @@ static int smb2_rename(struct ksmbd_work *work,
goto out;
}
- new_name = smb2_get_name(share,
- file_info->FileName,
+ new_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
if (IS_ERR(new_name)) {
@@ -5510,8 +5510,7 @@ static int smb2_create_link(struct ksmbd_work *work,
if (!pathname)
return -ENOMEM;
- link_name = smb2_get_name(share,
- file_info->FileName,
+ link_name = smb2_get_name(file_info->FileName,
le32_to_cpu(file_info->FileNameLength),
local_nls);
if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) {
@@ -5849,7 +5848,7 @@ static int set_file_mode_info(struct ksmbd_file *fp,
* smb2_set_info_file() - handler for smb2 set info command
* @work: smb work containing set info command buffer
* @fp: ksmbd_file pointer
- * @info_class: smb2 set info class
+ * @req: request buffer pointer
* @share: ksmbd_share_config pointer
*
* Return: 0 on success, otherwise error
@@ -6121,25 +6120,33 @@ out:
return err;
}
-static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
- struct smb2_read_req *req, void *data_buf,
- size_t length)
+static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work,
+ struct smb2_buffer_desc_v1 *desc,
+ __le32 Channel,
+ __le16 ChannelInfoOffset,
+ __le16 ChannelInfoLength)
{
- struct smb2_buffer_desc_v1 *desc =
- (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
- int err;
-
if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
+ Channel != SMB2_CHANNEL_RDMA_V1)
return -EINVAL;
- if (req->ReadChannelInfoOffset == 0 ||
- le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc))
+ if (ChannelInfoOffset == 0 ||
+ le16_to_cpu(ChannelInfoLength) < sizeof(*desc))
return -EINVAL;
work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
work->remote_key = le32_to_cpu(desc->token);
+ return 0;
+}
+
+static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+ struct smb2_read_req *req, void *data_buf,
+ size_t length)
+{
+ struct smb2_buffer_desc_v1 *desc =
+ (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+ int err;
err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
le32_to_cpu(desc->token),
@@ -6162,7 +6169,7 @@ int smb2_read(struct ksmbd_work *work)
struct ksmbd_conn *conn = work->conn;
struct smb2_read_req *req;
struct smb2_read_rsp *rsp;
- struct ksmbd_file *fp;
+ struct ksmbd_file *fp = NULL;
loff_t offset;
size_t length, mincount;
ssize_t nbytes = 0, remain_bytes = 0;
@@ -6176,6 +6183,18 @@ int smb2_read(struct ksmbd_work *work)
return smb2_read_pipe(work);
}
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ &req->Buffer[0],
+ req->Channel,
+ req->ReadChannelInfoOffset,
+ req->ReadChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
le64_to_cpu(req->PersistentFileId));
if (!fp) {
@@ -6361,21 +6380,6 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
- if (work->conn->dialect == SMB30_PROT_ID &&
- req->Channel != SMB2_CHANNEL_RDMA_V1)
- return -EINVAL;
-
- if (req->Length != 0 || req->DataOffset != 0)
- return -EINVAL;
-
- if (req->WriteChannelInfoOffset == 0 ||
- le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc))
- return -EINVAL;
-
- work->need_invalidate_rkey =
- (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
- work->remote_key = le32_to_cpu(desc->token);
-
data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
if (!data_buf)
return -ENOMEM;
@@ -6422,6 +6426,20 @@ int smb2_write(struct ksmbd_work *work)
return smb2_write_pipe(work);
}
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1 ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+ if (req->Length != 0 || req->DataOffset != 0)
+ return -EINVAL;
+ err = smb2_set_remote_key_for_rdma(work,
+ (struct smb2_buffer_desc_v1 *)
+ &req->Buffer[0],
+ req->Channel,
+ req->WriteChannelInfoOffset,
+ req->WriteChannelInfoLength);
+ if (err)
+ goto out;
+ }
+
if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
ksmbd_debug(SMB, "User does not have write permission\n");
err = -EACCES;
@@ -7243,15 +7261,10 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
struct sockaddr_storage_rsp *sockaddr_storage;
unsigned int flags;
unsigned long long speed;
- struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr;
rtnl_lock();
for_each_netdev(&init_net, netdev) {
- if (out_buf_len <
- nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
- rtnl_unlock();
- return -ENOSPC;
- }
+ bool ipv4_set = false;
if (netdev->type == ARPHRD_LOOPBACK)
continue;
@@ -7259,12 +7272,20 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
flags = dev_get_flags(netdev);
if (!(flags & IFF_RUNNING))
continue;
+ipv6_retry:
+ if (out_buf_len <
+ nbytes + sizeof(struct network_interface_info_ioctl_rsp)) {
+ rtnl_unlock();
+ return -ENOSPC;
+ }
nii_rsp = (struct network_interface_info_ioctl_rsp *)
&rsp->Buffer[nbytes];
nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex);
nii_rsp->Capability = 0;
+ if (netdev->real_num_tx_queues > 1)
+ nii_rsp->Capability |= cpu_to_le32(RSS_CAPABLE);
if (ksmbd_rdma_capable_netdev(netdev))
nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE);
@@ -7289,8 +7310,7 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
nii_rsp->SockAddr_Storage;
memset(sockaddr_storage, 0, 128);
- if (conn->peer_addr.ss_family == PF_INET ||
- ipv6_addr_v4mapped(&csin6->sin6_addr)) {
+ if (!ipv4_set) {
struct in_device *idev;
sockaddr_storage->Family = cpu_to_le16(INTERNETWORK);
@@ -7301,6 +7321,9 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
continue;
sockaddr_storage->addr4.IPv4address =
idev_ipv4_address(idev);
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
+ ipv4_set = true;
+ goto ipv6_retry;
} else {
struct inet6_dev *idev6;
struct inet6_ifaddr *ifa;
@@ -7322,9 +7345,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
break;
}
sockaddr_storage->addr6.ScopeId = 0;
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
-
- nbytes += sizeof(struct network_interface_info_ioctl_rsp);
}
rtnl_unlock();
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index 4a3e4339d4c4..725b800c29c8 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -980,6 +980,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn);
void init_smb2_max_read_size(unsigned int sz);
void init_smb2_max_write_size(unsigned int sz);
void init_smb2_max_trans_size(unsigned int sz);
+void init_smb2_max_credits(unsigned int sz);
bool is_smb2_neg_cmd(struct ksmbd_work *work);
bool is_smb2_rsp(struct ksmbd_work *work);
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index 50590842b651..e1369b4345a9 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -365,6 +365,7 @@ struct smb_version_values {
__u32 max_read_size;
__u32 max_write_size;
__u32 max_trans_size;
+ __u32 max_credits;
__u32 large_lock_type;
__u32 exclusive_lock_type;
__u32 shared_lock_type;
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
index 1acf1892a466..3ad6881e0f7e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/ksmbd/transport_ipc.c
@@ -301,6 +301,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
init_smb2_max_write_size(req->smb2_max_write);
if (req->smb2_max_trans)
init_smb2_max_trans_size(req->smb2_max_trans);
+ if (req->smb2_max_credits)
+ init_smb2_max_credits(req->smb2_max_credits);
ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string);
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index 7e57cbb0bb35..3c1ec1ac0b27 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -34,7 +34,8 @@
#include "smbstatus.h"
#include "transport_rdma.h"
-#define SMB_DIRECT_PORT 5445
+#define SMB_DIRECT_PORT_IWARP 5445
+#define SMB_DIRECT_PORT_INFINIBAND 445
#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
@@ -60,6 +61,10 @@
* as defined in [MS-SMBD] 3.1.1.1
* Those may change after a SMB_DIRECT negotiation
*/
+
+/* Set 445 port to SMB Direct port by default */
+static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
+
/* The local peer's maximum number of credits to grant to the peer */
static int smb_direct_receive_credit_max = 255;
@@ -75,10 +80,18 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */
static int smb_direct_max_receive_size = 8192;
-static int smb_direct_max_read_write_size = 1024 * 1024;
+static int smb_direct_max_read_write_size = 1048512;
static int smb_direct_max_outstanding_rw_ops = 8;
+static LIST_HEAD(smb_direct_device_list);
+static DEFINE_RWLOCK(smb_direct_device_lock);
+
+struct smb_direct_device {
+ struct ib_device *ib_dev;
+ struct list_head list;
+};
+
static struct smb_direct_listener {
struct rdma_cm_id *cm_id;
} smb_direct_listener;
@@ -415,6 +428,7 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
+ ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
ib_destroy_qp(t->qp);
}
@@ -555,6 +569,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
t->negotiation_requested = true;
t->full_packet_received = true;
+ enqueue_reassembly(t, recvmsg, 0);
wake_up_interruptible(&t->wait_status);
break;
case SMB_DIRECT_MSG_DATA_TRANSFER: {
@@ -1438,6 +1453,15 @@ static void smb_direct_disconnect(struct ksmbd_transport *t)
free_transport(st);
}
+static void smb_direct_shutdown(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+ ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
+
+ smb_direct_disconnect_rdma_work(&st->disconnect_work);
+}
+
static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1581,19 +1605,13 @@ static int smb_direct_accept_client(struct smb_direct_transport *t)
pr_err("error at rdma_accept: %d\n", ret);
return ret;
}
-
- wait_event_interruptible(t->wait_status,
- t->status != SMB_DIRECT_CS_NEW);
- if (t->status != SMB_DIRECT_CS_CONNECTED)
- return -ENOTCONN;
return 0;
}
-static int smb_direct_negotiate(struct smb_direct_transport *t)
+static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
{
int ret;
struct smb_direct_recvmsg *recvmsg;
- struct smb_direct_negotiate_req *req;
recvmsg = get_free_recvmsg(t);
if (!recvmsg)
@@ -1603,44 +1621,20 @@ static int smb_direct_negotiate(struct smb_direct_transport *t)
ret = smb_direct_post_recv(t, recvmsg);
if (ret) {
pr_err("Can't post recv: %d\n", ret);
- goto out;
+ goto out_err;
}
t->negotiation_requested = false;
ret = smb_direct_accept_client(t);
if (ret) {
pr_err("Can't accept client\n");
- goto out;
+ goto out_err;
}
smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
-
- ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
- ret = wait_event_interruptible_timeout(t->wait_status,
- t->negotiation_requested ||
- t->status == SMB_DIRECT_CS_DISCONNECTED,
- SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
- if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
- ret = ret < 0 ? ret : -ETIMEDOUT;
- goto out;
- }
-
- ret = smb_direct_check_recvmsg(recvmsg);
- if (ret == -ECONNABORTED)
- goto out;
-
- req = (struct smb_direct_negotiate_req *)recvmsg->packet;
- t->max_recv_size = min_t(int, t->max_recv_size,
- le32_to_cpu(req->preferred_send_size));
- t->max_send_size = min_t(int, t->max_send_size,
- le32_to_cpu(req->max_receive_size));
- t->max_fragmented_send_size =
- le32_to_cpu(req->max_fragmented_size);
-
- ret = smb_direct_send_negotiate_response(t, ret);
-out:
- if (recvmsg)
- put_recvmsg(t, recvmsg);
+ return 0;
+out_err:
+ put_recvmsg(t, recvmsg);
return ret;
}
@@ -1724,7 +1718,9 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
cap->max_inline_data = 0;
- cap->max_rdma_ctxs = 0;
+ cap->max_rdma_ctxs =
+ rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) *
+ smb_direct_max_outstanding_rw_ops;
return 0;
}
@@ -1806,6 +1802,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
{
int ret;
struct ib_qp_init_attr qp_attr;
+ int pages_per_rw;
t->pd = ib_alloc_pd(t->cm_id->device, 0);
if (IS_ERR(t->pd)) {
@@ -1853,6 +1850,23 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
t->qp = t->cm_id->qp;
t->cm_id->event_handler = smb_direct_cm_handler;
+ pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+ if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
+ int pages_per_mr, mr_count;
+
+ pages_per_mr = min_t(int, pages_per_rw,
+ t->cm_id->device->attrs.max_fast_reg_page_list_len);
+ mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) *
+ atomic_read(&t->rw_avail_ops);
+ ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count,
+ IB_MR_TYPE_MEM_REG, pages_per_mr, 0);
+ if (ret) {
+ pr_err("failed to init mr pool count %d pages %d\n",
+ mr_count, pages_per_mr);
+ goto err;
+ }
+ }
+
return 0;
err:
if (t->qp) {
@@ -1877,6 +1891,49 @@ err:
static int smb_direct_prepare(struct ksmbd_transport *t)
{
struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_negotiate_req *req;
+ int ret;
+
+ ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
+ ret = wait_event_interruptible_timeout(st->wait_status,
+ st->negotiation_requested ||
+ st->status == SMB_DIRECT_CS_DISCONNECTED,
+ SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
+ if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
+ return ret < 0 ? ret : -ETIMEDOUT;
+
+ recvmsg = get_first_reassembly(st);
+ if (!recvmsg)
+ return -ECONNABORTED;
+
+ ret = smb_direct_check_recvmsg(recvmsg);
+ if (ret == -ECONNABORTED)
+ goto out;
+
+ req = (struct smb_direct_negotiate_req *)recvmsg->packet;
+ st->max_recv_size = min_t(int, st->max_recv_size,
+ le32_to_cpu(req->preferred_send_size));
+ st->max_send_size = min_t(int, st->max_send_size,
+ le32_to_cpu(req->max_receive_size));
+ st->max_fragmented_send_size =
+ le32_to_cpu(req->max_fragmented_size);
+ st->max_fragmented_recv_size =
+ (st->recv_credit_max * st->max_recv_size) / 2;
+
+ ret = smb_direct_send_negotiate_response(st, ret);
+out:
+ spin_lock_irq(&st->reassembly_queue_lock);
+ st->reassembly_queue_length--;
+ list_del(&recvmsg->list);
+ spin_unlock_irq(&st->reassembly_queue_lock);
+ put_recvmsg(st, recvmsg);
+
+ return ret;
+}
+
+static int smb_direct_connect(struct smb_direct_transport *st)
+{
int ret;
struct ib_qp_cap qp_cap;
@@ -1898,13 +1955,11 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
return ret;
}
- ret = smb_direct_negotiate(st);
+ ret = smb_direct_prepare_negotiation(st);
if (ret) {
pr_err("Can't negotiate: %d\n", ret);
return ret;
}
-
- st->status = SMB_DIRECT_CS_CONNECTED;
return 0;
}
@@ -1920,6 +1975,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
{
struct smb_direct_transport *t;
+ int ret;
if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
ksmbd_debug(RDMA,
@@ -1932,18 +1988,23 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
if (!t)
return -ENOMEM;
+ ret = smb_direct_connect(t);
+ if (ret)
+ goto out_err;
+
KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
KSMBD_TRANS(t)->conn, "ksmbd:r%u",
- SMB_DIRECT_PORT);
+ smb_direct_port);
if (IS_ERR(KSMBD_TRANS(t)->handler)) {
- int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
-
+ ret = PTR_ERR(KSMBD_TRANS(t)->handler);
pr_err("Can't start thread\n");
- free_transport(t);
- return ret;
+ goto out_err;
}
return 0;
+out_err:
+ free_transport(t);
+ return ret;
}
static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
@@ -2007,12 +2068,65 @@ err:
return ret;
}
+static int smb_direct_ib_client_add(struct ib_device *ib_dev)
+{
+ struct smb_direct_device *smb_dev;
+
+ /* Set 5445 port if device type is iWARP(No IB) */
+ if (ib_dev->node_type != RDMA_NODE_IB_CA)
+ smb_direct_port = SMB_DIRECT_PORT_IWARP;
+
+ if (!ib_dev->ops.get_netdev ||
+ !rdma_frwr_is_supported(&ib_dev->attrs))
+ return 0;
+
+ smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
+ if (!smb_dev)
+ return -ENOMEM;
+ smb_dev->ib_dev = ib_dev;
+
+ write_lock(&smb_direct_device_lock);
+ list_add(&smb_dev->list, &smb_direct_device_list);
+ write_unlock(&smb_direct_device_lock);
+
+ ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
+ return 0;
+}
+
+static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
+ void *client_data)
+{
+ struct smb_direct_device *smb_dev, *tmp;
+
+ write_lock(&smb_direct_device_lock);
+ list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
+ if (smb_dev->ib_dev == ib_dev) {
+ list_del(&smb_dev->list);
+ kfree(smb_dev);
+ break;
+ }
+ }
+ write_unlock(&smb_direct_device_lock);
+}
+
+static struct ib_client smb_direct_ib_client = {
+ .name = "ksmbd_smb_direct_ib",
+ .add = smb_direct_ib_client_add,
+ .remove = smb_direct_ib_client_remove,
+};
+
int ksmbd_rdma_init(void)
{
int ret;
smb_direct_listener.cm_id = NULL;
+ ret = ib_register_client(&smb_direct_ib_client);
+ if (ret) {
+ pr_err("failed to ib_register_client\n");
+ return ret;
+ }
+
/* When a client is running out of send credits, the credits are
* granted by the server's sending a packet using this queue.
* This avoids the situation that a clients cannot send packets
@@ -2023,7 +2137,7 @@ int ksmbd_rdma_init(void)
if (!smb_direct_wq)
return -ENOMEM;
- ret = smb_direct_listen(SMB_DIRECT_PORT);
+ ret = smb_direct_listen(smb_direct_port);
if (ret) {
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
@@ -2036,36 +2150,67 @@ int ksmbd_rdma_init(void)
return 0;
}
-int ksmbd_rdma_destroy(void)
+void ksmbd_rdma_destroy(void)
{
- if (smb_direct_listener.cm_id)
- rdma_destroy_id(smb_direct_listener.cm_id);
+ if (!smb_direct_listener.cm_id)
+ return;
+
+ ib_unregister_client(&smb_direct_ib_client);
+ rdma_destroy_id(smb_direct_listener.cm_id);
+
smb_direct_listener.cm_id = NULL;
if (smb_direct_wq) {
destroy_workqueue(smb_direct_wq);
smb_direct_wq = NULL;
}
- return 0;
}
bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
{
- struct ib_device *ibdev;
+ struct smb_direct_device *smb_dev;
+ int i;
bool rdma_capable = false;
- ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
- if (ibdev) {
- if (rdma_frwr_is_supported(&ibdev->attrs))
- rdma_capable = true;
- ib_device_put(ibdev);
+ read_lock(&smb_direct_device_lock);
+ list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
+ for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
+ struct net_device *ndev;
+
+ ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
+ i + 1);
+ if (!ndev)
+ continue;
+
+ if (ndev == netdev) {
+ dev_put(ndev);
+ rdma_capable = true;
+ goto out;
+ }
+ dev_put(ndev);
+ }
+ }
+out:
+ read_unlock(&smb_direct_device_lock);
+
+ if (rdma_capable == false) {
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
+ if (ibdev) {
+ if (rdma_frwr_is_supported(&ibdev->attrs))
+ rdma_capable = true;
+ ib_device_put(ibdev);
+ }
}
+
return rdma_capable;
}
static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
.prepare = smb_direct_prepare,
.disconnect = smb_direct_disconnect,
+ .shutdown = smb_direct_shutdown,
.writev = smb_direct_writev,
.read = smb_direct_read,
.rdma_read = smb_direct_rdma_read,
diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
index 0fa8adc0776f..5567d93a6f96 100644
--- a/fs/ksmbd/transport_rdma.h
+++ b/fs/ksmbd/transport_rdma.h
@@ -7,8 +7,6 @@
#ifndef __KSMBD_TRANSPORT_RDMA_H__
#define __KSMBD_TRANSPORT_RDMA_H__
-#define SMB_DIRECT_PORT 5445
-
/* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
struct smb_direct_negotiate_req {
__le16 min_version;
@@ -52,7 +50,7 @@ struct smb_direct_data_transfer {
#ifdef CONFIG_SMB_SERVER_SMBDIRECT
int ksmbd_rdma_init(void);
-int ksmbd_rdma_destroy(void);
+void ksmbd_rdma_destroy(void);
bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
#else
static inline int ksmbd_rdma_init(void) { return 0; }
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
index c14320e03b69..82a1429bbe12 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/ksmbd/transport_tcp.c
@@ -404,7 +404,7 @@ static int create_socket(struct interface *iface)
&ksmbd_socket);
if (ret) {
pr_err("Can't create socket for ipv4: %d\n", ret);
- goto out_error;
+ goto out_clear;
}
sin.sin_family = PF_INET;
@@ -462,6 +462,7 @@ static int create_socket(struct interface *iface)
out_error:
tcp_destroy_socket(ksmbd_socket);
+out_clear:
iface->ksmbd_socket = NULL;
return ret;
}
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
index 448576fbe4b7..36239ce31afd 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/ksmbd/vfs_cache.h
@@ -96,16 +96,6 @@ struct ksmbd_file {
int durable_timeout;
- /* for SMB1 */
- int pid;
-
- /* conflict lock fail count for SMB1 */
- unsigned int cflock_cnt;
- /* last lock failure start offset for SMB1 */
- unsigned long long llock_fstart;
-
- int dirent_offset;
-
/* if ls is happening on directory, below is valid*/
struct ksmbd_readdir_data readdir_data;
int dot_dotdot[2];
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b220e1b91726..0475c5a5d061 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -54,13 +54,9 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops);
static DEFINE_MUTEX(nlmsvc_mutex);
static unsigned int nlmsvc_users;
-static struct task_struct *nlmsvc_task;
-static struct svc_rqst *nlmsvc_rqst;
+static struct svc_serv *nlmsvc_serv;
unsigned long nlmsvc_timeout;
-static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
-static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
-
unsigned int lockd_net_id;
/*
@@ -184,7 +180,12 @@ lockd(void *vrqstp)
nlm_shutdown_hosts();
cancel_delayed_work_sync(&ln->grace_period_end);
locks_end_grace(&ln->lockd_manager);
- return 0;
+
+ dprintk("lockd_down: service stopped\n");
+
+ svc_exit_thread(rqstp);
+
+ module_put_and_kthread_exit(0);
}
static int create_lockd_listener(struct svc_serv *serv, const char *name,
@@ -290,8 +291,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
__func__, net->ns.inum);
}
} else {
- pr_err("%s: no users! task=%p, net=%x\n",
- __func__, nlmsvc_task, net->ns.inum);
+ pr_err("%s: no users! net=%x\n",
+ __func__, net->ns.inum);
BUG();
}
}
@@ -302,20 +303,16 @@ static int lockd_inetaddr_event(struct notifier_block *this,
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct sockaddr_in sin;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nlm_ntf_refcnt))
+ if (event != NETDEV_DOWN)
goto out;
- if (nlmsvc_rqst) {
+ if (nlmsvc_serv) {
dprintk("lockd_inetaddr_event: removed %pI4\n",
&ifa->ifa_local);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ifa->ifa_local;
- svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
- (struct sockaddr *)&sin);
+ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin);
}
- atomic_dec(&nlm_ntf_refcnt);
- wake_up(&nlm_ntf_wq);
out:
return NOTIFY_DONE;
@@ -332,21 +329,17 @@ static int lockd_inet6addr_event(struct notifier_block *this,
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
struct sockaddr_in6 sin6;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nlm_ntf_refcnt))
+ if (event != NETDEV_DOWN)
goto out;
- if (nlmsvc_rqst) {
+ if (nlmsvc_serv) {
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
- svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
- (struct sockaddr *)&sin6);
+ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6);
}
- atomic_dec(&nlm_ntf_refcnt);
- wake_up(&nlm_ntf_wq);
out:
return NOTIFY_DONE;
@@ -357,86 +350,22 @@ static struct notifier_block lockd_inet6addr_notifier = {
};
#endif
-static void lockd_unregister_notifiers(void)
-{
- unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
-#if IS_ENABLED(CONFIG_IPV6)
- unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
-#endif
- wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0);
-}
-
-static void lockd_svc_exit_thread(void)
-{
- atomic_dec(&nlm_ntf_refcnt);
- lockd_unregister_notifiers();
- svc_exit_thread(nlmsvc_rqst);
-}
-
-static int lockd_start_svc(struct svc_serv *serv)
-{
- int error;
-
- if (nlmsvc_rqst)
- return 0;
-
- /*
- * Create the kernel thread and wait for it to start.
- */
- nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
- if (IS_ERR(nlmsvc_rqst)) {
- error = PTR_ERR(nlmsvc_rqst);
- printk(KERN_WARNING
- "lockd_up: svc_rqst allocation failed, error=%d\n",
- error);
- lockd_unregister_notifiers();
- goto out_rqst;
- }
-
- atomic_inc(&nlm_ntf_refcnt);
- svc_sock_update_bufs(serv);
- serv->sv_maxconn = nlm_max_connections;
-
- nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
- if (IS_ERR(nlmsvc_task)) {
- error = PTR_ERR(nlmsvc_task);
- printk(KERN_WARNING
- "lockd_up: kthread_run failed, error=%d\n", error);
- goto out_task;
- }
- nlmsvc_rqst->rq_task = nlmsvc_task;
- wake_up_process(nlmsvc_task);
-
- dprintk("lockd_up: service started\n");
- return 0;
-
-out_task:
- lockd_svc_exit_thread();
- nlmsvc_task = NULL;
-out_rqst:
- nlmsvc_rqst = NULL;
- return error;
-}
-
static const struct svc_serv_ops lockd_sv_ops = {
.svo_shutdown = svc_rpcb_cleanup,
+ .svo_function = lockd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
+ .svo_module = THIS_MODULE,
};
-static struct svc_serv *lockd_create_svc(void)
+static int lockd_get(void)
{
struct svc_serv *serv;
+ int error;
- /*
- * Check whether we're already up and running.
- */
- if (nlmsvc_rqst) {
- /*
- * Note: increase service usage, because later in case of error
- * svc_destroy() will be called.
- */
- svc_get(nlmsvc_rqst->rq_server);
- return nlmsvc_rqst->rq_server;
+ if (nlmsvc_serv) {
+ svc_get(nlmsvc_serv);
+ nlmsvc_users++;
+ return 0;
}
/*
@@ -454,14 +383,41 @@ static struct svc_serv *lockd_create_svc(void)
serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
+
+ serv->sv_maxconn = nlm_max_connections;
+ error = svc_set_num_threads(serv, NULL, 1);
+ /* The thread now holds the only reference */
+ svc_put(serv);
+ if (error < 0)
+ return error;
+
+ nlmsvc_serv = serv;
register_inetaddr_notifier(&lockd_inetaddr_notifier);
#if IS_ENABLED(CONFIG_IPV6)
register_inet6addr_notifier(&lockd_inet6addr_notifier);
#endif
dprintk("lockd_up: service created\n");
- return serv;
+ nlmsvc_users++;
+ return 0;
+}
+
+static void lockd_put(void)
+{
+ if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n"))
+ return;
+ if (--nlmsvc_users)
+ return;
+
+ unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+ unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+#endif
+
+ svc_set_num_threads(nlmsvc_serv, NULL, 0);
+ nlmsvc_serv = NULL;
+ dprintk("lockd_down: service destroyed\n");
}
/*
@@ -469,36 +425,21 @@ static struct svc_serv *lockd_create_svc(void)
*/
int lockd_up(struct net *net, const struct cred *cred)
{
- struct svc_serv *serv;
int error;
mutex_lock(&nlmsvc_mutex);
- serv = lockd_create_svc();
- if (IS_ERR(serv)) {
- error = PTR_ERR(serv);
- goto err_create;
- }
+ error = lockd_get();
+ if (error)
+ goto err;
- error = lockd_up_net(serv, net, cred);
+ error = lockd_up_net(nlmsvc_serv, net, cred);
if (error < 0) {
- lockd_unregister_notifiers();
- goto err_put;
+ lockd_put();
+ goto err;
}
- error = lockd_start_svc(serv);
- if (error < 0) {
- lockd_down_net(serv, net);
- goto err_put;
- }
- nlmsvc_users++;
- /*
- * Note: svc_serv structures have an initial use count of 1,
- * so we exit through here on both success and failure.
- */
-err_put:
- svc_destroy(serv);
-err_create:
+err:
mutex_unlock(&nlmsvc_mutex);
return error;
}
@@ -511,27 +452,8 @@ void
lockd_down(struct net *net)
{
mutex_lock(&nlmsvc_mutex);
- lockd_down_net(nlmsvc_rqst->rq_server, net);
- if (nlmsvc_users) {
- if (--nlmsvc_users)
- goto out;
- } else {
- printk(KERN_ERR "lockd_down: no users! task=%p\n",
- nlmsvc_task);
- BUG();
- }
-
- if (!nlmsvc_task) {
- printk(KERN_ERR "lockd_down: no lockd running.\n");
- BUG();
- }
- kthread_stop(nlmsvc_task);
- dprintk("lockd_down: service stopped\n");
- lockd_svc_exit_thread();
- dprintk("lockd_down: service destroyed\n");
- nlmsvc_task = NULL;
- nlmsvc_rqst = NULL;
-out:
+ lockd_down_net(nlmsvc_serv, net);
+ lockd_put();
mutex_unlock(&nlmsvc_mutex);
}
EXPORT_SYMBOL_GPL(lockd_down);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e9b85d8fd5fe..cb3658ab9b7a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -470,8 +470,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_host *host, struct nlm_lock *lock, int wait,
struct nlm_cookie *cookie, int reclaim)
{
- struct nlm_block *block = NULL;
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct inode *inode = nlmsvc_file_inode(file);
+#endif
+ struct nlm_block *block = NULL;
int error;
int mode;
int async_block = 0;
@@ -484,7 +486,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
(long long)lock->fl.fl_end,
wait);
- if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) {
+ if (nlmsvc_file_file(file)->f_op->lock) {
async_block = wait;
wait = 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index 0fca9d680978..8c6df10cd9ed 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -62,6 +62,7 @@
#include <linux/pid_namespace.h>
#include <linux/hashtable.h>
#include <linux/percpu.h>
+#include <linux/sysctl.h>
#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>
@@ -88,8 +89,37 @@ static int target_leasetype(struct file_lock *fl)
return fl->fl_type;
}
-int leases_enable = 1;
-int lease_break_time = 45;
+static int leases_enable = 1;
+static int lease_break_time = 45;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table locks_sysctls[] = {
+ {
+ .procname = "leases-enable",
+ .data = &leases_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_MMU
+ {
+ .procname = "lease-break-time",
+ .data = &lease_break_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif /* CONFIG_MMU */
+ {}
+};
+
+static int __init init_fs_locks_sysctls(void)
+{
+ register_sysctl_init("fs", locks_sysctls);
+ return 0;
+}
+early_initcall(init_fs_locks_sysctls);
+#endif /* CONFIG_SYSCTL */
/*
* The global file_lock_list is only used for displaying /proc/locks, so we
diff --git a/fs/mpage.c b/fs/mpage.c
index 334e7d09aa65..87f5cfef6caa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -29,7 +29,6 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
-#include <linux/cleancache.h>
#include "internal.h"
/*
@@ -284,12 +283,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
SetPageMappedToDisk(page);
}
- if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
- cleancache_get_page(page) == 0) {
- SetPageUptodate(page);
- goto confused;
- }
-
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
diff --git a/fs/namei.c b/fs/namei.c
index d81f04f8d818..b867a92c078e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1020,10 +1020,60 @@ static inline void put_link(struct nameidata *nd)
path_put(&last->link);
}
-int sysctl_protected_symlinks __read_mostly = 0;
-int sysctl_protected_hardlinks __read_mostly = 0;
-int sysctl_protected_fifos __read_mostly;
-int sysctl_protected_regular __read_mostly;
+static int sysctl_protected_symlinks __read_mostly;
+static int sysctl_protected_hardlinks __read_mostly;
+static int sysctl_protected_fifos __read_mostly;
+static int sysctl_protected_regular __read_mostly;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table namei_sysctls[] = {
+ {
+ .procname = "protected_symlinks",
+ .data = &sysctl_protected_symlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_hardlinks",
+ .data = &sysctl_protected_hardlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "protected_fifos",
+ .data = &sysctl_protected_fifos,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "protected_regular",
+ .data = &sysctl_protected_regular,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ { }
+};
+
+static int __init init_fs_namei_sysctls(void)
+{
+ register_sysctl_init("fs", namei_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_namei_sysctls);
+
+#endif /* CONFIG_SYSCTL */
/**
* may_follow_link - Check symlink following for unsafe situations
diff --git a/fs/namespace.c b/fs/namespace.c
index dc31ad6b370f..40b994a29e90 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -37,7 +37,7 @@
#include "internal.h"
/* Maximum number of mounts in a mount namespace */
-unsigned int sysctl_mount_max __read_mostly = 100000;
+static unsigned int sysctl_mount_max __read_mostly = 100000;
static unsigned int m_hash_mask __read_mostly;
static unsigned int m_hash_shift __read_mostly;
@@ -4620,3 +4620,25 @@ const struct proc_ns_operations mntns_operations = {
.install = mntns_install,
.owner = mntns_owner,
};
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table fs_namespace_sysctls[] = {
+ {
+ .procname = "mount-max",
+ .data = &sysctl_mount_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ { }
+};
+
+static int __init init_fs_namespace_sysctls(void)
+{
+ register_sysctl_init("fs", fs_namespace_sysctls);
+ return 0;
+}
+fs_initcall(init_fs_namespace_sysctls);
+
+#endif /* CONFIG_SYSCTL */
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 6169659857b3..501da990c259 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -55,7 +55,8 @@ static struct netfs_read_request *netfs_alloc_read_request(
INIT_WORK(&rreq->work, netfs_rreq_work);
refcount_set(&rreq->usage, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- ops->init_rreq(rreq, file);
+ if (ops->init_rreq)
+ ops->init_rreq(rreq, file);
netfs_stat(&netfs_n_rh_rreq);
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 86d856de1389..054cc1255fac 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -93,7 +93,7 @@ nfs4_callback_svc(void *vrqstp)
svc_process(rqstp);
}
svc_exit_thread(rqstp);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
@@ -137,7 +137,7 @@ nfs41_callback_svc(void *vrqstp)
}
}
svc_exit_thread(rqstp);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
@@ -169,12 +169,12 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS)
nrservs = NFS4_MIN_NR_CALLBACK_THREADS;
- if (serv->sv_nrthreads-1 == nrservs)
+ if (serv->sv_nrthreads == nrservs)
return 0;
- ret = serv->sv_ops->svo_setup(serv, NULL, nrservs);
+ ret = svc_set_num_threads(serv, NULL, nrservs);
if (ret) {
- serv->sv_ops->svo_setup(serv, NULL, 0);
+ svc_set_num_threads(serv, NULL, 0);
return ret;
}
dprintk("nfs_callback_up: service started\n");
@@ -235,14 +235,12 @@ err_bind:
static const struct svc_serv_ops nfs40_cb_sv_ops = {
.svo_function = nfs4_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
#if defined(CONFIG_NFS_V4_1)
static const struct svc_serv_ops nfs41_cb_sv_ops = {
.svo_function = nfs41_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
@@ -266,14 +264,8 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
/*
* Check whether we're already up and running.
*/
- if (cb_info->serv) {
- /*
- * Note: increase service usage, because later in case of error
- * svc_destroy() will be called.
- */
- svc_get(cb_info->serv);
- return cb_info->serv;
- }
+ if (cb_info->serv)
+ return svc_get(cb_info->serv);
switch (minorversion) {
case 0:
@@ -294,7 +286,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
cb_info->users);
- serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
if (!serv) {
printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
return ERR_PTR(-ENOMEM);
@@ -335,16 +327,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
goto err_start;
cb_info->users++;
- /*
- * svc_create creates the svc_serv with sv_nrthreads == 1, and then
- * svc_prepare_thread increments that. So we need to call svc_destroy
- * on both success and failure so that the refcount is 1 when the
- * thread exits.
- */
err_net:
if (!cb_info->users)
cb_info->serv = NULL;
- svc_destroy(serv);
+ svc_put(serv);
err_create:
mutex_unlock(&nfs_callback_mutex);
return ret;
@@ -369,8 +355,8 @@ void nfs_callback_down(int minorversion, struct net *net)
cb_info->users--;
if (cb_info->users == 0) {
svc_get(serv);
- serv->sv_ops->svo_setup(serv, NULL, 0);
- svc_destroy(serv);
+ svc_set_num_threads(serv, NULL, 0);
+ svc_put(serv);
dprintk("nfs_callback_down: service destroyed\n");
cb_info->serv = NULL;
}
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 171c424cb6d5..01596f2d0a1e 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -158,5 +158,5 @@ const struct export_operations nfs_export_ops = {
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
+ EXPORT_OP_NOATOMIC_ATTR,
};
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f63dfa01001c..d88b779f9dd0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2693,6 +2693,6 @@ static int nfs4_run_state_manager(void *ptr)
allow_signal(SIGKILL);
nfs4_state_manager(clp);
nfs_put_client(clp);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index fdf89fcf1a0c..8bc807c5fea4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -44,12 +44,9 @@ struct nfsd_fcache_bucket {
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
struct nfsd_fcache_disposal {
- struct list_head list;
struct work_struct work;
- struct net *net;
spinlock_t lock;
struct list_head freeme;
- struct rcu_head rcu;
};
static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
@@ -62,8 +59,6 @@ static long nfsd_file_lru_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
static atomic_long_t nfsd_filecache_count;
static struct delayed_work nfsd_filecache_laundrette;
-static DEFINE_SPINLOCK(laundrette_lock);
-static LIST_HEAD(laundrettes);
static void nfsd_file_gc(void);
@@ -194,7 +189,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
}
nf->nf_mark = NULL;
- init_rwsem(&nf->nf_rwsem);
trace_nfsd_file_alloc(nf);
}
return nf;
@@ -249,7 +243,7 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
trace_nfsd_file_unhash(nf);
if (nfsd_file_check_write_error(nf))
- nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
hlist_del_rcu(&nf->nf_node);
atomic_long_dec(&nfsd_filecache_count);
@@ -367,19 +361,13 @@ nfsd_file_list_remove_disposal(struct list_head *dst,
static void
nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
{
- struct nfsd_fcache_disposal *l;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
- rcu_read_lock();
- list_for_each_entry_rcu(l, &laundrettes, list) {
- if (l->net == net) {
- spin_lock(&l->lock);
- list_splice_tail_init(files, &l->freeme);
- spin_unlock(&l->lock);
- queue_work(nfsd_filecache_wq, &l->work);
- break;
- }
- }
- rcu_read_unlock();
+ spin_lock(&l->lock);
+ list_splice_tail_init(files, &l->freeme);
+ spin_unlock(&l->lock);
+ queue_work(nfsd_filecache_wq, &l->work);
}
static void
@@ -755,7 +743,7 @@ nfsd_file_cache_purge(struct net *net)
}
static struct nfsd_fcache_disposal *
-nfsd_alloc_fcache_disposal(struct net *net)
+nfsd_alloc_fcache_disposal(void)
{
struct nfsd_fcache_disposal *l;
@@ -763,7 +751,6 @@ nfsd_alloc_fcache_disposal(struct net *net)
if (!l)
return NULL;
INIT_WORK(&l->work, nfsd_file_delayed_close);
- l->net = net;
spin_lock_init(&l->lock);
INIT_LIST_HEAD(&l->freeme);
return l;
@@ -772,61 +759,27 @@ nfsd_alloc_fcache_disposal(struct net *net)
static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
{
- rcu_assign_pointer(l->net, NULL);
cancel_work_sync(&l->work);
nfsd_file_dispose_list(&l->freeme);
- kfree_rcu(l, rcu);
-}
-
-static void
-nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
-{
- spin_lock(&laundrette_lock);
- list_add_tail_rcu(&l->list, &laundrettes);
- spin_unlock(&laundrette_lock);
-}
-
-static void
-nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
-{
- spin_lock(&laundrette_lock);
- list_del_rcu(&l->list);
- spin_unlock(&laundrette_lock);
-}
-
-static int
-nfsd_alloc_fcache_disposal_net(struct net *net)
-{
- struct nfsd_fcache_disposal *l;
-
- l = nfsd_alloc_fcache_disposal(net);
- if (!l)
- return -ENOMEM;
- nfsd_add_fcache_disposal(l);
- return 0;
+ kfree(l);
}
static void
nfsd_free_fcache_disposal_net(struct net *net)
{
- struct nfsd_fcache_disposal *l;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
- rcu_read_lock();
- list_for_each_entry_rcu(l, &laundrettes, list) {
- if (l->net != net)
- continue;
- nfsd_del_fcache_disposal(l);
- rcu_read_unlock();
- nfsd_free_fcache_disposal(l);
- return;
- }
- rcu_read_unlock();
+ nfsd_free_fcache_disposal(l);
}
int
nfsd_file_cache_start_net(struct net *net)
{
- return nfsd_alloc_fcache_disposal_net(net);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->fcache_disposal = nfsd_alloc_fcache_disposal();
+ return nn->fcache_disposal ? 0 : -ENOMEM;
}
void
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 7872df5a0fe3..435ceab27897 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -46,7 +46,6 @@ struct nfsd_file {
refcount_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
- struct rw_semaphore nf_rwsem;
};
int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 935c1028c217..1b1a962a1804 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -11,6 +11,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/percpu_counter.h>
+#include <linux/siphash.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@@ -108,9 +109,8 @@ struct nfsd_net {
bool nfsd_net_up;
bool lockd_up;
- /* Time of server startup */
- struct timespec64 nfssvc_boot;
- seqlock_t boot_lock;
+ seqlock_t writeverf_lock;
+ unsigned char writeverf[8];
/*
* Max number of connections this nfsd container will allow. Defaults
@@ -123,12 +123,13 @@ struct nfsd_net {
u32 clverifier_counter;
struct svc_serv *nfsd_serv;
-
- wait_queue_head_t ntf_wq;
- atomic_t ntf_refcnt;
-
- /* Allow umount to wait for nfsd state cleanup */
- struct completion nfsd_shutdown_complete;
+ /* When a listening socket is added to nfsd, keep_active is set
+ * and this justifies a reference on nfsd_serv. This stops
+ * nfsd_serv from being freed. When the number of threads is
+ * set, keep_active is cleared and the reference is dropped. So
+ * when the last thread exits, the service will be destroyed.
+ */
+ int keep_active;
/*
* clientid and stateid data for construction of net unique COPY
@@ -184,6 +185,10 @@ struct nfsd_net {
/* utsname taken from the process that starts the server */
char nfsd_name[UNX_MAXNODENAME+1];
+
+ struct nfsd_fcache_disposal *fcache_disposal;
+
+ siphash_key_t siphash_key;
};
/* Simple check to find out if a given net was properly initialized */
@@ -193,6 +198,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id;
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
-void nfsd_reset_boot_verifier(struct nfsd_net *nn);
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_write_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 15dac36ca852..8ef53f6726ec 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -202,15 +202,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
nvecs = svc_fill_write_vector(rqstp, &argp->payload);
- if (!nvecs) {
- resp->status = nfserr_io;
- goto out;
- }
+
resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, nvecs, &cnt,
resp->committed, resp->verf);
resp->count = cnt;
-out:
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index c3ac1b6aa3aa..7c45ba4db61b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -487,71 +487,6 @@ neither:
return true;
}
-static bool fs_supports_change_attribute(struct super_block *sb)
-{
- return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion;
-}
-
-/*
- * Fill in the pre_op attr for the wcc data
- */
-void fill_pre_wcc(struct svc_fh *fhp)
-{
- struct inode *inode;
- struct kstat stat;
- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
-
- if (fhp->fh_no_wcc || fhp->fh_pre_saved)
- return;
- inode = d_inode(fhp->fh_dentry);
- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
- __be32 err = fh_getattr(fhp, &stat);
-
- if (err) {
- /* Grab the times from inode anyway */
- stat.mtime = inode->i_mtime;
- stat.ctime = inode->i_ctime;
- stat.size = inode->i_size;
- }
- fhp->fh_pre_mtime = stat.mtime;
- fhp->fh_pre_ctime = stat.ctime;
- fhp->fh_pre_size = stat.size;
- }
- if (v4)
- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
-
- fhp->fh_pre_saved = true;
-}
-
-/*
- * Fill in the post_op attr for the wcc data
- */
-void fill_post_wcc(struct svc_fh *fhp)
-{
- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
- struct inode *inode = d_inode(fhp->fh_dentry);
-
- if (fhp->fh_no_wcc)
- return;
-
- if (fhp->fh_post_saved)
- printk("nfsd: inode locked twice during operation.\n");
-
- fhp->fh_post_saved = true;
-
- if (fs_supports_change_attribute(inode->i_sb) || !v4) {
- __be32 err = fh_getattr(fhp, &fhp->fh_post_attr);
-
- if (err) {
- fhp->fh_post_saved = false;
- fhp->fh_post_attr.ctime = inode->i_ctime;
- }
- }
- if (v4)
- fhp->fh_post_change =
- nfsd4_change_attribute(&fhp->fh_post_attr, inode);
-}
-
/*
* XDR decode functions
*/
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a36261f89bdf..ed1ee25647be 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -598,7 +598,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
- nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
+ nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
}
static __be32
@@ -1101,7 +1101,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));
@@ -1510,11 +1510,14 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
{
+ struct file *dst = copy->nf_dst->nf_file;
+ struct file *src = copy->nf_src->nf_file;
+ errseq_t since;
ssize_t bytes_copied = 0;
u64 bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos;
u64 dst_pos = copy->cp_dst_pos;
- __be32 status;
+ int status;
/* See RFC 7862 p.67: */
if (bytes_total == 0)
@@ -1522,9 +1525,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do {
if (kthread_should_stop())
break;
- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
- src_pos, copy->nf_dst->nf_file, dst_pos,
- bytes_total);
+ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
+ bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
@@ -1534,11 +1536,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
} while (bytes_total > 0 && !copy->cp_synchronous);
/* for a non-zero asynchronous copy do a commit of data */
if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
- down_write(&copy->nf_dst->nf_rwsem);
- status = vfs_fsync_range(copy->nf_dst->nf_file,
- copy->cp_dst_pos,
+ since = READ_ONCE(dst->f_wb_err);
+ status = vfs_fsync_range(dst, copy->cp_dst_pos,
copy->cp_res.wr_bytes_written, 0);
- up_write(&copy->nf_dst->nf_rwsem);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
if (!status)
copy->committed = true;
}
@@ -2528,7 +2530,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto encode_op;
}
- fh_clear_wcc(current_fh);
+ fh_clear_pre_post_attrs(current_fh);
/* If op is non-idempotent */
if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1956d377d1a6..72900b89cf84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -246,6 +246,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
if (fh_match(fh, &cur->nbl_fh)) {
list_del_init(&cur->nbl_list);
+ WARN_ON(list_empty(&cur->nbl_lru));
list_del_init(&cur->nbl_lru);
found = cur;
break;
@@ -271,6 +272,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
INIT_LIST_HEAD(&nbl->nbl_lru);
fh_copy_shallow(&nbl->nbl_fh, fh);
locks_init_lock(&nbl->nbl_lock);
+ kref_init(&nbl->nbl_kref);
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
&nfsd4_cb_notify_lock_ops,
NFSPROC4_CLNT_CB_NOTIFY_LOCK);
@@ -280,11 +282,20 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
}
static void
+free_nbl(struct kref *kref)
+{
+ struct nfsd4_blocked_lock *nbl;
+
+ nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
+ kfree(nbl);
+}
+
+static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
- kfree(nbl);
+ kref_put(&nbl->nbl_kref, free_nbl);
}
static void
@@ -302,6 +313,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
struct nfsd4_blocked_lock,
nbl_list);
list_del_init(&nbl->nbl_list);
+ WARN_ON(list_empty(&nbl->nbl_lru));
list_move(&nbl->nbl_lru, &reaplist);
}
spin_unlock(&nn->blocked_locks_lock);
@@ -360,11 +372,13 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
* st_{access,deny}_bmap field of the stateid, in order to track not
* only what share bits are currently in force, but also what
* combinations of share bits previous opens have used. This allows us
- * to enforce the recommendation of rfc 3530 14.2.19 that the server
- * return an error if the client attempt to downgrade to a combination
- * of share bits not explicable by closing some of its previous opens.
+ * to enforce the recommendation in
+ * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that
+ * the server return an error if the client attempt to downgrade to a
+ * combination of share bits not explicable by closing some of its
+ * previous opens.
*
- * XXX: This enforcement is actually incomplete, since we don't keep
+ * This enforcement is arguably incomplete, since we don't keep
* track of access/deny bit combinations; so, e.g., we allow:
*
* OPEN allow read, deny write
@@ -372,6 +386,10 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
* DOWNGRADE allow read, deny none
*
* which we should reject.
+ *
+ * But you could also argue that our current code is already overkill,
+ * since it only exists to return NFS4ERR_INVAL on incorrect client
+ * behavior.
*/
static unsigned int
bmap_to_share_mode(unsigned long bmap)
@@ -6040,7 +6058,11 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
*nfp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
- status = check_special_stateids(net, fhp, stateid, flags);
+ if (cstid)
+ status = nfserr_bad_stateid;
+ else
+ status = check_special_stateids(net, fhp, stateid,
+ flags);
goto done;
}
@@ -6836,7 +6858,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
- struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -6858,7 +6879,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
- sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -6910,8 +6930,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate) &&
- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
+ if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
@@ -6923,8 +6942,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fl_type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate) &&
- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
+ if (nfsd4_has_session(cstate))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
@@ -6945,6 +6963,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
+ /*
+ * Most filesystems with their own ->lock operations will block
+ * the nfsd thread waiting to acquire the lock. That leads to
+ * deadlocks (we don't want every nfsd thread tied up waiting
+ * for file locks), so don't attempt blocking lock notifications
+ * on those filesystems:
+ */
+ if (nf->nf_file->f_op->lock)
+ fl_flags &= ~FL_SLEEP;
+
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
dprintk("NFSD: %s: unable to allocate block!\n", __func__);
@@ -6975,6 +7003,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
+ kref_get(&nbl->nbl_kref);
spin_unlock(&nn->blocked_locks_lock);
}
@@ -6987,6 +7016,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nn->somebody_reclaimed = true;
break;
case FILE_LOCK_DEFERRED:
+ kref_put(&nbl->nbl_kref, free_nbl);
nbl = NULL;
fallthrough;
case -EAGAIN: /* conflock holds conflicting lock */
@@ -7007,8 +7037,13 @@ out:
/* dequeue it if we queued it before */
if (fl_flags & FL_SLEEP) {
spin_lock(&nn->blocked_locks_lock);
- list_del_init(&nbl->nbl_list);
- list_del_init(&nbl->nbl_lru);
+ if (!list_empty(&nbl->nbl_list) &&
+ !list_empty(&nbl->nbl_lru)) {
+ list_del_init(&nbl->nbl_list);
+ list_del_init(&nbl->nbl_lru);
+ kref_put(&nbl->nbl_kref, free_nbl);
+ }
+ /* nbl can use one of lists to be linked to reaplist */
spin_unlock(&nn->blocked_locks_lock);
}
free_blocked_lock(nbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5a93a5db4fb0..899de438e529 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -277,21 +277,10 @@ nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf)
static __be32
nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
{
- u32 i, count;
- __be32 *p;
-
- if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
- return nfserr_bad_xdr;
- /* request sanity */
- if (count > 1000)
- return nfserr_bad_xdr;
- p = xdr_inline_decode(argp->xdr, count << 2);
- if (!p)
- return nfserr_bad_xdr;
- for (i = 0; i < bmlen; i++)
- bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
+ ssize_t status;
- return nfs_ok;
+ status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen);
+ return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok;
}
static __be32
@@ -4804,8 +4793,8 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
return nfserr_resource;
*p++ = htonl(NFS4_CONTENT_HOLE);
- p = xdr_encode_hyper(p, read->rd_offset);
- p = xdr_encode_hyper(p, count);
+ p = xdr_encode_hyper(p, read->rd_offset);
+ p = xdr_encode_hyper(p, count);
*eof = (read->rd_offset + count) >= f_size;
*maxcount = min_t(unsigned long, count, *maxcount);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 6e0b6f3148dc..a4a69ab6ab28 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -87,7 +87,7 @@ nfsd_hashsize(unsigned int limit)
static u32
nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
{
- return hash_32(be32_to_cpu(xid), nn->maskbits);
+ return hash_32((__force u32)xid, nn->maskbits);
}
static struct svc_cacherep *
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 51a49e0cfe37..b9f27fbcd768 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -742,13 +742,12 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
return err;
err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
- if (err < 0) {
- nfsd_destroy(net);
- return err;
- }
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
+ if (err >= 0 &&
+ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ svc_get(nn->nfsd_serv);
+
+ nfsd_put(net);
return err;
}
@@ -783,8 +782,10 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
+ if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ svc_get(nn->nfsd_serv);
+
+ nfsd_put(net);
return 0;
out_close:
xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
@@ -793,10 +794,7 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- if (!list_empty(&nn->nfsd_serv->sv_permsocks))
- nn->nfsd_serv->sv_nrthreads--;
- else
- nfsd_destroy(net);
+ nfsd_put(net);
return err;
}
@@ -1485,9 +1483,8 @@ static __net_init int nfsd_init_net(struct net *net)
nn->clientid_counter = nn->clientid_base + 1;
nn->s2s_cp_cl_id = nn->clientid_counter++;
- atomic_set(&nn->ntf_refcnt, 0);
- init_waitqueue_head(&nn->ntf_wq);
- seqlock_init(&nn->boot_lock);
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
return 0;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 498e5a489826..3e5008b475ff 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -97,7 +97,7 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
-void nfsd_destroy(struct net *net);
+void nfsd_put(struct net *net);
bool i_am_nfsd(void);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f3779fa72c89..145208bcb9bd 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -611,6 +611,70 @@ out_negative:
return nfserr_serverfault;
}
+#ifdef CONFIG_NFSD_V3
+
+/**
+ * fh_fill_pre_attrs - Fill in pre-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+void fh_fill_pre_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode;
+ struct kstat stat;
+ __be32 err;
+
+ if (fhp->fh_no_wcc || fhp->fh_pre_saved)
+ return;
+
+ inode = d_inode(fhp->fh_dentry);
+ err = fh_getattr(fhp, &stat);
+ if (err) {
+ /* Grab the times from inode anyway */
+ stat.mtime = inode->i_mtime;
+ stat.ctime = inode->i_ctime;
+ stat.size = inode->i_size;
+ }
+ if (v4)
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+
+ fhp->fh_pre_mtime = stat.mtime;
+ fhp->fh_pre_ctime = stat.ctime;
+ fhp->fh_pre_size = stat.size;
+ fhp->fh_pre_saved = true;
+}
+
+/**
+ * fh_fill_post_attrs - Fill in post-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+void fh_fill_post_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode = d_inode(fhp->fh_dentry);
+ __be32 err;
+
+ if (fhp->fh_no_wcc)
+ return;
+
+ if (fhp->fh_post_saved)
+ printk("nfsd: inode locked twice during operation.\n");
+
+ err = fh_getattr(fhp, &fhp->fh_post_attr);
+ if (err) {
+ fhp->fh_post_saved = false;
+ fhp->fh_post_attr.ctime = inode->i_ctime;
+ } else
+ fhp->fh_post_saved = true;
+ if (v4)
+ fhp->fh_post_change =
+ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+}
+
+#endif /* CONFIG_NFSD_V3 */
+
/*
* Release a file handle.
*/
@@ -623,7 +687,7 @@ fh_put(struct svc_fh *fhp)
fh_unlock(fhp);
fhp->fh_dentry = NULL;
dput(dentry);
- fh_clear_wcc(fhp);
+ fh_clear_pre_post_attrs(fhp);
}
fh_drop_write(fhp);
if (exp) {
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index d11e4b6870d6..434930d8a946 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -284,12 +284,13 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
#endif
#ifdef CONFIG_NFSD_V3
-/*
- * The wcc data stored in current_fh should be cleared
- * between compound ops.
+
+/**
+ * fh_clear_pre_post_attrs - Reset pre/post attributes
+ * @fhp: file handle to be updated
+ *
*/
-static inline void
-fh_clear_wcc(struct svc_fh *fhp)
+static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
{
fhp->fh_post_saved = false;
fhp->fh_pre_saved = false;
@@ -323,13 +324,24 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
return time_to_chattr(&stat->ctime);
}
-extern void fill_pre_wcc(struct svc_fh *fhp);
-extern void fill_post_wcc(struct svc_fh *fhp);
-#else
-#define fh_clear_wcc(ignored)
-#define fill_pre_wcc(ignored)
-#define fill_post_wcc(notused)
-#endif /* CONFIG_NFSD_V3 */
+extern void fh_fill_pre_attrs(struct svc_fh *fhp);
+extern void fh_fill_post_attrs(struct svc_fh *fhp);
+
+#else /* !CONFIG_NFSD_V3 */
+
+static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
+{
+}
+
+static inline void fh_fill_pre_attrs(struct svc_fh *fhp)
+{
+}
+
+static inline void fh_fill_post_attrs(struct svc_fh *fhp)
+{
+}
+
+#endif /* !CONFIG_NFSD_V3 */
/*
@@ -355,7 +367,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
inode = d_inode(dentry);
inode_lock_nested(inode, subclass);
- fill_pre_wcc(fhp);
+ fh_fill_pre_attrs(fhp);
fhp->fh_locked = true;
}
@@ -372,7 +384,7 @@ static inline void
fh_unlock(struct svc_fh *fhp)
{
if (fhp->fh_locked) {
- fill_post_wcc(fhp);
+ fh_fill_post_attrs(fhp);
inode_unlock(d_inode(fhp->fh_dentry));
fhp->fh_locked = false;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index de282f3273c5..18b8eb43a19b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -235,10 +235,6 @@ nfsd_proc_write(struct svc_rqst *rqstp)
argp->len, argp->offset);
nvecs = svc_fill_write_vector(rqstp, &argp->payload);
- if (!nvecs) {
- resp->status = nfserr_io;
- goto out;
- }
resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, rqstp->rq_vec, nvecs,
@@ -247,7 +243,6 @@ nfsd_proc_write(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
return rpc_drop_reply;
-out:
return rpc_success;
}
@@ -850,6 +845,7 @@ nfserrno (int errno)
{ nfserr_io, -EIO },
{ nfserr_nxio, -ENXIO },
{ nfserr_fbig, -E2BIG },
+ { nfserr_stale, -EBADF },
{ nfserr_acces, -EACCES },
{ nfserr_exist, -EEXIST },
{ nfserr_xdev, -EXDEV },
@@ -878,6 +874,8 @@ nfserrno (int errno)
{ nfserr_toosmall, -ETOOSMALL },
{ nfserr_serverfault, -ESERVERFAULT },
{ nfserr_serverfault, -ENFILE },
+ { nfserr_io, -EREMOTEIO },
+ { nfserr_stale, -EOPENSTALE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
{ nfserr_no_grace, -ENOGRACE},
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 80431921e5d7..b8c682b62d29 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/fs_struct.h>
#include <linux/swap.h>
+#include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
@@ -55,18 +56,17 @@ static __be32 nfsd_init_request(struct svc_rqst *,
struct svc_process_info *);
/*
- * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
- * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
- * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members
+ * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks.
*
* If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
- * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
- * of nfsd threads must exist and each must listed in ->sp_all_threads in each
- * entry of ->sv_pools[].
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
+ * nn->keep_active is set). That number of nfsd threads must
+ * exist and each must be listed in ->sp_all_threads in some entry of
+ * ->sv_pools[].
*
- * Transitions of the thread count between zero and non-zero are of particular
- * interest since the svc_serv needs to be created and initialized at that
- * point, or freed.
+ * Each active thread holds a counted reference on nn->nfsd_serv, as does
+ * the nn->keep_active flag and various transient calls to svc_get().
*
* Finally, the nfsd_mutex also protects some of the global variables that are
* accessed when nfsd starts and that are settable via the write_* routines in
@@ -345,33 +345,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+/**
+ * nfsd_copy_write_verifier - Atomically copy a write verifier
+ * @verf: buffer in which to receive the verifier cookie
+ * @nn: NFS net namespace
+ *
+ * This function provides a wait-free mechanism for copying the
+ * namespace's write verifier without tearing it.
+ */
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
{
int seq = 0;
do {
- read_seqbegin_or_lock(&nn->boot_lock, &seq);
- /*
- * This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy. y2038 time_t overflow is
- * irrelevant in this usage
- */
- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
- } while (need_seqretry(&nn->boot_lock, seq));
- done_seqretry(&nn->boot_lock, seq);
+ read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
+ memcpy(verf, nn->writeverf, sizeof(*verf));
+ } while (need_seqretry(&nn->writeverf_lock, seq));
+ done_seqretry(&nn->writeverf_lock, seq);
}
-static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn)
{
- ktime_get_real_ts64(&nn->nfssvc_boot);
+ struct timespec64 now;
+ u64 verf;
+
+ /*
+ * Because the time value is hashed, y2038 time_t overflow
+ * is irrelevant in this usage.
+ */
+ ktime_get_raw_ts64(&now);
+ verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key);
+ memcpy(nn->writeverf, &verf, sizeof(nn->writeverf));
}
-void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+/**
+ * nfsd_reset_write_verifier - Generate a new write verifier
+ * @nn: NFS net namespace
+ *
+ * This function updates the ->writeverf field of @nn. This field
+ * contains an opaque cookie that, according to Section 18.32.3 of
+ * RFC 8881, "the client can use to determine whether a server has
+ * changed instance state (e.g., server restart) between a call to
+ * WRITE and a subsequent call to either WRITE or COMMIT. This
+ * cookie MUST be unchanged during a single instance of the NFSv4.1
+ * server and MUST be unique between instances of the NFSv4.1
+ * server."
+ */
+void nfsd_reset_write_verifier(struct nfsd_net *nn)
{
- write_seqlock(&nn->boot_lock);
- nfsd_reset_boot_verifier_locked(nn);
- write_sequnlock(&nn->boot_lock);
+ write_seqlock(&nn->writeverf_lock);
+ nfsd_reset_write_verifier_locked(nn);
+ write_sequnlock(&nn->writeverf_lock);
}
static int nfsd_startup_net(struct net *net, const struct cred *cred)
@@ -435,6 +459,7 @@ static void nfsd_shutdown_net(struct net *net)
nfsd_shutdown_generic();
}
+static DEFINE_SPINLOCK(nfsd_notifier_lock);
static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -444,18 +469,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in sin;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ifa->ifa_local;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
out:
return NOTIFY_DONE;
@@ -475,10 +499,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in6 sin6;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
@@ -487,8 +511,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
+
out:
return NOTIFY_DONE;
}
@@ -505,7 +529,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- atomic_dec(&nn->ntf_refcnt);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -513,7 +536,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);
/*
* write_ports can create the server without actually starting
@@ -594,20 +616,9 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_shutdown = nfsd_last_thread,
.svo_function = nfsd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads,
.svo_module = THIS_MODULE,
};
-static void nfsd_complete_shutdown(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- WARN_ON(!mutex_is_locked(&nfsd_mutex));
-
- nn->nfsd_serv = NULL;
- complete(&nn->nfsd_shutdown_complete);
-}
-
void nfsd_shutdown_threads(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -622,11 +633,9 @@ void nfsd_shutdown_threads(struct net *net)
svc_get(serv);
/* Kill outstanding nfsd threads */
- serv->sv_ops->svo_setup(serv, NULL, 0);
- nfsd_destroy(net);
+ svc_set_num_threads(serv, NULL, 0);
+ nfsd_put(net);
mutex_unlock(&nfsd_mutex);
- /* Wait for shutdown of nfsd_serv to complete */
- wait_for_completion(&nn->nfsd_shutdown_complete);
}
bool i_am_nfsd(void)
@@ -638,6 +647,7 @@ int nfsd_create_serv(struct net *net)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nn->nfsd_serv) {
@@ -647,19 +657,23 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
- nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
- &nfsd_thread_sv_ops);
- if (nn->nfsd_serv == NULL)
+ serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+ &nfsd_thread_sv_ops);
+ if (serv == NULL)
return -ENOMEM;
- init_completion(&nn->nfsd_shutdown_complete);
- nn->nfsd_serv->sv_maxconn = nn->max_connections;
- error = svc_bind(nn->nfsd_serv, net);
+ serv->sv_maxconn = nn->max_connections;
+ error = svc_bind(serv, net);
if (error < 0) {
- svc_destroy(nn->nfsd_serv);
- nfsd_complete_shutdown(net);
+ /* NOT nfsd_put() as notifiers (see below) haven't
+ * been set up yet.
+ */
+ svc_put(serv);
return error;
}
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = serv;
+ spin_unlock(&nfsd_notifier_lock);
set_max_drc();
/* check if the notifier is already set */
@@ -669,8 +683,7 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- atomic_inc(&nn->ntf_refcnt);
- nfsd_reset_boot_verifier(nn);
+ nfsd_reset_write_verifier(nn);
return 0;
}
@@ -697,16 +710,26 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
return 0;
}
-void nfsd_destroy(struct net *net)
+/* This is the callback for kref_put() below.
+ * There is no code here as the first thing to be done is
+ * call svc_shutdown_net(), but we cannot get the 'net' from
+ * the kref. So do all the work when kref_put returns true.
+ */
+static void nfsd_noop(struct kref *ref)
+{
+}
+
+void nfsd_put(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
- if (destroy)
+ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
svc_shutdown_net(nn->nfsd_serv, net);
- svc_destroy(nn->nfsd_serv);
- if (destroy)
- nfsd_complete_shutdown(net);
+ svc_destroy(&nn->nfsd_serv->sv_refcnt);
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+ spin_unlock(&nfsd_notifier_lock);
+ }
}
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
@@ -733,7 +756,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (tot > NFSD_MAXSERVS) {
/* total too large: scale down requested numbers */
for (i = 0; i < n && tot > 0; i++) {
- int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ int new = nthreads[i] * NFSD_MAXSERVS / tot;
tot -= (nthreads[i] - new);
nthreads[i] = new;
}
@@ -753,12 +776,13 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
/* apply the new numbers */
svc_get(nn->nfsd_serv);
for (i = 0; i < n; i++) {
- err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- &nn->nfsd_serv->sv_pools[i], nthreads[i]);
+ err = svc_set_num_threads(nn->nfsd_serv,
+ &nn->nfsd_serv->sv_pools[i],
+ nthreads[i]);
if (err)
break;
}
- nfsd_destroy(net);
+ nfsd_put(net);
return err;
}
@@ -795,21 +819,19 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
error = nfsd_startup_net(net, cred);
if (error)
- goto out_destroy;
- error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- NULL, nrservs);
+ goto out_put;
+ error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
if (error)
goto out_shutdown;
- /* We are holding a reference to nn->nfsd_serv which
- * we don't want to count in the return value,
- * so subtract 1
- */
- error = nn->nfsd_serv->sv_nrthreads - 1;
+ error = nn->nfsd_serv->sv_nrthreads;
out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown_net(net);
-out_destroy:
- nfsd_destroy(net); /* Release server */
+out_put:
+ /* Threads now hold service active */
+ if (xchg(&nn->keep_active, 0))
+ nfsd_put(net);
+ nfsd_put(net);
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -923,9 +945,6 @@ nfsd(void *vrqstp)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int err;
- /* Lock module and set up kernel thread */
- mutex_lock(&nfsd_mutex);
-
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
* umask as defined by the client instead of init's umask. */
@@ -945,8 +964,7 @@ nfsd(void *vrqstp)
allow_signal(SIGINT);
allow_signal(SIGQUIT);
- nfsdstats.th_cnt++;
- mutex_unlock(&nfsd_mutex);
+ atomic_inc(&nfsdstats.th_cnt);
set_freezable();
@@ -973,20 +991,36 @@ nfsd(void *vrqstp)
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);
- mutex_lock(&nfsd_mutex);
- nfsdstats.th_cnt --;
+ atomic_dec(&nfsdstats.th_cnt);
out:
- rqstp->rq_server = NULL;
+ /* Take an extra ref so that the svc_put in svc_exit_thread()
+ * doesn't call svc_destroy()
+ */
+ svc_get(nn->nfsd_serv);
/* Release the thread */
svc_exit_thread(rqstp);
- nfsd_destroy(net);
+ /* We need to drop a ref, but may not drop the last reference
+ * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
+ * could deadlock with nfsd_shutdown_threads() waiting for us.
+ * So three options are:
+ * - drop a non-final reference,
+ * - get the mutex without waiting
+ * - sleep briefly andd try the above again
+ */
+ while (!svc_put_not_last(nn->nfsd_serv)) {
+ if (mutex_trylock(&nfsd_mutex)) {
+ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+ break;
+ }
+ msleep(20);
+ }
/* Release module */
- mutex_unlock(&nfsd_mutex);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
@@ -1096,7 +1130,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
mutex_unlock(&nfsd_mutex);
return -ENODEV;
}
- /* bump up the psudo refcount while traversing */
svc_get(nn->nfsd_serv);
ret = svc_pool_stats_open(nn->nfsd_serv, file);
mutex_unlock(&nfsd_mutex);
@@ -1109,8 +1142,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file)
struct net *net = inode->i_sb->s_fs_info;
mutex_lock(&nfsd_mutex);
- /* this function really, really should have been called svc_put() */
- nfsd_destroy(net);
+ nfsd_put(net);
mutex_unlock(&nfsd_mutex);
return ret;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e73bdbb1634a..95457cfd37fc 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -568,6 +568,10 @@ struct nfs4_ol_stateid {
struct list_head st_locks;
struct nfs4_stateowner *st_stateowner;
struct nfs4_clnt_odstate *st_clnt_odstate;
+/*
+ * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the
+ * comment above bmap_to_share_mode() for explanation:
+ */
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid *st_openstp;
@@ -629,6 +633,7 @@ struct nfsd4_blocked_lock {
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
+ struct kref nbl_kref;
};
struct nfsd4_compound_state;
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 1d3b881e7382..a8c5a02a84f0 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -45,7 +45,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */
- seq_printf(seq, "th %u 0", nfsdstats.th_cnt);
+ seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
/* deprecated thread usage histogram stats */
for (i = 0; i < 10; i++)
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 51ecda852e23..9b43dc3d9991 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -29,11 +29,9 @@ enum {
struct nfsd_stats {
struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
- /* Protected by nfsd_mutex */
- unsigned int th_cnt; /* number of available threads */
+ atomic_t th_cnt; /* number of available threads */
};
-
extern struct nfsd_stats nfsdstats;
extern struct svc_stat nfsd_svcstats;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index f1e0d3c51bc2..c4cf56327843 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -47,7 +47,7 @@
rqstp->rq_xprt->xpt_remotelen); \
} while (0);
-TRACE_EVENT(nfsd_garbage_args_err,
+DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
TP_PROTO(
const struct svc_rqst *rqstp
),
@@ -69,27 +69,13 @@ TRACE_EVENT(nfsd_garbage_args_err,
)
);
-TRACE_EVENT(nfsd_cant_encode_err,
- TP_PROTO(
- const struct svc_rqst *rqstp
- ),
- TP_ARGS(rqstp),
- TP_STRUCT__entry(
- NFSD_TRACE_PROC_ARG_FIELDS
-
- __field(u32, vers)
- __field(u32, proc)
- ),
- TP_fast_assign(
- NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+#define DEFINE_NFSD_XDR_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \
+ TP_PROTO(const struct svc_rqst *rqstp), \
+ TP_ARGS(rqstp))
- __entry->vers = rqstp->rq_vers;
- __entry->proc = rqstp->rq_proc;
- ),
- TP_printk("xid=0x%08x vers=%u proc=%u",
- __entry->xid, __entry->vers, __entry->proc
- )
-);
+DEFINE_NFSD_XDR_ERR_EVENT(garbage_args);
+DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
#define show_nfsd_may_flags(x) \
__print_flags(x, "|", \
@@ -413,6 +399,56 @@ TRACE_EVENT(nfsd_dirent,
)
)
+DECLARE_EVENT_CLASS(nfsd_copy_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *src_fhp,
+ loff_t src_offset,
+ struct svc_fh *dst_fhp,
+ loff_t dst_offset,
+ u64 count,
+ int status),
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, src_fh_hash)
+ __field(loff_t, src_offset)
+ __field(u32, dst_fh_hash)
+ __field(loff_t, dst_offset)
+ __field(u64, count)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
+ __entry->src_offset = src_offset;
+ __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
+ __entry->dst_offset = dst_offset;
+ __entry->count = count;
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
+ "dst_fh_hash=0x%08x dst_offset=%lld "
+ "count=%llu status=%d",
+ __entry->xid, __entry->src_fh_hash, __entry->src_offset,
+ __entry->dst_fh_hash, __entry->dst_offset,
+ (unsigned long long)__entry->count,
+ __entry->status)
+)
+
+#define DEFINE_NFSD_COPY_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *src_fhp, \
+ loff_t src_offset, \
+ struct svc_fh *dst_fhp, \
+ loff_t dst_offset, \
+ u64 count, \
+ int status), \
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
+ count, status))
+
+DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
+
#include "state.h"
#include "filecache.h"
#include "vfs.h"
@@ -538,6 +574,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
DEFINE_NET_EVENT(grace_start);
DEFINE_NET_EVENT(grace_complete);
+TRACE_EVENT(nfsd_writeverf_reset,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_rqst *rqstp,
+ int error
+ ),
+ TP_ARGS(nn, rqstp, error),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(u32, xid)
+ __field(int, error)
+ __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->error = error;
+
+ /* avoid seqlock inside TP_fast_assign */
+ memcpy(__entry->verifier, nn->writeverf,
+ NFS4_VERIFIER_SIZE);
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
+ __entry->boot_time, __entry->xid, __entry->error,
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+ )
+);
+
TRACE_EVENT(nfsd_clid_cred_mismatch,
TP_PROTO(
const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c99857689e2c..99c2b9dfbb10 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -40,6 +40,7 @@
#include "../internal.h"
#include "acl.h"
#include "idmap.h"
+#include "xdr4.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
@@ -517,15 +518,23 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
- struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
+static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
+{
+ return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
+}
+
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync)
{
struct file *src = nf_src->nf_file;
struct file *dst = nf_dst->nf_file;
+ errseq_t since;
loff_t cloned;
__be32 ret = 0;
- down_write(&nf_dst->nf_rwsem);
+ since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
if (cloned < 0) {
ret = nfserrno(cloned);
@@ -540,15 +549,25 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
status = commit_inode_metadata(file_inode(src));
if (status < 0) {
- nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
- nfsd_net_id));
+ struct nfsd_net *nn = net_generic(nf_dst->nf_net,
+ nfsd_net_id);
+
+ trace_nfsd_clone_file_range_err(rqstp,
+ &nfsd4_get_cstate(rqstp)->save_fh,
+ src_pos,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, status);
ret = nfserrno(status);
}
}
out_err:
- up_write(&nf_dst->nf_rwsem);
return ret;
}
@@ -777,6 +796,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
__be32 err;
+ bool retried = false;
validate_process_creds();
/*
@@ -792,9 +812,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+retry:
err = fh_verify(rqstp, fhp, type, may_flags);
- if (!err)
+ if (!err) {
err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ if (err == nfserr_stale && !retried) {
+ retried = true;
+ fh_put(fhp);
+ goto retry;
+ }
+ }
validate_process_creds();
return err;
}
@@ -944,10 +971,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
unsigned long *cnt, int stable,
__be32 *verf)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
struct super_block *sb = file_inode(file)->i_sb;
struct svc_export *exp;
struct iov_iter iter;
+ errseq_t since;
__be32 nfserr;
int host_err;
int use_wgather;
@@ -985,36 +1014,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
flags |= RWF_SYNC;
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
- if (flags & RWF_SYNC) {
- down_write(&nf->nf_rwsem);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- up_write(&nf->nf_rwsem);
- } else {
- down_read(&nf->nf_rwsem);
- if (verf)
- nfsd_copy_boot_verifier(verf,
- net_generic(SVC_NET(rqstp),
- nfsd_net_id));
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- up_read(&nf->nf_rwsem);
- }
+ since = READ_ONCE(file->f_wb_err);
+ if (verf)
+ nfsd_copy_write_verifier(verf, nn);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0) {
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
goto out_nfserr;
}
*cnt = host_err;
nfsd_stats_io_write_add(exp, *cnt);
fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+ goto out_nfserr;
if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
- if (host_err < 0)
- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
- nfsd_net_id));
+ if (host_err < 0) {
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+ }
}
out_nfserr:
@@ -1089,19 +1110,6 @@ out:
}
#ifdef CONFIG_NFSD_V3
-static int
-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
- loff_t end)
-{
- struct address_space *mapping = nf->nf_file->f_mapping;
- int ret = filemap_fdatawrite_range(mapping, offset, end);
-
- if (ret)
- return ret;
- filemap_fdatawait_range_keep_errors(mapping, offset, end);
- return 0;
-}
-
/*
* Commit all pending writes to stable storage.
*
@@ -1115,6 +1123,7 @@ __be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long count, __be32 *verf)
{
+ struct nfsd_net *nn;
struct nfsd_file *nf;
loff_t end = LLONG_MAX;
__be32 err = nfserr_inval;
@@ -1131,29 +1140,28 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
+ nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end);
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+ int err2;
- down_write(&nf->nf_rwsem);
- if (!err2)
- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_write_verifier(verf, nn);
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
break;
case -EINVAL:
err = nfserr_notsupp;
break;
default:
- err = nfserrno(err2);
- nfsd_reset_boot_verifier(net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, err2);
}
- up_write(&nf->nf_rwsem);
+ err = nfserrno(err2);
} else
- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
- nfsd_net_id));
+ nfsd_copy_write_verifier(verf, nn);
nfsd_file_put(nf);
out:
@@ -1747,8 +1755,8 @@ retry:
* so do it by hand */
trap = lock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = true;
- fill_pre_wcc(ffhp);
- fill_pre_wcc(tfhp);
+ fh_fill_pre_attrs(ffhp);
+ fh_fill_pre_attrs(tfhp);
odentry = lookup_one_len(fname, fdentry, flen);
host_err = PTR_ERR(odentry);
@@ -1808,8 +1816,8 @@ retry:
* were the same, so again we do it by hand.
*/
if (!close_cached) {
- fill_post_wcc(ffhp);
- fill_post_wcc(tfhp);
+ fh_fill_post_attrs(ffhp);
+ fh_fill_post_attrs(tfhp);
}
unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = false;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index b21b76e6b9a8..9f56dcb22ff7 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -57,7 +57,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
struct nfsd_file *nf_dst, u64 dst_pos,
u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index bc3e2cd4117f..063dd16d75b5 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -195,12 +195,12 @@ void nilfs_page_bug(struct page *page)
*/
static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
{
- struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
+ struct buffer_head *dbh, *dbufs, *sbh;
unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
BUG_ON(PageWriteback(dst));
- sbh = sbufs = page_buffers(src);
+ sbh = page_buffers(src);
if (!page_has_buffers(dst))
create_empty_buffers(dst, sbh->b_size, 0);
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index d5ebebb034ff..829dd4a61b66 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -19,7 +19,25 @@
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
-int dir_notify_enable __read_mostly = 1;
+static int dir_notify_enable __read_mostly = 1;
+#ifdef CONFIG_SYSCTL
+static struct ctl_table dnotify_sysctls[] = {
+ {
+ .procname = "dir-notify-enable",
+ .data = &dir_notify_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {}
+};
+static void __init dnotify_sysctl_init(void)
+{
+ register_sysctl_init("fs", dnotify_sysctls);
+}
+#else
+#define dnotify_sysctl_init() do { } while (0)
+#endif
static struct kmem_cache *dnotify_struct_cache __read_mostly;
static struct kmem_cache *dnotify_mark_cache __read_mostly;
@@ -386,6 +404,7 @@ static int __init dnotify_init(void)
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
+ dnotify_sysctl_init();
return 0;
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 73a3e939c921..73b1615f9d96 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -59,7 +59,7 @@ static int fanotify_max_queued_events __read_mostly;
static long ft_zero = 0;
static long ft_int_max = INT_MAX;
-struct ctl_table fanotify_table[] = {
+static struct ctl_table fanotify_table[] = {
{
.procname = "max_user_groups",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
@@ -88,6 +88,13 @@ struct ctl_table fanotify_table[] = {
},
{ }
};
+
+static void __init fanotify_sysctls_init(void)
+{
+ register_sysctl("fs/fanotify", fanotify_table);
+}
+#else
+#define fanotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
/*
@@ -1743,6 +1750,7 @@ static int __init fanotify_user_setup(void)
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
FANOTIFY_DEFAULT_MAX_GROUPS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks;
+ fanotify_sysctls_init();
return 0;
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 29fca3284bb5..54583f62dc44 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -58,7 +58,7 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
static long it_zero = 0;
static long it_int_max = INT_MAX;
-struct ctl_table inotify_table[] = {
+static struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
@@ -87,6 +87,14 @@ struct ctl_table inotify_table[] = {
},
{ }
};
+
+static void __init inotify_sysctls_init(void)
+{
+ register_sysctl("fs/inotify", inotify_table);
+}
+
+#else
+#define inotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
@@ -849,6 +857,7 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
+ inotify_sysctls_init();
return 0;
}
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index d563abc3e136..2911c04a33e0 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 8aaec7e0804e..fb825059d488 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -11,7 +11,6 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
-#include <linux/cleancache.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/kernel.h>
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bb247bc349e4..bf9357123bc5 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2040,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
int i, idx;
struct ocfs2_extent_list *el, *left_el, *right_el;
struct ocfs2_extent_rec *left_rec, *right_rec;
- struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
+ struct buffer_head *root_bh;
/*
* Update the counts and position values within all the
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 68d11c295dd3..498da317580a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1799,20 +1799,20 @@ try_again:
*/
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
cluster_of_pages, mmap_page);
- if (ret && ret != -EAGAIN) {
- mlog_errno(ret);
- goto out_quota;
- }
+ if (ret) {
+ /*
+ * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
+ * the target page. In this case, we exit with no error and no target
+ * page. This will trigger the caller, page_mkwrite(), to re-try
+ * the operation.
+ */
+ if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) {
+ BUG_ON(wc->w_target_page);
+ ret = 0;
+ goto out_quota;
+ }
- /*
- * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
- * the target page. In this case, we exit with no error and no target
- * page. This will trigger the caller, page_mkwrite(), to re-try
- * the operation.
- */
- if (ret == -EAGAIN) {
- BUG_ON(wc->w_target_page);
- ret = 0;
+ mlog_errno(ret);
goto out_quota;
}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f89ffcbd585f..a17be1618bf7 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -379,7 +379,7 @@ static void o2hb_nego_timeout(struct work_struct *work)
o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
/* lowest node as master node to make negotiate decision. */
- master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0);
+ master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES);
if (master_node == o2nm_this_node()) {
if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 810d32815593..563881ddbf00 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -120,7 +120,8 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(KTHREAD),
};
-static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
+static struct attribute *mlog_default_attrs[MLOG_MAX_BITS] = {NULL, };
+ATTRIBUTE_GROUPS(mlog_default);
static ssize_t mlog_show(struct kobject *obj, struct attribute *attr,
char *buf)
@@ -144,8 +145,8 @@ static const struct sysfs_ops mlog_attr_ops = {
};
static struct kobj_type mlog_ktype = {
- .default_attrs = mlog_attr_ptrs,
- .sysfs_ops = &mlog_attr_ops,
+ .default_groups = mlog_default_groups,
+ .sysfs_ops = &mlog_attr_ops,
};
static struct kset mlog_kset = {
@@ -157,10 +158,10 @@ int mlog_sys_init(struct kset *o2cb_kset)
int i = 0;
while (mlog_attrs[i].attr.mode) {
- mlog_attr_ptrs[i] = &mlog_attrs[i].attr;
+ mlog_default_attrs[i] = &mlog_attrs[i].attr;
i++;
}
- mlog_attr_ptrs[i] = NULL;
+ mlog_default_attrs[i] = NULL;
kobject_set_name(&mlog_kset.kobj, "logmask");
mlog_kset.kobj.kset = o2cb_kset;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index bd8d534f11cb..f2cc1ff29e6d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3343,7 +3343,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
struct ocfs2_dir_entry *de, *last_de = NULL;
char *de_buf, *limit;
unsigned long offset = 0;
- unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
+ unsigned int rec_len, new_rec_len, free_space;
/*
* This calculates how many free bytes we'd have in block zero, should
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9f90fc9551e1..c4eccd499db8 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1045,7 +1045,7 @@ static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map)
int status, ret = 0, i;
char *p;
- if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES)
goto bail;
qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL);
@@ -1217,7 +1217,7 @@ static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map)
struct o2nm_node *node;
int ret = 0, status, count, i;
- if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
+ if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES)
goto bail;
qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9b88219febb5..227da5b1b6ab 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -861,7 +861,7 @@ lookup:
* to see if there are any nodes that still need to be
* considered. these will not appear in the mle nodemap
* but they might own this lockres. wait on them. */
- bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES) {
mlog(0, "%s: res %.*s, At least one node (%d) "
"to recover before lock mastery can begin\n",
@@ -912,7 +912,7 @@ redo_request:
dlm_wait_for_recovery(dlm);
spin_lock(&dlm->spinlock);
- bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES) {
mlog(0, "%s: res %.*s, At least one node (%d) "
"to recover before lock mastery can begin\n",
@@ -1079,7 +1079,7 @@ recheck:
sleep = 1;
/* have all nodes responded? */
if (voting_done && !*blocked) {
- bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (dlm->node_num <= bit) {
/* my node number is lowest.
* now tell other nodes that I am
@@ -1234,8 +1234,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
} else {
mlog(ML_ERROR, "node down! %d\n", node);
if (blocked) {
- int lowest = find_next_bit(mle->maybe_map,
- O2NM_MAX_NODES, 0);
+ int lowest = find_first_bit(mle->maybe_map,
+ O2NM_MAX_NODES);
/* act like it was never there */
clear_bit(node, mle->maybe_map);
@@ -1795,7 +1795,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
"MLE for it! (%.*s)\n", assert->node_idx,
namelen, name);
} else {
- int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0);
+ int bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (bit >= O2NM_MAX_NODES) {
/* not necessarily an error, though less likely.
* could be master just re-asserting. */
@@ -2521,7 +2521,7 @@ static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm,
}
if (!nonlocal) {
- node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ node_ref = find_first_bit(res->refmap, O2NM_MAX_NODES);
if (node_ref >= O2NM_MAX_NODES)
return 0;
}
@@ -3303,7 +3303,7 @@ static void dlm_clean_block_mle(struct dlm_ctxt *dlm,
BUG_ON(mle->type != DLM_MLE_BLOCK);
spin_lock(&mle->spinlock);
- bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES);
if (bit != dead_node) {
mlog(0, "mle found, but dead node %u would not have been "
"master\n", dead_node);
@@ -3542,7 +3542,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm)
spin_lock(&dlm->master_lock);
BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
- BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
+ BUG_ON((find_first_bit(dlm->domain_map, O2NM_MAX_NODES) < O2NM_MAX_NODES));
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_master_hash(dlm, i);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 5cd5f7511dac..52ad342fec3e 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -451,7 +451,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
int bit;
- bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES);
if (bit >= O2NM_MAX_NODES || bit < 0)
dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
else
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index c350bd4df770..eedf07ca23ca 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -92,7 +92,7 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
return 0;
/* Another node has this resource with this node as the master */
- bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ bit = find_first_bit(res->refmap, O2NM_MAX_NODES);
if (bit < O2NM_MAX_NODES)
return 0;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index de56e6231af8..1ad7106741f8 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -94,6 +94,7 @@ static struct attribute *ocfs2_filecheck_attrs[] = {
&ocfs2_filecheck_attr_set.attr,
NULL
};
+ATTRIBUTE_GROUPS(ocfs2_filecheck);
static void ocfs2_filecheck_release(struct kobject *kobj)
{
@@ -138,7 +139,7 @@ static const struct sysfs_ops ocfs2_filecheck_ops = {
};
static struct kobj_type ocfs2_ktype_filecheck = {
- .default_attrs = ocfs2_filecheck_attrs,
+ .default_groups = ocfs2_filecheck_groups,
.sysfs_ops = &ocfs2_filecheck_ops,
.release = ocfs2_filecheck_release,
};
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dbf9b9e97d74..1887a2708709 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1669,8 +1669,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
status = jbd2_journal_load(journal);
if (status < 0) {
mlog_errno(status);
- if (!igrab(inode))
- BUG();
+ BUG_ON(!igrab(inode));
jbd2_journal_destroy(journal);
goto done;
}
@@ -1699,8 +1698,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
if (status < 0)
mlog_errno(status);
- if (!igrab(inode))
- BUG();
+ BUG_ON(!igrab(inode));
jbd2_journal_destroy(journal);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 16f1bfc407f2..731558a6f27d 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -672,31 +672,8 @@ static struct ctl_table ocfs2_mod_table[] = {
{ }
};
-static struct ctl_table ocfs2_kern_table[] = {
- {
- .procname = "ocfs2",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_mod_table
- },
- { }
-};
-
-static struct ctl_table ocfs2_root_table[] = {
- {
- .procname = "fs",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_kern_table
- },
- { }
-};
-
static struct ctl_table_header *ocfs2_table_header;
-
/*
* Initialization
*/
@@ -705,7 +682,7 @@ static int __init ocfs2_stack_glue_init(void)
{
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
- ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
+ ocfs2_table_header = register_sysctl("fs/ocfs2", ocfs2_mod_table);
if (!ocfs2_table_header) {
printk(KERN_ERR
"ocfs2 stack glue: unable to register sysctl\n");
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 1286b88b6fa1..2772dec9dcea 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -25,7 +25,6 @@
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
-#include <linux/cleancache.h>
#include <linux/signal.h>
#define CREATE_TRACE_POINTS
@@ -2283,7 +2282,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog_errno(status);
goto bail;
}
- cleancache_init_shared_fs(sb);
osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
if (!osb->ocfs2_wq) {
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 538e839590ef..b501dc07f922 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -176,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
{
kfree(bufmap->page_array);
kfree(bufmap->desc_array);
- kfree(bufmap->buffer_index_array);
+ bitmap_free(bufmap->buffer_index_array);
kfree(bufmap);
}
@@ -226,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
bufmap->desc_size = user_desc->size;
bufmap->desc_shift = ilog2(bufmap->desc_size);
- bufmap->buffer_index_array =
- kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL);
+ bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL);
if (!bufmap->buffer_index_array)
goto out_free_bufmap;
@@ -250,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
out_free_desc_array:
kfree(bufmap->desc_array);
out_free_index_array:
- kfree(bufmap->buffer_index_array);
+ bitmap_free(bufmap->buffer_index_array);
out_free_bufmap:
kfree(bufmap);
out:
diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
index 3627ea946402..de80b62553bb 100644
--- a/fs/orangefs/orangefs-sysfs.c
+++ b/fs/orangefs/orangefs-sysfs.c
@@ -894,10 +894,11 @@ static struct attribute *orangefs_default_attrs[] = {
&perf_time_interval_secs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(orangefs_default);
static struct kobj_type orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = orangefs_default_attrs,
+ .default_groups = orangefs_default_groups,
};
static struct orangefs_attribute acache_hard_limit_attribute =
@@ -931,10 +932,11 @@ static struct attribute *acache_orangefs_default_attrs[] = {
&acache_timeout_msecs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(acache_orangefs_default);
static struct kobj_type acache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = acache_orangefs_default_attrs,
+ .default_groups = acache_orangefs_default_groups,
};
static struct orangefs_attribute capcache_hard_limit_attribute =
@@ -968,10 +970,11 @@ static struct attribute *capcache_orangefs_default_attrs[] = {
&capcache_timeout_secs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(capcache_orangefs_default);
static struct kobj_type capcache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = capcache_orangefs_default_attrs,
+ .default_groups = capcache_orangefs_default_groups,
};
static struct orangefs_attribute ccache_hard_limit_attribute =
@@ -1005,10 +1008,11 @@ static struct attribute *ccache_orangefs_default_attrs[] = {
&ccache_timeout_secs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(ccache_orangefs_default);
static struct kobj_type ccache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = ccache_orangefs_default_attrs,
+ .default_groups = ccache_orangefs_default_groups,
};
static struct orangefs_attribute ncache_hard_limit_attribute =
@@ -1042,10 +1046,11 @@ static struct attribute *ncache_orangefs_default_attrs[] = {
&ncache_timeout_msecs_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(ncache_orangefs_default);
static struct kobj_type ncache_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = ncache_orangefs_default_attrs,
+ .default_groups = ncache_orangefs_default_groups,
};
static struct orangefs_attribute pc_acache_attribute =
@@ -1072,10 +1077,11 @@ static struct attribute *pc_orangefs_default_attrs[] = {
&pc_ncache_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(pc_orangefs_default);
static struct kobj_type pc_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = pc_orangefs_default_attrs,
+ .default_groups = pc_orangefs_default_groups,
};
static struct orangefs_attribute stats_reads_attribute =
@@ -1095,10 +1101,11 @@ static struct attribute *stats_orangefs_default_attrs[] = {
&stats_writes_attribute.attr,
NULL,
};
+ATTRIBUTE_GROUPS(stats_orangefs_default);
static struct kobj_type stats_orangefs_ktype = {
.sysfs_ops = &orangefs_sysfs_ops,
- .default_attrs = stats_orangefs_default_attrs,
+ .default_groups = stats_orangefs_default_groups,
};
static struct kobject *orangefs_obj;
diff --git a/fs/pipe.c b/fs/pipe.c
index 6d4342bad9f1..cc28623a67b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -25,6 +25,7 @@
#include <linux/fcntl.h>
#include <linux/memcontrol.h>
#include <linux/watch_queue.h>
+#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
@@ -50,13 +51,13 @@
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
-unsigned int pipe_max_size = 1048576;
+static unsigned int pipe_max_size = 1048576;
/* Maximum allocatable pages per user. Hard limit is unset by default, soft
* matches default values.
*/
-unsigned long pipe_user_pages_hard;
-unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+static unsigned long pipe_user_pages_hard;
+static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
/*
* We use head and tail indices that aren't masked off, except at the point of
@@ -1428,6 +1429,60 @@ static struct file_system_type pipe_fs_type = {
.kill_sb = kill_anon_super,
};
+#ifdef CONFIG_SYSCTL
+static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
+{
+ if (write) {
+ unsigned int val;
+
+ val = round_pipe_size(*lvalp);
+ if (val == 0)
+ return -EINVAL;
+
+ *valp = val;
+ } else {
+ unsigned int val = *valp;
+ *lvalp = (unsigned long) val;
+ }
+
+ return 0;
+}
+
+static int proc_dopipe_max_size(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_dopipe_max_size_conv, NULL);
+}
+
+static struct ctl_table fs_pipe_sysctls[] = {
+ {
+ .procname = "pipe-max-size",
+ .data = &pipe_max_size,
+ .maxlen = sizeof(pipe_max_size),
+ .mode = 0644,
+ .proc_handler = proc_dopipe_max_size,
+ },
+ {
+ .procname = "pipe-user-pages-hard",
+ .data = &pipe_user_pages_hard,
+ .maxlen = sizeof(pipe_user_pages_hard),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "pipe-user-pages-soft",
+ .data = &pipe_user_pages_soft,
+ .maxlen = sizeof(pipe_user_pages_soft),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ { }
+};
+#endif
+
static int __init init_pipe_fs(void)
{
int err = register_filesystem(&pipe_fs_type);
@@ -1439,6 +1494,9 @@ static int __init init_pipe_fs(void)
unregister_filesystem(&pipe_fs_type);
}
}
+#ifdef CONFIG_SYSCTL
+ register_sysctl_init("fs", fs_pipe_sysctls);
+#endif
return err;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ff869a66b34e..fd8b0c12b2cb 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -92,6 +92,7 @@
#include <linux/string_helpers.h>
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include "internal.h"
@@ -102,6 +103,8 @@ void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
if (p->flags & PF_WQ_WORKER)
wq_worker_comm(tcomm, sizeof(tcomm), p);
+ else if (p->flags & PF_KTHREAD)
+ get_kthread_comm(tcomm, sizeof(tcomm), p);
else
__get_task_comm(tcomm, sizeof(tcomm), p);
@@ -468,6 +471,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
u64 cgtime, gtime;
unsigned long rsslim = 0;
unsigned long flags;
+ int exit_code = task->exit_code;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -531,6 +535,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
maj_flt += sig->maj_flt;
thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;
+
+ if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
+ exit_code = sig->group_exit_code;
}
sid = task_session_nr_ns(task, ns);
@@ -630,7 +637,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_puts(m, " 0 0 0 0 0 0 0");
if (permitted)
- seq_put_decimal_ll(m, " ", task->exit_code);
+ seq_put_decimal_ll(m, " ", exit_code);
else
seq_puts(m, " 0");
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 13eda8de2998..d654ce7150fd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -670,10 +670,10 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
/************************************************************************/
/* permission checks */
-static int proc_fd_access_allowed(struct inode *inode)
+static bool proc_fd_access_allowed(struct inode *inode)
{
struct task_struct *task;
- int allowed = 0;
+ bool allowed = false;
/* Allow access to a task's file descriptors if it is us or we
* may use ptrace attach to the process and find out that
* information.
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5b78739e60e4..f2132407e133 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -791,12 +791,6 @@ void proc_remove(struct proc_dir_entry *de)
}
EXPORT_SYMBOL(proc_remove);
-void *PDE_DATA(const struct inode *inode)
-{
- return __PDE_DATA(inode);
-}
-EXPORT_SYMBOL(PDE_DATA);
-
/*
* Pull a user buffer into memory and pass it to the file's write handler if
* one is supplied. The ->write() method is permitted to modify the
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 599eb724ff2d..f84355c5a36d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -650,6 +650,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
return NULL;
}
+ inode->i_private = de->data;
inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
PROC_I(inode)->pde = de;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 03415f3fb3a8..06a80f78433d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -115,11 +115,6 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
return PROC_I(inode)->pde;
}
-static inline void *__PDE_DATA(const struct inode *inode)
-{
- return PDE(inode)->data;
-}
-
static inline struct pid *proc_pid(const struct inode *inode)
{
return PROC_I(inode)->pid;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 39b823ab2564..e1cfeda397f3 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -138,7 +138,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* @parent: The parent directory in which to create.
* @ops: The seq_file ops with which to read the file.
* @write: The write method with which to 'modify' the file.
- * @data: Data for retrieval by PDE_DATA().
+ * @data: Data for retrieval by pde_data().
*
* Create a network namespaced proc file in the @parent directory with the
* specified @name and @mode that allows reading of a file that displays a
@@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data);
* modified by the @write function. @write should return 0 on success.
*
* The @data value is accessible from the @show and @write functions by calling
- * PDE_DATA() on the file inode. The network namespace must be accessed by
+ * pde_data() on the file inode. The network namespace must be accessed by
* calling seq_file_net() on the seq_file struct.
*/
struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode,
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single);
* @parent: The parent directory in which to create.
* @show: The seqfile show method with which to read the file.
* @write: The write method with which to 'modify' the file.
- * @data: Data for retrieval by PDE_DATA().
+ * @data: Data for retrieval by pde_data().
*
* Create a network-namespaced proc file in the @parent directory with the
* specified @name and @mode that allows reading of a file that displays a
@@ -245,7 +245,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single);
* modified by the @write function. @write should return 0 on success.
*
* The @data value is accessible from the @show and @write functions by calling
- * PDE_DATA() on the file inode. The network namespace must be accessed by
+ * pde_data() on the file inode. The network namespace must be accessed by
* calling seq_file_single_net() on the seq_file struct.
*/
struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mode,
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5d66faecd4ef..7d9cfc730bd4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include <linux/mount.h>
+#include <linux/kmemleak.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -25,15 +26,32 @@ static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
/* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX, 65535 };
EXPORT_SYMBOL(sysctl_vals);
+const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
+EXPORT_SYMBOL_GPL(sysctl_long_vals);
+
/* Support for permanently empty directories */
struct ctl_table sysctl_mount_point[] = {
{ }
};
+/**
+ * register_sysctl_mount_point() - registers a sysctl mount point
+ * @path: path for the mount point
+ *
+ * Used to create a permanently empty directory to serve as mount point.
+ * There are some subtle but important permission checks this allows in the
+ * case of unprivileged mounts.
+ */
+struct ctl_table_header *register_sysctl_mount_point(const char *path)
+{
+ return register_sysctl(path, sysctl_mount_point);
+}
+EXPORT_SYMBOL(register_sysctl_mount_point);
+
static bool is_empty_dir(struct ctl_table_header *head)
{
return head->ctl_table[0].child == sysctl_mount_point;
@@ -163,7 +181,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
else {
pr_err("sysctl duplicate entry: ");
sysctl_print_dir(head->parent);
- pr_cont("/%s\n", entry->procname);
+ pr_cont("%s\n", entry->procname);
return -EEXIST;
}
}
@@ -1020,8 +1038,8 @@ failed:
if (IS_ERR(subdir)) {
pr_err("sysctl could not get directory: ");
sysctl_print_dir(dir);
- pr_cont("/%*.*s %ld\n",
- namelen, namelen, name, PTR_ERR(subdir));
+ pr_cont("%*.*s %ld\n", namelen, namelen, name,
+ PTR_ERR(subdir));
}
drop_sysctl_table(&dir->header);
if (new)
@@ -1053,7 +1071,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead,
struct ctl_dir *dir;
int ret;
- ret = 0;
spin_lock(&sysctl_lock);
root = (*pentry)->data;
set = lookup_header_set(root);
@@ -1384,6 +1401,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab
}
EXPORT_SYMBOL(register_sysctl);
+/**
+ * __register_sysctl_init() - register sysctl table to path
+ * @path: path name for sysctl base
+ * @table: This is the sysctl table that needs to be registered to the path
+ * @table_name: The name of sysctl table, only used for log printing when
+ * registration fails
+ *
+ * The sysctl interface is used by userspace to query or modify at runtime
+ * a predefined value set on a variable. These variables however have default
+ * values pre-set. Code which depends on these variables will always work even
+ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
+ * ability to query or modify the sysctls dynamically at run time. Chances of
+ * register_sysctl() failing on init are extremely low, and so for both reasons
+ * this function does not return any error as it is used by initialization code.
+ *
+ * Context: Can only be called after your respective sysctl base path has been
+ * registered. So for instance, most base directories are registered early on
+ * init before init levels are processed through proc_sys_init() and
+ * sysctl_init_bases().
+ */
+void __init __register_sysctl_init(const char *path, struct ctl_table *table,
+ const char *table_name)
+{
+ struct ctl_table_header *hdr = register_sysctl(path, table);
+
+ if (unlikely(!hdr)) {
+ pr_err("failed when register_sysctl %s to %s\n", table_name, path);
+ return;
+ }
+ kmemleak_not_leak(hdr);
+}
+
static char *append_path(const char *path, char *pos, const char *name)
{
int namelen;
@@ -1597,6 +1646,15 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
}
EXPORT_SYMBOL(register_sysctl_table);
+int __register_sysctl_base(struct ctl_table *base_table)
+{
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_table(base_table);
+ kmemleak_not_leak(hdr);
+ return 0;
+}
+
static void put_links(struct ctl_table_header *header)
{
struct ctl_table_set *root_set = &sysctl_table_root.default_set;
@@ -1626,7 +1684,7 @@ static void put_links(struct ctl_table_header *header)
else {
pr_err("sysctl link missing during unregister: ");
sysctl_print_dir(parent);
- pr_cont("/%s\n", name);
+ pr_cont("%s\n", name);
}
}
}
@@ -1710,7 +1768,7 @@ int __init proc_sys_init(void)
proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations;
proc_sys_root->nlink = 0;
- return sysctl_init();
+ return sysctl_init_bases();
}
struct sysctl_alias {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ad667dbc96f5..18f8c3acbb85 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/pagewalk.h>
#include <linux/vmacache.h>
+#include <linux/mm_inline.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
#include <linux/mount.h>
@@ -308,6 +309,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma);
if (!name) {
+ const char *anon_name;
+
if (!mm) {
name = "[vdso]";
goto done;
@@ -319,8 +322,16 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
goto done;
}
- if (is_stack(vma))
+ if (is_stack(vma)) {
name = "[stack]";
+ goto done;
+ }
+
+ anon_name = vma_anon_name(vma);
+ if (anon_name) {
+ seq_pad(m, ' ');
+ seq_printf(m, "[anon:%s]", anon_name);
+ }
}
done:
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 509f85148fee..702754dd1daf 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -65,8 +65,6 @@ static size_t vmcoredd_orig_sz;
static DECLARE_RWSEM(vmcore_cb_rwsem);
/* List of registered vmcore callbacks. */
static LIST_HEAD(vmcore_cb_list);
-/* Whether we had a surprise unregistration of a callback. */
-static bool vmcore_cb_unstable;
/* Whether the vmcore has been opened once. */
static bool vmcore_opened;
@@ -94,10 +92,8 @@ void unregister_vmcore_cb(struct vmcore_cb *cb)
* very unusual (e.g., forced driver removal), but we cannot stop
* unregistering.
*/
- if (vmcore_opened) {
+ if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback unregistration\n");
- vmcore_cb_unstable = true;
- }
up_write(&vmcore_cb_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
@@ -108,8 +104,6 @@ static bool pfn_is_ram(unsigned long pfn)
bool ret = true;
lockdep_assert_held_read(&vmcore_cb_rwsem);
- if (unlikely(vmcore_cb_unstable))
- return false;
list_for_each_entry(cb, &vmcore_cb_list, next) {
if (unlikely(!cb->pfn_is_ram))
@@ -581,7 +575,7 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
* looping over all pages without a reason.
*/
down_read(&vmcore_cb_rwsem);
- if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
+ if (!list_empty(&vmcore_cb_list))
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
else
ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 65ce0e72e7b9..e20d1484c663 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -155,11 +155,12 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info,
int nonblock)
{
+ enum pid_type type;
ssize_t ret;
DECLARE_WAITQUEUE(wait, current);
spin_lock_irq(&current->sighand->siglock);
- ret = dequeue_signal(current, &ctx->sigmask, info);
+ ret = dequeue_signal(current, &ctx->sigmask, info, &type);
switch (ret) {
case 0:
if (!nonblock)
@@ -174,7 +175,7 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
add_wait_queue(&current->sighand->signalfd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- ret = dequeue_signal(current, &ctx->sigmask, info);
+ ret = dequeue_signal(current, &ctx->sigmask, info, &type);
if (ret != 0)
break;
if (signal_pending(current)) {
diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smbfs_common/smb2pdu.h
index 7ccadcbe684b..38b8fc514860 100644
--- a/fs/smbfs_common/smb2pdu.h
+++ b/fs/smbfs_common/smb2pdu.h
@@ -449,7 +449,7 @@ struct smb2_netname_neg_context {
*/
/* Flags */
-#define SMB2_ACCEPT_TRANSFORM_LEVEL_SECURITY 0x00000001
+#define SMB2_ACCEPT_TRANSPORT_LEVEL_SECURITY 0x00000001
struct smb2_transport_capabilities_context {
__le16 ContextType; /* 6 */
diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smbfs_common/smbfsctl.h
index 926f87cd6af0..d51939c43ad7 100644
--- a/fs/smbfs_common/smbfsctl.h
+++ b/fs/smbfs_common/smbfsctl.h
@@ -95,8 +95,10 @@
#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
#define FSCTL_GET_REFS_VOLUME_DATA 0x000902D8 /* See MS-FSCC 2.3.24 */
+#define FSCTL_SET_INTEGRITY_INFORMATION_EXT 0x00090380
#define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3
#define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b
+#define FSCTL_REFS_STREAM_SNAPSHOT_MANAGEMENT 0x00090440
#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF
#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index bb44ff4c5cc6..b1b556dbce12 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/magic.h>
#include <linux/xattr.h>
+#include <linux/backing-dev.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -112,6 +113,24 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(
return decompressor;
}
+static int squashfs_bdi_init(struct super_block *sb)
+{
+ int err;
+ unsigned int major = MAJOR(sb->s_dev);
+ unsigned int minor = MINOR(sb->s_dev);
+
+ bdi_put(sb->s_bdi);
+ sb->s_bdi = &noop_backing_dev_info;
+
+ err = super_setup_bdi_name(sb, "squashfs_%u_%u", major, minor);
+ if (err)
+ return err;
+
+ sb->s_bdi->ra_pages = 0;
+ sb->s_bdi->io_pages = 0;
+
+ return 0;
+}
static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
@@ -127,6 +146,20 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
TRACE("Entered squashfs_fill_superblock\n");
+ /*
+ * squashfs provides 'backing_dev_info' in order to disable read-ahead. For
+ * squashfs, I/O is not deferred, it is done immediately in readpage,
+ * which means the user would always have to wait their own I/O. So the effect
+ * of readahead is very weak for squashfs. squashfs_bdi_init will set
+ * sb->s_bdi->ra_pages and sb->s_bdi->io_pages to 0 and close readahead for
+ * squashfs.
+ */
+ err = squashfs_bdi_init(sb);
+ if (err) {
+ errorf(fc, "squashfs init bdi failed");
+ return err;
+ }
+
sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
if (sb->s_fs_info == NULL) {
ERROR("Failed to allocate squashfs_sb_info\n");
diff --git a/fs/super.c b/fs/super.c
index 3bfc0f8fbd5b..7af820ba5ad5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -31,7 +31,6 @@
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
-#include <linux/cleancache.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
@@ -260,7 +259,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
s->s_time_gran = 1000000000;
s->s_time_min = TIME64_MIN;
s->s_time_max = TIME64_MAX;
- s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
s->s_shrink.scan_objects = super_cache_scan;
@@ -330,7 +328,6 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- cleancache_invalidate_fs(s);
unregister_shrinker(&s->s_shrink);
fs->kill_sb(s);
@@ -1423,8 +1420,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
}
EXPORT_SYMBOL(mount_nodev);
-static int reconfigure_single(struct super_block *s,
- int flags, void *data)
+int reconfigure_single(struct super_block *s,
+ int flags, void *data)
{
struct fs_context *fc;
int ret;
diff --git a/fs/sysctls.c b/fs/sysctls.c
new file mode 100644
index 000000000000..c701273c9432
--- /dev/null
+++ b/fs/sysctls.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * /proc/sys/fs shared sysctls
+ *
+ * These sysctls are shared between different filesystems.
+ */
+#include <linux/init.h>
+#include <linux/sysctl.h>
+
+static struct ctl_table fs_shared_sysctls[] = {
+ {
+ .procname = "overflowuid",
+ .data = &fs_overflowuid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
+ },
+ {
+ .procname = "overflowgid",
+ .data = &fs_overflowgid,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_MAXOLDUID,
+ },
+ { }
+};
+
+DECLARE_SYSCTL_BASE(fs, fs_shared_sysctls);
+
+static int __init init_fs_sysctls(void)
+{
+ return register_sysctl_base(fs);
+}
+
+early_initcall(init_fs_sysctls);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 3616839c5c4b..bafc02bf8220 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -109,12 +109,12 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
* also the directory that is being deleted.
*/
inode_unlock(inode);
- inode_unlock(dentry->d_inode);
+ inode_unlock(d_inode(dentry));
ret = tracefs_ops.rmdir(name);
inode_lock_nested(inode, I_MUTEX_PARENT);
- inode_lock(dentry->d_inode);
+ inode_lock(d_inode(dentry));
kfree(name);
@@ -284,7 +284,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
static int tracefs_apply_options(struct super_block *sb)
{
struct tracefs_fs_info *fsi = sb->s_fs_info;
- struct inode *inode = sb->s_root->d_inode;
+ struct inode *inode = d_inode(sb->s_root);
struct tracefs_mount_opts *opts = &fsi->mount_opts;
inode->i_mode &= ~S_IALLUGO;
@@ -403,18 +403,18 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = tracefs_mount->mnt_root;
- inode_lock(parent->d_inode);
- if (unlikely(IS_DEADDIR(parent->d_inode)))
+ inode_lock(d_inode(parent));
+ if (unlikely(IS_DEADDIR(d_inode(parent))))
dentry = ERR_PTR(-ENOENT);
else
dentry = lookup_one_len(name, parent, strlen(name));
- if (!IS_ERR(dentry) && dentry->d_inode) {
+ if (!IS_ERR(dentry) && d_inode(dentry)) {
dput(dentry);
dentry = ERR_PTR(-EEXIST);
}
if (IS_ERR(dentry)) {
- inode_unlock(parent->d_inode);
+ inode_unlock(d_inode(parent));
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
@@ -423,7 +423,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
static struct dentry *failed_creating(struct dentry *dentry)
{
- inode_unlock(dentry->d_parent->d_inode);
+ inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
return NULL;
@@ -431,7 +431,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
static struct dentry *end_creating(struct dentry *dentry)
{
- inode_unlock(dentry->d_parent->d_inode);
+ inode_unlock(d_inode(dentry->d_parent));
return dentry;
}
@@ -489,7 +489,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_uid = d_inode(dentry->d_parent)->i_uid;
inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
- fsnotify_create(dentry->d_parent->d_inode, dentry);
+ fsnotify_create(d_inode(dentry->d_parent), dentry);
return end_creating(dentry);
}
@@ -516,8 +516,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
d_instantiate(dentry, inode);
- inc_nlink(dentry->d_parent->d_inode);
- fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ inc_nlink(d_inode(dentry->d_parent));
+ fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
return end_creating(dentry);
}
diff --git a/fs/unicode/.gitignore b/fs/unicode/.gitignore
index 361294571ab0..51cdf3fb4dd4 100644
--- a/fs/unicode/.gitignore
+++ b/fs/unicode/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
/mkutf8data
-/utf8data.h
+/utf8data.c
diff --git a/fs/unicode/Kconfig b/fs/unicode/Kconfig
index 2c27b9a5cd6c..610d7bc05d6e 100644
--- a/fs/unicode/Kconfig
+++ b/fs/unicode/Kconfig
@@ -8,7 +8,16 @@ config UNICODE
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
support.
+config UNICODE_UTF8_DATA
+ tristate "UTF-8 normalization and casefolding tables"
+ depends on UNICODE
+ default UNICODE
+ help
+ This contains a large table of case foldings, which can be loaded as
+ a separate module if you say M here. To be on the safe side stick
+ to the default of Y. Saying N here makes no sense, if you do not want
+ utf8 casefolding support, disable CONFIG_UNICODE instead.
+
config UNICODE_NORMALIZATION_SELFTEST
tristate "Test UTF-8 normalization support"
- depends on UNICODE
- default n
+ depends on UNICODE_UTF8_DATA
diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile
index b88aecc86550..2f9d9188852b 100644
--- a/fs/unicode/Makefile
+++ b/fs/unicode/Makefile
@@ -2,14 +2,15 @@
obj-$(CONFIG_UNICODE) += unicode.o
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
+obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o
unicode-y := utf8-norm.o utf8-core.o
-$(obj)/utf8-norm.o: $(obj)/utf8data.h
+$(obj)/utf8-data.o: $(obj)/utf8data.c
-# In the normal build, the checked-in utf8data.h is just shipped.
+# In the normal build, the checked-in utf8data.c is just shipped.
#
-# To generate utf8data.h from UCD, put *.txt files in this directory
+# To generate utf8data.c from UCD, put *.txt files in this directory
# and pass REGENERATE_UTF8DATA=1 from the command line.
ifdef REGENERATE_UTF8DATA
@@ -24,15 +25,15 @@ quiet_cmd_utf8data = GEN $@
-t $(srctree)/$(src)/NormalizationTest.txt \
-o $@
-$(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
+$(obj)/utf8data.c: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
$(call if_changed,utf8data)
else
-$(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
+$(obj)/utf8data.c: $(src)/utf8data.c_shipped FORCE
$(call if_changed,shipped)
endif
-targets += utf8data.h
+targets += utf8data.c
hostprogs += mkutf8data
diff --git a/fs/unicode/mkutf8data.c b/fs/unicode/mkutf8data.c
index ff2025ac5a32..bc1a7c8b5c8d 100644
--- a/fs/unicode/mkutf8data.c
+++ b/fs/unicode/mkutf8data.c
@@ -3287,12 +3287,10 @@ static void write_file(void)
open_fail(utf8_name, errno);
fprintf(file, "/* This file is generated code, do not edit. */\n");
- fprintf(file, "#ifndef __INCLUDED_FROM_UTF8NORM_C__\n");
- fprintf(file, "#error Only nls_utf8-norm.c should include this file.\n");
- fprintf(file, "#endif\n");
fprintf(file, "\n");
- fprintf(file, "static const unsigned int utf8vers = %#x;\n",
- unicode_maxage);
+ fprintf(file, "#include <linux/module.h>\n");
+ fprintf(file, "#include <linux/kernel.h>\n");
+ fprintf(file, "#include \"utf8n.h\"\n");
fprintf(file, "\n");
fprintf(file, "static const unsigned int utf8agetab[] = {\n");
for (i = 0; i != ages_count; i++)
@@ -3339,6 +3337,22 @@ static void write_file(void)
fprintf(file, "\n");
}
fprintf(file, "};\n");
+ fprintf(file, "\n");
+ fprintf(file, "struct utf8data_table utf8_data_table = {\n");
+ fprintf(file, "\t.utf8agetab = utf8agetab,\n");
+ fprintf(file, "\t.utf8agetab_size = ARRAY_SIZE(utf8agetab),\n");
+ fprintf(file, "\n");
+ fprintf(file, "\t.utf8nfdicfdata = utf8nfdicfdata,\n");
+ fprintf(file, "\t.utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),\n");
+ fprintf(file, "\n");
+ fprintf(file, "\t.utf8nfdidata = utf8nfdidata,\n");
+ fprintf(file, "\t.utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),\n");
+ fprintf(file, "\n");
+ fprintf(file, "\t.utf8data = utf8data,\n");
+ fprintf(file, "};\n");
+ fprintf(file, "EXPORT_SYMBOL_GPL(utf8_data_table);");
+ fprintf(file, "\n");
+ fprintf(file, "MODULE_LICENSE(\"GPL v2\");\n");
fclose(file);
}
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index dc25823bfed9..67aaadc3ab07 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -5,16 +5,13 @@
#include <linux/slab.h>
#include <linux/parser.h>
#include <linux/errno.h>
-#include <linux/unicode.h>
#include <linux/stringhash.h>
#include "utf8n.h"
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
{
- const struct utf8data *data = utf8nfdi(um->version);
-
- if (utf8nlen(data, str->name, str->len) < 0)
+ if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
return -1;
return 0;
}
@@ -23,14 +20,13 @@ EXPORT_SYMBOL(utf8_validate);
int utf8_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
- const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
return -EINVAL;
- if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
+ if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
return -EINVAL;
do {
@@ -50,14 +46,13 @@ EXPORT_SYMBOL(utf8_strncmp);
int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1, cur2;
int c1, c2;
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
return -EINVAL;
- if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
+ if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
return -EINVAL;
do {
@@ -81,12 +76,11 @@ int utf8_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf,
const struct qstr *s1)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1;
int c1, c2;
int i = 0;
- if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
+ if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
return -EINVAL;
do {
@@ -105,11 +99,10 @@ EXPORT_SYMBOL(utf8_strncasecmp_folded);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
size_t nlen = 0;
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
@@ -128,12 +121,11 @@ EXPORT_SYMBOL(utf8_casefold);
int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
struct qstr *str)
{
- const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur;
int c;
unsigned long hash = init_name_hash(salt);
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
return -EINVAL;
while ((c = utf8byte(&cur))) {
@@ -149,11 +141,10 @@ EXPORT_SYMBOL(utf8_casefold_hash);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen)
{
- const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur;
ssize_t nlen = 0;
- if (utf8ncursor(&cur, data, str->name, str->len) < 0)
+ if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) {
@@ -167,69 +158,59 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
}
return -EINVAL;
}
-
EXPORT_SYMBOL(utf8_normalize);
-static int utf8_parse_version(const char *version, unsigned int *maj,
- unsigned int *min, unsigned int *rev)
+static const struct utf8data *find_table_version(const struct utf8data *table,
+ size_t nr_entries, unsigned int version)
{
- substring_t args[3];
- char version_string[12];
- static const struct match_token token[] = {
- {1, "%d.%d.%d"},
- {0, NULL}
- };
-
- strncpy(version_string, version, sizeof(version_string));
-
- if (match_token(version_string, token, args) != 1)
- return -EINVAL;
-
- if (match_int(&args[0], maj) || match_int(&args[1], min) ||
- match_int(&args[2], rev))
- return -EINVAL;
+ size_t i = nr_entries - 1;
- return 0;
+ while (version < table[i].maxage)
+ i--;
+ if (version > table[i].maxage)
+ return NULL;
+ return &table[i];
}
-struct unicode_map *utf8_load(const char *version)
+struct unicode_map *utf8_load(unsigned int version)
{
- struct unicode_map *um = NULL;
- int unicode_version;
-
- if (version) {
- unsigned int maj, min, rev;
-
- if (utf8_parse_version(version, &maj, &min, &rev) < 0)
- return ERR_PTR(-EINVAL);
-
- if (!utf8version_is_supported(maj, min, rev))
- return ERR_PTR(-EINVAL);
-
- unicode_version = UNICODE_AGE(maj, min, rev);
- } else {
- unicode_version = utf8version_latest();
- printk(KERN_WARNING"UTF-8 version not specified. "
- "Assuming latest supported version (%d.%d.%d).",
- (unicode_version >> 16) & 0xff,
- (unicode_version >> 8) & 0xff,
- (unicode_version & 0xff));
- }
+ struct unicode_map *um;
um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
if (!um)
return ERR_PTR(-ENOMEM);
-
- um->charset = "UTF-8";
- um->version = unicode_version;
-
+ um->version = version;
+
+ um->tables = symbol_request(utf8_data_table);
+ if (!um->tables)
+ goto out_free_um;
+
+ if (!utf8version_is_supported(um, version))
+ goto out_symbol_put;
+ um->ntab[UTF8_NFDI] = find_table_version(um->tables->utf8nfdidata,
+ um->tables->utf8nfdidata_size, um->version);
+ if (!um->ntab[UTF8_NFDI])
+ goto out_symbol_put;
+ um->ntab[UTF8_NFDICF] = find_table_version(um->tables->utf8nfdicfdata,
+ um->tables->utf8nfdicfdata_size, um->version);
+ if (!um->ntab[UTF8_NFDICF])
+ goto out_symbol_put;
return um;
+
+out_symbol_put:
+ symbol_put(um->tables);
+out_free_um:
+ kfree(um);
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL(utf8_load);
void utf8_unload(struct unicode_map *um)
{
- kfree(um);
+ if (um) {
+ symbol_put(utf8_data_table);
+ kfree(um);
+ }
}
EXPORT_SYMBOL(utf8_unload);
diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c
index 1d2d2e5b906a..768f8ab448b8 100644
--- a/fs/unicode/utf8-norm.c
+++ b/fs/unicode/utf8-norm.c
@@ -6,34 +6,17 @@
#include "utf8n.h"
-struct utf8data {
- unsigned int maxage;
- unsigned int offset;
-};
-
-#define __INCLUDED_FROM_UTF8NORM_C__
-#include "utf8data.h"
-#undef __INCLUDED_FROM_UTF8NORM_C__
-
-int utf8version_is_supported(u8 maj, u8 min, u8 rev)
+int utf8version_is_supported(const struct unicode_map *um, unsigned int version)
{
- int i = ARRAY_SIZE(utf8agetab) - 1;
- unsigned int sb_utf8version = UNICODE_AGE(maj, min, rev);
+ int i = um->tables->utf8agetab_size - 1;
- while (i >= 0 && utf8agetab[i] != 0) {
- if (sb_utf8version == utf8agetab[i])
+ while (i >= 0 && um->tables->utf8agetab[i] != 0) {
+ if (version == um->tables->utf8agetab[i])
return 1;
i--;
}
return 0;
}
-EXPORT_SYMBOL(utf8version_is_supported);
-
-int utf8version_latest(void)
-{
- return utf8vers;
-}
-EXPORT_SYMBOL(utf8version_latest);
/*
* UTF-8 valid ranges.
@@ -168,7 +151,7 @@ typedef const unsigned char utf8trie_t;
* underlying datatype: unsigned char.
*
* leaf[0]: The unicode version, stored as a generation number that is
- * an index into utf8agetab[]. With this we can filter code
+ * an index into ->utf8agetab[]. With this we can filter code
* points based on the unicode version in which they were
* defined. The CCC of a non-defined code point is 0.
* leaf[1]: Canonical Combining Class. During normalization, we need
@@ -316,21 +299,19 @@ utf8hangul(const char *str, unsigned char *hangul)
* is well-formed and corresponds to a known unicode code point. The
* shorthand for this will be "is valid UTF-8 unicode".
*/
-static utf8leaf_t *utf8nlookup(const struct utf8data *data,
- unsigned char *hangul, const char *s, size_t len)
+static utf8leaf_t *utf8nlookup(const struct unicode_map *um,
+ enum utf8_normalization n, unsigned char *hangul, const char *s,
+ size_t len)
{
- utf8trie_t *trie = NULL;
+ utf8trie_t *trie = um->tables->utf8data + um->ntab[n]->offset;
int offlen;
int offset;
int mask;
int node;
- if (!data)
- return NULL;
if (len == 0)
return NULL;
- trie = utf8data + data->offset;
node = 1;
while (node) {
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
@@ -392,172 +373,29 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data,
*
* Forwards to utf8nlookup().
*/
-static utf8leaf_t *utf8lookup(const struct utf8data *data,
- unsigned char *hangul, const char *s)
+static utf8leaf_t *utf8lookup(const struct unicode_map *um,
+ enum utf8_normalization n, unsigned char *hangul, const char *s)
{
- return utf8nlookup(data, hangul, s, (size_t)-1);
-}
-
-/*
- * Maximum age of any character in s.
- * Return -1 if s is not valid UTF-8 unicode.
- * Return 0 if only non-assigned code points are used.
- */
-int utf8agemax(const struct utf8data *data, const char *s)
-{
- utf8leaf_t *leaf;
- int age = 0;
- int leaf_age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
-
- while (*s) {
- leaf = utf8lookup(data, hangul, s);
- if (!leaf)
- return -1;
-
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age > age)
- age = leaf_age;
- s += utf8clen(s);
- }
- return age;
+ return utf8nlookup(um, n, hangul, s, (size_t)-1);
}
-EXPORT_SYMBOL(utf8agemax);
-
-/*
- * Minimum age of any character in s.
- * Return -1 if s is not valid UTF-8 unicode.
- * Return 0 if non-assigned code points are used.
- */
-int utf8agemin(const struct utf8data *data, const char *s)
-{
- utf8leaf_t *leaf;
- int age;
- int leaf_age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
- age = data->maxage;
- while (*s) {
- leaf = utf8lookup(data, hangul, s);
- if (!leaf)
- return -1;
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age < age)
- age = leaf_age;
- s += utf8clen(s);
- }
- return age;
-}
-EXPORT_SYMBOL(utf8agemin);
-
-/*
- * Maximum age of any character in s, touch at most len bytes.
- * Return -1 if s is not valid UTF-8 unicode.
- */
-int utf8nagemax(const struct utf8data *data, const char *s, size_t len)
-{
- utf8leaf_t *leaf;
- int age = 0;
- int leaf_age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
-
- while (len && *s) {
- leaf = utf8nlookup(data, hangul, s, len);
- if (!leaf)
- return -1;
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age > age)
- age = leaf_age;
- len -= utf8clen(s);
- s += utf8clen(s);
- }
- return age;
-}
-EXPORT_SYMBOL(utf8nagemax);
-
-/*
- * Maximum age of any character in s, touch at most len bytes.
- * Return -1 if s is not valid UTF-8 unicode.
- */
-int utf8nagemin(const struct utf8data *data, const char *s, size_t len)
-{
- utf8leaf_t *leaf;
- int leaf_age;
- int age;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
- age = data->maxage;
- while (len && *s) {
- leaf = utf8nlookup(data, hangul, s, len);
- if (!leaf)
- return -1;
- leaf_age = utf8agetab[LEAF_GEN(leaf)];
- if (leaf_age <= data->maxage && leaf_age < age)
- age = leaf_age;
- len -= utf8clen(s);
- s += utf8clen(s);
- }
- return age;
-}
-EXPORT_SYMBOL(utf8nagemin);
-
-/*
- * Length of the normalization of s.
- * Return -1 if s is not valid UTF-8 unicode.
- *
- * A string of Default_Ignorable_Code_Point has length 0.
- */
-ssize_t utf8len(const struct utf8data *data, const char *s)
-{
- utf8leaf_t *leaf;
- size_t ret = 0;
- unsigned char hangul[UTF8HANGULLEAF];
-
- if (!data)
- return -1;
- while (*s) {
- leaf = utf8lookup(data, hangul, s);
- if (!leaf)
- return -1;
- if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
- ret += utf8clen(s);
- else if (LEAF_CCC(leaf) == DECOMPOSE)
- ret += strlen(LEAF_STR(leaf));
- else
- ret += utf8clen(s);
- s += utf8clen(s);
- }
- return ret;
-}
-EXPORT_SYMBOL(utf8len);
/*
* Length of the normalization of s, touch at most len bytes.
* Return -1 if s is not valid UTF-8 unicode.
*/
-ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
+ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
+ const char *s, size_t len)
{
utf8leaf_t *leaf;
size_t ret = 0;
unsigned char hangul[UTF8HANGULLEAF];
- if (!data)
- return -1;
while (len && *s) {
- leaf = utf8nlookup(data, hangul, s, len);
+ leaf = utf8nlookup(um, n, hangul, s, len);
if (!leaf)
return -1;
- if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
+ if (um->tables->utf8agetab[LEAF_GEN(leaf)] >
+ um->ntab[n]->maxage)
ret += utf8clen(s);
else if (LEAF_CCC(leaf) == DECOMPOSE)
ret += strlen(LEAF_STR(leaf));
@@ -568,7 +406,6 @@ ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
}
return ret;
}
-EXPORT_SYMBOL(utf8nlen);
/*
* Set up an utf8cursor for use by utf8byte().
@@ -580,14 +417,13 @@ EXPORT_SYMBOL(utf8nlen);
*
* Returns -1 on error, 0 on success.
*/
-int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s, size_t len)
+int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
+ enum utf8_normalization n, const char *s, size_t len)
{
- if (!data)
- return -1;
if (!s)
return -1;
- u8c->data = data;
+ u8c->um = um;
+ u8c->n = n;
u8c->s = s;
u8c->p = NULL;
u8c->ss = NULL;
@@ -604,23 +440,6 @@ int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
return -1;
return 0;
}
-EXPORT_SYMBOL(utf8ncursor);
-
-/*
- * Set up an utf8cursor for use by utf8byte().
- *
- * u8c : pointer to cursor.
- * data : const struct utf8data to use for normalization.
- * s : NUL-terminated string.
- *
- * Returns -1 on error, 0 on success.
- */
-int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s)
-{
- return utf8ncursor(u8c, data, s, (unsigned int)-1);
-}
-EXPORT_SYMBOL(utf8cursor);
/*
* Get one byte from the normalized form of the string described by u8c.
@@ -678,9 +497,9 @@ int utf8byte(struct utf8cursor *u8c)
/* Look up the data for the current character. */
if (u8c->p) {
- leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
+ leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
} else {
- leaf = utf8nlookup(u8c->data, u8c->hangul,
+ leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul,
u8c->s, u8c->len);
}
@@ -690,7 +509,8 @@ int utf8byte(struct utf8cursor *u8c)
ccc = LEAF_CCC(leaf);
/* Characters that are too new have CCC 0. */
- if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) {
+ if (u8c->um->tables->utf8agetab[LEAF_GEN(leaf)] >
+ u8c->um->ntab[u8c->n]->maxage) {
ccc = STOPPER;
} else if (ccc == DECOMPOSE) {
u8c->len -= utf8clen(u8c->s);
@@ -704,7 +524,7 @@ int utf8byte(struct utf8cursor *u8c)
goto ccc_mismatch;
}
- leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
+ leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
if (!leaf)
return -1;
ccc = LEAF_CCC(leaf);
@@ -765,28 +585,10 @@ ccc_mismatch:
}
}
}
-EXPORT_SYMBOL(utf8byte);
-
-const struct utf8data *utf8nfdi(unsigned int maxage)
-{
- int i = ARRAY_SIZE(utf8nfdidata) - 1;
-
- while (maxage < utf8nfdidata[i].maxage)
- i--;
- if (maxage > utf8nfdidata[i].maxage)
- return NULL;
- return &utf8nfdidata[i];
-}
-EXPORT_SYMBOL(utf8nfdi);
-
-const struct utf8data *utf8nfdicf(unsigned int maxage)
-{
- int i = ARRAY_SIZE(utf8nfdicfdata) - 1;
- while (maxage < utf8nfdicfdata[i].maxage)
- i--;
- if (maxage > utf8nfdicfdata[i].maxage)
- return NULL;
- return &utf8nfdicfdata[i];
-}
-EXPORT_SYMBOL(utf8nfdicf);
+#ifdef CONFIG_UNICODE_NORMALIZATION_SELFTEST_MODULE
+EXPORT_SYMBOL_GPL(utf8version_is_supported);
+EXPORT_SYMBOL_GPL(utf8nlen);
+EXPORT_SYMBOL_GPL(utf8ncursor);
+EXPORT_SYMBOL_GPL(utf8byte);
+#endif
diff --git a/fs/unicode/utf8-selftest.c b/fs/unicode/utf8-selftest.c
index 6fe8af7edccb..eb2bbdd688d7 100644
--- a/fs/unicode/utf8-selftest.c
+++ b/fs/unicode/utf8-selftest.c
@@ -18,9 +18,7 @@ unsigned int failed_tests;
unsigned int total_tests;
/* Tests will be based on this version. */
-#define latest_maj 12
-#define latest_min 1
-#define latest_rev 0
+#define UTF8_LATEST UNICODE_AGE(12, 1, 0)
#define _test(cond, func, line, fmt, ...) do { \
total_tests++; \
@@ -160,18 +158,22 @@ static const struct {
}
};
-static void check_utf8_nfdi(void)
+static ssize_t utf8len(const struct unicode_map *um, enum utf8_normalization n,
+ const char *s)
+{
+ return utf8nlen(um, n, s, (size_t)-1);
+}
+
+static int utf8cursor(struct utf8cursor *u8c, const struct unicode_map *um,
+ enum utf8_normalization n, const char *s)
+{
+ return utf8ncursor(u8c, um, n, s, (unsigned int)-1);
+}
+
+static void check_utf8_nfdi(struct unicode_map *um)
{
int i;
struct utf8cursor u8c;
- const struct utf8data *data;
-
- data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
- if (!data) {
- pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
- __func__, latest_maj, latest_min, latest_rev);
- return;
- }
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
int len = strlen(nfdi_test_data[i].str);
@@ -179,10 +181,11 @@ static void check_utf8_nfdi(void)
int j = 0;
unsigned char c;
- test((utf8len(data, nfdi_test_data[i].str) == nlen));
- test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
+ test((utf8len(um, UTF8_NFDI, nfdi_test_data[i].str) == nlen));
+ test((utf8nlen(um, UTF8_NFDI, nfdi_test_data[i].str, len) ==
+ nlen));
- if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
+ if (utf8cursor(&u8c, um, UTF8_NFDI, nfdi_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
@@ -196,18 +199,10 @@ static void check_utf8_nfdi(void)
}
}
-static void check_utf8_nfdicf(void)
+static void check_utf8_nfdicf(struct unicode_map *um)
{
int i;
struct utf8cursor u8c;
- const struct utf8data *data;
-
- data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
- if (!data) {
- pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
- __func__, latest_maj, latest_min, latest_rev);
- return;
- }
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
int len = strlen(nfdicf_test_data[i].str);
@@ -215,10 +210,13 @@ static void check_utf8_nfdicf(void)
int j = 0;
unsigned char c;
- test((utf8len(data, nfdicf_test_data[i].str) == nlen));
- test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
+ test((utf8len(um, UTF8_NFDICF, nfdicf_test_data[i].str) ==
+ nlen));
+ test((utf8nlen(um, UTF8_NFDICF, nfdicf_test_data[i].str, len) ==
+ nlen));
- if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
+ if (utf8cursor(&u8c, um, UTF8_NFDICF,
+ nfdicf_test_data[i].str) < 0)
pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) {
@@ -232,16 +230,9 @@ static void check_utf8_nfdicf(void)
}
}
-static void check_utf8_comparisons(void)
+static void check_utf8_comparisons(struct unicode_map *table)
{
int i;
- struct unicode_map *table = utf8_load("12.1.0");
-
- if (IS_ERR(table)) {
- pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
- __func__, latest_maj, latest_min, latest_rev);
- return;
- }
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
const struct qstr s1 = {.name = nfdi_test_data[i].str,
@@ -262,42 +253,49 @@ static void check_utf8_comparisons(void)
test_f(!utf8_strncasecmp(table, &s1, &s2),
"%s %s comparison mismatch\n", s1.name, s2.name);
}
-
- utf8_unload(table);
}
-static void check_supported_versions(void)
+static void check_supported_versions(struct unicode_map *um)
{
/* Unicode 7.0.0 should be supported. */
- test(utf8version_is_supported(7, 0, 0));
+ test(utf8version_is_supported(um, UNICODE_AGE(7, 0, 0)));
/* Unicode 9.0.0 should be supported. */
- test(utf8version_is_supported(9, 0, 0));
+ test(utf8version_is_supported(um, UNICODE_AGE(9, 0, 0)));
/* Unicode 1x.0.0 (the latest version) should be supported. */
- test(utf8version_is_supported(latest_maj, latest_min, latest_rev));
+ test(utf8version_is_supported(um, UTF8_LATEST));
/* Next versions don't exist. */
- test(!utf8version_is_supported(13, 0, 0));
- test(!utf8version_is_supported(0, 0, 0));
- test(!utf8version_is_supported(-1, -1, -1));
+ test(!utf8version_is_supported(um, UNICODE_AGE(13, 0, 0)));
+ test(!utf8version_is_supported(um, UNICODE_AGE(0, 0, 0)));
+ test(!utf8version_is_supported(um, UNICODE_AGE(-1, -1, -1)));
}
static int __init init_test_ucd(void)
{
+ struct unicode_map *um;
+
failed_tests = 0;
total_tests = 0;
- check_supported_versions();
- check_utf8_nfdi();
- check_utf8_nfdicf();
- check_utf8_comparisons();
+ um = utf8_load(UTF8_LATEST);
+ if (IS_ERR(um)) {
+ pr_err("%s: Unable to load utf8 table.\n", __func__);
+ return PTR_ERR(um);
+ }
+
+ check_supported_versions(um);
+ check_utf8_nfdi(um);
+ check_utf8_nfdicf(um);
+ check_utf8_comparisons(um);
if (!failed_tests)
pr_info("All %u tests passed\n", total_tests);
else
pr_err("%u out of %u tests failed\n", failed_tests,
total_tests);
+ utf8_unload(um);
return 0;
}
diff --git a/fs/unicode/utf8data.h_shipped b/fs/unicode/utf8data.c_shipped
index 76e4f0e1b089..d9b62901aa96 100644
--- a/fs/unicode/utf8data.h_shipped
+++ b/fs/unicode/utf8data.c_shipped
@@ -1,9 +1,8 @@
/* This file is generated code, do not edit. */
-#ifndef __INCLUDED_FROM_UTF8NORM_C__
-#error Only nls_utf8-norm.c should include this file.
-#endif
-static const unsigned int utf8vers = 0xc0100;
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "utf8n.h"
static const unsigned int utf8agetab[] = {
0,
@@ -4107,3 +4106,18 @@ static const unsigned char utf8data[64256] = {
0x52,0x04,0x00,0x00,0x11,0x04,0x00,0x00,0x02,0x00,0xcf,0x86,0xcf,0x06,0x02,0x00,
0x81,0x80,0xcf,0x86,0x85,0x84,0xcf,0x86,0xcf,0x06,0x02,0x00,0x00,0x00,0x00,0x00
};
+
+struct utf8data_table utf8_data_table = {
+ .utf8agetab = utf8agetab,
+ .utf8agetab_size = ARRAY_SIZE(utf8agetab),
+
+ .utf8nfdicfdata = utf8nfdicfdata,
+ .utf8nfdicfdata_size = ARRAY_SIZE(utf8nfdicfdata),
+
+ .utf8nfdidata = utf8nfdidata,
+ .utf8nfdidata_size = ARRAY_SIZE(utf8nfdidata),
+
+ .utf8data = utf8data,
+};
+EXPORT_SYMBOL_GPL(utf8_data_table);
+MODULE_LICENSE("GPL v2");
diff --git a/fs/unicode/utf8n.h b/fs/unicode/utf8n.h
index 0acd530c2c79..bd00d587747a 100644
--- a/fs/unicode/utf8n.h
+++ b/fs/unicode/utf8n.h
@@ -11,53 +11,9 @@
#include <linux/export.h>
#include <linux/string.h>
#include <linux/module.h>
+#include <linux/unicode.h>
-/* Encoding a unicode version number as a single unsigned int. */
-#define UNICODE_MAJ_SHIFT (16)
-#define UNICODE_MIN_SHIFT (8)
-
-#define UNICODE_AGE(MAJ, MIN, REV) \
- (((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
- ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
- ((unsigned int)(REV)))
-
-/* Highest unicode version supported by the data tables. */
-extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
-extern int utf8version_latest(void);
-
-/*
- * Look for the correct const struct utf8data for a unicode version.
- * Returns NULL if the version requested is too new.
- *
- * Two normalization forms are supported: nfdi and nfdicf.
- *
- * nfdi:
- * - Apply unicode normalization form NFD.
- * - Remove any Default_Ignorable_Code_Point.
- *
- * nfdicf:
- * - Apply unicode normalization form NFD.
- * - Remove any Default_Ignorable_Code_Point.
- * - Apply a full casefold (C + F).
- */
-extern const struct utf8data *utf8nfdi(unsigned int maxage);
-extern const struct utf8data *utf8nfdicf(unsigned int maxage);
-
-/*
- * Determine the maximum age of any unicode character in the string.
- * Returns 0 if only unassigned code points are present.
- * Returns -1 if the input is not valid UTF-8.
- */
-extern int utf8agemax(const struct utf8data *data, const char *s);
-extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);
-
-/*
- * Determine the minimum age of any unicode character in the string.
- * Returns 0 if any unassigned code points are present.
- * Returns -1 if the input is not valid UTF-8.
- */
-extern int utf8agemin(const struct utf8data *data, const char *s);
-extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
+int utf8version_is_supported(const struct unicode_map *um, unsigned int version);
/*
* Determine the length of the normalized from of the string,
@@ -65,8 +21,8 @@ extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
* Returns 0 if only ignorable code points are present.
* Returns -1 if the input is not valid UTF-8.
*/
-extern ssize_t utf8len(const struct utf8data *data, const char *s);
-extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
+ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
+ const char *s, size_t len);
/* Needed in struct utf8cursor below. */
#define UTF8HANGULLEAF (12)
@@ -75,7 +31,8 @@ extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
* Cursor structure used by the normalizer.
*/
struct utf8cursor {
- const struct utf8data *data;
+ const struct unicode_map *um;
+ enum utf8_normalization n;
const char *s;
const char *p;
const char *ss;
@@ -92,10 +49,8 @@ struct utf8cursor {
* Returns 0 on success.
* Returns -1 on failure.
*/
-extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s);
-extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
- const char *s, size_t len);
+int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
+ enum utf8_normalization n, const char *s, size_t len);
/*
* Get the next byte in the normalization.
@@ -105,4 +60,24 @@ extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
*/
extern int utf8byte(struct utf8cursor *u8c);
+struct utf8data {
+ unsigned int maxage;
+ unsigned int offset;
+};
+
+struct utf8data_table {
+ const unsigned int *utf8agetab;
+ int utf8agetab_size;
+
+ const struct utf8data *utf8nfdicfdata;
+ int utf8nfdicfdata_size;
+
+ const struct utf8data *utf8nfdidata;
+ int utf8nfdidata_size;
+
+ const unsigned char *utf8data;
+};
+
+extern struct utf8data_table utf8_data_table;
+
#endif /* UTF8NORM_H */
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 22bf14ab2d16..e26b10132d47 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -15,6 +15,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
#include <linux/poll.h>
#include <linux/slab.h>
@@ -877,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX);
+ NULL_VM_UFFD_CTX, vma_anon_name(vma));
if (prev)
vma = prev;
else
@@ -1436,7 +1437,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- ((struct vm_userfaultfd_ctx){ ctx }));
+ ((struct vm_userfaultfd_ctx){ ctx }),
+ vma_anon_name(vma));
if (prev) {
vma = prev;
goto next;
@@ -1613,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
- NULL_VM_UFFD_CTX);
+ NULL_VM_UFFD_CTX, vma_anon_name(vma));
if (prev) {
vma = prev;
goto next;
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 6f49bf39183c..c557a030acfe 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/backing-dev.h>
#include "xfs_message.h"
#include "xfs_trace.h"
@@ -26,6 +25,6 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
current->comm, current->pid,
(unsigned int)size, __func__, lflags);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ memalloc_retry_wait(lflags);
} while (1);
}
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index c43877c8a279..505533c43a92 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -93,21 +93,6 @@ struct getbmapx {
#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */
/*
- * Structure for XFS_IOC_FSSETDM.
- * For use by backup and restore programs to set the XFS on-disk inode
- * fields di_dmevmask and di_dmstate. These must be set to exactly and
- * only values previously obtained via xfs_bulkstat! (Specifically the
- * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
- */
-#ifndef HAVE_FSDMIDATA
-struct fsdmidata {
- __u32 fsd_dmevmask; /* corresponds to di_dmevmask */
- __u16 fsd_padding;
- __u16 fsd_dmstate; /* corresponds to di_dmstate */
-};
-#endif
-
-/*
* File segment locking set data type for 64 bit access.
* Also used for all the RESV/FREE interfaces.
*/
@@ -562,16 +547,10 @@ typedef struct xfs_fsop_handlereq {
/*
* Compound structures for passing args through Handle Request interfaces
- * xfs_fssetdm_by_handle, xfs_attrlist_by_handle, xfs_attrmulti_by_handle
- * - ioctls: XFS_IOC_FSSETDM_BY_HANDLE, XFS_IOC_ATTRLIST_BY_HANDLE, and
- * XFS_IOC_ATTRMULTI_BY_HANDLE
+ * xfs_attrlist_by_handle, xfs_attrmulti_by_handle
+ * - ioctls: XFS_IOC_ATTRLIST_BY_HANDLE, and XFS_IOC_ATTRMULTI_BY_HANDLE
*/
-typedef struct xfs_fsop_setdm_handlereq {
- struct xfs_fsop_handlereq hreq; /* handle information */
- struct fsdmidata __user *data; /* DMAPI data */
-} xfs_fsop_setdm_handlereq_t;
-
/*
* Flags passed in xfs_attr_multiop.am_flags for the attr ioctl interface.
*
@@ -781,15 +760,15 @@ struct xfs_scrub_metadata {
* For 'documentation' purposed more than anything else,
* the "cmd #" field reflects the IRIX fcntl number.
*/
-#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
-#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
+/* XFS_IOC_ALLOCSP ------- deprecated 10 */
+/* XFS_IOC_FREESP -------- deprecated 11 */
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
-#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
-#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
+/* XFS_IOC_ALLOCSP64 ----- deprecated 36 */
+/* XFS_IOC_FREESP64 ------ deprecated 37 */
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
-#define XFS_IOC_FSSETDM _IOW ('X', 39, struct fsdmidata)
+/* XFS_IOC_FSSETDM ------- deprecated 39 */
#define XFS_IOC_RESVSP _IOW ('X', 40, struct xfs_flock64)
#define XFS_IOC_UNRESVSP _IOW ('X', 41, struct xfs_flock64)
#define XFS_IOC_RESVSP64 _IOW ('X', 42, struct xfs_flock64)
@@ -831,7 +810,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FREEZE _IOWR('X', 119, int) /* aka FIFREEZE */
#define XFS_IOC_THAW _IOWR('X', 120, int) /* aka FITHAW */
-#define XFS_IOC_FSSETDM_BY_HANDLE _IOW ('X', 121, struct xfs_fsop_setdm_handlereq)
+/* XFS_IOC_FSSETDM_BY_HANDLE -- deprecated 121 */
#define XFS_IOC_ATTRLIST_BY_HANDLE _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
#define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index bed798792226..90aebfe9dc5f 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -281,7 +281,7 @@ xchk_superblock(
features_mask = cpu_to_be32(XFS_SB_VERSION2_ATTR2BIT);
if ((sb->sb_features2 & features_mask) !=
(cpu_to_be32(mp->m_sb.sb_features2) & features_mask))
- xchk_block_set_corrupt(sc, bp);
+ xchk_block_set_preen(sc, bp);
if (!xfs_has_crc(mp)) {
/* all v5 fields must be zero */
@@ -290,39 +290,38 @@ xchk_superblock(
offsetof(struct xfs_dsb, sb_features_compat)))
xchk_block_set_corrupt(sc, bp);
} else {
- /* Check compat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_COMPAT_UNKNOWN);
- if ((sb->sb_features_compat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_compat) & features_mask))
+ /* compat features must match */
+ if (sb->sb_features_compat !=
+ cpu_to_be32(mp->m_sb.sb_features_compat))
xchk_block_set_corrupt(sc, bp);
- /* Check ro compat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_RO_COMPAT_UNKNOWN |
- XFS_SB_FEAT_RO_COMPAT_FINOBT |
- XFS_SB_FEAT_RO_COMPAT_RMAPBT |
- XFS_SB_FEAT_RO_COMPAT_REFLINK);
- if ((sb->sb_features_ro_compat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_ro_compat) &
- features_mask))
+ /* ro compat features must match */
+ if (sb->sb_features_ro_compat !=
+ cpu_to_be32(mp->m_sb.sb_features_ro_compat))
xchk_block_set_corrupt(sc, bp);
- /* Check incompat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_UNKNOWN |
- XFS_SB_FEAT_INCOMPAT_FTYPE |
- XFS_SB_FEAT_INCOMPAT_SPINODES |
- XFS_SB_FEAT_INCOMPAT_META_UUID);
- if ((sb->sb_features_incompat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_incompat) &
- features_mask))
- xchk_block_set_corrupt(sc, bp);
+ /*
+ * NEEDSREPAIR is ignored on a secondary super, so we should
+ * clear it when we find it, though it's not a corruption.
+ */
+ features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR);
+ if ((cpu_to_be32(mp->m_sb.sb_features_incompat) ^
+ sb->sb_features_incompat) & features_mask)
+ xchk_block_set_preen(sc, bp);
- /* Check log incompat flags; all are set at mkfs time. */
- features_mask = cpu_to_be32(XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN);
- if ((sb->sb_features_log_incompat & features_mask) !=
- (cpu_to_be32(mp->m_sb.sb_features_log_incompat) &
- features_mask))
+ /* all other incompat features must match */
+ if ((cpu_to_be32(mp->m_sb.sb_features_incompat) ^
+ sb->sb_features_incompat) & ~features_mask)
xchk_block_set_corrupt(sc, bp);
+ /*
+ * log incompat features protect newer log record types from
+ * older log recovery code. Log recovery doesn't check the
+ * secondary supers, so we can clear these if needed.
+ */
+ if (sb->sb_features_log_incompat)
+ xchk_block_set_preen(sc, bp);
+
/* Don't care about sb_crc */
if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index d7bfed52f4cd..6da7f2ca77de 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -52,6 +52,18 @@ xrep_superblock(
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+ /*
+ * Don't write out a secondary super with NEEDSREPAIR or log incompat
+ * features set, since both are ignored when set on a secondary.
+ */
+ if (xfs_has_crc(mp)) {
+ struct xfs_dsb *sb = bp->b_addr;
+
+ sb->sb_features_incompat &=
+ ~cpu_to_be32(XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR);
+ sb->sb_features_log_incompat = 0;
+ }
+
/* Write this to disk. */
xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 797ea0c8b14e..d4a387d3d0ce 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -771,8 +771,7 @@ int
xfs_alloc_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t len,
- int alloc_type)
+ xfs_off_t len)
{
xfs_mount_t *mp = ip->i_mount;
xfs_off_t count;
@@ -865,8 +864,8 @@ xfs_alloc_file_space(
goto error;
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
- allocatesize_fsb, alloc_type, 0, imapp,
- &nimaps);
+ allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
+ &nimaps);
if (error)
goto error;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 9f993168b55b..24b37d211f1d 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -54,7 +54,7 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
/* preallocation and hole punch interface */
int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len, int alloc_type);
+ xfs_off_t len);
int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index bbb0fbd34e64..b45e0d50a405 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -394,7 +394,7 @@ xfs_buf_alloc_pages(
}
XFS_STATS_INC(bp->b_mount, xb_page_retries);
- congestion_wait(BLK_RW_ASYNC, HZ / 50);
+ memalloc_retry_wait(gfp_mask);
}
return 0;
}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 8310005af00f..a7174a5b3203 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -138,7 +138,8 @@ xfs_dir2_sf_getdents(
STATIC int
xfs_dir2_block_getdents(
struct xfs_da_args *args,
- struct dir_context *ctx)
+ struct dir_context *ctx,
+ unsigned int *lock_mode)
{
struct xfs_inode *dp = args->dp; /* incore directory inode */
struct xfs_buf *bp; /* buffer for block */
@@ -146,7 +147,6 @@ xfs_dir2_block_getdents(
int wantoff; /* starting block offset */
xfs_off_t cook;
struct xfs_da_geometry *geo = args->geo;
- int lock_mode;
unsigned int offset, next_offset;
unsigned int end;
@@ -156,12 +156,13 @@ xfs_dir2_block_getdents(
if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
return 0;
- lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir3_block_read(args->trans, dp, &bp);
- xfs_iunlock(dp, lock_mode);
if (error)
return error;
+ xfs_iunlock(dp, *lock_mode);
+ *lock_mode = 0;
+
/*
* Extract the byte offset we start at from the seek pointer.
* We'll skip entries before this.
@@ -344,7 +345,8 @@ STATIC int
xfs_dir2_leaf_getdents(
struct xfs_da_args *args,
struct dir_context *ctx,
- size_t bufsize)
+ size_t bufsize,
+ unsigned int *lock_mode)
{
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
@@ -356,7 +358,6 @@ xfs_dir2_leaf_getdents(
xfs_dir2_off_t curoff; /* current overall offset */
int length; /* temporary length value */
int byteoff; /* offset in current block */
- int lock_mode;
unsigned int offset = 0;
int error = 0; /* error return value */
@@ -390,13 +391,16 @@ xfs_dir2_leaf_getdents(
bp = NULL;
}
- lock_mode = xfs_ilock_data_map_shared(dp);
+ if (*lock_mode == 0)
+ *lock_mode = xfs_ilock_data_map_shared(dp);
error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
&rablk, &bp);
- xfs_iunlock(dp, lock_mode);
if (error || !bp)
break;
+ xfs_iunlock(dp, *lock_mode);
+ *lock_mode = 0;
+
xfs_dir3_data_check(dp, bp);
/*
* Find our position in the block.
@@ -496,7 +500,7 @@ xfs_dir2_leaf_getdents(
*
* If supplied, the transaction collects locked dir buffers to avoid
* nested buffer deadlocks. This function does not dirty the
- * transaction. The caller should ensure that the inode is locked
+ * transaction. The caller must hold the IOLOCK (shared or exclusive)
* before calling this function.
*/
int
@@ -507,8 +511,9 @@ xfs_readdir(
size_t bufsize)
{
struct xfs_da_args args = { NULL };
- int rval;
- int v;
+ unsigned int lock_mode;
+ int isblock;
+ int error;
trace_xfs_readdir(dp);
@@ -516,6 +521,7 @@ xfs_readdir(
return -EIO;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
+ ASSERT(xfs_isilocked(dp, XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
args.dp = dp;
@@ -523,13 +529,22 @@ xfs_readdir(
args.trans = tp;
if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_getdents(&args, ctx);
- else if ((rval = xfs_dir2_isblock(&args, &v)))
- ;
- else if (v)
- rval = xfs_dir2_block_getdents(&args, ctx);
- else
- rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
+ return xfs_dir2_sf_getdents(&args, ctx);
- return rval;
+ lock_mode = xfs_ilock_data_map_shared(dp);
+ error = xfs_dir2_isblock(&args, &isblock);
+ if (error)
+ goto out_unlock;
+
+ if (isblock) {
+ error = xfs_dir2_block_getdents(&args, ctx, &lock_mode);
+ goto out_unlock;
+ }
+
+ error = xfs_dir2_leaf_getdents(&args, ctx, bufsize, &lock_mode);
+
+out_unlock:
+ if (lock_mode)
+ xfs_iunlock(dp, lock_mode);
+ return error;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8d4c5ca261bd..22ad207bedf4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1051,8 +1051,7 @@ xfs_file_fallocate(
}
if (!xfs_is_always_cow_inode(ip)) {
- error = xfs_alloc_file_space(ip, offset, len,
- XFS_BMAPI_PREALLOC);
+ error = xfs_alloc_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 2e718728986f..9644f938990c 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1854,28 +1854,20 @@ xfs_inodegc_worker(
}
/*
- * Force all currently queued inode inactivation work to run immediately, and
- * wait for the work to finish. Two pass - queue all the work first pass, wait
- * for it in a second pass.
+ * Force all currently queued inode inactivation work to run immediately and
+ * wait for the work to finish.
*/
void
xfs_inodegc_flush(
struct xfs_mount *mp)
{
- struct xfs_inodegc *gc;
- int cpu;
-
if (!xfs_is_inodegc_enabled(mp))
return;
trace_xfs_inodegc_flush(mp, __return_address);
xfs_inodegc_queue_all(mp);
-
- for_each_online_cpu(cpu) {
- gc = per_cpu_ptr(mp->m_inodegc, cpu);
- flush_work(&gc->work);
- }
+ flush_workqueue(mp->m_inodegc_wq);
}
/*
@@ -1886,18 +1878,12 @@ void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
- struct xfs_inodegc *gc;
- int cpu;
-
if (!xfs_clear_inodegc_enabled(mp))
return;
xfs_inodegc_queue_all(mp);
+ drain_workqueue(mp->m_inodegc_wq);
- for_each_online_cpu(cpu) {
- gc = per_cpu_ptr(mp->m_inodegc, cpu);
- cancel_work_sync(&gc->work);
- }
trace_xfs_inodegc_stop(mp, __return_address);
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8ea47a9d5aad..03a6198c97f6 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -627,87 +627,6 @@ xfs_attrmulti_by_handle(
return error;
}
-int
-xfs_ioc_space(
- struct file *filp,
- xfs_flock64_t *bf)
-{
- struct inode *inode = file_inode(filp);
- struct xfs_inode *ip = XFS_I(inode);
- struct iattr iattr;
- enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR;
- uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
- int error;
-
- if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
- return -EPERM;
-
- if (!(filp->f_mode & FMODE_WRITE))
- return -EBADF;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- if (xfs_is_always_cow_inode(ip))
- return -EOPNOTSUPP;
-
- if (filp->f_flags & O_DSYNC)
- flags |= XFS_PREALLOC_SYNC;
- if (filp->f_mode & FMODE_NOCMTIME)
- flags |= XFS_PREALLOC_INVISIBLE;
-
- error = mnt_want_write_file(filp);
- if (error)
- return error;
-
- xfs_ilock(ip, iolock);
- error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
- if (error)
- goto out_unlock;
- inode_dio_wait(inode);
-
- switch (bf->l_whence) {
- case 0: /*SEEK_SET*/
- break;
- case 1: /*SEEK_CUR*/
- bf->l_start += filp->f_pos;
- break;
- case 2: /*SEEK_END*/
- bf->l_start += XFS_ISIZE(ip);
- break;
- default:
- error = -EINVAL;
- goto out_unlock;
- }
-
- if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) {
- error = -EINVAL;
- goto out_unlock;
- }
-
- if (bf->l_start > XFS_ISIZE(ip)) {
- error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip),
- XFS_BMAPI_PREALLOC);
- if (error)
- goto out_unlock;
- }
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = bf->l_start;
- error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp),
- &iattr);
- if (error)
- goto out_unlock;
-
- error = xfs_update_prealloc_flags(ip, flags);
-
-out_unlock:
- xfs_iunlock(ip, iolock);
- mnt_drop_write_file(filp);
- return error;
-}
-
/* Return 0 on success or positive error */
int
xfs_fsbulkstat_one_fmt(
@@ -1936,6 +1855,15 @@ xfs_fs_eofblocks_from_user(
}
/*
+ * These long-unused ioctls were removed from the official ioctl API in 5.17,
+ * but retain these definitions so that we can log warnings about them.
+ */
+#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
+#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
+#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
+#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
+
+/*
* Note: some of the ioctl's return positive numbers as a
* byte count indicating success, such as readlink_by_handle.
* So we don't "sign flip" like most other routines. This means
@@ -1965,13 +1893,11 @@ xfs_file_ioctl(
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64: {
- xfs_flock64_t bf;
-
- if (copy_from_user(&bf, arg, sizeof(bf)))
- return -EFAULT;
- return xfs_ioc_space(filp, &bf);
- }
+ case XFS_IOC_FREESP64:
+ xfs_warn_once(mp,
+ "%s should use fallocate; XFS_IOC_{ALLOC,FREE}SP ioctl unsupported",
+ current->comm);
+ return -ENOTTY;
case XFS_IOC_DIOINFO: {
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct dioattr da;
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 845d3bcab74b..d4abba2c13c1 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -10,12 +10,6 @@ struct xfs_bstat;
struct xfs_ibulk;
struct xfs_inogrp;
-
-extern int
-xfs_ioc_space(
- struct file *filp,
- xfs_flock64_t *bf);
-
int
xfs_ioc_swapext(
xfs_swapext_t *sxp);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 8783af203cfc..004ed2a251e8 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -28,22 +28,6 @@
#ifdef BROKEN_X86_ALIGNMENT
STATIC int
-xfs_compat_flock64_copyin(
- xfs_flock64_t *bf,
- compat_xfs_flock64_t __user *arg32)
-{
- if (get_user(bf->l_type, &arg32->l_type) ||
- get_user(bf->l_whence, &arg32->l_whence) ||
- get_user(bf->l_start, &arg32->l_start) ||
- get_user(bf->l_len, &arg32->l_len) ||
- get_user(bf->l_sysid, &arg32->l_sysid) ||
- get_user(bf->l_pid, &arg32->l_pid) ||
- copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
- return -EFAULT;
- return 0;
-}
-
-STATIC int
xfs_compat_ioc_fsgeometry_v1(
struct xfs_mount *mp,
compat_xfs_fsop_geom_v1_t __user *arg32)
@@ -445,17 +429,6 @@ xfs_file_compat_ioctl(
switch (cmd) {
#if defined(BROKEN_X86_ALIGNMENT)
- case XFS_IOC_ALLOCSP_32:
- case XFS_IOC_FREESP_32:
- case XFS_IOC_ALLOCSP64_32:
- case XFS_IOC_FREESP64_32: {
- struct xfs_flock64 bf;
-
- if (xfs_compat_flock64_copyin(&bf, arg))
- return -EFAULT;
- cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
- return xfs_ioc_space(filp, &bf);
- }
case XFS_IOC_FSGEOMETRY_V1_32:
return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg);
case XFS_IOC_FSGROWFSDATA_32: {
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index 9929482bf358..c14852362fce 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -142,28 +142,6 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq {
_IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
- __s16 l_type;
- __s16 l_whence;
- __s64 l_start __attribute__((packed));
- /* len == 0 means until end of file */
- __s64 l_len __attribute__((packed));
- __s32 l_sysid;
- __u32 l_pid;
- __s32 l_pad[4]; /* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
-
typedef struct compat_xfs_fsop_geom_v1 {
__u32 blocksize; /* filesystem (data) block size */
__u32 rtextsize; /* realtime extent size */