summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c20
-rw-r--r--fs/Kconfig18
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/aio.c2
-rw-r--r--fs/block_dev.c263
-rw-r--r--fs/btrfs/Makefile1
-rw-r--r--fs/btrfs/acl.c11
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/backref.h2
-rw-r--r--fs/btrfs/block-group.c114
-rw-r--r--fs/btrfs/btrfs_inode.h27
-rw-r--r--fs/btrfs/check-integrity.c60
-rw-r--r--fs/btrfs/compression.c169
-rw-r--r--fs/btrfs/compression.h5
-rw-r--r--fs/btrfs/ctree.c62
-rw-r--r--fs/btrfs/ctree.h94
-rw-r--r--fs/btrfs/delayed-inode.c227
-rw-r--r--fs/btrfs/dir-item.c76
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c12
-rw-r--r--fs/btrfs/extent_io.c318
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c23
-rw-r--r--fs/btrfs/free-space-cache.c26
-rw-r--r--fs/btrfs/inode.c307
-rw-r--r--fs/btrfs/ioctl.c188
-rw-r--r--fs/btrfs/lzo.c236
-rw-r--r--fs/btrfs/ordered-data.c5
-rw-r--r--fs/btrfs/ordered-data.h2
-rw-r--r--fs/btrfs/qgroup.c8
-rw-r--r--fs/btrfs/raid56.c18
-rw-r--r--fs/btrfs/ref-verify.c10
-rw-r--r--fs/btrfs/relocation.c306
-rw-r--r--fs/btrfs/send.c35
-rw-r--r--fs/btrfs/space-info.c98
-rw-r--r--fs/btrfs/struct-funcs.c8
-rw-r--r--fs/btrfs/subpage.c24
-rw-r--r--fs/btrfs/subpage.h3
-rw-r--r--fs/btrfs/super.c56
-rw-r--r--fs/btrfs/sysfs.c108
-rw-r--r--fs/btrfs/tests/qgroup-tests.c30
-rw-r--r--fs/btrfs/tree-checker.c38
-rw-r--r--fs/btrfs/tree-log.c102
-rw-r--r--fs/btrfs/verity.c811
-rw-r--r--fs/btrfs/volumes.c234
-rw-r--r--fs/btrfs/volumes.h29
-rw-r--r--fs/btrfs/zlib.c54
-rw-r--r--fs/btrfs/zoned.c22
-rw-r--r--fs/btrfs/zoned.h1
-rw-r--r--fs/btrfs/zstd.c39
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/ceph/caps.c38
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/locks.c3
-rw-r--r--fs/ceph/mds_client.c32
-rw-r--r--fs/ceph/mdsmap.c8
-rw-r--r--fs/ceph/snap.c37
-rw-r--r--fs/ceph/super.h5
-rw-r--r--fs/cifs/Kconfig30
-rw-r--r--fs/cifs/cifs_debug.c11
-rw-r--r--fs/cifs/cifs_swn.c2
-rw-r--r--fs/cifs/cifs_unicode.c9
-rw-r--r--fs/cifs/cifsencrypt.c89
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/cifs/cifsglob.h37
-rw-r--r--fs/cifs/cifspdu.h28
-rw-r--r--fs/cifs/cifsproto.h10
-rw-r--r--fs/cifs/cifssmb.c107
-rw-r--r--fs/cifs/connect.c32
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c50
-rw-r--r--fs/cifs/fs_context.c25
-rw-r--r--fs/cifs/fs_context.h3
-rw-r--r--fs/cifs/fscache.c41
-rw-r--r--fs/cifs/fscache.h23
-rw-r--r--fs/cifs/inode.c25
-rw-r--r--fs/cifs/misc.c50
-rw-r--r--fs/cifs/readdir.c23
-rw-r--r--fs/cifs/sess.c257
-rw-r--r--fs/cifs/smb2maperror.c1
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smbencrypt.c139
-rw-r--r--fs/cifs_common/Makefile7
-rw-r--r--fs/cifs_common/arc4.h23
-rw-r--r--fs/cifs_common/cifs_arc4.c87
-rw-r--r--fs/cifs_common/cifs_md4.c197
-rw-r--r--fs/cifs_common/md4.h27
-rw-r--r--fs/configfs/file.c18
-rw-r--r--fs/crypto/fname.c106
-rw-r--r--fs/crypto/hooks.c44
-rw-r--r--fs/dax.c2
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext2/Kconfig1
-rw-r--r--fs/ext2/ext2.h11
-rw-r--r--fs/ext2/file.c7
-rw-r--r--fs/ext2/inode.c27
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/ext4_jbd2.c3
-rw-r--r--fs/ext4/extents.c25
-rw-r--r--fs/ext4/file.c13
-rw-r--r--fs/ext4/inode.c47
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/mmp.c2
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/super.c13
-rw-r--r--fs/ext4/symlink.c12
-rw-r--r--fs/ext4/truncate.h8
-rw-r--r--fs/f2fs/data.c8
-rw-r--r--fs/f2fs/f2fs.h1
-rw-r--r--fs/f2fs/file.c62
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/super.c1
-rw-r--r--fs/f2fs/sysfs.c2
-rw-r--r--fs/fat/fatent.c1
-rw-r--r--fs/fcntl.c18
-rw-r--r--fs/fs-writeback.c19
-rw-r--r--fs/fuse/dax.c56
-rw-r--r--fs/fuse/dir.c11
-rw-r--r--fs/fuse/file.c10
-rw-r--r--fs/fuse/fuse_i.h7
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/gfs2/aops.c9
-rw-r--r--fs/gfs2/file.c3
-rw-r--r--fs/gfs2/glock.c13
-rw-r--r--fs/gfs2/glops.c27
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/gfs2/lock_dlm.c5
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/lops.c44
-rw-r--r--fs/gfs2/meta_io.c7
-rw-r--r--fs/gfs2/ops_fstype.c53
-rw-r--r--fs/gfs2/super.c107
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/util.h5
-rw-r--r--fs/hpfs/Kconfig1
-rw-r--r--fs/hpfs/file.c51
-rw-r--r--fs/inode.c2
-rw-r--r--fs/internal.h8
-rw-r--r--fs/io-wq.c275
-rw-r--r--fs/io-wq.h3
-rw-r--r--fs/io_uring.c2003
-rw-r--r--fs/ioctl.c211
-rw-r--r--fs/isofs/inode.c27
-rw-r--r--fs/isofs/isofs.h1
-rw-r--r--fs/isofs/joliet.c4
-rw-r--r--fs/ksmbd/Kconfig68
-rw-r--r--fs/ksmbd/Makefile20
-rw-r--r--fs/ksmbd/asn1.c343
-rw-r--r--fs/ksmbd/asn1.h21
-rw-r--r--fs/ksmbd/auth.c1364
-rw-r--r--fs/ksmbd/auth.h67
-rw-r--r--fs/ksmbd/connection.c413
-rw-r--r--fs/ksmbd/connection.h213
-rw-r--r--fs/ksmbd/crypto_ctx.c282
-rw-r--r--fs/ksmbd/crypto_ctx.h74
-rw-r--r--fs/ksmbd/glob.h49
-rw-r--r--fs/ksmbd/ksmbd_netlink.h395
-rw-r--r--fs/ksmbd/ksmbd_spnego_negtokeninit.asn131
-rw-r--r--fs/ksmbd/ksmbd_spnego_negtokentarg.asn119
-rw-r--r--fs/ksmbd/ksmbd_work.c80
-rw-r--r--fs/ksmbd/ksmbd_work.h117
-rw-r--r--fs/ksmbd/mgmt/ksmbd_ida.c46
-rw-r--r--fs/ksmbd/mgmt/ksmbd_ida.h34
-rw-r--r--fs/ksmbd/mgmt/share_config.c238
-rw-r--r--fs/ksmbd/mgmt/share_config.h81
-rw-r--r--fs/ksmbd/mgmt/tree_connect.c121
-rw-r--r--fs/ksmbd/mgmt/tree_connect.h56
-rw-r--r--fs/ksmbd/mgmt/user_config.c69
-rw-r--r--fs/ksmbd/mgmt/user_config.h66
-rw-r--r--fs/ksmbd/mgmt/user_session.c369
-rw-r--r--fs/ksmbd/mgmt/user_session.h106
-rw-r--r--fs/ksmbd/misc.c338
-rw-r--r--fs/ksmbd/misc.h35
-rw-r--r--fs/ksmbd/ndr.c345
-rw-r--r--fs/ksmbd/ndr.h22
-rw-r--r--fs/ksmbd/nterr.h543
-rw-r--r--fs/ksmbd/ntlmssp.h169
-rw-r--r--fs/ksmbd/oplock.c1709
-rw-r--r--fs/ksmbd/oplock.h131
-rw-r--r--fs/ksmbd/server.c633
-rw-r--r--fs/ksmbd/server.h70
-rw-r--r--fs/ksmbd/smb2misc.c438
-rw-r--r--fs/ksmbd/smb2ops.c312
-rw-r--r--fs/ksmbd/smb2pdu.c8373
-rw-r--r--fs/ksmbd/smb2pdu.h1698
-rw-r--r--fs/ksmbd/smb_common.c674
-rw-r--r--fs/ksmbd/smb_common.h542
-rw-r--r--fs/ksmbd/smbacl.c1366
-rw-r--r--fs/ksmbd/smbacl.h212
-rw-r--r--fs/ksmbd/smbfsctl.h91
-rw-r--r--fs/ksmbd/smbstatus.h1822
-rw-r--r--fs/ksmbd/transport_ipc.c874
-rw-r--r--fs/ksmbd/transport_ipc.h47
-rw-r--r--fs/ksmbd/transport_rdma.c2058
-rw-r--r--fs/ksmbd/transport_rdma.h63
-rw-r--r--fs/ksmbd/transport_tcp.c618
-rw-r--r--fs/ksmbd/transport_tcp.h13
-rw-r--r--fs/ksmbd/unicode.c384
-rw-r--r--fs/ksmbd/unicode.h357
-rw-r--r--fs/ksmbd/uniupr.h268
-rw-r--r--fs/ksmbd/vfs.c1895
-rw-r--r--fs/ksmbd/vfs.h197
-rw-r--r--fs/ksmbd/vfs_cache.c725
-rw-r--r--fs/ksmbd/vfs_cache.h178
-rw-r--r--fs/ksmbd/xattr.h122
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c7
-rw-r--r--fs/lockd/svclock.c82
-rw-r--r--fs/lockd/svcproc.c6
-rw-r--r--fs/lockd/svcsubs.c114
-rw-r--r--fs/locks.c117
-rw-r--r--fs/namei.c286
-rw-r--r--fs/namespace.c67
-rw-r--r--fs/nfs/export.c2
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfsd/lockd.c8
-rw-r--r--fs/nfsd/nfs4state.c34
-rw-r--r--fs/nfsd/nfsproc.c1
-rw-r--r--fs/nfsd/trace.h17
-rw-r--r--fs/nfsd/vfs.c44
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c268
-rw-r--r--fs/notify/fsnotify.c6
-rw-r--r--fs/notify/fsnotify.h15
-rw-r--r--fs/notify/inotify/inotify_user.c17
-rw-r--r--fs/notify/mark.c52
-rw-r--r--fs/ocfs2/locks.c4
-rw-r--r--fs/open.c8
-rw-r--r--fs/overlayfs/export.c2
-rw-r--r--fs/overlayfs/file.c47
-rw-r--r--fs/overlayfs/readdir.c5
-rw-r--r--fs/pipe.c52
-rw-r--r--fs/read_write.c17
-rw-r--r--fs/remap_range.c12
-rw-r--r--fs/squashfs/block.c7
-rw-r--r--fs/squashfs/lz4_wrapper.c2
-rw-r--r--fs/squashfs/lzo_wrapper.c2
-rw-r--r--fs/squashfs/xz_wrapper.c2
-rw-r--r--fs/squashfs/zlib_wrapper.c2
-rw-r--r--fs/squashfs/zstd_wrapper.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/timerfd.c16
-rw-r--r--fs/ubifs/file.c13
-rw-r--r--fs/udf/dir.c5
-rw-r--r--fs/udf/ecma_167.h44
-rw-r--r--fs/udf/inode.c3
-rw-r--r--fs/udf/misc.c13
-rw-r--r--fs/udf/namei.c13
-rw-r--r--fs/udf/osta_udf.h22
-rw-r--r--fs/udf/super.c75
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/udf/unicode.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c15
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_inode.c121
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--fs/zonefs/super.c23
-rw-r--r--fs/zonefs/zonefs.h7
267 files changed, 38302 insertions, 4751 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 59c32c9b799f..aab5e6538660 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -121,10 +121,6 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
- /* No mandatory locks */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
-
if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
invalidate_mapping_pages(&inode->i_data, 0, -1);
@@ -312,10 +308,6 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
filp, cmd, fl, filp);
- /* No mandatory locks */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- goto out_err;
-
if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
invalidate_mapping_pages(&inode->i_data, 0, -1);
@@ -327,7 +319,6 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
ret = v9fs_file_getlock(filp, fl);
else
ret = -EINVAL;
-out_err:
return ret;
}
@@ -348,10 +339,6 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd,
p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n",
filp, cmd, fl, filp);
- /* No mandatory locks */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- goto out_err;
-
if (!(fl->fl_flags & FL_FLOCK))
goto out_err;
@@ -625,12 +612,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
inode = file_inode(vma->vm_file);
-
- if (!mapping_can_writeback(inode->i_mapping))
- wbc.nr_to_write = 0;
-
- might_sleep();
- sync_inode(inode, &wbc);
+ filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
}
diff --git a/fs/Kconfig b/fs/Kconfig
index a7749c126b8e..b11bd4b387e1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -101,16 +101,6 @@ config FILE_LOCKING
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
-config MANDATORY_FILE_LOCKING
- bool "Enable Mandatory file locking"
- depends on FILE_LOCKING
- default y
- help
- This option enables files appropriately marked files on appropriely
- mounted filesystems to support mandatory locking.
-
- To the best of my knowledge this is dead code that no one cares about.
-
source "fs/crypto/Kconfig"
source "fs/verity/Kconfig"
@@ -358,7 +348,15 @@ config NFS_V4_2_SSC_HELPER
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
+
source "fs/cifs/Kconfig"
+source "fs/ksmbd/Kconfig"
+
+config CIFS_COMMON
+ tristate
+ default y if CIFS=y
+ default m if CIFS=m
+
source "fs/coda/Kconfig"
source "fs/afs/Kconfig"
source "fs/9p/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index f98f3e691c37..354e2ba3ee67 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -96,7 +96,9 @@ obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-$(CONFIG_UNICODE) += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
+obj-$(CONFIG_CIFS_COMMON) += cifs_common/
obj-$(CONFIG_CIFS) += cifs/
+obj-$(CONFIG_SMB_SERVER) += ksmbd/
obj-$(CONFIG_HPFS_FS) += hpfs/
obj-$(CONFIG_NTFS_FS) += ntfs/
obj-$(CONFIG_UFS_FS) += ufs/
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index cb3054c7843e..c4210a3964d8 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -772,10 +772,6 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
fl->fl_type, fl->fl_flags,
(long long) fl->fl_start, (long long) fl->fl_end);
- /* AFS doesn't support mandatory locks */
- if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
-
if (IS_GETLK(cmd))
return afs_do_getlk(file, fl);
diff --git a/fs/aio.c b/fs/aio.c
index 76ce0cc3ee4e..51b08ab01dff 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1695,7 +1695,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask);
req->done = true;
- if (iocb->ki_eventfd && eventfd_signal_count()) {
+ if (iocb->ki_eventfd && eventfd_signal_allowed()) {
iocb = NULL;
INIT_WORK(&req->work, aio_poll_put_work);
schedule_work(&req->work);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9ef4f1fc2cb0..45df6cbccf12 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -35,6 +35,7 @@
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include "internal.h"
+#include "../block/blk.h"
struct bdev_inode {
struct block_device bdev;
@@ -385,7 +386,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
+ bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -513,7 +514,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
+ return bioset_init(&blkdev_dio_pool, 4,
+ offsetof(struct blkdev_dio, bio),
+ BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
}
module_init(blkdev_init);
@@ -686,7 +689,8 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
return retval;
}
-int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
+static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *bd_inode = bdev_file_inode(filp);
struct block_device *bdev = I_BDEV(bd_inode);
@@ -707,7 +711,6 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
return error;
}
-EXPORT_SYMBOL(blkdev_fsync);
/**
* bdev_read_page() - Start reading a page from a block device
@@ -801,7 +804,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
memset(&ei->bdev, 0, sizeof(ei->bdev));
- ei->bdev.bd_bdi = &noop_backing_dev_info;
return &ei->vfs_inode;
}
@@ -812,8 +814,15 @@ static void bdev_free_inode(struct inode *inode)
free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info);
- if (!bdev_is_partition(bdev))
+ if (!bdev_is_partition(bdev)) {
+ if (bdev->bd_disk && bdev->bd_disk->bdi)
+ bdi_put(bdev->bd_disk->bdi);
kfree(bdev->bd_disk);
+ }
+
+ if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
+ blk_free_ext_minor(MINOR(bdev->bd_dev));
+
kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
@@ -826,16 +835,9 @@ static void init_once(void *data)
static void bdev_evict_inode(struct inode *inode)
{
- struct block_device *bdev = &BDEV_I(inode)->bdev;
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode); /* is it needed here? */
clear_inode(inode);
- /* Detach inode from wb early as bdi_put() may free bdi->wb */
- inode_detach_wb(inode);
- if (bdev->bd_bdi != &noop_backing_dev_info) {
- bdi_put(bdev->bd_bdi);
- bdev->bd_bdi = &noop_backing_dev_info;
- }
}
static const struct super_operations bdev_sops = {
@@ -902,9 +904,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
bdev->bd_disk = disk;
bdev->bd_partno = partno;
bdev->bd_inode = inode;
-#ifdef CONFIG_SYSFS
- INIT_LIST_HEAD(&bdev->bd_holder_disks);
-#endif
bdev->bd_stats = alloc_percpu(struct disk_stats);
if (!bdev->bd_stats) {
iput(inode);
@@ -921,31 +920,6 @@ void bdev_add(struct block_device *bdev, dev_t dev)
insert_inode_hash(bdev->bd_inode);
}
-static struct block_device *bdget(dev_t dev)
-{
- struct inode *inode;
-
- inode = ilookup(blockdev_superblock, dev);
- if (!inode)
- return NULL;
- return &BDEV_I(inode)->bdev;
-}
-
-/**
- * bdgrab -- Grab a reference to an already referenced block device
- * @bdev: Block device to grab a reference to.
- *
- * Returns the block_device with an additional reference when successful,
- * or NULL if the inode is already beeing freed.
- */
-struct block_device *bdgrab(struct block_device *bdev)
-{
- if (!igrab(bdev->bd_inode))
- return NULL;
- return bdev;
-}
-EXPORT_SYMBOL(bdgrab);
-
long nr_blockdev_pages(void)
{
struct inode *inode;
@@ -959,12 +933,6 @@ long nr_blockdev_pages(void)
return ret;
}
-void bdput(struct block_device *bdev)
-{
- iput(bdev->bd_inode);
-}
-EXPORT_SYMBOL(bdput);
-
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
@@ -1094,148 +1062,6 @@ void bd_abort_claiming(struct block_device *bdev, void *holder)
}
EXPORT_SYMBOL(bd_abort_claiming);
-#ifdef CONFIG_SYSFS
-struct bd_holder_disk {
- struct list_head list;
- struct gendisk *disk;
- int refcnt;
-};
-
-static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
- struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
-
- list_for_each_entry(holder, &bdev->bd_holder_disks, list)
- if (holder->disk == disk)
- return holder;
- return NULL;
-}
-
-static int add_symlink(struct kobject *from, struct kobject *to)
-{
- return sysfs_create_link(from, to, kobject_name(to));
-}
-
-static void del_symlink(struct kobject *from, struct kobject *to)
-{
- sysfs_remove_link(from, kobject_name(to));
-}
-
-/**
- * bd_link_disk_holder - create symlinks between holding disk and slave bdev
- * @bdev: the claimed slave bdev
- * @disk: the holding disk
- *
- * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
- *
- * This functions creates the following sysfs symlinks.
- *
- * - from "slaves" directory of the holder @disk to the claimed @bdev
- * - from "holders" directory of the @bdev to the holder @disk
- *
- * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
- * passed to bd_link_disk_holder(), then:
- *
- * /sys/block/dm-0/slaves/sda --> /sys/block/sda
- * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
- *
- * The caller must have claimed @bdev before calling this function and
- * ensure that both @bdev and @disk are valid during the creation and
- * lifetime of these symlinks.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
- int ret = 0;
-
- mutex_lock(&bdev->bd_disk->open_mutex);
-
- WARN_ON_ONCE(!bdev->bd_holder);
-
- /* FIXME: remove the following once add_disk() handles errors */
- if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
- goto out_unlock;
-
- holder = bd_find_holder_disk(bdev, disk);
- if (holder) {
- holder->refcnt++;
- goto out_unlock;
- }
-
- holder = kzalloc(sizeof(*holder), GFP_KERNEL);
- if (!holder) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- INIT_LIST_HEAD(&holder->list);
- holder->disk = disk;
- holder->refcnt = 1;
-
- ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
- if (ret)
- goto out_free;
-
- ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
- if (ret)
- goto out_del;
- /*
- * bdev could be deleted beneath us which would implicitly destroy
- * the holder directory. Hold on to it.
- */
- kobject_get(bdev->bd_holder_dir);
-
- list_add(&holder->list, &bdev->bd_holder_disks);
- goto out_unlock;
-
-out_del:
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
-out_free:
- kfree(holder);
-out_unlock:
- mutex_unlock(&bdev->bd_disk->open_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(bd_link_disk_holder);
-
-/**
- * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
- * @bdev: the calimed slave bdev
- * @disk: the holding disk
- *
- * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT.
- *
- * CONTEXT:
- * Might sleep.
- */
-void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
-{
- struct bd_holder_disk *holder;
-
- mutex_lock(&bdev->bd_disk->open_mutex);
-
- holder = bd_find_holder_disk(bdev, disk);
-
- if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
- del_symlink(disk->slave_dir, bdev_kobj(bdev));
- del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
- kobject_put(bdev->bd_holder_dir);
- list_del_init(&holder->list);
- kfree(holder);
- }
-
- mutex_unlock(&bdev->bd_disk->open_mutex);
-}
-EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
-#endif
-
static void blkdev_flush_mapping(struct block_device *bdev)
{
WARN_ON_ONCE(bdev->bd_holders);
@@ -1260,11 +1086,8 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
}
}
- if (!bdev->bd_openers) {
+ if (!bdev->bd_openers)
set_init_blocksize(bdev);
- if (bdev->bd_bdi == &noop_backing_dev_info)
- bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
- }
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
bdev_disk_changed(disk, false);
bdev->bd_openers++;
@@ -1282,16 +1105,14 @@ static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
static int blkdev_get_part(struct block_device *part, fmode_t mode)
{
struct gendisk *disk = part->bd_disk;
- struct block_device *whole;
int ret;
if (part->bd_openers)
goto done;
- whole = bdgrab(disk->part0);
- ret = blkdev_get_whole(whole, mode);
+ ret = blkdev_get_whole(bdev_whole(part), mode);
if (ret)
- goto out_put_whole;
+ return ret;
ret = -ENXIO;
if (!bdev_nr_sectors(part))
@@ -1299,16 +1120,12 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
disk->open_partitions++;
set_init_blocksize(part);
- if (part->bd_bdi == &noop_backing_dev_info)
- part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
done:
part->bd_openers++;
return 0;
out_blkdev_put:
- blkdev_put_whole(whole, mode);
-out_put_whole:
- bdput(whole);
+ blkdev_put_whole(bdev_whole(part), mode);
return ret;
}
@@ -1321,42 +1138,42 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
blkdev_flush_mapping(part);
whole->bd_disk->open_partitions--;
blkdev_put_whole(whole, mode);
- bdput(whole);
}
struct block_device *blkdev_get_no_open(dev_t dev)
{
struct block_device *bdev;
- struct gendisk *disk;
+ struct inode *inode;
- bdev = bdget(dev);
- if (!bdev) {
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode) {
blk_request_module(dev);
- bdev = bdget(dev);
- if (!bdev)
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode)
return NULL;
}
- disk = bdev->bd_disk;
- if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
- goto bdput;
- if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
- goto put_disk;
- if (!try_module_get(bdev->bd_disk->fops->owner))
- goto put_disk;
+ /* switch from the inode reference to a device mode one: */
+ bdev = &BDEV_I(inode)->bdev;
+ if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+ bdev = NULL;
+ iput(inode);
+
+ if (!bdev)
+ return NULL;
+ if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
+ !try_module_get(bdev->bd_disk->fops->owner)) {
+ put_device(&bdev->bd_device);
+ return NULL;
+ }
+
return bdev;
-put_disk:
- put_disk(disk);
-bdput:
- bdput(bdev);
- return NULL;
}
void blkdev_put_no_open(struct block_device *bdev)
{
module_put(bdev->bd_disk->fops->owner);
- put_disk(bdev->bd_disk);
- bdput(bdev);
+ put_device(&bdev->bd_device);
}
/**
@@ -1409,7 +1226,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
mutex_lock(&disk->open_mutex);
ret = -ENXIO;
- if (!(disk->flags & GENHD_FL_UP))
+ if (!disk_live(disk))
goto abort_claiming;
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index cec88a66bd6c..3dcf9bcc2326 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -36,6 +36,7 @@ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
+btrfs-$(CONFIG_FS_VERITY) += verity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index d95eb5c8cb37..c9f9789e828f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -53,7 +53,8 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
}
static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct posix_acl *acl, int type)
+ struct user_namespace *mnt_userns,
+ struct inode *inode, struct posix_acl *acl, int type)
{
int ret, size = 0;
const char *name;
@@ -114,12 +115,12 @@ int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
umode_t old_mode = inode->i_mode;
if (type == ACL_TYPE_ACCESS && acl) {
- ret = posix_acl_update_mode(&init_user_ns, inode,
+ ret = posix_acl_update_mode(mnt_userns, inode,
&inode->i_mode, &acl);
if (ret)
return ret;
}
- ret = __btrfs_set_acl(NULL, inode, acl, type);
+ ret = __btrfs_set_acl(NULL, mnt_userns, inode, acl, type);
if (ret)
inode->i_mode = old_mode;
return ret;
@@ -140,14 +141,14 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
return ret;
if (default_acl) {
- ret = __btrfs_set_acl(trans, inode, default_acl,
+ ret = __btrfs_set_acl(trans, &init_user_ns, inode, default_acl,
ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
}
if (acl) {
if (!ret)
- ret = __btrfs_set_acl(trans, inode, acl,
+ ret = __btrfs_set_acl(trans, &init_user_ns, inode, acl,
ACL_TYPE_ACCESS);
posix_acl_release(acl);
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 78b202d198b8..f735b8798ba1 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1211,7 +1211,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
again:
head = NULL;
- ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
BUG_ON(ret == 0);
@@ -1488,14 +1488,14 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots,
- bool ignore_offset, bool skip_commit_root_sem)
+ bool skip_commit_root_sem)
{
int ret;
if (!trans && !skip_commit_root_sem)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
- time_seq, roots, ignore_offset);
+ time_seq, roots, false);
if (!trans && !skip_commit_root_sem)
up_read(&fs_info->commit_root_sem);
return ret;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index ff5f07f9940b..ba454032dbe2 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -47,7 +47,7 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
const u64 *extent_item_pos, bool ignore_offset);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots, bool ignore_offset,
+ u64 time_seq, struct ulist **roots,
bool skip_commit_root_sem);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9e7d9d0c763d..a3b830b8410a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1561,7 +1561,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
ret = btrfs_relocate_chunk(fs_info, bg->start);
- if (ret)
+ if (ret && ret != -EAGAIN)
btrfs_err(fs_info, "error relocating chunk %llu",
bg->start);
@@ -2105,11 +2105,22 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
bg->used = em->len;
bg->flags = map->type;
ret = btrfs_add_block_group_cache(fs_info, bg);
+ /*
+ * We may have some valid block group cache added already, in
+ * that case we skip to the next one.
+ */
+ if (ret == -EEXIST) {
+ ret = 0;
+ btrfs_put_block_group(bg);
+ continue;
+ }
+
if (ret) {
btrfs_remove_free_space_cache(bg);
btrfs_put_block_group(bg);
break;
}
+
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, 0, &space_info);
bg->space_info = space_info;
@@ -2212,6 +2223,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
+ /*
+ * We've hit some error while reading the extent tree, and have
+ * rescue=ibadroots mount option.
+ * Try to fill the tree using dummy block groups so that the user can
+ * continue to mount and grab their data.
+ */
+ if (ret && btrfs_test_opt(info, IGNOREBADROOTS))
+ ret = fill_dummy_bgs(info);
return ret;
}
@@ -2244,6 +2263,95 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
}
+static int insert_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 chunk_offset,
+ u64 start, u64 num_bytes)
+{
+ struct btrfs_fs_info *fs_info = device->fs_info;
+ struct btrfs_root *root = fs_info->dev_root;
+ struct btrfs_path *path;
+ struct btrfs_dev_extent *extent;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ int ret;
+
+ WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
+ WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = device->devid;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = start;
+ ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
+ btrfs_set_dev_extent_chunk_tree(leaf, extent, BTRFS_CHUNK_TREE_OBJECTID);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
+
+ btrfs_set_dev_extent_length(leaf, extent, num_bytes);
+ btrfs_mark_buffer_dirty(leaf);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * This function belongs to phase 2.
+ *
+ * See the comment at btrfs_chunk_alloc() for details about the chunk allocation
+ * phases.
+ */
+static int insert_dev_extents(struct btrfs_trans_handle *trans,
+ u64 chunk_offset, u64 chunk_size)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_device *device;
+ struct extent_map *em;
+ struct map_lookup *map;
+ u64 dev_offset;
+ u64 stripe_size;
+ int i;
+ int ret = 0;
+
+ em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+
+ map = em->map_lookup;
+ stripe_size = em->orig_block_len;
+
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with the map's stripes, because the device object's id can change
+ * at any time during that final phase of the device replace operation
+ * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
+ * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
+ * resulting in persisting a device extent item with such ID.
+ */
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ for (i = 0; i < map->num_stripes; i++) {
+ device = map->stripes[i].dev;
+ dev_offset = map->stripes[i].physical;
+
+ ret = insert_dev_extent(trans, device, chunk_offset, dev_offset,
+ stripe_size);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+
+ free_extent_map(em);
+ return ret;
+}
+
/*
* This function, btrfs_create_pending_block_groups(), belongs to the phase 2 of
* chunk allocation.
@@ -2278,8 +2386,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
if (ret)
btrfs_abort_transaction(trans, ret);
}
- ret = btrfs_finish_chunk_alloc(trans, block_group->start,
- block_group->length);
+ ret = insert_dev_extents(trans, block_group->start,
+ block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c652e19ad74e..76ee1452c57b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -51,6 +51,13 @@ enum {
* the file range, inode's io_tree).
*/
BTRFS_INODE_NO_DELALLOC_FLUSH,
+ /*
+ * Set when we are working on enabling verity for a file. Computing and
+ * writing the whole Merkle tree can take a while so we want to prevent
+ * races where two separate tasks attempt to simultaneously start verity
+ * on the same file.
+ */
+ BTRFS_INODE_VERITY_IN_PROGRESS,
};
/* in memory btrfs inode */
@@ -189,8 +196,10 @@ struct btrfs_inode {
*/
u64 csum_bytes;
- /* flags field from the on disk inode */
+ /* Backwards incompatible flags, lower half of inode_item::flags */
u32 flags;
+ /* Read-only compatibility flags, upper half of inode_item::flags */
+ u32 ro_flags;
/*
* Counters to keep track of the number of extent item's we may use due
@@ -348,6 +357,22 @@ struct btrfs_dio_private {
u8 csums[];
};
+/*
+ * btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
+ * separate u32s. These two functions convert between the two representations.
+ */
+static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
+{
+ return (flags | ((u64)ro_flags << 32));
+}
+
+static inline void btrfs_inode_split_flags(u64 inode_item_flags,
+ u32 *flags, u32 *ro_flags)
+{
+ *flags = (u32)inode_item_flags;
+ *ro_flags = (u32)(inode_item_flags >> 32);
+}
+
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 169508609324..86816088927f 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -243,47 +243,6 @@ struct btrfsic_state {
u32 datablock_size;
};
-static void btrfsic_block_init(struct btrfsic_block *b);
-static struct btrfsic_block *btrfsic_block_alloc(void);
-static void btrfsic_block_free(struct btrfsic_block *b);
-static void btrfsic_block_link_init(struct btrfsic_block_link *n);
-static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
-static void btrfsic_block_link_free(struct btrfsic_block_link *n);
-static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
-static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
-static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
-static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
- struct btrfsic_block_hashtable *h);
-static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
-static struct btrfsic_block *btrfsic_block_hashtable_lookup(
- struct block_device *bdev,
- u64 dev_bytenr,
- struct btrfsic_block_hashtable *h);
-static void btrfsic_block_link_hashtable_init(
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_block_link_hashtable_add(
- struct btrfsic_block_link *l,
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
-static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
- struct block_device *bdev_ref_to,
- u64 dev_bytenr_ref_to,
- struct block_device *bdev_ref_from,
- u64 dev_bytenr_ref_from,
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_dev_state_hashtable_init(
- struct btrfsic_dev_state_hashtable *h);
-static void btrfsic_dev_state_hashtable_add(
- struct btrfsic_dev_state *ds,
- struct btrfsic_dev_state_hashtable *h);
-static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
- struct btrfsic_dev_state_hashtable *h);
-static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
-static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
-static int btrfsic_process_superblock(struct btrfsic_state *state,
- struct btrfs_fs_devices *fs_devices);
static int btrfsic_process_metablock(struct btrfsic_state *state,
struct btrfsic_block *block,
struct btrfsic_block_data_ctx *block_ctx,
@@ -313,14 +272,6 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
static int btrfsic_read_block(struct btrfsic_state *state,
struct btrfsic_block_data_ctx *block_ctx);
-static void btrfsic_dump_database(struct btrfsic_state *state);
-static int btrfsic_test_for_metadata(struct btrfsic_state *state,
- char **datav, unsigned int num_pages);
-static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
- u64 dev_bytenr, char **mapped_datav,
- unsigned int num_pages,
- struct bio *bio, int *bio_is_patched,
- int submit_bio_bh_rw);
static int btrfsic_process_written_superblock(
struct btrfsic_state *state,
struct btrfsic_block *const block,
@@ -1558,10 +1509,8 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
/* Pages must be unmapped in reverse order */
while (num_pages > 0) {
num_pages--;
- if (block_ctx->datav[num_pages]) {
- kunmap_local(block_ctx->datav[num_pages]);
+ if (block_ctx->datav[num_pages])
block_ctx->datav[num_pages] = NULL;
- }
if (block_ctx->pagev[num_pages]) {
__free_page(block_ctx->pagev[num_pages]);
block_ctx->pagev[num_pages] = NULL;
@@ -1638,7 +1587,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
i = j;
}
for (i = 0; i < num_pages; i++)
- block_ctx->datav[i] = kmap_local_page(block_ctx->pagev[i]);
+ block_ctx->datav[i] = page_address(block_ctx->pagev[i]);
return block_ctx->len;
}
@@ -2703,7 +2652,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
bio_for_each_segment(bvec, bio, iter) {
BUG_ON(bvec.bv_len != PAGE_SIZE);
- mapped_datav[i] = kmap_local_page(bvec.bv_page);
+ mapped_datav[i] = page_address(bvec.bv_page);
i++;
if (dev_state->state->print_mask &
@@ -2716,9 +2665,6 @@ static void __btrfsic_submit_bio(struct bio *bio)
mapped_datav, segs,
bio, &bio_is_patched,
bio->bi_opf);
- /* Unmap in reverse order */
- for (--i; i >= 0; i--)
- kunmap_local(mapped_datav[i]);
kfree(mapped_datav);
} else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 30d82cdf128c..7869ad12bc6e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -172,10 +172,9 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
/* Hash through the page sector by sector */
for (pg_offset = 0; pg_offset < bytes_left;
pg_offset += sectorsize) {
- kaddr = kmap_atomic(page);
+ kaddr = page_address(page);
crypto_shash_digest(shash, kaddr + pg_offset,
sectorsize, csum);
- kunmap_atomic(kaddr);
if (memcmp(&csum, cb_sum, csum_size) != 0) {
btrfs_print_data_csum_error(inode, disk_start,
@@ -565,6 +564,16 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (isize == 0)
return 0;
+ /*
+ * For current subpage support, we only support 64K page size,
+ * which means maximum compressed extent size (128K) is just 2x page
+ * size.
+ * This makes readahead less effective, so here disable readahead for
+ * subpage for now, until full compressed write is supported.
+ */
+ if (btrfs_sb(inode->i_sb)->sectorsize < PAGE_SIZE)
+ return 0;
+
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
while (last_offset < compressed_end) {
@@ -673,6 +682,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
struct page *page;
struct bio *comp_bio;
u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
+ u64 file_offset;
u64 em_len;
u64 em_start;
struct extent_map *em;
@@ -682,15 +692,17 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
em_tree = &BTRFS_I(inode)->extent_tree;
+ file_offset = bio_first_bvec_all(bio)->bv_offset +
+ page_offset(bio_first_page_all(bio));
+
/* we need the actual starting offset of this extent in the file */
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree,
- page_offset(bio_first_page_all(bio)),
- fs_info->sectorsize);
+ em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock);
if (!em)
return BLK_STS_IOERR;
+ ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
@@ -721,8 +733,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
goto fail1;
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
- cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
- __GFP_HIGHMEM);
+ cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
if (!cb->compressed_pages[pg_index]) {
faili = pg_index - 1;
ret = BLK_STS_RESOURCE;
@@ -1261,96 +1272,82 @@ void __cold btrfs_exit_compress(void)
}
/*
- * Copy uncompressed data from working buffer to pages.
+ * Copy decompressed data from working buffer to pages.
+ *
+ * @buf: The decompressed data buffer
+ * @buf_len: The decompressed data length
+ * @decompressed: Number of bytes that are already decompressed inside the
+ * compressed extent
+ * @cb: The compressed extent descriptor
+ * @orig_bio: The original bio that the caller wants to read for
+ *
+ * An easier to understand graph is like below:
+ *
+ * |<- orig_bio ->| |<- orig_bio->|
+ * |<------- full decompressed extent ----->|
+ * |<----------- @cb range ---->|
+ * | |<-- @buf_len -->|
+ * |<--- @decompressed --->|
+ *
+ * Note that, @cb can be a subpage of the full decompressed extent, but
+ * @cb->start always has the same as the orig_file_offset value of the full
+ * decompressed extent.
*
- * buf_start is the byte offset we're of the start of our workspace buffer.
+ * When reading compressed extent, we have to read the full compressed extent,
+ * while @orig_bio may only want part of the range.
+ * Thus this function will ensure only data covered by @orig_bio will be copied
+ * to.
*
- * total_out is the last byte of the buffer
+ * Return 0 if we have copied all needed contents for @orig_bio.
+ * Return >0 if we need continue decompress.
*/
-int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
- unsigned long total_out, u64 disk_start,
- struct bio *bio)
+int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
+ struct compressed_bio *cb, u32 decompressed)
{
- unsigned long buf_offset;
- unsigned long current_buf_start;
- unsigned long start_byte;
- unsigned long prev_start_byte;
- unsigned long working_bytes = total_out - buf_start;
- unsigned long bytes;
- struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter);
-
- /*
- * start byte is the first byte of the page we're currently
- * copying into relative to the start of the compressed data.
- */
- start_byte = page_offset(bvec.bv_page) - disk_start;
-
- /* we haven't yet hit data corresponding to this page */
- if (total_out <= start_byte)
- return 1;
-
- /*
- * the start of the data we care about is offset into
- * the middle of our working buffer
- */
- if (total_out > start_byte && buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes -= buf_offset;
- } else {
- buf_offset = 0;
- }
- current_buf_start = buf_start;
-
- /* copy bytes from the working buffer into the pages */
- while (working_bytes > 0) {
- bytes = min_t(unsigned long, bvec.bv_len,
- PAGE_SIZE - (buf_offset % PAGE_SIZE));
- bytes = min(bytes, working_bytes);
-
- memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset,
- bytes);
- flush_dcache_page(bvec.bv_page);
+ struct bio *orig_bio = cb->orig_bio;
+ /* Offset inside the full decompressed extent */
+ u32 cur_offset;
+
+ cur_offset = decompressed;
+ /* The main loop to do the copy */
+ while (cur_offset < decompressed + buf_len) {
+ struct bio_vec bvec;
+ size_t copy_len;
+ u32 copy_start;
+ /* Offset inside the full decompressed extent */
+ u32 bvec_offset;
+
+ bvec = bio_iter_iovec(orig_bio, orig_bio->bi_iter);
+ /*
+ * cb->start may underflow, but subtracting that value can still
+ * give us correct offset inside the full decompressed extent.
+ */
+ bvec_offset = page_offset(bvec.bv_page) + bvec.bv_offset - cb->start;
- buf_offset += bytes;
- working_bytes -= bytes;
- current_buf_start += bytes;
+ /* Haven't reached the bvec range, exit */
+ if (decompressed + buf_len <= bvec_offset)
+ return 1;
- /* check if we need to pick another page */
- bio_advance(bio, bytes);
- if (!bio->bi_iter.bi_size)
- return 0;
- bvec = bio_iter_iovec(bio, bio->bi_iter);
- prev_start_byte = start_byte;
- start_byte = page_offset(bvec.bv_page) - disk_start;
+ copy_start = max(cur_offset, bvec_offset);
+ copy_len = min(bvec_offset + bvec.bv_len,
+ decompressed + buf_len) - copy_start;
+ ASSERT(copy_len);
/*
- * We need to make sure we're only adjusting
- * our offset into compression working buffer when
- * we're switching pages. Otherwise we can incorrectly
- * keep copying when we were actually done.
+ * Extra range check to ensure we didn't go beyond
+ * @buf + @buf_len.
*/
- if (start_byte != prev_start_byte) {
- /*
- * make sure our new page is covered by this
- * working buffer
- */
- if (total_out <= start_byte)
- return 1;
+ ASSERT(copy_start - decompressed < buf_len);
+ memcpy_to_page(bvec.bv_page, bvec.bv_offset,
+ buf + copy_start - decompressed, copy_len);
+ flush_dcache_page(bvec.bv_page);
+ cur_offset += copy_len;
- /*
- * the next page in the biovec might not be adjacent
- * to the last page, but it might still be found
- * inside this working buffer. bump our offset pointer
- */
- if (total_out > start_byte &&
- current_buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes = total_out - start_byte;
- current_buf_start = buf_start + buf_offset;
- }
- }
+ bio_advance(orig_bio, copy_len);
+ /* Finished the bio */
+ if (!orig_bio->bi_iter.bi_size)
+ return 0;
}
-
return 1;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index c359f20920d0..399be0b435bf 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -86,9 +86,8 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
unsigned long *total_out);
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
unsigned long start_byte, size_t srclen, size_t destlen);
-int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
- unsigned long total_out, u64 disk_start,
- struct bio *bio);
+int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
+ struct compressed_bio *cb, u32 decompressed);
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int len, u64 disk_start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c5c08c87e130..84627cbd5b5b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -726,21 +726,21 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
/*
* search for key in the extent_buffer. The items start at offset p,
- * and they are item_size apart. There are 'max' items in p.
+ * and they are item_size apart.
*
* the slot in the array is returned via slot, and it points to
* the place where you would insert key if it is not found in
* the array.
*
- * slot may point to max if the key is bigger than all of the keys
+ * Slot may point to total number of items if the key is bigger than
+ * all of the keys
*/
static noinline int generic_bin_search(struct extent_buffer *eb,
unsigned long p, int item_size,
- const struct btrfs_key *key,
- int max, int *slot)
+ const struct btrfs_key *key, int *slot)
{
int low = 0;
- int high = max;
+ int high = btrfs_header_nritems(eb);
int ret;
const int key_size = sizeof(struct btrfs_disk_key);
@@ -799,15 +799,11 @@ int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
if (btrfs_header_level(eb) == 0)
return generic_bin_search(eb,
offsetof(struct btrfs_leaf, items),
- sizeof(struct btrfs_item),
- key, btrfs_header_nritems(eb),
- slot);
+ sizeof(struct btrfs_item), key, slot);
else
return generic_bin_search(eb,
offsetof(struct btrfs_node, ptrs),
- sizeof(struct btrfs_key_ptr),
- key, btrfs_header_nritems(eb),
- slot);
+ sizeof(struct btrfs_key_ptr), key, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size)
@@ -1237,7 +1233,6 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
u64 target;
u64 nread = 0;
u64 nread_max;
- struct extent_buffer *eb;
u32 nr;
u32 blocksize;
u32 nscan = 0;
@@ -1266,10 +1261,14 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
search = btrfs_node_blockptr(node, slot);
blocksize = fs_info->nodesize;
- eb = find_extent_buffer(fs_info, search);
- if (eb) {
- free_extent_buffer(eb);
- return;
+ if (path->reada != READA_FORWARD_ALWAYS) {
+ struct extent_buffer *eb;
+
+ eb = find_extent_buffer(fs_info, search);
+ if (eb) {
+ free_extent_buffer(eb);
+ return;
+ }
}
target = search;
@@ -2103,6 +2102,27 @@ again:
}
/*
+ * Execute search and call btrfs_previous_item to traverse backwards if the item
+ * was not found.
+ *
+ * Return 0 if found, 1 if not found and < 0 if error.
+ */
+int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
+ struct btrfs_path *path)
+{
+ int ret;
+
+ ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret > 0)
+ ret = btrfs_previous_item(root, path, key->objectid, key->type);
+
+ if (ret == 0)
+ btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
+
+ return ret;
+}
+
+/*
* adjust the pointers going up the tree, starting at level
* making sure the right key of each node is points to 'key'.
* This is used after shifting pointers to the left, so it stops
@@ -4358,16 +4378,6 @@ next:
return 1;
}
-/*
- * search the tree again to find a leaf with greater keys
- * returns 0 if it found something or 1 if there are no greater leaves.
- * returns < 0 on io errors.
- */
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
-{
- return btrfs_next_old_leaf(root, path, 0);
-}
-
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq)
{
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e5e53e592d4f..f07c82fafa04 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -281,7 +281,8 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
- BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
+ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
+ BTRFS_FEATURE_COMPAT_RO_VERITY)
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
@@ -1012,8 +1013,6 @@ struct btrfs_fs_info {
u64 zoned;
};
- /* Max size to emit ZONE_APPEND write command */
- u64 max_zone_append_size;
struct mutex zoned_meta_io_lock;
spinlock_t treelog_bg_lock;
u64 treelog_bg;
@@ -1484,20 +1483,20 @@ do { \
/*
* Inode flags
*/
-#define BTRFS_INODE_NODATASUM (1 << 0)
-#define BTRFS_INODE_NODATACOW (1 << 1)
-#define BTRFS_INODE_READONLY (1 << 2)
-#define BTRFS_INODE_NOCOMPRESS (1 << 3)
-#define BTRFS_INODE_PREALLOC (1 << 4)
-#define BTRFS_INODE_SYNC (1 << 5)
-#define BTRFS_INODE_IMMUTABLE (1 << 6)
-#define BTRFS_INODE_APPEND (1 << 7)
-#define BTRFS_INODE_NODUMP (1 << 8)
-#define BTRFS_INODE_NOATIME (1 << 9)
-#define BTRFS_INODE_DIRSYNC (1 << 10)
-#define BTRFS_INODE_COMPRESS (1 << 11)
-
-#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
+#define BTRFS_INODE_NODATASUM (1U << 0)
+#define BTRFS_INODE_NODATACOW (1U << 1)
+#define BTRFS_INODE_READONLY (1U << 2)
+#define BTRFS_INODE_NOCOMPRESS (1U << 3)
+#define BTRFS_INODE_PREALLOC (1U << 4)
+#define BTRFS_INODE_SYNC (1U << 5)
+#define BTRFS_INODE_IMMUTABLE (1U << 6)
+#define BTRFS_INODE_APPEND (1U << 7)
+#define BTRFS_INODE_NODUMP (1U << 8)
+#define BTRFS_INODE_NOATIME (1U << 9)
+#define BTRFS_INODE_DIRSYNC (1U << 10)
+#define BTRFS_INODE_COMPRESS (1U << 11)
+
+#define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31)
#define BTRFS_INODE_FLAG_MASK \
(BTRFS_INODE_NODATASUM | \
@@ -1514,6 +1513,10 @@ do { \
BTRFS_INODE_COMPRESS | \
BTRFS_INODE_ROOT_ITEM_INIT)
+#define BTRFS_INODE_RO_VERITY (1U << 0)
+
+#define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY)
+
struct btrfs_map_token {
struct extent_buffer *eb;
char *kaddr;
@@ -2781,10 +2784,11 @@ enum btrfs_flush_state {
FLUSH_DELAYED_REFS = 4,
FLUSH_DELALLOC = 5,
FLUSH_DELALLOC_WAIT = 6,
- ALLOC_CHUNK = 7,
- ALLOC_CHUNK_FORCE = 8,
- RUN_DELAYED_IPUTS = 9,
- COMMIT_TRANS = 10,
+ FLUSH_DELALLOC_FULL = 7,
+ ALLOC_CHUNK = 8,
+ ALLOC_CHUNK_FORCE = 9,
+ RUN_DELAYED_IPUTS = 10,
+ COMMIT_TRANS = 11,
};
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
@@ -2901,10 +2905,13 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
}
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
+
+int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
+ struct btrfs_path *path);
+
static inline int btrfs_next_old_item(struct btrfs_root *root,
struct btrfs_path *p, u64 time_seq)
{
@@ -2913,6 +2920,18 @@ static inline int btrfs_next_old_item(struct btrfs_root *root,
return btrfs_next_old_leaf(root, p, time_seq);
return 0;
}
+
+/*
+ * Search the tree again to find a leaf with greater keys.
+ *
+ * Returns 0 if it found something or 1 if there are no greater leaves.
+ * Returns < 0 on error.
+ */
+static inline int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ return btrfs_next_old_leaf(root, path, 0);
+}
+
static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
@@ -3145,7 +3164,8 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
- struct btrfs_root *parent_root);
+ struct btrfs_root *parent_root,
+ struct user_namespace *mnt_userns);
void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
unsigned *bits);
void btrfs_clear_delalloc_extent(struct inode *inode,
@@ -3194,10 +3214,10 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc);
-int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
+int btrfs_writepage_cow_fixup(struct page *page);
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
- u64 end, int uptodate);
+ u64 end, bool uptodate);
extern const struct dentry_operations btrfs_dentry_operations;
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dio_ops;
@@ -3779,6 +3799,30 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
return signal_pending(current);
}
+/* verity.c */
+#ifdef CONFIG_FS_VERITY
+
+extern const struct fsverity_operations btrfs_verityops;
+int btrfs_drop_verity_items(struct btrfs_inode *inode);
+
+BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item,
+ encryption, 8);
+BTRFS_SETGET_FUNCS(verity_descriptor_size, struct btrfs_verity_descriptor_item,
+ size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption,
+ struct btrfs_verity_descriptor_item, encryption, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size,
+ struct btrfs_verity_descriptor_item, size, 64);
+
+#else
+
+static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
+{
+ return 0;
+}
+
+#endif
+
/* Sanity test specific functions */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_destroy_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 257c1e18abd4..1e08eb2b27f0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -6,7 +6,6 @@
#include <linux/slab.h>
#include <linux/iversion.h>
-#include <linux/sched/mm.h>
#include "misc.h"
#include "delayed-inode.h"
#include "disk-io.h"
@@ -672,176 +671,119 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
}
/*
- * This helper will insert some continuous items into the same leaf according
- * to the free space of the leaf.
+ * Insert a single delayed item or a batch of delayed items that have consecutive
+ * keys if they exist.
*/
-static int btrfs_batch_insert_items(struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *item)
+static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_item *first_item)
{
- struct btrfs_delayed_item *curr, *next;
- int free_space;
- int total_size = 0;
- struct extent_buffer *leaf;
- char *data_ptr;
- struct btrfs_key *keys;
- u32 *data_size;
- struct list_head head;
- int slot;
+ LIST_HEAD(batch);
+ struct btrfs_delayed_item *curr;
+ struct btrfs_delayed_item *next;
+ const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
+ int total_size;
int nitems;
- int i;
- int ret = 0;
-
- BUG_ON(!path->nodes[0]);
+ char *ins_data = NULL;
+ struct btrfs_key *ins_keys;
+ u32 *ins_sizes;
+ int ret;
- leaf = path->nodes[0];
- free_space = btrfs_leaf_free_space(leaf);
- INIT_LIST_HEAD(&head);
+ list_add_tail(&first_item->tree_list, &batch);
+ nitems = 1;
+ total_size = first_item->data_len + sizeof(struct btrfs_item);
+ curr = first_item;
- next = item;
- nitems = 0;
+ while (true) {
+ int next_size;
- /*
- * count the number of the continuous items that we can insert in batch
- */
- while (total_size + next->data_len + sizeof(struct btrfs_item) <=
- free_space) {
- total_size += next->data_len + sizeof(struct btrfs_item);
- list_add_tail(&next->tree_list, &head);
- nitems++;
-
- curr = next;
next = __btrfs_next_delayed_item(curr);
- if (!next)
+ if (!next || !btrfs_is_continuous_delayed_item(curr, next))
break;
- if (!btrfs_is_continuous_delayed_item(curr, next))
+ next_size = next->data_len + sizeof(struct btrfs_item);
+ if (total_size + next_size > max_size)
break;
- }
- if (!nitems) {
- ret = 0;
- goto out;
+ list_add_tail(&next->tree_list, &batch);
+ nitems++;
+ total_size += next_size;
+ curr = next;
}
- keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
- if (!keys) {
- ret = -ENOMEM;
- goto out;
- }
+ if (nitems == 1) {
+ ins_keys = &first_item->key;
+ ins_sizes = &first_item->data_len;
+ } else {
+ int i = 0;
- data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
- if (!data_size) {
- ret = -ENOMEM;
- goto error;
+ ins_data = kmalloc(nitems * sizeof(u32) +
+ nitems * sizeof(struct btrfs_key), GFP_NOFS);
+ if (!ins_data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ins_sizes = (u32 *)ins_data;
+ ins_keys = (struct btrfs_key *)(ins_data + nitems * sizeof(u32));
+ list_for_each_entry(curr, &batch, tree_list) {
+ ins_keys[i] = curr->key;
+ ins_sizes[i] = curr->data_len;
+ i++;
+ }
}
- /* get keys of all the delayed items */
- i = 0;
- list_for_each_entry(next, &head, tree_list) {
- keys[i] = next->key;
- data_size[i] = next->data_len;
- i++;
- }
+ ret = btrfs_insert_empty_items(trans, root, path, ins_keys, ins_sizes,
+ nitems);
+ if (ret)
+ goto out;
- /* insert the keys of the items */
- setup_items_for_insert(root, path, keys, data_size, nitems);
+ list_for_each_entry(curr, &batch, tree_list) {
+ char *data_ptr;
- /* insert the dir index items */
- slot = path->slots[0];
- list_for_each_entry_safe(curr, next, &head, tree_list) {
- data_ptr = btrfs_item_ptr(leaf, slot, char);
- write_extent_buffer(leaf, &curr->data,
- (unsigned long)data_ptr,
- curr->data_len);
- slot++;
+ data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
+ write_extent_buffer(path->nodes[0], &curr->data,
+ (unsigned long)data_ptr, curr->data_len);
+ path->slots[0]++;
+ }
- btrfs_delayed_item_release_metadata(root, curr);
+ /*
+ * Now release our path before releasing the delayed items and their
+ * metadata reservations, so that we don't block other tasks for more
+ * time than needed.
+ */
+ btrfs_release_path(path);
+ list_for_each_entry_safe(curr, next, &batch, tree_list) {
list_del(&curr->tree_list);
+ btrfs_delayed_item_release_metadata(root, curr);
btrfs_release_delayed_item(curr);
}
-
-error:
- kfree(data_size);
- kfree(keys);
out:
+ kfree(ins_data);
return ret;
}
-/*
- * This helper can just do simple insertion that needn't extend item for new
- * data, such as directory name index insertion, inode insertion.
- */
-static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *delayed_item)
-{
- struct extent_buffer *leaf;
- unsigned int nofs_flag;
- char *ptr;
- int ret;
-
- nofs_flag = memalloc_nofs_save();
- ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
- delayed_item->data_len);
- memalloc_nofs_restore(nofs_flag);
- if (ret < 0 && ret != -EEXIST)
- return ret;
-
- leaf = path->nodes[0];
-
- ptr = btrfs_item_ptr(leaf, path->slots[0], char);
-
- write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
- delayed_item->data_len);
- btrfs_mark_buffer_dirty(leaf);
-
- btrfs_delayed_item_release_metadata(root, delayed_item);
- return 0;
-}
-
-/*
- * we insert an item first, then if there are some continuous items, we try
- * to insert those items into the same leaf.
- */
static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_root *root,
struct btrfs_delayed_node *node)
{
- struct btrfs_delayed_item *curr, *prev;
int ret = 0;
-do_again:
- mutex_lock(&node->mutex);
- curr = __btrfs_first_delayed_insertion_item(node);
- if (!curr)
- goto insert_end;
-
- ret = btrfs_insert_delayed_item(trans, root, path, curr);
- if (ret < 0) {
- btrfs_release_path(path);
- goto insert_end;
- }
+ while (ret == 0) {
+ struct btrfs_delayed_item *curr;
- prev = curr;
- curr = __btrfs_next_delayed_item(prev);
- if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
- /* insert the continuous items into the same leaf */
- path->slots[0]++;
- btrfs_batch_insert_items(root, path, curr);
+ mutex_lock(&node->mutex);
+ curr = __btrfs_first_delayed_insertion_item(node);
+ if (!curr) {
+ mutex_unlock(&node->mutex);
+ break;
+ }
+ ret = btrfs_insert_delayed_item(trans, root, path, curr);
+ mutex_unlock(&node->mutex);
}
- btrfs_release_delayed_item(prev);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(path);
- mutex_unlock(&node->mutex);
- goto do_again;
-
-insert_end:
- mutex_unlock(&node->mutex);
return ret;
}
@@ -914,7 +856,6 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_delayed_item *curr, *prev;
- unsigned int nofs_flag;
int ret = 0;
do_again:
@@ -923,9 +864,7 @@ do_again:
if (!curr)
goto delete_fail;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
- memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto delete_fail;
else if (ret > 0) {
@@ -994,7 +933,6 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
- unsigned int nofs_flag;
int mod;
int ret;
@@ -1007,9 +945,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
else
mod = 1;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
- memalloc_nofs_restore(nofs_flag);
if (ret > 0)
ret = -ENOENT;
if (ret < 0)
@@ -1066,9 +1002,7 @@ search:
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = -1;
- nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto err_out;
ASSERT(ret);
@@ -1711,6 +1645,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *inode_item,
struct inode *inode)
{
+ u64 flags;
+
btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
@@ -1723,7 +1659,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode_peek_iversion(inode));
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
- btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
+ flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
+ BTRFS_I(inode)->ro_flags);
+ btrfs_set_stack_inode_flags(inode_item, flags);
btrfs_set_stack_inode_block_group(inode_item, 0);
btrfs_set_stack_timespec_sec(&inode_item->atime,
@@ -1781,7 +1719,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
btrfs_stack_inode_sequence(inode_item));
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
- BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
+ btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item),
+ &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 98b63ebed539..f1274d5c3805 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -170,6 +170,25 @@ out_free:
return 0;
}
+static struct btrfs_dir_item *btrfs_lookup_match_dir(
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *key, const char *name,
+ int name_len, int mod)
+{
+ const int ins_len = (mod < 0 ? -1 : 0);
+ const int cow = (mod != 0);
+ int ret;
+
+ ret = btrfs_search_slot(trans, root, key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ return ERR_PTR(-ENOENT);
+
+ return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+}
+
/*
* lookup a directory item based on name. 'dir' is the objectid
* we're searching in, and 'mod' tells us if you plan on deleting the
@@ -181,23 +200,18 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
const char *name, int name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
+ struct btrfs_dir_item *di;
key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
-
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
return NULL;
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return di;
}
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
@@ -211,7 +225,6 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
int slot;
struct btrfs_path *path;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -220,20 +233,20 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-
- /* return back any errors */
- if (ret < 0)
- goto out;
+ di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ /* Nothing found, we're safe */
+ if (ret == -ENOENT) {
+ ret = 0;
+ goto out;
+ }
- /* nothing found, we're safe */
- if (ret > 0) {
- ret = 0;
- goto out;
+ if (ret < 0)
+ goto out;
}
/* we found an item, look for our name in the item */
- di = btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
if (di) {
/* our exact name was found */
ret = -EEXIST;
@@ -274,21 +287,13 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
u64 objectid, const char *name, int name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
key.objectid = dir;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = objectid;
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return ERR_PTR(-ENOENT);
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
}
struct btrfs_dir_item *
@@ -345,21 +350,18 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
const char *name, u16 name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
+ struct btrfs_dir_item *di;
key.objectid = dir;
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
+
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
return NULL;
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return di;
}
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a59ab7b9aea0..2f9515dccce0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3392,11 +3392,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_alloc;
}
- /* For 4K sector size support, it's only read-only */
- if (PAGE_SIZE == SZ_64K && sectorsize == SZ_4K) {
- if (!sb_rdonly(sb) || btrfs_super_log_root(disk_super)) {
+ if (sectorsize != PAGE_SIZE) {
+ btrfs_warn(fs_info,
+ "read-write for sector size %u with page size %lu is experimental",
+ sectorsize, PAGE_SIZE);
+ }
+ if (sectorsize != PAGE_SIZE) {
+ if (btrfs_super_incompat_flags(fs_info->super_copy) &
+ BTRFS_FEATURE_INCOMPAT_RAID56) {
btrfs_err(fs_info,
- "subpage sectorsize %u only supported read-only for page size %lu",
+ "RAID56 is not yet supported for sector size %u with page size %lu",
sectorsize, PAGE_SIZE);
err = -EINVAL;
goto fail_alloc;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 268ce58d4569..fc3da7585fb7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -153,7 +153,7 @@ search_again:
else
key.type = BTRFS_EXTENT_ITEM_KEY;
- ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out_free;
@@ -5950,9 +5950,9 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
*/
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_block_group *cache = NULL;
struct btrfs_device *device;
- struct list_head *devices;
u64 group_trimmed;
u64 range_end = U64_MAX;
u64 start;
@@ -6016,9 +6016,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
btrfs_warn(fs_info,
"failed to trim %llu block group(s), last error %d",
bg_failed, bg_ret);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- devices = &fs_info->fs_devices->devices;
- list_for_each_entry(device, devices, dev_list) {
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
continue;
@@ -6031,7 +6031,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
trimmed += group_trimmed;
}
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
if (dev_failed)
btrfs_warn(fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9e81d25dea70..aaddd7225348 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,6 +13,7 @@
#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/cleancache.h>
+#include <linux/fsverity.h>
#include "misc.h"
#include "extent_io.h"
#include "extent-io-tree.h"
@@ -172,6 +173,8 @@ int __must_check submit_one_bio(struct bio *bio, int mirror_num,
bio->bi_private = NULL;
+ /* Caller should ensure the bio has at least some range added */
+ ASSERT(bio->bi_iter.bi_size);
if (is_data_inode(tree->private_data))
ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
bio_flags);
@@ -2245,18 +2248,6 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
return bitset;
}
-/*
- * helper function to set a given page up to date if all the
- * extents in the tree for that page are up to date
- */
-static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
-{
- u64 start = page_offset(page);
- u64 end = start + PAGE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
- SetPageUptodate(page);
-}
-
int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec)
@@ -2688,7 +2679,15 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
start + len <= page_offset(page) + PAGE_SIZE);
if (uptodate) {
- btrfs_page_set_uptodate(fs_info, page, start, len);
+ if (fsverity_active(page->mapping->host) &&
+ !PageError(page) &&
+ !PageUptodate(page) &&
+ start < i_size_read(page->mapping->host) &&
+ !fsverity_verify_page(page)) {
+ btrfs_page_set_error(fs_info, page, start, len);
+ } else {
+ btrfs_page_set_uptodate(fs_info, page, start, len);
+ }
} else {
btrfs_page_clear_uptodate(fs_info, page, start, len);
btrfs_page_set_error(fs_info, page, start, len);
@@ -2779,7 +2778,7 @@ next:
void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
struct btrfs_inode *inode;
- int uptodate = (err == 0);
+ const bool uptodate = (err == 0);
int ret = 0;
ASSERT(page && page->mapping);
@@ -2787,8 +2786,14 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
if (!uptodate) {
- ClearPageUptodate(page);
- SetPageError(page);
+ const struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ u32 len;
+
+ ASSERT(end + 1 - start <= U32_MAX);
+ len = end + 1 - start;
+
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
+ btrfs_page_set_error(fs_info, page, start, len);
ret = err < 0 ? err : -EIO;
mapping_set_error(page->mapping, ret);
}
@@ -3097,7 +3102,7 @@ readpage_ok:
/* Update page status and unlock */
end_page_read(page, uptodate, start, len);
endio_readpage_release_extent(&processed, BTRFS_I(inode),
- start, end, uptodate);
+ start, end, PageUptodate(page));
}
/* Release the last extent */
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
@@ -3153,11 +3158,13 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
return bio;
}
-struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
+struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
{
struct bio *bio;
struct btrfs_io_bio *btrfs_bio;
+ ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
+
/* this will never fail when it's backed by a bioset */
bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
ASSERT(bio);
@@ -3181,20 +3188,22 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
* @size: portion of page that we want to write
* @prev_bio_flags: flags of previous bio to see if we can merge the current one
* @bio_flags: flags of the current bio to see if we can merge them
- * @return: true if page was added, false otherwise
*
* Attempt to add a page to bio considering stripe alignment etc.
*
- * Return true if successfully page added. Otherwise, return false.
+ * Return >= 0 for the number of bytes added to the bio.
+ * Can return 0 if the current bio is already at stripe/zone boundary.
+ * Return <0 for error.
*/
-static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
- struct page *page,
- u64 disk_bytenr, unsigned int size,
- unsigned int pg_offset,
- unsigned long bio_flags)
+static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
+ struct page *page,
+ u64 disk_bytenr, unsigned int size,
+ unsigned int pg_offset,
+ unsigned long bio_flags)
{
struct bio *bio = bio_ctrl->bio;
u32 bio_size = bio->bi_iter.bi_size;
+ u32 real_size;
const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
bool contig;
int ret;
@@ -3203,29 +3212,36 @@ static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
/* The limit should be calculated when bio_ctrl->bio is allocated */
ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
if (bio_ctrl->bio_flags != bio_flags)
- return false;
+ return 0;
if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
contig = bio->bi_iter.bi_sector == sector;
else
contig = bio_end_sector(bio) == sector;
if (!contig)
- return false;
+ return 0;
- if (bio_size + size > bio_ctrl->len_to_oe_boundary ||
- bio_size + size > bio_ctrl->len_to_stripe_boundary)
- return false;
+ real_size = min(bio_ctrl->len_to_oe_boundary,
+ bio_ctrl->len_to_stripe_boundary) - bio_size;
+ real_size = min(real_size, size);
+
+ /*
+ * If real_size is 0, never call bio_add_*_page(), as even size is 0,
+ * bio will still execute its endio function on the page!
+ */
+ if (real_size == 0)
+ return 0;
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- ret = bio_add_zone_append_page(bio, page, size, pg_offset);
+ ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
else
- ret = bio_add_page(bio, page, size, pg_offset);
+ ret = bio_add_page(bio, page, real_size, pg_offset);
- return ret == size;
+ return ret;
}
static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
- struct btrfs_inode *inode)
+ struct btrfs_inode *inode, u64 file_offset)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_io_geometry geom;
@@ -3266,9 +3282,8 @@ static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
return 0;
}
- ASSERT(fs_info->max_zone_append_size > 0);
/* Ordered extent not yet created, so we're good */
- ordered = btrfs_lookup_ordered_extent(inode, logical);
+ ordered = btrfs_lookup_ordered_extent(inode, file_offset);
if (!ordered) {
bio_ctrl->len_to_oe_boundary = U32_MAX;
return 0;
@@ -3280,6 +3295,62 @@ static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
return 0;
}
+static int alloc_new_bio(struct btrfs_inode *inode,
+ struct btrfs_bio_ctrl *bio_ctrl,
+ struct writeback_control *wbc,
+ unsigned int opf,
+ bio_end_io_t end_io_func,
+ u64 disk_bytenr, u32 offset, u64 file_offset,
+ unsigned long bio_flags)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct bio *bio;
+ int ret;
+
+ /*
+ * For compressed page range, its disk_bytenr is always @disk_bytenr
+ * passed in, no matter if we have added any range into previous bio.
+ */
+ if (bio_flags & EXTENT_BIO_COMPRESSED)
+ bio = btrfs_bio_alloc(disk_bytenr);
+ else
+ bio = btrfs_bio_alloc(disk_bytenr + offset);
+ bio_ctrl->bio = bio;
+ bio_ctrl->bio_flags = bio_flags;
+ bio->bi_end_io = end_io_func;
+ bio->bi_private = &inode->io_tree;
+ bio->bi_write_hint = inode->vfs_inode.i_write_hint;
+ bio->bi_opf = opf;
+ ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
+ if (ret < 0)
+ goto error;
+ if (wbc) {
+ struct block_device *bdev;
+
+ bdev = fs_info->fs_devices->latest_bdev;
+ bio_set_dev(bio, bdev);
+ wbc_init_bio(wbc, bio);
+ }
+ if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ struct btrfs_device *device;
+
+ device = btrfs_zoned_get_device(fs_info, disk_bytenr,
+ fs_info->sectorsize);
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
+ goto error;
+ }
+
+ btrfs_io_bio(bio)->device = device;
+ }
+ return 0;
+error:
+ bio_ctrl->bio = NULL;
+ bio->bi_status = errno_to_blk_status(ret);
+ bio_endio(bio);
+ return ret;
+}
+
/*
* @opf: bio REQ_OP_* and REQ_* flags as one value
* @wbc: optional writeback control for io accounting
@@ -3305,61 +3376,67 @@ static int submit_extent_page(unsigned int opf,
bool force_bio_submit)
{
int ret = 0;
- struct bio *bio;
- size_t io_size = min_t(size_t, size, PAGE_SIZE);
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
- struct extent_io_tree *tree = &inode->io_tree;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ unsigned int cur = pg_offset;
ASSERT(bio_ctrl);
ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
pg_offset + size <= PAGE_SIZE);
- if (bio_ctrl->bio) {
- bio = bio_ctrl->bio;
- if (force_bio_submit ||
- !btrfs_bio_add_page(bio_ctrl, page, disk_bytenr, io_size,
- pg_offset, bio_flags)) {
- ret = submit_one_bio(bio, mirror_num, bio_ctrl->bio_flags);
+ if (force_bio_submit && bio_ctrl->bio) {
+ ret = submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->bio_flags);
+ bio_ctrl->bio = NULL;
+ if (ret < 0)
+ return ret;
+ }
+
+ while (cur < pg_offset + size) {
+ u32 offset = cur - pg_offset;
+ int added;
+
+ /* Allocate new bio if needed */
+ if (!bio_ctrl->bio) {
+ ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
+ end_io_func, disk_bytenr, offset,
+ page_offset(page) + cur,
+ bio_flags);
+ if (ret < 0)
+ return ret;
+ }
+ /*
+ * We must go through btrfs_bio_add_page() to ensure each
+ * page range won't cross various boundaries.
+ */
+ if (bio_flags & EXTENT_BIO_COMPRESSED)
+ added = btrfs_bio_add_page(bio_ctrl, page, disk_bytenr,
+ size - offset, pg_offset + offset,
+ bio_flags);
+ else
+ added = btrfs_bio_add_page(bio_ctrl, page,
+ disk_bytenr + offset, size - offset,
+ pg_offset + offset, bio_flags);
+
+ /* Metadata page range should never be split */
+ if (!is_data_inode(&inode->vfs_inode))
+ ASSERT(added == 0 || added == size - offset);
+
+ /* At least we added some page, update the account */
+ if (wbc && added)
+ wbc_account_cgroup_owner(wbc, page, added);
+
+ /* We have reached boundary, submit right now */
+ if (added < size - offset) {
+ /* The bio should contain some page(s) */
+ ASSERT(bio_ctrl->bio->bi_iter.bi_size);
+ ret = submit_one_bio(bio_ctrl->bio, mirror_num,
+ bio_ctrl->bio_flags);
bio_ctrl->bio = NULL;
if (ret < 0)
return ret;
- } else {
- if (wbc)
- wbc_account_cgroup_owner(wbc, page, io_size);
- return 0;
}
+ cur += added;
}
-
- bio = btrfs_bio_alloc(disk_bytenr);
- bio_add_page(bio, page, io_size, pg_offset);
- bio->bi_end_io = end_io_func;
- bio->bi_private = tree;
- bio->bi_write_hint = page->mapping->host->i_write_hint;
- bio->bi_opf = opf;
- if (wbc) {
- struct block_device *bdev;
-
- bdev = fs_info->fs_devices->latest_bdev;
- bio_set_dev(bio, bdev);
- wbc_init_bio(wbc, bio);
- wbc_account_cgroup_owner(wbc, page, io_size);
- }
- if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
- struct btrfs_device *device;
-
- device = btrfs_zoned_get_device(fs_info, disk_bytenr, io_size);
- if (IS_ERR(device))
- return PTR_ERR(device);
-
- btrfs_io_bio(bio)->device = device;
- }
-
- bio_ctrl->bio = bio;
- bio_ctrl->bio_flags = bio_flags;
- ret = calc_bio_boundaries(bio_ctrl, inode);
-
- return ret;
+ return 0;
}
static int attach_extent_buffer_page(struct extent_buffer *eb,
@@ -3488,7 +3565,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
size_t pg_offset = 0;
size_t iosize;
size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = 0;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
ret = set_page_extent_mapped(page);
@@ -3519,6 +3595,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
}
begin_page_read(fs_info, page);
while (cur <= end) {
+ unsigned long this_bio_flag = 0;
bool force_bio_submit = false;
u64 disk_bytenr;
@@ -3627,7 +3704,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
/* the get_extent function already copied into the page */
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
- check_page_uptodate(tree, page);
unlock_extent(tree, cur, cur + iosize - 1);
end_page_read(page, true, cur, iosize);
cur = cur + iosize;
@@ -3722,14 +3798,9 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
delalloc_end, &page_started, nr_written, wbc);
if (ret) {
- SetPageError(page);
- /*
- * btrfs_run_delalloc_range should return < 0 for error
- * but just in case, we use > 0 here meaning the IO is
- * started, so we don't want to return > 0 unless
- * things are going well.
- */
- return ret < 0 ? ret : -EIO;
+ btrfs_page_set_error(inode->root->fs_info, page,
+ page_offset(page), PAGE_SIZE);
+ return ret;
}
/*
* delalloc_end is already one less than the total length, so
@@ -3829,9 +3900,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
int *nr_ret)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u64 start = page_offset(page);
- u64 end = start + PAGE_SIZE - 1;
- u64 cur = start;
+ u64 cur = page_offset(page);
+ u64 end = cur + PAGE_SIZE - 1;
u64 extent_offset;
u64 block_start;
struct extent_map *em;
@@ -3841,7 +3911,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
const unsigned int write_flags = wbc_to_write_flags(wbc);
bool compressed;
- ret = btrfs_writepage_cow_fixup(page, start, end);
+ ret = btrfs_writepage_cow_fixup(page);
if (ret) {
/* Fixup worker will requeue */
redirty_page_for_writepage(wbc, page);
@@ -3865,7 +3935,16 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
if (cur >= i_size) {
btrfs_writepage_endio_finish_ordered(inode, page, cur,
- end, 1);
+ end, true);
+ /*
+ * This range is beyond i_size, thus we don't need to
+ * bother writing back.
+ * But we still need to clear the dirty subpage bit, or
+ * the next time the page gets dirtied, we will try to
+ * writeback the sectors with subpage dirty bits,
+ * causing writeback without ordered extent.
+ */
+ btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
break;
}
@@ -3915,7 +3994,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
nr++;
else
btrfs_writepage_endio_finish_ordered(inode,
- page, cur, cur + iosize - 1, 1);
+ page, cur, cur + iosize - 1, true);
+ btrfs_page_clear_dirty(fs_info, page, cur, iosize);
cur += iosize;
continue;
}
@@ -3951,6 +4031,12 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
cur += iosize;
nr++;
}
+ /*
+ * If we finish without problem, we should not only clear page dirty,
+ * but also empty subpage dirty bits
+ */
+ if (!ret)
+ btrfs_page_assert_not_dirty(fs_info, page);
*nr_ret = nr;
return ret;
}
@@ -3981,7 +4067,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
WARN_ON(!PageLocked(page));
- ClearPageError(page);
+ btrfs_page_clear_error(btrfs_sb(inode->i_sb), page,
+ page_offset(page), PAGE_SIZE);
pg_offset = offset_in_page(i_size);
if (page->index > end_index ||
@@ -4022,10 +4109,39 @@ done:
set_page_writeback(page);
end_page_writeback(page);
}
- if (PageError(page)) {
- ret = ret < 0 ? ret : -EIO;
+ /*
+ * Here we used to have a check for PageError() and then set @ret and
+ * call end_extent_writepage().
+ *
+ * But in fact setting @ret here will cause different error paths
+ * between subpage and regular sectorsize.
+ *
+ * For regular page size, we never submit current page, but only add
+ * current page to current bio.
+ * The bio submission can only happen in next page.
+ * Thus if we hit the PageError() branch, @ret is already set to
+ * non-zero value and will not get updated for regular sectorsize.
+ *
+ * But for subpage case, it's possible we submit part of current page,
+ * thus can get PageError() set by submitted bio of the same page,
+ * while our @ret is still 0.
+ *
+ * So here we unify the behavior and don't set @ret.
+ * Error can still be properly passed to higher layer as page will
+ * be set error, here we just don't handle the IO failure.
+ *
+ * NOTE: This is just a hotfix for subpage.
+ * The root fix will be properly ending ordered extent when we hit
+ * an error during writeback.
+ *
+ * But that needs a bigger refactoring, as we not only need to grab the
+ * submitted OE, but also need to know exactly at which bytenr we hit
+ * the error.
+ * Currently the full page based __extent_writepage_io() is not
+ * capable of that.
+ */
+ if (PageError(page))
end_extent_writepage(page, ret, start, page_end);
- }
unlock_page(page);
ASSERT(ret <= 0);
return ret;
@@ -4984,7 +5100,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
ret = __extent_writepage(page, &wbc_writepages, &epd);
else {
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode),
- page, start, start + PAGE_SIZE - 1, 1);
+ page, start, start + PAGE_SIZE - 1, true);
unlock_page(page);
}
put_page(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 62027f551b44..53abdc280451 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -280,7 +280,7 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct bio *btrfs_bio_alloc(u64 first_byte);
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio);
-struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size);
+struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index df6631eefc65..2673c6ba7a4e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -233,7 +233,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 objectid,
u64 offset, int mod)
{
- int ret;
struct btrfs_key file_key;
int ins_len = mod < 0 ? -1 : 0;
int cow = mod != 0;
@@ -241,8 +240,8 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
file_key.objectid = objectid;
file_key.offset = offset;
file_key.type = BTRFS_EXTENT_DATA_KEY;
- ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
- return ret;
+
+ return btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
}
/*
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ee34497500e1..7ff577005d0f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -16,6 +16,7 @@
#include <linux/btrfs.h>
#include <linux/uio.h>
#include <linux/iversion.h>
+#include <linux/fsverity.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -1340,7 +1341,18 @@ static int prepare_uptodate_page(struct inode *inode,
unlock_page(page);
return -EIO;
}
- if (page->mapping != inode->i_mapping) {
+
+ /*
+ * Since btrfs_readpage() will unlock the page before it
+ * returns, there is a window where btrfs_releasepage() can be
+ * called to release the page. Here we check both inode
+ * mapping and PagePrivate() to make sure the page was not
+ * released.
+ *
+ * The private flag check is essential for subpage as we need
+ * to store extra bitmap using page->private.
+ */
+ if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
unlock_page(page);
return -EAGAIN;
}
@@ -3604,7 +3616,13 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
static int btrfs_file_open(struct inode *inode, struct file *filp)
{
+ int ret;
+
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+
+ ret = fsverity_file_open(inode, filp);
+ if (ret)
+ return ret;
return generic_file_open(inode, filp);
}
@@ -3633,6 +3651,9 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
+ if (fsverity_active(inode))
+ return 0;
+
if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
return 0;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 2131ae5b9ed7..da0eee7c9e5f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -344,19 +344,13 @@ fail:
static void readahead_cache(struct inode *inode)
{
- struct file_ra_state *ra;
+ struct file_ra_state ra;
unsigned long last_index;
- ra = kzalloc(sizeof(*ra), GFP_NOFS);
- if (!ra)
- return;
-
- file_ra_state_init(ra, inode->i_mapping);
+ file_ra_state_init(&ra, inode->i_mapping);
last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
- page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
-
- kfree(ra);
+ page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index);
}
static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
@@ -2544,6 +2538,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
+ const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
spin_lock(&ctl->tree_lock);
if (!used)
@@ -2573,9 +2568,9 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
/* All the region is now unusable. Mark it as unused and reclaim */
if (block_group->zone_unusable == block_group->length) {
btrfs_mark_bg_unused(block_group);
- } else if (block_group->zone_unusable >=
- div_factor_fine(block_group->length,
- fs_info->bg_reclaim_threshold)) {
+ } else if (bg_reclaim_threshold &&
+ block_group->zone_unusable >=
+ div_factor_fine(block_group->length, bg_reclaim_threshold)) {
btrfs_mark_bg_to_reclaim(block_group);
}
@@ -2652,8 +2647,11 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group,
* btrfs_pin_extent_for_log_replay() when replaying the log.
* Advance the pointer not to overwrite the tree-log nodes.
*/
- if (block_group->alloc_offset < offset + bytes)
- block_group->alloc_offset = offset + bytes;
+ if (block_group->start + block_group->alloc_offset <
+ offset + bytes) {
+ block_group->alloc_offset =
+ offset + bytes - block_group->start;
+ }
return 0;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b0595e8bdd9..487533c35ddb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,6 +32,7 @@
#include <linux/sched/mm.h>
#include <linux/iomap.h>
#include <asm/unaligned.h>
+#include <linux/fsverity.h>
#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
@@ -286,9 +287,8 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
cur_size = min_t(unsigned long, compressed_size,
PAGE_SIZE);
- kaddr = kmap_atomic(cpage);
+ kaddr = page_address(cpage);
write_extent_buffer(leaf, kaddr, ptr, cur_size);
- kunmap_atomic(kaddr);
i++;
ptr += cur_size;
@@ -490,6 +490,9 @@ static noinline int add_async_extent(struct async_chunk *cow,
*/
static inline bool inode_can_compress(struct btrfs_inode *inode)
{
+ /* Subpage doesn't support compression yet */
+ if (inode->root->fs_info->sectorsize < PAGE_SIZE)
+ return false;
if (inode->flags & BTRFS_INODE_NODATACOW ||
inode->flags & BTRFS_INODE_NODATASUM)
return false;
@@ -629,7 +632,7 @@ again:
* inode has not been flagged as nocompress. This flag can
* change at any time if we discover bad compression ratios.
*/
- if (nr_pages > 1 && inode_need_compress(BTRFS_I(inode), start, end)) {
+ if (inode_need_compress(BTRFS_I(inode), start, end)) {
WARN_ON(pages);
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
@@ -682,7 +685,11 @@ again:
}
}
cont:
- if (start == 0) {
+ /*
+ * Check cow_file_range() for why we don't even try to create inline
+ * extent for subpage case.
+ */
+ if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
/* lets try to make an inline extent */
if (ret || total_in < actual_end) {
/* we didn't compress the entire range, try
@@ -973,7 +980,7 @@ retry:
p->mapping = inode->vfs_inode.i_mapping;
btrfs_writepage_endio_finish_ordered(inode, p, start,
- end, 0);
+ end, false);
p->mapping = NULL;
extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
@@ -1080,7 +1087,17 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
- if (start == 0) {
+ /*
+ * Due to the page size limit, for subpage we can only trigger the
+ * writeback for the dirty sectors of page, that means data writeback
+ * is doing more writeback than what we want.
+ *
+ * This is especially unexpected for some call sites like fallocate,
+ * where we only increase i_size after everything is done.
+ * This means we can trigger inline extent even if we didn't want to.
+ * So here we skip inline extent creation completely.
+ */
+ if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
/* lets try to make an inline extent */
ret = cow_file_range_inline(inode, start, end, 0,
BTRFS_COMPRESS_NONE, NULL);
@@ -1290,11 +1307,6 @@ static noinline void async_cow_submit(struct btrfs_work *work)
nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
- /* atomic_sub_return implies a barrier */
- if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
- 5 * SZ_1M)
- cond_wake_up_nomb(&fs_info->async_submit_wait);
-
/*
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
@@ -1303,6 +1315,11 @@ static noinline void async_cow_submit(struct btrfs_work *work)
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
+
+ /* atomic_sub_return implies a barrier */
+ if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
+ 5 * SZ_1M)
+ cond_wake_up_nomb(&fs_info->async_submit_wait);
}
static noinline void async_cow_free(struct btrfs_work *work)
@@ -1946,6 +1963,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
ret = cow_file_range_async(inode, wbc, locked_page, start, end,
page_started, nr_written);
}
+ ASSERT(ret <= 0);
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
end - start + 1);
@@ -2285,7 +2303,6 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
struct extent_map *split_mid = NULL;
struct extent_map *split_post = NULL;
int ret = 0;
- int modified;
unsigned long flags;
/* Sanity check */
@@ -2315,11 +2332,12 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
ASSERT(em->len == len);
ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
+ ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
+ ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
+ ASSERT(!list_empty(&em->list));
flags = em->flags;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
- clear_bit(EXTENT_FLAG_LOGGING, &flags);
- modified = !list_empty(&em->list);
/* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start;
@@ -2333,7 +2351,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
split_pre->compress_type = em->compress_type;
split_pre->generation = em->generation;
- replace_extent_mapping(em_tree, em, split_pre, modified);
+ replace_extent_mapping(em_tree, em, split_pre, 1);
/*
* Now we only have an extent_map at:
@@ -2353,7 +2371,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
split_mid->flags = flags;
split_mid->compress_type = em->compress_type;
split_mid->generation = em->generation;
- add_extent_mapping(em_tree, split_mid, modified);
+ add_extent_mapping(em_tree, split_mid, 1);
}
if (post) {
@@ -2367,7 +2385,7 @@ static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
split_post->flags = flags;
split_post->compress_type = em->compress_type;
split_post->generation = em->generation;
- add_extent_mapping(em_tree, split_post, modified);
+ add_extent_mapping(em_tree, split_post, 1);
}
/* Once for us */
@@ -2770,7 +2788,7 @@ out_page:
* to fix it up. The async helper will wait for ordered extents, set
* the delalloc bit and make it safe to write the page.
*/
-int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
+int btrfs_writepage_cow_fixup(struct page *page)
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -3171,7 +3189,7 @@ static void finish_ordered_fn(struct btrfs_work *work)
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
- u64 end, int uptodate)
+ u64 end, bool uptodate)
{
trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
@@ -3257,25 +3275,44 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
return 0;
}
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ /*
+ * For subpage case, above PageChecked is not safe as it's not subpage
+ * compatible.
+ * But for now only cow fixup and compressed read utilize PageChecked
+ * flag, while in this context we can easily use io_bio->csum to
+ * determine if we really need to do csum verification.
+ *
+ * So for now, just exit if io_bio->csum is NULL, as it means it's
+ * compressed read, and its compressed data csum has already been
+ * verified.
+ */
+ if (io_bio->csum == NULL)
return 0;
- if (!root->fs_info->csum_root)
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
return 0;
- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
- test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
- clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
+ if (!root->fs_info->csum_root)
return 0;
- }
ASSERT(page_offset(page) <= start &&
end <= page_offset(page) + PAGE_SIZE - 1);
for (pg_off = offset_in_page(start);
pg_off < offset_in_page(end);
pg_off += sectorsize, bio_offset += sectorsize) {
+ u64 file_offset = pg_off + page_offset(page);
int ret;
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
+ test_range_bit(io_tree, file_offset,
+ file_offset + sectorsize - 1,
+ EXTENT_NODATASUM, 1, NULL)) {
+ /* Skip the range without csum for data reloc inode */
+ clear_extent_bits(io_tree, file_offset,
+ file_offset + sectorsize - 1,
+ EXTENT_NODATASUM);
+ continue;
+ }
ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off,
page_offset(page) + pg_off);
if (ret < 0) {
@@ -3520,7 +3557,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
/*
* If we have an inode with links, there are a couple of
- * possibilities. Old kernels (before v3.12) used to create an
+ * possibilities:
+ *
+ * 1. We were halfway through creating fsverity metadata for the
+ * file. In that case, the orphan item represents incomplete
+ * fsverity metadata which must be cleaned up with
+ * btrfs_drop_verity_items and deleting the orphan item.
+
+ * 2. Old kernels (before v3.12) used to create an
* orphan item for truncate indicating that there were possibly
* extent items past i_size that needed to be deleted. In v3.12,
* truncate was changed to update i_size in sync with the extent
@@ -3538,8 +3582,12 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* but either way, we can delete the orphan item.
*/
if (ret == -ENOENT || inode->i_nlink) {
- if (!ret)
+ if (!ret) {
+ ret = btrfs_drop_verity_items(BTRFS_I(inode));
iput(inode);
+ if (ret)
+ goto out;
+ }
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -3728,7 +3776,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
rdev = btrfs_inode_rdev(leaf, inode_item);
BTRFS_I(inode)->index_cnt = (u64)-1;
- BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+ btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
+ &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
cache_index:
/*
@@ -3859,6 +3908,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_map_token token;
+ u64 flags;
btrfs_init_map_token(&token, leaf);
@@ -3894,7 +3944,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
- btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
+ flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
+ BTRFS_I(inode)->ro_flags);
+ btrfs_set_token_inode_flags(&token, item, flags);
btrfs_set_token_inode_block_group(&token, item, 0);
}
@@ -5088,15 +5140,13 @@ static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
int ret;
/*
- * Still need to make sure the inode looks like it's been updated so
- * that any holes get logged if we fsync.
+ * If NO_HOLES is enabled, we don't need to do anything.
+ * Later, up in the call chain, either btrfs_set_inode_last_sub_trans()
+ * or btrfs_update_inode() will be called, which guarantee that the next
+ * fsync will know this inode was changed and needs to be logged.
*/
- if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
- inode->last_trans = fs_info->generation;
- inode->last_sub_trans = root->log_transid;
- inode->last_log_commit = root->last_log_commit;
+ if (btrfs_fs_incompat(fs_info, NO_HOLES))
return 0;
- }
/*
* 1 - for the one we're dropping
@@ -5342,7 +5392,7 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
if (btrfs_root_readonly(root))
return -EROFS;
- err = setattr_prepare(&init_user_ns, dentry, attr);
+ err = setattr_prepare(mnt_userns, dentry, attr);
if (err)
return err;
@@ -5353,13 +5403,12 @@ static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentr
}
if (attr->ia_valid) {
- setattr_copy(&init_user_ns, inode, attr);
+ setattr_copy(mnt_userns, inode, attr);
inode_inc_iversion(inode);
err = btrfs_dirty_inode(inode);
if (!err && attr->ia_valid & ATTR_MODE)
- err = posix_acl_chmod(&init_user_ns, inode,
- inode->i_mode);
+ err = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
}
return err;
@@ -5522,6 +5571,7 @@ void btrfs_evict_inode(struct inode *inode)
trace_btrfs_inode_evict(inode);
if (!root) {
+ fsverity_cleanup_inode(inode);
clear_inode(inode);
return;
}
@@ -5604,6 +5654,7 @@ no_delete:
* to retry these periodically in the future.
*/
btrfs_remove_delayed_node(BTRFS_I(inode));
+ fsverity_cleanup_inode(inode);
clear_inode(inode);
}
@@ -6370,6 +6421,7 @@ static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
+ struct user_namespace *mnt_userns,
struct inode *dir,
const char *name, int name_len,
u64 ref_objectid, u64 objectid,
@@ -6479,7 +6531,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (ret != 0)
goto fail_unlock;
- inode_init_owner(&init_user_ns, inode, dir, mode);
+ inode_init_owner(mnt_userns, inode, dir, mode);
inode_set_bytes(inode, 0);
inode->i_mtime = current_time(inode);
@@ -6664,9 +6716,9 @@ static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_unlock;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
- mode, &index);
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
@@ -6728,9 +6780,9 @@ static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_unlock;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
- mode, &index);
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
@@ -6873,8 +6925,9 @@ static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_fail;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid,
S_IFDIR | mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
@@ -8207,8 +8260,8 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
u64 start_sector;
int async_submit = 0;
u64 submit_len;
- int clone_offset = 0;
- int clone_len;
+ u64 clone_offset = 0;
+ u64 clone_len;
u64 logical;
int ret;
blk_status_t status;
@@ -8256,9 +8309,9 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
status = errno_to_blk_status(ret);
goto out_err_em;
}
- ASSERT(geom.len <= INT_MAX);
- clone_len = min_t(int, submit_len, geom.len);
+ clone_len = min(submit_len, geom.len);
+ ASSERT(clone_len <= UINT_MAX);
/*
* This will never fail as it's passing GPF_NOFS and
@@ -8402,11 +8455,47 @@ static void btrfs_readahead(struct readahead_control *rac)
extent_readahead(rac);
}
+/*
+ * For releasepage() and invalidatepage() we have a race window where
+ * end_page_writeback() is called but the subpage spinlock is not yet released.
+ * If we continue to release/invalidate the page, we could cause use-after-free
+ * for subpage spinlock. So this function is to spin and wait for subpage
+ * spinlock.
+ */
+static void wait_subpage_spinlock(struct page *page)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+ struct btrfs_subpage *subpage;
+
+ if (fs_info->sectorsize == PAGE_SIZE)
+ return;
+
+ ASSERT(PagePrivate(page) && page->private);
+ subpage = (struct btrfs_subpage *)page->private;
+
+ /*
+ * This may look insane as we just acquire the spinlock and release it,
+ * without doing anything. But we just want to make sure no one is
+ * still holding the subpage spinlock.
+ * And since the page is not dirty nor writeback, and we have page
+ * locked, the only possible way to hold a spinlock is from the endio
+ * function to clear page writeback.
+ *
+ * Here we just acquire the spinlock so that all existing callers
+ * should exit and we're safe to release/invalidate the page.
+ */
+ spin_lock_irq(&subpage->lock);
+ spin_unlock_irq(&subpage->lock);
+}
+
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
int ret = try_release_extent_mapping(page, gfp_flags);
- if (ret == 1)
+
+ if (ret == 1) {
+ wait_subpage_spinlock(page);
clear_page_extent_mapped(page);
+ }
return ret;
}
@@ -8470,6 +8559,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
* do double ordered extent accounting on the same page.
*/
wait_on_page_writeback(page);
+ wait_subpage_spinlock(page);
/*
* For subpage case, we have call sites like
@@ -8558,7 +8648,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
spin_unlock_irq(&inode->ordered_tree.lock);
if (btrfs_dec_test_ordered_pending(inode, &ordered,
- cur, range_end + 1 - cur, 1)) {
+ cur, range_end + 1 - cur)) {
btrfs_finish_ordered_io(ordered);
/*
* The ordered extent has finished, now we're again
@@ -8939,7 +9029,8 @@ out:
*/
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
- struct btrfs_root *parent_root)
+ struct btrfs_root *parent_root,
+ struct user_namespace *mnt_userns)
{
struct inode *inode;
int err;
@@ -8950,7 +9041,8 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
if (err < 0)
return err;
- inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, ino, ino,
+ inode = btrfs_new_inode(trans, new_root, mnt_userns, NULL, "..", 2,
+ ino, ino,
S_IFDIR | (~current_umask() & S_IRWXUGO),
&index);
if (IS_ERR(inode))
@@ -8994,6 +9086,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->defrag_bytes = 0;
ei->disk_i_size = 0;
ei->flags = 0;
+ ei->ro_flags = 0;
ei->csum_bytes = 0;
ei->index_cnt = (u64)-1;
ei->dir_index = 0;
@@ -9175,6 +9268,7 @@ static int btrfs_getattr(struct user_namespace *mnt_userns,
struct inode *inode = d_inode(path->dentry);
u32 blocksize = inode->i_sb->s_blocksize;
u32 bi_flags = BTRFS_I(inode)->flags;
+ u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
@@ -9187,13 +9281,15 @@ static int btrfs_getattr(struct user_namespace *mnt_userns,
stat->attributes |= STATX_ATTR_IMMUTABLE;
if (bi_flags & BTRFS_INODE_NODUMP)
stat->attributes |= STATX_ATTR_NODUMP;
+ if (bi_ro_flags & BTRFS_INODE_RO_VERITY)
+ stat->attributes |= STATX_ATTR_VERITY;
stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_COMPRESSED |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(mnt_userns, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
spin_lock(&BTRFS_I(inode)->lock);
@@ -9227,8 +9323,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
bool dest_log_pinned = false;
bool need_abort = false;
- /* we only allow rename subvolume link between subvolumes */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+ /*
+ * For non-subvolumes allow exchange only within one subvolume, in the
+ * same inode namespace. Two subvolumes (represented as directory) can
+ * be exchanged as they're a logical link and have a fixed inode number.
+ */
+ if (root != dest &&
+ (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino != BTRFS_FIRST_FREE_OBJECTID))
return -EXDEV;
/* close the race window with snapshot create/destroy ioctl */
@@ -9275,8 +9377,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(root);
- root_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -9293,8 +9393,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(dest);
- dest_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, root,
old_dentry->d_name.name,
old_dentry->d_name.len,
@@ -9325,6 +9423,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode), 1);
}
+ /*
+ * Now pin the logs of the roots. We do it to ensure that no other task
+ * can sync the logs while we are in progress with the rename, because
+ * that could result in an inconsistency in case any of the inodes that
+ * are part of this rename operation were logged before.
+ *
+ * We pin the logs even if at this precise moment none of the inodes was
+ * logged before. This is because right after we checked for that, some
+ * other task fsyncing some other inode not involved with this rename
+ * operation could log that one of our inodes exists.
+ *
+ * We don't need to pin the logs before the above calls to
+ * btrfs_insert_inode_ref(), since those don't ever need to change a log.
+ */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_pin_log_trans(root);
+ root_log_pinned = true;
+ }
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_pin_log_trans(dest);
+ dest_log_pinned = true;
+ }
+
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
@@ -9406,8 +9527,7 @@ out_fail:
if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
- (new_inode &&
- btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
+ btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))
btrfs_set_log_full_commit(trans);
if (root_log_pinned) {
@@ -9431,6 +9551,7 @@ out_notrans:
static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
+ struct user_namespace *mnt_userns,
struct inode *dir,
struct dentry *dentry)
{
@@ -9443,7 +9564,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
if (ret)
return ret;
- inode = btrfs_new_inode(trans, root, dir,
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
dentry->d_name.name,
dentry->d_name.len,
btrfs_ino(BTRFS_I(dir)),
@@ -9480,9 +9601,10 @@ out:
return ret;
}
-static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry,
- unsigned int flags)
+static int btrfs_rename(struct user_namespace *mnt_userns,
+ struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
struct btrfs_trans_handle *trans;
@@ -9577,8 +9699,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(root);
- log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -9602,6 +9722,25 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
+ /*
+ * Now pin the log. We do it to ensure that no other task can
+ * sync the log while we are in progress with the rename, as
+ * that could result in an inconsistency in case any of the
+ * inodes that are part of this rename operation were logged
+ * before.
+ *
+ * We pin the log even if at this precise moment none of the
+ * inodes was logged before. This is because right after we
+ * checked for that, some other task fsyncing some other inode
+ * not involved with this rename operation could log that one of
+ * our inodes exists.
+ *
+ * We don't need to pin the logs before the above call to
+ * btrfs_insert_inode_ref(), since that does not need to change
+ * a log.
+ */
+ btrfs_pin_log_trans(root);
+ log_pinned = true;
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
old_dentry->d_name.name,
@@ -9655,8 +9794,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (flags & RENAME_WHITEOUT) {
- ret = btrfs_whiteout_for_rename(trans, root, old_dir,
- old_dentry);
+ ret = btrfs_whiteout_for_rename(trans, root, mnt_userns,
+ old_dir, old_dentry);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -9706,7 +9845,8 @@ static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_di
return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
new_dentry);
- return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+ return btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
+ new_dentry, flags);
}
struct btrfs_delalloc_work {
@@ -9803,11 +9943,7 @@ static int start_delalloc_inodes(struct btrfs_root *root,
btrfs_queue_work(root->fs_info->flush_workers,
&work->work);
} else {
- ret = sync_inode(inode, wbc);
- if (!ret &&
- test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- ret = sync_inode(inode, wbc);
+ ret = filemap_fdatawrite_wbc(inode->i_mapping, wbc);
btrfs_add_delayed_iput(inode);
if (ret || wbc->nr_to_write <= 0)
goto out;
@@ -9942,9 +10078,10 @@ static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
goto out_unlock;
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
- objectid, S_IFLNK|S_IRWXUGO, &index);
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir,
+ dentry->d_name.name, dentry->d_name.len,
+ btrfs_ino(BTRFS_I(dir)), objectid,
+ S_IFLNK | S_IRWXUGO, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
@@ -10268,7 +10405,7 @@ static int btrfs_permission(struct user_namespace *mnt_userns,
if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
return -EACCES;
}
- return generic_permission(&init_user_ns, inode, mask);
+ return generic_permission(mnt_userns, inode, mask);
}
static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
@@ -10293,7 +10430,7 @@ static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (ret)
goto out;
- inode = btrfs_new_inode(trans, root, dir, NULL, 0,
+ inode = btrfs_new_inode(trans, root, mnt_userns, dir, NULL, 0,
btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0ba98e08a029..41524f9aeac3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -27,6 +27,7 @@
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/fileattr.h>
+#include <linux/fsverity.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@@ -103,9 +104,11 @@ static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
* ioctl.
*/
-static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
+static unsigned int btrfs_inode_flags_to_fsflags(struct btrfs_inode *binode)
{
unsigned int iflags = 0;
+ u32 flags = binode->flags;
+ u32 ro_flags = binode->ro_flags;
if (flags & BTRFS_INODE_SYNC)
iflags |= FS_SYNC_FL;
@@ -121,6 +124,8 @@ static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
iflags |= FS_DIRSYNC_FL;
if (flags & BTRFS_INODE_NODATACOW)
iflags |= FS_NOCOW_FL;
+ if (ro_flags & BTRFS_INODE_RO_VERITY)
+ iflags |= FS_VERITY_FL;
if (flags & BTRFS_INODE_NOCOMPRESS)
iflags |= FS_NOCOMP_FL;
@@ -148,10 +153,12 @@ void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (binode->flags & BTRFS_INODE_DIRSYNC)
new_fl |= S_DIRSYNC;
+ if (binode->ro_flags & BTRFS_INODE_RO_VERITY)
+ new_fl |= S_VERITY;
set_mask_bits(&inode->i_flags,
- S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
- new_fl);
+ S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC |
+ S_VERITY, new_fl);
}
/*
@@ -200,7 +207,7 @@ int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
{
struct btrfs_inode *binode = BTRFS_I(d_inode(dentry));
- fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode->flags));
+ fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode));
return 0;
}
@@ -224,7 +231,7 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
return -EOPNOTSUPP;
fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags);
- old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+ old_fsflags = btrfs_inode_flags_to_fsflags(binode);
ret = check_fsflags(old_fsflags, fsflags);
if (ret)
return ret;
@@ -492,8 +499,8 @@ int __pure btrfs_is_empty_uuid(u8 *uuid)
return 1;
}
-static noinline int create_subvol(struct inode *dir,
- struct dentry *dentry,
+static noinline int create_subvol(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *dentry,
const char *name, int namelen,
struct btrfs_qgroup_inherit *inherit)
{
@@ -638,7 +645,7 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
}
- ret = btrfs_create_subvol_root(trans, new_root, root);
+ ret = btrfs_create_subvol_root(trans, new_root, root, mnt_userns);
btrfs_put_root(new_root);
if (ret) {
/* We potentially lose an unused inode item here */
@@ -830,7 +837,8 @@ free_pending:
* nfs_async_unlink().
*/
-static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
+static int btrfs_may_delete(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *victim, int isdir)
{
int error;
@@ -840,12 +848,12 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
BUG_ON(d_inode(victim->d_parent) != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
- error = inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
+ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
- if (check_sticky(&init_user_ns, dir, d_inode(victim)) ||
+ if (check_sticky(mnt_userns, dir, d_inode(victim)) ||
IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) ||
IS_SWAPFILE(d_inode(victim)))
return -EPERM;
@@ -864,13 +872,16 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
}
/* copy of may_create in fs/namei.c() */
-static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
+static inline int btrfs_may_create(struct user_namespace *mnt_userns,
+ struct inode *dir, struct dentry *child)
{
if (d_really_is_positive(child))
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
- return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
+ if (!fsuidgid_has_mapping(dir->i_sb, mnt_userns))
+ return -EOVERFLOW;
+ return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
}
/*
@@ -879,6 +890,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
* inside this filesystem so it's quite a bit simpler.
*/
static noinline int btrfs_mksubvol(const struct path *parent,
+ struct user_namespace *mnt_userns,
const char *name, int namelen,
struct btrfs_root *snap_src,
bool readonly,
@@ -893,12 +905,12 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (error == -EINTR)
return error;
- dentry = lookup_one_len(name, parent->dentry, namelen);
+ dentry = lookup_one(mnt_userns, name, parent->dentry, namelen);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_unlock;
- error = btrfs_may_create(dir, dentry);
+ error = btrfs_may_create(mnt_userns, dir, dentry);
if (error)
goto out_dput;
@@ -920,7 +932,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
if (snap_src)
error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
else
- error = create_subvol(dir, dentry, name, namelen, inherit);
+ error = create_subvol(mnt_userns, dir, dentry, name, namelen, inherit);
if (!error)
fsnotify_mkdir(dir, dentry);
@@ -934,6 +946,7 @@ out_unlock:
}
static noinline int btrfs_mksnapshot(const struct path *parent,
+ struct user_namespace *mnt_userns,
const char *name, int namelen,
struct btrfs_root *root,
bool readonly,
@@ -963,7 +976,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
- ret = btrfs_mksubvol(parent, name, namelen,
+ ret = btrfs_mksubvol(parent, mnt_userns, name, namelen,
root, readonly, inherit);
out:
if (snapshot_force_cow)
@@ -1792,6 +1805,7 @@ out_drop:
}
static noinline int __btrfs_ioctl_snap_create(struct file *file,
+ struct user_namespace *mnt_userns,
const char *name, unsigned long fd, int subvol,
bool readonly,
struct btrfs_qgroup_inherit *inherit)
@@ -1819,8 +1833,8 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
}
if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, name, namelen,
- NULL, readonly, inherit);
+ ret = btrfs_mksubvol(&file->f_path, mnt_userns, name,
+ namelen, NULL, readonly, inherit);
} else {
struct fd src = fdget(fd);
struct inode *src_inode;
@@ -1834,16 +1848,17 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
ret = -EXDEV;
- } else if (!inode_owner_or_capable(&init_user_ns, src_inode)) {
+ } else if (!inode_owner_or_capable(mnt_userns, src_inode)) {
/*
* Subvolume creation is not restricted, but snapshots
* are limited to own subvolumes only
*/
ret = -EPERM;
} else {
- ret = btrfs_mksnapshot(&file->f_path, name, namelen,
- BTRFS_I(src_inode)->root,
- readonly, inherit);
+ ret = btrfs_mksnapshot(&file->f_path, mnt_userns,
+ name, namelen,
+ BTRFS_I(src_inode)->root,
+ readonly, inherit);
}
fdput(src);
}
@@ -1867,8 +1882,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
- subvol, false, NULL);
+ ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file),
+ vol_args->name, vol_args->fd, subvol,
+ false, NULL);
kfree(vol_args);
return ret;
@@ -1926,8 +1942,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
}
}
- ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
- subvol, readonly, inherit);
+ ret = __btrfs_ioctl_snap_create(file, file_mnt_user_ns(file),
+ vol_args->name, vol_args->fd, subvol,
+ readonly, inherit);
if (ret)
goto free_inherit;
free_inherit:
@@ -1971,7 +1988,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
u64 flags;
int ret = 0;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(file_mnt_user_ns(file), inode))
return -EPERM;
ret = mnt_want_write_file(file);
@@ -2382,23 +2399,16 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.offset = (u64)-1;
while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(root, &key, path);
if (ret < 0)
goto out;
else if (ret > 0) {
- ret = btrfs_previous_item(root, path, dirid,
- BTRFS_INODE_REF_KEY);
- if (ret < 0)
- goto out;
- else if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
+ ret = -ENOENT;
+ goto out;
}
l = path->nodes[0];
slot = path->slots[0];
- btrfs_item_key_to_cpu(l, &key, slot);
iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
len = btrfs_inode_ref_name_len(l, iref);
@@ -2429,7 +2439,8 @@ out:
return ret;
}
-static int btrfs_search_path_in_tree_user(struct inode *inode,
+static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
+ struct inode *inode,
struct btrfs_ioctl_ino_lookup_user_args *args)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
@@ -2473,23 +2484,16 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
+ ret = btrfs_search_backwards(root, &key, path);
+ if (ret < 0)
+ goto out_put;
+ else if (ret > 0) {
+ ret = -ENOENT;
goto out_put;
- } else if (ret > 0) {
- ret = btrfs_previous_item(root, path, dirid,
- BTRFS_INODE_REF_KEY);
- if (ret < 0) {
- goto out_put;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto out_put;
- }
}
leaf = path->nodes[0];
slot = path->slots[0];
- btrfs_item_key_to_cpu(leaf, &key, slot);
iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
len = btrfs_inode_ref_name_len(leaf, iref);
@@ -2527,7 +2531,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
ret = PTR_ERR(temp_inode);
goto out_put;
}
- ret = inode_permission(&init_user_ns, temp_inode,
+ ret = inode_permission(mnt_userns, temp_inode,
MAY_READ | MAY_EXEC);
iput(temp_inode);
if (ret) {
@@ -2669,7 +2673,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
return -EACCES;
}
- ret = btrfs_search_path_in_tree_user(inode, args);
+ ret = btrfs_search_path_in_tree_user(file_mnt_user_ns(file), inode, args);
if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
ret = -EFAULT;
@@ -2905,6 +2909,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
struct btrfs_root *dest = NULL;
struct btrfs_ioctl_vol_args *vol_args = NULL;
struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
+ struct user_namespace *mnt_userns = file_mnt_user_ns(file);
char *subvol_name, *subvol_name_ptr = NULL;
int subvol_namelen;
int err = 0;
@@ -2932,6 +2937,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (err)
goto out;
} else {
+ struct inode *old_dir;
+
if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL;
goto out;
@@ -2968,6 +2975,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = PTR_ERR(parent);
goto out_drop_write;
}
+ old_dir = dir;
dir = d_inode(parent);
/*
@@ -2978,6 +2986,20 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
*/
destroy_parent = true;
+ /*
+ * On idmapped mounts, deletion via subvolid is
+ * restricted to subvolumes that are immediate
+ * ancestors of the inode referenced by the file
+ * descriptor in the ioctl. Otherwise the idmapping
+ * could potentially be abused to delete subvolumes
+ * anywhere in the filesystem the user wouldn't be able
+ * to delete without an idmapped mount.
+ */
+ if (old_dir != dir && mnt_userns != &init_user_ns) {
+ err = -EOPNOTSUPP;
+ goto free_parent;
+ }
+
subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
fs_info, vol_args2->subvolid);
if (IS_ERR(subvol_name_ptr)) {
@@ -3016,7 +3038,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
if (err == -EINTR)
goto free_subvol_name;
- dentry = lookup_one_len(subvol_name, parent, subvol_namelen);
+ dentry = lookup_one(mnt_userns, subvol_name, parent, subvol_namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_unlock_dir;
@@ -3058,14 +3080,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (root == dest)
goto out_dput;
- err = inode_permission(&init_user_ns, inode,
- MAY_WRITE | MAY_EXEC);
+ err = inode_permission(mnt_userns, inode, MAY_WRITE | MAY_EXEC);
if (err)
goto out_dput;
}
/* check if subvolume may be deleted by a user */
- err = btrfs_may_delete(dir, dentry, 1);
+ err = btrfs_may_delete(mnt_userns, dir, dentry, 1);
if (err)
goto out_dput;
@@ -3103,7 +3124,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_ioctl_defrag_range_args *range;
+ struct btrfs_ioctl_defrag_range_args range = {0};
int ret;
ret = mnt_want_write_file(file);
@@ -3115,6 +3136,12 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
+ /* Subpage defrag will be supported in later commits */
+ if (root->fs_info->sectorsize < PAGE_SIZE) {
+ ret = -ENOTTY;
+ goto out;
+ }
+
switch (inode->i_mode & S_IFMT) {
case S_IFDIR:
if (!capable(CAP_SYS_ADMIN)) {
@@ -3135,33 +3162,24 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
- range = kzalloc(sizeof(*range), GFP_KERNEL);
- if (!range) {
- ret = -ENOMEM;
- goto out;
- }
-
if (argp) {
- if (copy_from_user(range, argp,
- sizeof(*range))) {
+ if (copy_from_user(&range, argp, sizeof(range))) {
ret = -EFAULT;
- kfree(range);
goto out;
}
/* compression requires us to start the IO */
- if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
- range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
- range->extent_thresh = (u32)-1;
+ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
+ range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+ range.extent_thresh = (u32)-1;
}
} else {
/* the rest are all set to zero by kzalloc */
- range->len = (u64)-1;
+ range.len = (u64)-1;
}
ret = btrfs_defrag_file(file_inode(file), file,
- range, BTRFS_OLDEST_GENERATION, 0);
+ &range, BTRFS_OLDEST_GENERATION, 0);
if (ret > 0)
ret = 0;
- kfree(range);
break;
default:
ret = -EINVAL;
@@ -4404,25 +4422,20 @@ drop_write:
static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
void __user *arg)
{
- struct btrfs_ioctl_quota_rescan_args *qsa;
+ struct btrfs_ioctl_quota_rescan_args qsa = {0};
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
- if (!qsa)
- return -ENOMEM;
-
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
- qsa->flags = 1;
- qsa->progress = fs_info->qgroup_rescan_progress.objectid;
+ qsa.flags = 1;
+ qsa.progress = fs_info->qgroup_rescan_progress.objectid;
}
- if (copy_to_user(arg, qsa, sizeof(*qsa)))
+ if (copy_to_user(arg, &qsa, sizeof(qsa)))
ret = -EFAULT;
- kfree(qsa);
return ret;
}
@@ -4436,6 +4449,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
}
static long _btrfs_ioctl_set_received_subvol(struct file *file,
+ struct user_namespace *mnt_userns,
struct btrfs_ioctl_received_subvol_args *sa)
{
struct inode *inode = file_inode(file);
@@ -4447,7 +4461,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
int ret = 0;
int received_uuid_changed;
- if (!inode_owner_or_capable(&init_user_ns, inode))
+ if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
ret = mnt_want_write_file(file);
@@ -4552,7 +4566,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
args64->rtime.nsec = args32->rtime.nsec;
args64->flags = args32->flags;
- ret = _btrfs_ioctl_set_received_subvol(file, args64);
+ ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), args64);
if (ret)
goto out;
@@ -4586,7 +4600,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
if (IS_ERR(sa))
return PTR_ERR(sa);
- ret = _btrfs_ioctl_set_received_subvol(file, sa);
+ ret = _btrfs_ioctl_set_received_subvol(file, file_mnt_user_ns(file), sa);
if (ret)
goto out;
@@ -5013,6 +5027,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_subvol_rootref(file, argp);
case BTRFS_IOC_INO_LOOKUP_USER:
return btrfs_ioctl_ino_lookup_user(file, argp);
+ case FS_IOC_ENABLE_VERITY:
+ return fsverity_ioctl_enable(file, (const void __user *)argp);
+ case FS_IOC_MEASURE_VERITY:
+ return fsverity_ioctl_measure(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cd042c7567a4..c25dfd1a8a54 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -14,6 +14,7 @@
#include <linux/lzo.h>
#include <linux/refcount.h>
#include "compression.h"
+#include "ctree.h"
#define LZO_LEN 4
@@ -140,18 +141,18 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
*total_in = 0;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
/*
* store the size of all chunks of compressed data in
* the first 4 bytes
*/
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
out_offset = LZO_LEN;
tot_out = LZO_LEN;
pages[0] = out_page;
@@ -209,19 +210,18 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
if (out_len == 0 && tot_in >= len)
break;
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[nr_pages++] = out_page;
pg_bytes_left = PAGE_SIZE;
@@ -243,12 +243,11 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
break;
bytes_left = len - tot_in;
- kunmap(in_page);
put_page(in_page);
start += PAGE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
in_len = min(bytes_left, PAGE_SIZE);
}
@@ -258,164 +257,130 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
}
/* store the size of all chunks of compressed data */
- sizes_ptr = kmap_local_page(pages[0]);
+ sizes_ptr = page_address(pages[0]);
write_compress_length(sizes_ptr, tot_out);
- kunmap_local(sizes_ptr);
ret = 0;
*total_out = tot_out;
*total_in = tot_in;
out:
*out_pages = nr_pages;
- if (out_page)
- kunmap(out_page);
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
return ret;
}
+/*
+ * Copy the compressed segment payload into @dest.
+ *
+ * For the payload there will be no padding, just need to do page switching.
+ */
+static void copy_compressed_segment(struct compressed_bio *cb,
+ char *dest, u32 len, u32 *cur_in)
+{
+ u32 orig_in = *cur_in;
+
+ while (*cur_in < orig_in + len) {
+ struct page *cur_page;
+ u32 copy_len = min_t(u32, PAGE_SIZE - offset_in_page(*cur_in),
+ orig_in + len - *cur_in);
+
+ ASSERT(copy_len);
+ cur_page = cb->compressed_pages[*cur_in / PAGE_SIZE];
+
+ memcpy(dest + *cur_in - orig_in,
+ page_address(cur_page) + offset_in_page(*cur_in),
+ copy_len);
+
+ *cur_in += copy_len;
+ }
+}
+
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
- int ret = 0, ret2;
- char *data_in;
- unsigned long page_in_index = 0;
- size_t srclen = cb->compressed_len;
- unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
- unsigned long buf_start;
- unsigned long buf_offset = 0;
- unsigned long bytes;
- unsigned long working_bytes;
- size_t in_len;
- size_t out_len;
- const size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
- unsigned long in_offset;
- unsigned long in_page_bytes_left;
- unsigned long tot_in;
- unsigned long tot_out;
- unsigned long tot_len;
- char *buf;
- bool may_late_unmap, need_unmap;
- struct page **pages_in = cb->compressed_pages;
- u64 disk_start = cb->start;
- struct bio *orig_bio = cb->orig_bio;
+ const struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
+ const u32 sectorsize = fs_info->sectorsize;
+ int ret;
+ /* Compressed data length, can be unaligned */
+ u32 len_in;
+ /* Offset inside the compressed data */
+ u32 cur_in = 0;
+ /* Bytes decompressed so far */
+ u32 cur_out = 0;
+
+ len_in = read_compress_length(page_address(cb->compressed_pages[0]));
+ cur_in += LZO_LEN;
- data_in = kmap(pages_in[0]);
- tot_len = read_compress_length(data_in);
/*
- * Compressed data header check.
+ * LZO header length check
*
- * The real compressed size can't exceed the maximum extent length, and
- * all pages should be used (whole unused page with just the segment
- * header is not possible). If this happens it means the compressed
- * extent is corrupted.
+ * The total length should not exceed the maximum extent length,
+ * and all sectors should be used.
+ * If this happens, it means the compressed extent is corrupted.
*/
- if (tot_len > min_t(size_t, BTRFS_MAX_COMPRESSED, srclen) ||
- tot_len < srclen - PAGE_SIZE) {
- ret = -EUCLEAN;
- goto done;
+ if (len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) ||
+ round_up(len_in, sectorsize) < cb->compressed_len) {
+ btrfs_err(fs_info,
+ "invalid lzo header, lzo len %u compressed len %u",
+ len_in, cb->compressed_len);
+ return -EUCLEAN;
}
- tot_in = LZO_LEN;
- in_offset = LZO_LEN;
- in_page_bytes_left = PAGE_SIZE - LZO_LEN;
-
- tot_out = 0;
-
- while (tot_in < tot_len) {
- in_len = read_compress_length(data_in + in_offset);
- in_page_bytes_left -= LZO_LEN;
- in_offset += LZO_LEN;
- tot_in += LZO_LEN;
+ /* Go through each lzo segment */
+ while (cur_in < len_in) {
+ struct page *cur_page;
+ /* Length of the compressed segment */
+ u32 seg_len;
+ u32 sector_bytes_left;
+ size_t out_len = lzo1x_worst_compress(sectorsize);
/*
- * Segment header check.
- *
- * The segment length must not exceed the maximum LZO
- * compression size, nor the total compressed size.
+ * We should always have enough space for one segment header
+ * inside current sector.
*/
- if (in_len > max_segment_len || tot_in + in_len > tot_len) {
- ret = -EUCLEAN;
- goto done;
- }
-
- tot_in += in_len;
- working_bytes = in_len;
- may_late_unmap = need_unmap = false;
-
- /* fast path: avoid using the working buffer */
- if (in_page_bytes_left >= in_len) {
- buf = data_in + in_offset;
- bytes = in_len;
- may_late_unmap = true;
- goto cont;
- }
-
- /* copy bytes from the pages into the working buffer */
- buf = workspace->cbuf;
- buf_offset = 0;
- while (working_bytes) {
- bytes = min(working_bytes, in_page_bytes_left);
-
- memcpy(buf + buf_offset, data_in + in_offset, bytes);
- buf_offset += bytes;
-cont:
- working_bytes -= bytes;
- in_page_bytes_left -= bytes;
- in_offset += bytes;
-
- /* check if we need to pick another page */
- if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
- || in_page_bytes_left == 0) {
- tot_in += in_page_bytes_left;
-
- if (working_bytes == 0 && tot_in >= tot_len)
- break;
-
- if (page_in_index + 1 >= total_pages_in) {
- ret = -EIO;
- goto done;
- }
-
- if (may_late_unmap)
- need_unmap = true;
- else
- kunmap(pages_in[page_in_index]);
-
- data_in = kmap(pages_in[++page_in_index]);
-
- in_page_bytes_left = PAGE_SIZE;
- in_offset = 0;
- }
- }
-
- out_len = max_segment_len;
- ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
- &out_len);
- if (need_unmap)
- kunmap(pages_in[page_in_index - 1]);
+ ASSERT(cur_in / sectorsize ==
+ (cur_in + LZO_LEN - 1) / sectorsize);
+ cur_page = cb->compressed_pages[cur_in / PAGE_SIZE];
+ ASSERT(cur_page);
+ seg_len = read_compress_length(page_address(cur_page) +
+ offset_in_page(cur_in));
+ cur_in += LZO_LEN;
+
+ /* Copy the compressed segment payload into workspace */
+ copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in);
+
+ /* Decompress the data */
+ ret = lzo1x_decompress_safe(workspace->cbuf, seg_len,
+ workspace->buf, &out_len);
if (ret != LZO_E_OK) {
- pr_warn("BTRFS: decompress failed\n");
+ btrfs_err(fs_info, "failed to decompress");
ret = -EIO;
- break;
+ goto out;
}
- buf_start = tot_out;
- tot_out += out_len;
+ /* Copy the data into inode pages */
+ ret = btrfs_decompress_buf2page(workspace->buf, out_len, cb, cur_out);
+ cur_out += out_len;
- ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
- tot_out, disk_start, orig_bio);
- if (ret2 == 0)
- break;
+ /* All data read, exit */
+ if (ret == 0)
+ goto out;
+ ret = 0;
+
+ /* Check if the sector has enough space for a segment header */
+ sector_bytes_left = sectorsize - (cur_in % sectorsize);
+ if (sector_bytes_left >= LZO_LEN)
+ continue;
+
+ /* Skip the padding zeros */
+ cur_in += sector_bytes_left;
}
-done:
- kunmap(pages_in[page_in_index]);
+out:
if (!ret)
- zero_fill_bio(orig_bio);
+ zero_fill_bio(cb->orig_bio);
return ret;
}
@@ -466,7 +431,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
- kaddr = kmap_local_page(dest_page);
+ kaddr = page_address(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
/*
@@ -476,7 +441,6 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
*/
if (bytes < destlen)
memset(kaddr+bytes, 0, destlen-bytes);
- kunmap_local(kaddr);
out:
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5c0f8481e25e..edb65abf0393 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -446,7 +446,6 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
* Will be also used to store the finished ordered extent.
* @file_offset: File offset for the finished IO
* @io_size: Length of the finish IO range
- * @uptodate: If the IO finishes without problem
*
* Return true if the ordered extent is finished in the range, and update
* @cached.
@@ -457,7 +456,7 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
*/
bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
- u64 file_offset, u64 io_size, int uptodate)
+ u64 file_offset, u64 io_size)
{
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
@@ -486,8 +485,6 @@ have_entry:
entry->bytes_left, io_size);
entry->bytes_left -= io_size;
- if (!uptodate)
- set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0) {
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index b2d88aba8420..4194e960ff61 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -177,7 +177,7 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
bool uptodate);
bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached,
- u64 file_offset, u64 io_size, int uptodate);
+ u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 0fa121171ca1..db680f5be745 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1733,7 +1733,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
ASSERT(trans != NULL);
ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
- false, true);
+ true);
if (ret < 0) {
trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_warn(trans->fs_info,
@@ -2651,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0,
- &record->old_roots, false, false);
+ &record->old_roots, false);
if (ret < 0)
goto cleanup;
}
@@ -2667,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
* current root. It's safe inside commit_transaction().
*/
ret = btrfs_find_all_roots(trans, fs_info,
- record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
+ record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
if (ret < 0)
goto cleanup;
if (qgroup_to_skip) {
@@ -3201,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
num_bytes = found.offset;
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
- &roots, false, false);
+ &roots, false);
if (ret < 0)
goto out;
/* For rescan, just pass old_roots as NULL */
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 244d499ebc72..d8d268ca8aa7 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1035,7 +1035,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
for (i = 0; i < rbio->nr_pages; i++) {
if (rbio->stripe_pages[i])
continue;
- page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
rbio->stripe_pages[i] = page;
@@ -1054,7 +1054,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
for (; i < rbio->nr_pages; i++) {
if (rbio->stripe_pages[i])
continue;
- page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
rbio->stripe_pages[i] = page;
@@ -1636,10 +1636,10 @@ struct btrfs_plug_cb {
static int plug_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
- plug_list);
- struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
- plug_list);
+ const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
+ plug_list);
+ const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
+ plug_list);
u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
@@ -2300,7 +2300,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
if (rbio->stripe_pages[index])
continue;
- page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ page = alloc_page(GFP_NOFS);
if (!page)
return -ENOMEM;
rbio->stripe_pages[index] = page;
@@ -2350,14 +2350,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
if (!need_check)
goto writeback;
- p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ p_page = alloc_page(GFP_NOFS);
if (!p_page)
goto cleanup;
SetPageUptodate(p_page);
if (has_qstripe) {
/* RAID6, allocate and map temp space for the Q stripe */
- q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ q_page = alloc_page(GFP_NOFS);
if (!q_page) {
__free_page(p_page);
goto cleanup;
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 8e026de74c44..d2062d5f71dd 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -264,8 +264,8 @@ static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
struct block_entry *be = NULL, *exist;
struct root_entry *re = NULL;
- re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
- be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
+ re = kzalloc(sizeof(struct root_entry), GFP_NOFS);
+ be = kzalloc(sizeof(struct block_entry), GFP_NOFS);
if (!be || !re) {
kfree(re);
kfree(be);
@@ -313,7 +313,7 @@ static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
struct root_entry *re;
struct ref_entry *ref = NULL, *exist;
- ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
+ ref = kmalloc(sizeof(struct ref_entry), GFP_NOFS);
if (!ref)
return -ENOMEM;
@@ -358,7 +358,7 @@ static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
struct block_entry *be;
struct ref_entry *ref;
- ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
+ ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
if (!ref)
return -ENOMEM;
be = add_block_entry(fs_info, bytenr, num_bytes, 0);
@@ -393,7 +393,7 @@ static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
- ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
+ ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
if (!ref)
return -ENOMEM;
be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fc831597cb22..914d403b4415 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -24,6 +24,7 @@
#include "block-group.h"
#include "backref.h"
#include "misc.h"
+#include "subpage.h"
/*
* Relocation overview
@@ -2781,10 +2782,70 @@ static noinline_for_stack int prealloc_file_extent_cluster(
u64 num_bytes;
int nr;
int ret = 0;
+ u64 i_size = i_size_read(&inode->vfs_inode);
u64 prealloc_start = cluster->start - offset;
u64 prealloc_end = cluster->end - offset;
u64 cur_offset = prealloc_start;
+ /*
+ * For subpage case, previous i_size may not be aligned to PAGE_SIZE.
+ * This means the range [i_size, PAGE_END + 1) is filled with zeros by
+ * btrfs_do_readpage() call of previously relocated file cluster.
+ *
+ * If the current cluster starts in the above range, btrfs_do_readpage()
+ * will skip the read, and relocate_one_page() will later writeback
+ * the padding zeros as new data, causing data corruption.
+ *
+ * Here we have to manually invalidate the range (i_size, PAGE_END + 1).
+ */
+ if (!IS_ALIGNED(i_size, PAGE_SIZE)) {
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const u32 sectorsize = fs_info->sectorsize;
+ struct page *page;
+
+ ASSERT(sectorsize < PAGE_SIZE);
+ ASSERT(IS_ALIGNED(i_size, sectorsize));
+
+ /*
+ * Subpage can't handle page with DIRTY but without UPTODATE
+ * bit as it can lead to the following deadlock:
+ *
+ * btrfs_readpage()
+ * | Page already *locked*
+ * |- btrfs_lock_and_flush_ordered_range()
+ * |- btrfs_start_ordered_extent()
+ * |- extent_write_cache_pages()
+ * |- lock_page()
+ * We try to lock the page we already hold.
+ *
+ * Here we just writeback the whole data reloc inode, so that
+ * we will be ensured to have no dirty range in the page, and
+ * are safe to clear the uptodate bits.
+ *
+ * This shouldn't cause too much overhead, as we need to write
+ * the data back anyway.
+ */
+ ret = filemap_write_and_wait(mapping);
+ if (ret < 0)
+ return ret;
+
+ clear_extent_bits(&inode->io_tree, i_size,
+ round_up(i_size, PAGE_SIZE) - 1,
+ EXTENT_UPTODATE);
+ page = find_lock_page(mapping, i_size >> PAGE_SHIFT);
+ /*
+ * If page is freed we don't need to do anything then, as we
+ * will re-read the whole page anyway.
+ */
+ if (page) {
+ btrfs_subpage_clear_uptodate(fs_info, page, i_size,
+ round_up(i_size, PAGE_SIZE) - i_size);
+ unlock_page(page);
+ put_page(page);
+ }
+ }
+
BUG_ON(cluster->start != cluster->boundary[0]);
ret = btrfs_alloc_data_chunk_ondemand(inode,
prealloc_end + 1 - prealloc_start);
@@ -2886,19 +2947,149 @@ noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
}
ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
-static int relocate_file_extent_cluster(struct inode *inode,
- struct file_extent_cluster *cluster)
+static u64 get_cluster_boundary_end(struct file_extent_cluster *cluster,
+ int cluster_nr)
+{
+ /* Last extent, use cluster end directly */
+ if (cluster_nr >= cluster->nr - 1)
+ return cluster->end;
+
+ /* Use next boundary start*/
+ return cluster->boundary[cluster_nr + 1] - 1;
+}
+
+static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
+ struct file_extent_cluster *cluster,
+ int *cluster_nr, unsigned long page_index)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 offset = BTRFS_I(inode)->index_cnt;
+ const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
+ gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
+ struct page *page;
u64 page_start;
u64 page_end;
+ u64 cur;
+ int ret;
+
+ ASSERT(page_index <= last_index);
+ page = find_lock_page(inode->i_mapping, page_index);
+ if (!page) {
+ page_cache_sync_readahead(inode->i_mapping, ra, NULL,
+ page_index, last_index + 1 - page_index);
+ page = find_or_create_page(inode->i_mapping, page_index, mask);
+ if (!page)
+ return -ENOMEM;
+ }
+ ret = set_page_extent_mapped(page);
+ if (ret < 0)
+ goto release_page;
+
+ if (PageReadahead(page))
+ page_cache_async_readahead(inode->i_mapping, ra, NULL, page,
+ page_index, last_index + 1 - page_index);
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ ret = -EIO;
+ goto release_page;
+ }
+ }
+
+ page_start = page_offset(page);
+ page_end = page_start + PAGE_SIZE - 1;
+
+ /*
+ * Start from the cluster, as for subpage case, the cluster can start
+ * inside the page.
+ */
+ cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
+ while (cur <= page_end) {
+ u64 extent_start = cluster->boundary[*cluster_nr] - offset;
+ u64 extent_end = get_cluster_boundary_end(cluster,
+ *cluster_nr) - offset;
+ u64 clamped_start = max(page_start, extent_start);
+ u64 clamped_end = min(page_end, extent_end);
+ u32 clamped_len = clamped_end + 1 - clamped_start;
+
+ /* Reserve metadata for this range */
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+ clamped_len);
+ if (ret)
+ goto release_page;
+
+ /* Mark the range delalloc and dirty for later writeback */
+ lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
+ ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
+ clamped_end, 0, NULL);
+ if (ret) {
+ clear_extent_bits(&BTRFS_I(inode)->io_tree,
+ clamped_start, clamped_end,
+ EXTENT_LOCKED | EXTENT_BOUNDARY);
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ clamped_len, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ clamped_len);
+ goto release_page;
+ }
+ btrfs_page_set_dirty(fs_info, page, clamped_start, clamped_len);
+
+ /*
+ * Set the boundary if it's inside the page.
+ * Data relocation requires the destination extents to have the
+ * same size as the source.
+ * EXTENT_BOUNDARY bit prevents current extent from being merged
+ * with previous extent.
+ */
+ if (in_range(cluster->boundary[*cluster_nr] - offset,
+ page_start, PAGE_SIZE)) {
+ u64 boundary_start = cluster->boundary[*cluster_nr] -
+ offset;
+ u64 boundary_end = boundary_start +
+ fs_info->sectorsize - 1;
+
+ set_extent_bits(&BTRFS_I(inode)->io_tree,
+ boundary_start, boundary_end,
+ EXTENT_BOUNDARY);
+ }
+ unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
+ cur += clamped_len;
+
+ /* Crossed extent end, go to next extent */
+ if (cur >= extent_end) {
+ (*cluster_nr)++;
+ /* Just finished the last extent of the cluster, exit. */
+ if (*cluster_nr >= cluster->nr)
+ break;
+ }
+ }
+ unlock_page(page);
+ put_page(page);
+
+ balance_dirty_pages_ratelimited(inode->i_mapping);
+ btrfs_throttle(fs_info);
+ if (btrfs_should_cancel_balance(fs_info))
+ ret = -ECANCELED;
+ return ret;
+
+release_page:
+ unlock_page(page);
+ put_page(page);
+ return ret;
+}
+
+static int relocate_file_extent_cluster(struct inode *inode,
+ struct file_extent_cluster *cluster)
+{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 offset = BTRFS_I(inode)->index_cnt;
unsigned long index;
unsigned long last_index;
- struct page *page;
struct file_ra_state *ra;
- gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
- int nr = 0;
+ int cluster_nr = 0;
int ret = 0;
if (!cluster->nr)
@@ -2919,109 +3110,14 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (ret)
goto out;
- index = (cluster->start - offset) >> PAGE_SHIFT;
last_index = (cluster->end - offset) >> PAGE_SHIFT;
- while (index <= last_index) {
- ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
- PAGE_SIZE);
- if (ret)
- goto out;
-
- page = find_lock_page(inode->i_mapping, index);
- if (!page) {
- page_cache_sync_readahead(inode->i_mapping,
- ra, NULL, index,
- last_index + 1 - index);
- page = find_or_create_page(inode->i_mapping, index,
- mask);
- if (!page) {
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
- ret = -ENOMEM;
- goto out;
- }
- }
- ret = set_page_extent_mapped(page);
- if (ret < 0) {
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
- unlock_page(page);
- put_page(page);
- goto out;
- }
-
- if (PageReadahead(page)) {
- page_cache_async_readahead(inode->i_mapping,
- ra, NULL, page, index,
- last_index + 1 - index);
- }
-
- if (!PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- put_page(page);
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
- ret = -EIO;
- goto out;
- }
- }
-
- page_start = page_offset(page);
- page_end = page_start + PAGE_SIZE - 1;
-
- lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
-
- if (nr < cluster->nr &&
- page_start + offset == cluster->boundary[nr]) {
- set_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end,
- EXTENT_BOUNDARY);
- nr++;
- }
-
- ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
- page_end, 0, NULL);
- if (ret) {
- unlock_page(page);
- put_page(page);
- btrfs_delalloc_release_metadata(BTRFS_I(inode),
- PAGE_SIZE, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
-
- clear_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end,
- EXTENT_LOCKED | EXTENT_BOUNDARY);
- goto out;
-
- }
- set_page_dirty(page);
-
- unlock_extent(&BTRFS_I(inode)->io_tree,
- page_start, page_end);
- unlock_page(page);
- put_page(page);
-
- index++;
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
- balance_dirty_pages_ratelimited(inode->i_mapping);
- btrfs_throttle(fs_info);
- if (btrfs_should_cancel_balance(fs_info)) {
- ret = -ECANCELED;
- goto out;
- }
- }
- WARN_ON(nr != cluster->nr);
+ for (index = (cluster->start - offset) >> PAGE_SHIFT;
+ index <= last_index && !ret; index++)
+ ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
if (btrfs_is_zoned(fs_info) && !ret)
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (ret == 0)
+ WARN_ON(cluster_nr != cluster->nr);
out:
kfree(ra);
return ret;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6ac37ae6c811..72f9b865e847 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1198,7 +1198,7 @@ struct backref_ctx {
static int __clone_root_cmp_bsearch(const void *key, const void *elt)
{
u64 root = (u64)(uintptr_t)key;
- struct clone_root *cr = (struct clone_root *)elt;
+ const struct clone_root *cr = elt;
if (root < cr->root->root_key.objectid)
return -1;
@@ -1209,8 +1209,8 @@ static int __clone_root_cmp_bsearch(const void *key, const void *elt)
static int __clone_root_cmp_sort(const void *e1, const void *e2)
{
- struct clone_root *cr1 = (struct clone_root *)e1;
- struct clone_root *cr2 = (struct clone_root *)e2;
+ const struct clone_root *cr1 = e1;
+ const struct clone_root *cr2 = e2;
if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
return -1;
@@ -1307,7 +1307,7 @@ static int find_extent_clone(struct send_ctx *sctx,
u64 flags = 0;
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb = path->nodes[0];
- struct backref_ctx *backref_ctx = NULL;
+ struct backref_ctx backref_ctx = {0};
struct clone_root *cur_clone_root;
struct btrfs_key found_key;
struct btrfs_path *tmp_path;
@@ -1322,12 +1322,6 @@ static int find_extent_clone(struct send_ctx *sctx,
/* We only use this path under the commit sem */
tmp_path->need_commit_sem = 0;
- backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
- if (!backref_ctx) {
- ret = -ENOMEM;
- goto out;
- }
-
if (data_offset >= ino_size) {
/*
* There may be extents that lie behind the file's size.
@@ -1392,12 +1386,12 @@ static int find_extent_clone(struct send_ctx *sctx,
cur_clone_root->found_refs = 0;
}
- backref_ctx->sctx = sctx;
- backref_ctx->found = 0;
- backref_ctx->cur_objectid = ino;
- backref_ctx->cur_offset = data_offset;
- backref_ctx->found_itself = 0;
- backref_ctx->extent_len = num_bytes;
+ backref_ctx.sctx = sctx;
+ backref_ctx.found = 0;
+ backref_ctx.cur_objectid = ino;
+ backref_ctx.cur_offset = data_offset;
+ backref_ctx.found_itself = 0;
+ backref_ctx.extent_len = num_bytes;
/*
* The last extent of a file may be too large due to page alignment.
@@ -1405,7 +1399,7 @@ static int find_extent_clone(struct send_ctx *sctx,
* __iterate_backrefs work.
*/
if (data_offset + num_bytes >= ino_size)
- backref_ctx->extent_len = ino_size - data_offset;
+ backref_ctx.extent_len = ino_size - data_offset;
/*
* Now collect all backrefs.
@@ -1416,12 +1410,12 @@ static int find_extent_clone(struct send_ctx *sctx,
extent_item_pos = 0;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, 1, __iterate_backrefs,
- backref_ctx, false);
+ &backref_ctx, false);
if (ret < 0)
goto out;
- if (!backref_ctx->found_itself) {
+ if (!backref_ctx.found_itself) {
/* found a bug in backref code? */
ret = -EIO;
btrfs_err(fs_info,
@@ -1434,7 +1428,7 @@ static int find_extent_clone(struct send_ctx *sctx,
"find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
data_offset, ino, num_bytes, logical);
- if (!backref_ctx->found)
+ if (!backref_ctx.found)
btrfs_debug(fs_info, "no clones found");
cur_clone_root = NULL;
@@ -1458,7 +1452,6 @@ static int find_extent_clone(struct send_ctx *sctx,
out:
btrfs_free_path(tmp_path);
- kfree(backref_ctx);
return ret;
}
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index f79bf85f2439..5ada02e0e629 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -493,6 +493,11 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
long time_left;
int loops;
+ delalloc_bytes = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
+ ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
+ if (delalloc_bytes == 0 && ordered_bytes == 0)
+ return;
+
/* Calc the number of the pages we need flush for space reservation */
if (to_reclaim == U64_MAX) {
items = U64_MAX;
@@ -500,22 +505,21 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
/*
* to_reclaim is set to however much metadata we need to
* reclaim, but reclaiming that much data doesn't really track
- * exactly, so increase the amount to reclaim by 2x in order to
- * make sure we're flushing enough delalloc to hopefully reclaim
- * some metadata reservations.
+ * exactly. What we really want to do is reclaim full inode's
+ * worth of reservations, however that's not available to us
+ * here. We will take a fraction of the delalloc bytes for our
+ * flushing loops and hope for the best. Delalloc will expand
+ * the amount we write to cover an entire dirty extent, which
+ * will reclaim the metadata reservation for that range. If
+ * it's not enough subsequent flush stages will be more
+ * aggressive.
*/
+ to_reclaim = max(to_reclaim, delalloc_bytes >> 3);
items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2;
- to_reclaim = items * EXTENT_SIZE_PER_ITEM;
}
trans = (struct btrfs_trans_handle *)current->journal_info;
- delalloc_bytes = percpu_counter_sum_positive(
- &fs_info->delalloc_bytes);
- ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
- if (delalloc_bytes == 0 && ordered_bytes == 0)
- return;
-
/*
* If we are doing more ordered than delalloc we need to just wait on
* ordered extents, otherwise we'll waste time trying to flush delalloc
@@ -528,9 +532,49 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
while ((delalloc_bytes || ordered_bytes) && loops < 3) {
u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
long nr_pages = min_t(u64, temp, LONG_MAX);
+ int async_pages;
btrfs_start_delalloc_roots(fs_info, nr_pages, true);
+ /*
+ * We need to make sure any outstanding async pages are now
+ * processed before we continue. This is because things like
+ * sync_inode() try to be smart and skip writing if the inode is
+ * marked clean. We don't use filemap_fwrite for flushing
+ * because we want to control how many pages we write out at a
+ * time, thus this is the only safe way to make sure we've
+ * waited for outstanding compressed workers to have started
+ * their jobs and thus have ordered extents set up properly.
+ *
+ * This exists because we do not want to wait for each
+ * individual inode to finish its async work, we simply want to
+ * start the IO on everybody, and then come back here and wait
+ * for all of the async work to catch up. Once we're done with
+ * that we know we'll have ordered extents for everything and we
+ * can decide if we wait for that or not.
+ *
+ * If we choose to replace this in the future, make absolutely
+ * sure that the proper waiting is being done in the async case,
+ * as there have been bugs in that area before.
+ */
+ async_pages = atomic_read(&fs_info->async_delalloc_pages);
+ if (!async_pages)
+ goto skip_async;
+
+ /*
+ * We don't want to wait forever, if we wrote less pages in this
+ * loop than we have outstanding, only wait for that number of
+ * pages, otherwise we can wait for all async pages to finish
+ * before continuing.
+ */
+ if (async_pages > nr_pages)
+ async_pages -= nr_pages;
+ else
+ async_pages = 0;
+ wait_event(fs_info->async_submit_wait,
+ atomic_read(&fs_info->async_delalloc_pages) <=
+ async_pages);
+skip_async:
loops++;
if (wait_ordered && !trans) {
btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
@@ -595,8 +639,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case FLUSH_DELALLOC:
case FLUSH_DELALLOC_WAIT:
+ case FLUSH_DELALLOC_FULL:
+ if (state == FLUSH_DELALLOC_FULL)
+ num_bytes = U64_MAX;
shrink_delalloc(fs_info, space_info, num_bytes,
- state == FLUSH_DELALLOC_WAIT, for_preempt);
+ state != FLUSH_DELALLOC, for_preempt);
break;
case FLUSH_DELAYED_REFS_NR:
case FLUSH_DELAYED_REFS:
@@ -686,7 +733,7 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
{
u64 global_rsv_size = fs_info->global_block_rsv.reserved;
u64 ordered, delalloc;
- u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+ u64 thresh = div_factor_fine(space_info->total_bytes, 90);
u64 used;
/* If we're just plain full then async reclaim just slows us down. */
@@ -694,6 +741,20 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
global_rsv_size) >= thresh)
return false;
+ used = space_info->bytes_may_use + space_info->bytes_pinned;
+
+ /* The total flushable belongs to the global rsv, don't flush. */
+ if (global_rsv_size >= used)
+ return false;
+
+ /*
+ * 128MiB is 1/4 of the maximum global rsv size. If we have less than
+ * that devoted to other reservations then there's no sense in flushing,
+ * we don't have a lot of things that need flushing.
+ */
+ if (used - global_rsv_size <= SZ_128M)
+ return false;
+
/*
* We have tickets queued, bail so we don't compete with the async
* flushers.
@@ -824,6 +885,8 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
struct reserve_ticket *ticket;
u64 tickets_id = space_info->tickets_id;
+ trace_btrfs_fail_all_tickets(fs_info, space_info);
+
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
__btrfs_dump_space_info(fs_info, space_info);
@@ -905,6 +968,14 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
}
/*
+ * We do not want to empty the system of delalloc unless we're
+ * under heavy pressure, so allow one trip through the flushing
+ * logic before we start doing a FLUSH_DELALLOC_FULL.
+ */
+ if (flush_state == FLUSH_DELALLOC_FULL && !commit_cycles)
+ flush_state++;
+
+ /*
* We don't want to force a chunk allocation until we've tried
* pretty hard to reclaim space. Think of the case where we
* freed up a bunch of space and so have a lot of pinned space
@@ -1067,7 +1138,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
* so if we now have space to allocate do the force chunk allocation.
*/
static const enum btrfs_flush_state data_flush_states[] = {
- FLUSH_DELALLOC_WAIT,
+ FLUSH_DELALLOC_FULL,
RUN_DELAYED_IPUTS,
COMMIT_TRANS,
ALLOC_CHUNK_FORCE,
@@ -1156,6 +1227,7 @@ static const enum btrfs_flush_state evict_flush_states[] = {
FLUSH_DELAYED_REFS,
FLUSH_DELALLOC,
FLUSH_DELALLOC_WAIT,
+ FLUSH_DELALLOC_FULL,
ALLOC_CHUNK,
COMMIT_TRANS,
};
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index 8260f8bb3ff0..f429256f56db 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -73,7 +73,7 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
} \
token->kaddr = page_address(token->eb->pages[idx]); \
token->offset = idx << PAGE_SHIFT; \
- if (oip + size <= PAGE_SIZE) \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE ) \
return get_unaligned_le##bits(token->kaddr + oip); \
\
memcpy(lebytes, token->kaddr + oip, part); \
@@ -94,7 +94,7 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
u8 lebytes[sizeof(u##bits)]; \
\
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
- if (oip + size <= PAGE_SIZE) \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) \
return get_unaligned_le##bits(kaddr + oip); \
\
memcpy(lebytes, kaddr + oip, part); \
@@ -124,7 +124,7 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token, \
} \
token->kaddr = page_address(token->eb->pages[idx]); \
token->offset = idx << PAGE_SHIFT; \
- if (oip + size <= PAGE_SIZE) { \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
put_unaligned_le##bits(val, token->kaddr + oip); \
return; \
} \
@@ -146,7 +146,7 @@ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
u8 lebytes[sizeof(u##bits)]; \
\
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
- if (oip + size <= PAGE_SIZE) { \
+ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
put_unaligned_le##bits(val, kaddr + oip); \
return; \
} \
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 640bcd21bf28..cb10e56ee31e 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -435,8 +435,10 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
spin_lock_irqsave(&subpage->lock, flags);
subpage->writeback_bitmap &= ~tmp;
- if (subpage->writeback_bitmap == 0)
+ if (subpage->writeback_bitmap == 0) {
+ ASSERT(PageWriteback(page));
end_page_writeback(page);
+ }
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -559,3 +561,23 @@ IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
PageWriteback);
IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
PageOrdered);
+
+/*
+ * Make sure not only the page dirty bit is cleared, but also subpage dirty bit
+ * is cleared.
+ */
+void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
+ struct page *page)
+{
+ struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+
+ if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
+ return;
+
+ ASSERT(!PageDirty(page));
+ if (fs_info->sectorsize == PAGE_SIZE)
+ return;
+
+ ASSERT(PagePrivate(page) && page->private);
+ ASSERT(subpage->dirty_bitmap == 0);
+}
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 4d7aca85d915..0120948f37a1 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -126,4 +126,7 @@ DECLARE_BTRFS_SUBPAGE_OPS(ordered);
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len);
+void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
+ struct page *page);
+
#endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d07b18b2b250..537d90bf5d84 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1201,21 +1201,14 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(root, &key, path);
if (ret < 0) {
goto err;
} else if (ret > 0) {
- ret = btrfs_previous_item(root, path, subvol_objectid,
- BTRFS_ROOT_BACKREF_KEY);
- if (ret < 0) {
- goto err;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto err;
- }
+ ret = -ENOENT;
+ goto err;
}
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
subvol_objectid = key.offset;
root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -1248,21 +1241,14 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(fs_root, &key, path);
if (ret < 0) {
goto err;
} else if (ret > 0) {
- ret = btrfs_previous_item(fs_root, path, dirid,
- BTRFS_INODE_REF_KEY);
- if (ret < 0) {
- goto err;
- } else if (ret > 0) {
- ret = -ENOENT;
- goto err;
- }
+ ret = -ENOENT;
+ goto err;
}
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
dirid = key.offset;
inode_ref = btrfs_item_ptr(path->nodes[0],
@@ -1353,6 +1339,9 @@ static int btrfs_fill_super(struct super_block *sb,
sb->s_op = &btrfs_super_ops;
sb->s_d_op = &btrfs_dentry_operations;
sb->s_export_op = &btrfs_export_ops;
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &btrfs_verityops;
+#endif
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
@@ -2041,13 +2030,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
ret = -EINVAL;
goto restore;
}
- if (fs_info->sectorsize < PAGE_SIZE) {
- btrfs_warn(fs_info,
- "read-write mount is not yet allowed for sectorsize %u page size %lu",
- fs_info->sectorsize, PAGE_SIZE);
- ret = -EINVAL;
- goto restore;
- }
/*
* NOTE: when remounting with a change that does writes, don't
@@ -2096,16 +2078,15 @@ restore:
}
/* Used to sort the devices by max_avail(descending sort) */
-static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
- const void *dev_info2)
+static int btrfs_cmp_device_free_bytes(const void *a, const void *b)
{
- if (((struct btrfs_device_info *)dev_info1)->max_avail >
- ((struct btrfs_device_info *)dev_info2)->max_avail)
+ const struct btrfs_device_info *dev_info1 = a;
+ const struct btrfs_device_info *dev_info2 = b;
+
+ if (dev_info1->max_avail > dev_info2->max_avail)
return -1;
- else if (((struct btrfs_device_info *)dev_info1)->max_avail <
- ((struct btrfs_device_info *)dev_info2)->max_avail)
+ else if (dev_info1->max_avail < dev_info2->max_avail)
return 1;
- else
return 0;
}
@@ -2381,7 +2362,7 @@ static struct file_system_type btrfs_root_fs_type = {
.name = "btrfs",
.mount = btrfs_mount_root,
.kill_sb = btrfs_kill_super,
- .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
};
MODULE_ALIAS_FS("btrfs");
@@ -2572,6 +2553,11 @@ static void __init btrfs_print_mod_info(void)
#else
", zoned=no"
#endif
+#ifdef CONFIG_FS_VERITY
+ ", fsverity=yes"
+#else
+ ", fsverity=no"
+#endif
;
pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 9d1d140118ff..25a6f587852b 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -22,6 +22,26 @@
#include "block-group.h"
#include "qgroup.h"
+/*
+ * Structure name Path
+ * --------------------------------------------------------------------------
+ * btrfs_supported_static_feature_attrs /sys/fs/btrfs/features
+ * btrfs_supported_feature_attrs /sys/fs/btrfs/features and
+ * /sys/fs/btrfs/<uuid>/features
+ * btrfs_attrs /sys/fs/btrfs/<uuid>
+ * devid_attrs /sys/fs/btrfs/<uuid>/devinfo/<devid>
+ * allocation_attrs /sys/fs/btrfs/<uuid>/allocation
+ * qgroup_attrs /sys/fs/btrfs/<uuid>/qgroups/<level>_<qgroupid>
+ * space_info_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>
+ * raid_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>/<bg-profile>
+ *
+ * When built with BTRFS_CONFIG_DEBUG:
+ *
+ * btrfs_debug_feature_attrs /sys/fs/btrfs/debug
+ * btrfs_debug_mount_attrs /sys/fs/btrfs/<uuid>/debug
+ * discard_debug_attrs /sys/fs/btrfs/<uuid>/debug/discard
+ */
+
struct btrfs_feature_attr {
struct kobj_attribute kobj_attr;
enum btrfs_feature_set feature_set;
@@ -267,7 +287,17 @@ BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
#endif
+#ifdef CONFIG_FS_VERITY
+BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
+#endif
+/*
+ * Features which depend on feature bits and may differ between each fs.
+ *
+ * /sys/fs/btrfs/features - all available features implemeted by this version
+ * /sys/fs/btrfs/UUID/features - features of the fs which are enabled or
+ * can be changed on a mounted filesystem.
+ */
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
BTRFS_FEAT_ATTR_PTR(default_subvol),
@@ -285,16 +315,12 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_PTR(zoned),
#endif
+#ifdef CONFIG_FS_VERITY
+ BTRFS_FEAT_ATTR_PTR(verity),
+#endif
NULL
};
-/*
- * Features which depend on feature bits and may differ between each fs.
- *
- * /sys/fs/btrfs/features lists all available features of this kernel while
- * /sys/fs/btrfs/UUID/features shows features of the fs which are enabled or
- * can be changed online.
- */
static const struct attribute_group btrfs_feature_attr_group = {
.name = "features",
.is_visible = btrfs_feature_visible,
@@ -366,6 +392,10 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
{
ssize_t ret = 0;
+ /* 4K sector size is also supported with 64K page size */
+ if (PAGE_SIZE == SZ_64K)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%u ", SZ_4K);
+
/* Only sectorsize == PAGE_SIZE is now supported */
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%lu\n", PAGE_SIZE);
@@ -374,6 +404,12 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
BTRFS_ATTR(static_feature, supported_sectorsizes,
supported_sectorsizes_show);
+/*
+ * Features which only depend on kernel version.
+ *
+ * These are listed in /sys/fs/btrfs/features along with
+ * btrfs_supported_feature_attrs.
+ */
static struct attribute *btrfs_supported_static_feature_attrs[] = {
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
BTRFS_ATTR_PTR(static_feature, supported_checksums),
@@ -383,12 +419,6 @@ static struct attribute *btrfs_supported_static_feature_attrs[] = {
NULL
};
-/*
- * Features which only depend on kernel version.
- *
- * These are listed in /sys/fs/btrfs/features along with
- * btrfs_feature_attr_group
- */
static const struct attribute_group btrfs_static_feature_attr_group = {
.name = "features",
.attrs = btrfs_supported_static_feature_attrs,
@@ -547,6 +577,11 @@ static ssize_t btrfs_discard_max_discard_size_store(struct kobject *kobj,
BTRFS_ATTR_RW(discard, max_discard_size, btrfs_discard_max_discard_size_show,
btrfs_discard_max_discard_size_store);
+/*
+ * Per-filesystem debugging of discard (when mounted with discard=async).
+ *
+ * Path: /sys/fs/btrfs/<uuid>/debug/discard/
+ */
static const struct attribute *discard_debug_attrs[] = {
BTRFS_ATTR_PTR(discard, discardable_bytes),
BTRFS_ATTR_PTR(discard, discardable_extents),
@@ -560,15 +595,19 @@ static const struct attribute *discard_debug_attrs[] = {
};
/*
- * Runtime debugging exported via sysfs
+ * Per-filesystem runtime debugging exported via sysfs.
*
- * /sys/fs/btrfs/debug - applies to module or all filesystems
- * /sys/fs/btrfs/UUID - applies only to the given filesystem
+ * Path: /sys/fs/btrfs/UUID/debug/
*/
static const struct attribute *btrfs_debug_mount_attrs[] = {
NULL,
};
+/*
+ * Runtime debugging exported via sysfs, applies to all mounted filesystems.
+ *
+ * Path: /sys/fs/btrfs/debug
+ */
static struct attribute *btrfs_debug_feature_attrs[] = {
NULL
};
@@ -637,6 +676,11 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
}
+/*
+ * Allocation information about block group profiles.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/allocation/<bg-type>/<bg-profile>/
+ */
static struct attribute *raid_attrs[] = {
BTRFS_ATTR_PTR(raid, total_bytes),
BTRFS_ATTR_PTR(raid, used_bytes),
@@ -676,6 +720,11 @@ SPACE_INFO_ATTR(bytes_zone_unusable);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
+/*
+ * Allocation information about block group types.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/allocation/<bg-type>/
+ */
static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, flags),
BTRFS_ATTR_PTR(space_info, total_bytes),
@@ -703,6 +752,11 @@ static struct kobj_type space_info_ktype = {
.default_groups = space_info_groups,
};
+/*
+ * Allocation information about block groups.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/allocation/
+ */
static const struct attribute *allocation_attrs[] = {
BTRFS_ATTR_PTR(allocation, global_rsv_reserved),
BTRFS_ATTR_PTR(allocation, global_rsv_size),
@@ -974,7 +1028,8 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
ssize_t ret;
- ret = scnprintf(buf, PAGE_SIZE, "%d\n", fs_info->bg_reclaim_threshold);
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+ READ_ONCE(fs_info->bg_reclaim_threshold));
return ret;
}
@@ -991,16 +1046,21 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
if (ret)
return ret;
- if (thresh <= 50 || thresh > 100)
+ if (thresh != 0 && (thresh <= 50 || thresh > 100))
return -EINVAL;
- fs_info->bg_reclaim_threshold = thresh;
+ WRITE_ONCE(fs_info->bg_reclaim_threshold, thresh);
return len;
}
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
btrfs_bg_reclaim_threshold_store);
+/*
+ * Per-filesystem information and stats.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/
+ */
static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, label),
BTRFS_ATTR_PTR(, nodesize),
@@ -1510,6 +1570,11 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
}
BTRFS_ATTR(devid, error_stats, btrfs_devinfo_error_stats_show);
+/*
+ * Information about one device.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/devinfo/<devid>/
+ */
static struct attribute *devid_attrs[] = {
BTRFS_ATTR_PTR(devid, error_stats),
BTRFS_ATTR_PTR(devid, in_fs_metadata),
@@ -1799,6 +1864,11 @@ QGROUP_RSV_ATTR(data, BTRFS_QGROUP_RSV_DATA);
QGROUP_RSV_ATTR(meta_pertrans, BTRFS_QGROUP_RSV_META_PERTRANS);
QGROUP_RSV_ATTR(meta_prealloc, BTRFS_QGROUP_RSV_META_PREALLOC);
+/*
+ * Qgroup information.
+ *
+ * Path: /sys/fs/btrfs/<uuid>/qgroups/<level>_<qgroupid>/
+ */
static struct attribute *qgroup_attrs[] = {
BTRFS_ATTR_PTR(qgroup, referenced),
BTRFS_ATTR_PTR(qgroup, exclusive),
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 98b5aaba46f1..19ba7d5b7d8f 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -223,8 +223,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
* we can only call btrfs_qgroup_account_extent() directly to test
* quota.
*/
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -236,8 +235,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -260,8 +258,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
old_roots = NULL;
new_roots = NULL;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -272,8 +269,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
if (ret)
return -EINVAL;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -324,8 +320,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -337,8 +332,7 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -359,8 +353,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -372,8 +365,7 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -400,8 +392,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -413,8 +404,7 @@ static int test_multiple_refs(struct btrfs_root *root,
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false, false);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index a8b2e0d2c025..7733e8ac0a69 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -24,6 +24,7 @@
#include "compression.h"
#include "volumes.h"
#include "misc.h"
+#include "btrfs_inode.h"
/*
* Error message should follow the following format:
@@ -873,13 +874,22 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
}
}
- if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
- (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
- (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
+ if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 &&
+ sub_stripes != btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1C3 &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1C4 &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 &&
+ num_stripes < btrfs_raid_array[BTRFS_RAID_RAID5].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 &&
+ num_stripes < btrfs_raid_array[BTRFS_RAID_RAID6].devs_min) ||
+ (type & BTRFS_BLOCK_GROUP_DUP &&
+ num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) ||
((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
- num_stripes != 1))) {
+ num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) {
chunk_err(leaf, chunk, logical,
"invalid num_stripes:sub_stripes %u:%u for profile %llu",
num_stripes, sub_stripes,
@@ -999,6 +1009,8 @@ static int check_inode_item(struct extent_buffer *leaf,
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
u32 mode;
int ret;
+ u32 flags;
+ u32 ro_flags;
ret = check_inode_key(leaf, key, slot);
if (unlikely(ret < 0))
@@ -1054,11 +1066,17 @@ static int check_inode_item(struct extent_buffer *leaf,
btrfs_inode_nlink(leaf, iitem));
return -EUCLEAN;
}
- if (unlikely(btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK)) {
+ btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags);
+ if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) {
inode_item_err(leaf, slot,
- "unknown flags detected: 0x%llx",
- btrfs_inode_flags(leaf, iitem) &
- ~BTRFS_INODE_FLAG_MASK);
+ "unknown incompat flags detected: 0x%x", flags);
+ return -EUCLEAN;
+ }
+ if (unlikely(!sb_rdonly(fs_info->sb) &&
+ (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) {
+ inode_item_err(leaf, slot,
+ "unknown ro-compat flags detected on writeable mount: 0x%x",
+ ro_flags);
return -EUCLEAN;
}
return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e6430ac9bbe8..f7efc26aa82a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -753,7 +753,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
*/
ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
ins.offset);
- if (ret == 0) {
+ if (ret < 0) {
+ goto out;
+ } else if (ret == 0) {
btrfs_init_generic_ref(&ref,
BTRFS_ADD_DELAYED_REF,
ins.objectid, ins.offset, 0);
@@ -3039,8 +3041,6 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
list_del_init(&ctx->list);
ctx->log_ret = error;
}
-
- INIT_LIST_HEAD(&root->log_ctxs[index]);
}
/*
@@ -3328,10 +3328,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
}
- mutex_lock(&root->log_mutex);
- if (root->last_log_commit < log_transid)
- root->last_log_commit = log_transid;
- mutex_unlock(&root->log_mutex);
+ /*
+ * We know there can only be one task here, since we have not yet set
+ * root->log_commit[index1] to 0 and any task attempting to sync the
+ * log must wait for the previous log transaction to commit if it's
+ * still in progress or wait for the current log transaction commit if
+ * someone else already started it. We use <= and not < because the
+ * first log transaction has an ID of 0.
+ */
+ ASSERT(root->last_log_commit <= log_transid);
+ root->last_log_commit = log_transid;
out_wake_log_root:
mutex_lock(&log_root_tree->log_mutex);
@@ -3417,14 +3423,10 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
}
/*
- * Check if an inode was logged in the current transaction. We can't always rely
- * on an inode's logged_trans value, because it's an in-memory only field and
- * therefore not persisted. This means that its value is lost if the inode gets
- * evicted and loaded again from disk (in which case it has a value of 0, and
- * certainly it is smaller then any possible transaction ID), when that happens
- * the full_sync flag is set in the inode's runtime flags, so on that case we
- * assume eviction happened and ignore the logged_trans value, assuming the
- * worst case, that the inode was logged before in the current transaction.
+ * Check if an inode was logged in the current transaction. This may often
+ * return some false positives, because logged_trans is an in memory only field,
+ * not persisted anywhere. This is meant to be used in contexts where a false
+ * positive has no functional consequences.
*/
static bool inode_logged(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
@@ -3432,8 +3434,17 @@ static bool inode_logged(struct btrfs_trans_handle *trans,
if (inode->logged_trans == trans->transid)
return true;
- if (inode->last_trans == trans->transid &&
- test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
+ /*
+ * The inode's logged_trans is always 0 when we load it (because it is
+ * not persisted in the inode item or elsewhere). So if it is 0, the
+ * inode was last modified in the current transaction then the inode may
+ * have been logged before in the current transaction, then evicted and
+ * loaded again in the current transaction - or may have never been logged
+ * in the current transaction, but since we can not be sure, we have to
+ * assume it was, otherwise our callers can leave an inconsistent log.
+ */
+ if (inode->logged_trans == 0 &&
+ inode->last_trans == trans->transid &&
!test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
return true;
@@ -3913,6 +3924,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
u64 logged_isize)
{
struct btrfs_map_token token;
+ u64 flags;
btrfs_init_map_token(&token, leaf);
@@ -3962,20 +3974,49 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
- btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
+ flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
+ BTRFS_I(inode)->ro_flags);
+ btrfs_set_token_inode_flags(&token, item, flags);
btrfs_set_token_inode_block_group(&token, item, 0);
}
static int log_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_root *log, struct btrfs_path *path,
- struct btrfs_inode *inode)
+ struct btrfs_inode *inode, bool inode_item_dropped)
{
struct btrfs_inode_item *inode_item;
int ret;
- ret = btrfs_insert_empty_item(trans, log, path,
- &inode->location, sizeof(*inode_item));
- if (ret && ret != -EEXIST)
+ /*
+ * If we are doing a fast fsync and the inode was logged before in the
+ * current transaction, then we know the inode was previously logged and
+ * it exists in the log tree. For performance reasons, in this case use
+ * btrfs_search_slot() directly with ins_len set to 0 so that we never
+ * attempt a write lock on the leaf's parent, which adds unnecessary lock
+ * contention in case there are concurrent fsyncs for other inodes of the
+ * same subvolume. Using btrfs_insert_empty_item() when the inode item
+ * already exists can also result in unnecessarily splitting a leaf.
+ */
+ if (!inode_item_dropped && inode->logged_trans == trans->transid) {
+ ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1);
+ ASSERT(ret <= 0);
+ if (ret > 0)
+ ret = -ENOENT;
+ } else {
+ /*
+ * This means it is the first fsync in the current transaction,
+ * so the inode item is not in the log and we need to insert it.
+ * We can never get -EEXIST because we are only called for a fast
+ * fsync and in case an inode eviction happens after the inode was
+ * logged before in the current transaction, when we load again
+ * the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime
+ * flags and set ->logged_trans to 0.
+ */
+ ret = btrfs_insert_empty_item(trans, log, path, &inode->location,
+ sizeof(*inode_item));
+ ASSERT(ret != -EEXIST);
+ }
+ if (ret)
return ret;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@@ -4160,7 +4201,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
static int extent_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct extent_map *em1, *em2;
+ const struct extent_map *em1, *em2;
em1 = list_entry(a, struct extent_map, list);
em2 = list_entry(b, struct extent_map, list);
@@ -5053,8 +5094,8 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
/*
* Check the inode's logged_trans only instead of
* btrfs_inode_in_log(). This is because the last_log_commit of
- * the inode is not updated when we only log that it exists and
- * it has the full sync bit set (see btrfs_log_inode()).
+ * the inode is not updated when we only log that it exists (see
+ * btrfs_log_inode()).
*/
if (BTRFS_I(inode)->logged_trans == trans->transid) {
spin_unlock(&BTRFS_I(inode)->lock);
@@ -5299,6 +5340,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
bool need_log_inode_item = true;
bool xattrs_logged = false;
bool recursive_logging = false;
+ bool inode_item_dropped = true;
path = btrfs_alloc_path();
if (!path)
@@ -5433,6 +5475,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
} else {
if (inode_only == LOG_INODE_ALL)
fast_search = true;
+ inode_item_dropped = false;
goto log_extents;
}
@@ -5466,7 +5509,7 @@ log_extents:
btrfs_release_path(path);
btrfs_release_path(dst_path);
if (need_log_inode_item) {
- err = log_inode_item(trans, log, dst_path, inode);
+ err = log_inode_item(trans, log, dst_path, inode, inode_item_dropped);
if (err)
goto out_unlock;
/*
@@ -5573,6 +5616,13 @@ static bool need_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
/*
+ * If a directory was not modified, no dentries added or removed, we can
+ * and should avoid logging it.
+ */
+ if (S_ISDIR(inode->vfs_inode.i_mode) && inode->last_trans < trans->transid)
+ return false;
+
+ /*
* If this inode does not have new/updated/deleted xattrs since the last
* time it was logged and is flagged as logged in the current transaction,
* we can skip logging it. As for new/deleted names, those are updated in
diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c
new file mode 100644
index 000000000000..28d443d3ef93
--- /dev/null
+++ b/fs/btrfs/verity.c
@@ -0,0 +1,811 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/iversion.h>
+#include <linux/fsverity.h>
+#include <linux/sched/mm.h>
+#include "ctree.h"
+#include "btrfs_inode.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "locking.h"
+
+/*
+ * Implementation of the interface defined in struct fsverity_operations.
+ *
+ * The main question is how and where to store the verity descriptor and the
+ * Merkle tree. We store both in dedicated btree items in the filesystem tree,
+ * together with the rest of the inode metadata. This means we'll need to do
+ * extra work to encrypt them once encryption is supported in btrfs, but btrfs
+ * has a lot of careful code around i_size and it seems better to make a new key
+ * type than try and adjust all of our expectations for i_size.
+ *
+ * Note that this differs from the implementation in ext4 and f2fs, where
+ * this data is stored as if it were in the file, but past EOF. However, btrfs
+ * does not have a widespread mechanism for caching opaque metadata pages, so we
+ * do pretend that the Merkle tree pages themselves are past EOF for the
+ * purposes of caching them (as opposed to creating a virtual inode).
+ *
+ * fs verity items are stored under two different key types on disk.
+ * The descriptor items:
+ * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
+ *
+ * At offset 0, we store a btrfs_verity_descriptor_item which tracks the
+ * size of the descriptor item and some extra data for encryption.
+ * Starting at offset 1, these hold the generic fs verity descriptor.
+ * The latter are opaque to btrfs, we just read and write them as a blob for
+ * the higher level verity code. The most common descriptor size is 256 bytes.
+ *
+ * The merkle tree items:
+ * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
+ *
+ * These also start at offset 0, and correspond to the merkle tree bytes.
+ * So when fsverity asks for page 0 of the merkle tree, we pull up one page
+ * starting at offset 0 for this key type. These are also opaque to btrfs,
+ * we're blindly storing whatever fsverity sends down.
+ *
+ * Another important consideration is the fact that the Merkle tree data scales
+ * linearly with the size of the file (with 4K pages/blocks and SHA-256, it's
+ * ~1/127th the size) so for large files, writing the tree can be a lengthy
+ * operation. For that reason, we guard the whole enable verity operation
+ * (between begin_enable_verity and end_enable_verity) with an orphan item.
+ * Again, because the data can be pretty large, it's quite possible that we
+ * could run out of space writing it, so we try our best to handle errors by
+ * stopping and rolling back rather than aborting the victim transaction.
+ */
+
+#define MERKLE_START_ALIGN 65536
+
+/*
+ * Compute the logical file offset where we cache the Merkle tree.
+ *
+ * @inode: inode of the verity file
+ *
+ * For the purposes of caching the Merkle tree pages, as required by
+ * fs-verity, it is convenient to do size computations in terms of a file
+ * offset, rather than in terms of page indices.
+ *
+ * Use 64K to be sure it's past the last page in the file, even with 64K pages.
+ * That rounding operation itself can overflow loff_t, so we do it in u64 and
+ * check.
+ *
+ * Returns the file offset on success, negative error code on failure.
+ */
+static loff_t merkle_file_pos(const struct inode *inode)
+{
+ u64 sz = inode->i_size;
+ u64 rounded = round_up(sz, MERKLE_START_ALIGN);
+
+ if (rounded > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+
+ return rounded;
+}
+
+/*
+ * Drop all the items for this inode with this key_type.
+ *
+ * @inode: inode to drop items for
+ * @key_type: type of items to drop (BTRFS_VERITY_DESC_ITEM or
+ * BTRFS_VERITY_MERKLE_ITEM)
+ *
+ * Before doing a verity enable we cleanup any existing verity items.
+ * This is also used to clean up if a verity enable failed half way through.
+ *
+ * Returns number of dropped items on success, negative error code on failure.
+ */
+static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int count = 0;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while (1) {
+ /* 1 for the item being dropped */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ /*
+ * Walk backwards through all the items until we find one that
+ * isn't from our key type or objectid
+ */
+ key.objectid = btrfs_ino(inode);
+ key.type = key_type;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ ret = 0;
+ /* No more keys of this type, we're done */
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+ } else if (ret < 0) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ /* No more keys of this type, we're done */
+ if (key.objectid != btrfs_ino(inode) || key.type != key_type)
+ break;
+
+ /*
+ * This shouldn't be a performance sensitive function because
+ * it's not used as part of truncate. If it ever becomes
+ * perf sensitive, change this to walk forward and bulk delete
+ * items
+ */
+ ret = btrfs_del_items(trans, root, path, path->slots[0], 1);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
+ count++;
+ btrfs_release_path(path);
+ btrfs_end_transaction(trans);
+ }
+ ret = count;
+ btrfs_end_transaction(trans);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Drop all verity items
+ *
+ * @inode: inode to drop verity items for
+ *
+ * In most contexts where we are dropping verity items, we want to do it for all
+ * the types of verity items, not a particular one.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int btrfs_drop_verity_items(struct btrfs_inode *inode)
+{
+ int ret;
+
+ ret = drop_verity_items(inode, BTRFS_VERITY_DESC_ITEM_KEY);
+ if (ret < 0)
+ return ret;
+ ret = drop_verity_items(inode, BTRFS_VERITY_MERKLE_ITEM_KEY);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Insert and write inode items with a given key type and offset.
+ *
+ * @inode: inode to insert for
+ * @key_type: key type to insert
+ * @offset: item offset to insert at
+ * @src: source data to write
+ * @len: length of source data to write
+ *
+ * Write len bytes from src into items of up to 2K length.
+ * The inserted items will have key (ino, key_type, offset + off) where off is
+ * consecutively increasing from 0 up to the last item ending at offset + len.
+ *
+ * Returns 0 on success and a negative error code on failure.
+ */
+static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
+ const char *src, u64 len)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+ struct btrfs_root *root = inode->root;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ unsigned long copy_bytes;
+ unsigned long src_offset = 0;
+ void *data;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ while (len > 0) {
+ /* 1 for the new item being inserted */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+
+ key.objectid = btrfs_ino(inode);
+ key.type = key_type;
+ key.offset = offset;
+
+ /*
+ * Insert 2K at a time mostly to be friendly for smaller leaf
+ * size filesystems
+ */
+ copy_bytes = min_t(u64, len, 2048);
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key, copy_bytes);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ break;
+ }
+
+ leaf = path->nodes[0];
+
+ data = btrfs_item_ptr(leaf, path->slots[0], void);
+ write_extent_buffer(leaf, src + src_offset,
+ (unsigned long)data, copy_bytes);
+ offset += copy_bytes;
+ src_offset += copy_bytes;
+ len -= copy_bytes;
+
+ btrfs_release_path(path);
+ btrfs_end_transaction(trans);
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Read inode items of the given key type and offset from the btree.
+ *
+ * @inode: inode to read items of
+ * @key_type: key type to read
+ * @offset: item offset to read from
+ * @dest: Buffer to read into. This parameter has slightly tricky
+ * semantics. If it is NULL, the function will not do any copying
+ * and will just return the size of all the items up to len bytes.
+ * If dest_page is passed, then the function will kmap_local the
+ * page and ignore dest, but it must still be non-NULL to avoid the
+ * counting-only behavior.
+ * @len: length in bytes to read
+ * @dest_page: copy into this page instead of the dest buffer
+ *
+ * Helper function to read items from the btree. This returns the number of
+ * bytes read or < 0 for errors. We can return short reads if the items don't
+ * exist on disk or aren't big enough to fill the desired length. Supports
+ * reading into a provided buffer (dest) or into the page cache
+ *
+ * Returns number of bytes read or a negative error code on failure.
+ */
+static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
+ char *dest, u64 len, struct page *dest_page)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root = inode->root;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ u64 item_end;
+ u64 copy_end;
+ int copied = 0;
+ u32 copy_offset;
+ unsigned long copy_bytes;
+ unsigned long dest_offset = 0;
+ void *data;
+ char *kaddr = dest;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ if (dest_page)
+ path->reada = READA_FORWARD;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = key_type;
+ key.offset = offset;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ if (path->slots[0] == 0)
+ goto out;
+ path->slots[0]--;
+ }
+
+ while (len > 0) {
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ if (key.objectid != btrfs_ino(inode) || key.type != key_type)
+ break;
+
+ item_end = btrfs_item_size_nr(leaf, path->slots[0]) + key.offset;
+
+ if (copied > 0) {
+ /*
+ * Once we've copied something, we want all of the items
+ * to be sequential
+ */
+ if (key.offset != offset)
+ break;
+ } else {
+ /*
+ * Our initial offset might be in the middle of an
+ * item. Make sure it all makes sense.
+ */
+ if (key.offset > offset)
+ break;
+ if (item_end <= offset)
+ break;
+ }
+
+ /* desc = NULL to just sum all the item lengths */
+ if (!dest)
+ copy_end = item_end;
+ else
+ copy_end = min(offset + len, item_end);
+
+ /* Number of bytes in this item we want to copy */
+ copy_bytes = copy_end - offset;
+
+ /* Offset from the start of item for copying */
+ copy_offset = offset - key.offset;
+
+ if (dest) {
+ if (dest_page)
+ kaddr = kmap_local_page(dest_page);
+
+ data = btrfs_item_ptr(leaf, path->slots[0], void);
+ read_extent_buffer(leaf, kaddr + dest_offset,
+ (unsigned long)data + copy_offset,
+ copy_bytes);
+
+ if (dest_page)
+ kunmap_local(kaddr);
+ }
+
+ offset += copy_bytes;
+ dest_offset += copy_bytes;
+ len -= copy_bytes;
+ copied += copy_bytes;
+
+ path->slots[0]++;
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ /*
+ * We've reached the last slot in this leaf and we need
+ * to go to the next leaf.
+ */
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ break;
+ } else if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+out:
+ btrfs_free_path(path);
+ if (!ret)
+ ret = copied;
+ return ret;
+}
+
+/*
+ * Delete an fsverity orphan
+ *
+ * @trans: transaction to do the delete in
+ * @inode: inode to orphan
+ *
+ * Capture verity orphan specific logic that is repeated in the couple places
+ * we delete verity orphans. Specifically, handling ENOENT and ignoring inodes
+ * with 0 links.
+ *
+ * Returns zero on success or a negative error code on failure.
+ */
+static int del_orphan(struct btrfs_trans_handle *trans, struct btrfs_inode *inode)
+{
+ struct btrfs_root *root = inode->root;
+ int ret;
+
+ /*
+ * If the inode has no links, it is either already unlinked, or was
+ * created with O_TMPFILE. In either case, it should have an orphan from
+ * that other operation. Rather than reference count the orphans, we
+ * simply ignore them here, because we only invoke the verity path in
+ * the orphan logic when i_nlink is 1.
+ */
+ if (!inode->vfs_inode.i_nlink)
+ return 0;
+
+ ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+ if (ret == -ENOENT)
+ ret = 0;
+ return ret;
+}
+
+/*
+ * Rollback in-progress verity if we encounter an error.
+ *
+ * @inode: inode verity had an error for
+ *
+ * We try to handle recoverable errors while enabling verity by rolling it back
+ * and just failing the operation, rather than having an fs level error no
+ * matter what. However, any error in rollback is unrecoverable.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int rollback_verity(struct btrfs_inode *inode)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = inode->root;
+ int ret;
+
+ ASSERT(inode_is_locked(&inode->vfs_inode));
+ truncate_inode_pages(inode->vfs_inode.i_mapping, inode->vfs_inode.i_size);
+ clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
+ ret = btrfs_drop_verity_items(inode);
+ if (ret) {
+ btrfs_handle_fs_error(root->fs_info, ret,
+ "failed to drop verity items in rollback %llu",
+ (u64)inode->vfs_inode.i_ino);
+ goto out;
+ }
+
+ /*
+ * 1 for updating the inode flag
+ * 1 for deleting the orphan
+ */
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ btrfs_handle_fs_error(root->fs_info, ret,
+ "failed to start transaction in verity rollback %llu",
+ (u64)inode->vfs_inode.i_ino);
+ goto out;
+ }
+ inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
+ btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ ret = del_orphan(trans, inode);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+ btrfs_end_transaction(trans);
+out:
+ return ret;
+}
+
+/*
+ * Finalize making the file a valid verity file
+ *
+ * @inode: inode to be marked as verity
+ * @desc: contents of the verity descriptor to write (not NULL)
+ * @desc_size: size of the verity descriptor
+ *
+ * Do the actual work of finalizing verity after successfully writing the Merkle
+ * tree:
+ *
+ * - write out the descriptor items
+ * - mark the inode with the verity flag
+ * - delete the orphan item
+ * - mark the ro compat bit
+ * - clear the in progress bit
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int finish_verity(struct btrfs_inode *inode, const void *desc,
+ size_t desc_size)
+{
+ struct btrfs_trans_handle *trans = NULL;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_verity_descriptor_item item;
+ int ret;
+
+ /* Write out the descriptor item */
+ memset(&item, 0, sizeof(item));
+ btrfs_set_stack_verity_descriptor_size(&item, desc_size);
+ ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 0,
+ (const char *)&item, sizeof(item));
+ if (ret)
+ goto out;
+
+ /* Write out the descriptor itself */
+ ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 1,
+ desc, desc_size);
+ if (ret)
+ goto out;
+
+ /*
+ * 1 for updating the inode flag
+ * 1 for deleting the orphan
+ */
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ inode->ro_flags |= BTRFS_INODE_RO_VERITY;
+ btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret)
+ goto end_trans;
+ ret = del_orphan(trans, inode);
+ if (ret)
+ goto end_trans;
+ clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
+ btrfs_set_fs_compat_ro(root->fs_info, VERITY);
+end_trans:
+ btrfs_end_transaction(trans);
+out:
+ return ret;
+
+}
+
+/*
+ * fsverity op that begins enabling verity.
+ *
+ * @filp: file to enable verity on
+ *
+ * Begin enabling fsverity for the file. We drop any existing verity items, add
+ * an orphan and set the in progress bit.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int btrfs_begin_enable_verity(struct file *filp)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
+ struct btrfs_root *root = inode->root;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ ASSERT(inode_is_locked(file_inode(filp)));
+
+ if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
+ return -EBUSY;
+
+ /*
+ * This should almost never do anything, but theoretically, it's
+ * possible that we failed to enable verity on a file, then were
+ * interrupted or failed while rolling back, failed to cleanup the
+ * orphan, and finally attempt to enable verity again.
+ */
+ ret = btrfs_drop_verity_items(inode);
+ if (ret)
+ return ret;
+
+ /* 1 for the orphan item */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_orphan_add(trans, inode);
+ if (!ret)
+ set_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
+ btrfs_end_transaction(trans);
+
+ return 0;
+}
+
+/*
+ * fsverity op that ends enabling verity.
+ *
+ * @filp: file we are finishing enabling verity on
+ * @desc: verity descriptor to write out (NULL in error conditions)
+ * @desc_size: size of the verity descriptor (variable with signatures)
+ * @merkle_tree_size: size of the merkle tree in bytes
+ *
+ * If desc is null, then VFS is signaling an error occurred during verity
+ * enable, and we should try to rollback. Otherwise, attempt to finish verity.
+ *
+ * Returns 0 on success, negative error code on error.
+ */
+static int btrfs_end_enable_verity(struct file *filp, const void *desc,
+ size_t desc_size, u64 merkle_tree_size)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
+ int ret = 0;
+ int rollback_ret;
+
+ ASSERT(inode_is_locked(file_inode(filp)));
+
+ if (desc == NULL)
+ goto rollback;
+
+ ret = finish_verity(inode, desc, desc_size);
+ if (ret)
+ goto rollback;
+ return ret;
+
+rollback:
+ rollback_ret = rollback_verity(inode);
+ if (rollback_ret)
+ btrfs_err(inode->root->fs_info,
+ "failed to rollback verity items: %d", rollback_ret);
+ return ret;
+}
+
+/*
+ * fsverity op that gets the struct fsverity_descriptor.
+ *
+ * @inode: inode to get the descriptor of
+ * @buf: output buffer for the descriptor contents
+ * @buf_size: size of the output buffer. 0 to query the size
+ *
+ * fsverity does a two pass setup for reading the descriptor, in the first pass
+ * it calls with buf_size = 0 to query the size of the descriptor, and then in
+ * the second pass it actually reads the descriptor off disk.
+ *
+ * Returns the size on success or a negative error code on failure.
+ */
+static int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
+ size_t buf_size)
+{
+ u64 true_size;
+ int ret = 0;
+ struct btrfs_verity_descriptor_item item;
+
+ memset(&item, 0, sizeof(item));
+ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 0,
+ (char *)&item, sizeof(item), NULL);
+ if (ret < 0)
+ return ret;
+
+ if (item.reserved[0] != 0 || item.reserved[1] != 0)
+ return -EUCLEAN;
+
+ true_size = btrfs_stack_verity_descriptor_size(&item);
+ if (true_size > INT_MAX)
+ return -EUCLEAN;
+
+ if (buf_size == 0)
+ return true_size;
+ if (buf_size < true_size)
+ return -ERANGE;
+
+ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 1,
+ buf, buf_size, NULL);
+ if (ret < 0)
+ return ret;
+ if (ret != true_size)
+ return -EIO;
+
+ return true_size;
+}
+
+/*
+ * fsverity op that reads and caches a merkle tree page.
+ *
+ * @inode: inode to read a merkle tree page for
+ * @index: page index relative to the start of the merkle tree
+ * @num_ra_pages: number of pages to readahead. Optional, we ignore it
+ *
+ * The Merkle tree is stored in the filesystem btree, but its pages are cached
+ * with a logical position past EOF in the inode's mapping.
+ *
+ * Returns the page we read, or an ERR_PTR on error.
+ */
+static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
+ pgoff_t index,
+ unsigned long num_ra_pages)
+{
+ struct page *page;
+ u64 off = (u64)index << PAGE_SHIFT;
+ loff_t merkle_pos = merkle_file_pos(inode);
+ int ret;
+
+ if (merkle_pos < 0)
+ return ERR_PTR(merkle_pos);
+ if (merkle_pos > inode->i_sb->s_maxbytes - off - PAGE_SIZE)
+ return ERR_PTR(-EFBIG);
+ index += merkle_pos >> PAGE_SHIFT;
+again:
+ page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
+ if (page) {
+ if (PageUptodate(page))
+ return page;
+
+ lock_page(page);
+ /*
+ * We only insert uptodate pages, so !Uptodate has to be
+ * an error
+ */
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ unlock_page(page);
+ return page;
+ }
+
+ page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Merkle item keys are indexed from byte 0 in the merkle tree.
+ * They have the form:
+ *
+ * [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
+ */
+ ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off,
+ page_address(page), PAGE_SIZE, page);
+ if (ret < 0) {
+ put_page(page);
+ return ERR_PTR(ret);
+ }
+ if (ret < PAGE_SIZE)
+ memzero_page(page, ret, PAGE_SIZE - ret);
+
+ SetPageUptodate(page);
+ ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS);
+
+ if (!ret) {
+ /* Inserted and ready for fsverity */
+ unlock_page(page);
+ } else {
+ put_page(page);
+ /* Did someone race us into inserting this page? */
+ if (ret == -EEXIST)
+ goto again;
+ page = ERR_PTR(ret);
+ }
+ return page;
+}
+
+/*
+ * fsverity op that writes a Merkle tree block into the btree.
+ *
+ * @inode: inode to write a Merkle tree block for
+ * @buf: Merkle tree data block to write
+ * @index: index of the block in the Merkle tree
+ * @log_blocksize: log base 2 of the Merkle tree block size
+ *
+ * Note that the block size could be different from the page size, so it is not
+ * safe to assume that index is a page index.
+ *
+ * Returns 0 on success or negative error code on failure
+ */
+static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf,
+ u64 index, int log_blocksize)
+{
+ u64 off = index << log_blocksize;
+ u64 len = 1ULL << log_blocksize;
+ loff_t merkle_pos = merkle_file_pos(inode);
+
+ if (merkle_pos < 0)
+ return merkle_pos;
+ if (merkle_pos > inode->i_sb->s_maxbytes - off - len)
+ return -EFBIG;
+
+ return write_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY,
+ off, buf, len);
+}
+
+const struct fsverity_operations btrfs_verityops = {
+ .begin_enable_verity = btrfs_begin_enable_verity,
+ .end_enable_verity = btrfs_end_enable_verity,
+ .get_verity_descriptor = btrfs_get_verity_descriptor,
+ .read_merkle_tree_page = btrfs_read_merkle_tree_page,
+ .write_merkle_tree_block = btrfs_write_merkle_tree_block,
+};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 70f94b75f25a..ec3a874165de 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -38,7 +38,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.sub_stripes = 2,
.dev_stripes = 1,
.devs_max = 0, /* 0 == as many as possible */
- .devs_min = 4,
+ .devs_min = 2,
.tolerated_failures = 1,
.devs_increment = 2,
.ncopies = 2,
@@ -103,7 +103,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 0,
- .devs_min = 2,
+ .devs_min = 1,
.tolerated_failures = 0,
.devs_increment = 1,
.ncopies = 1,
@@ -153,6 +153,32 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
},
};
+/*
+ * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
+ * can be used as index to access btrfs_raid_array[].
+ */
+enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags)
+{
+ if (flags & BTRFS_BLOCK_GROUP_RAID10)
+ return BTRFS_RAID_RAID10;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+ return BTRFS_RAID_RAID1;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+ return BTRFS_RAID_RAID1C3;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+ return BTRFS_RAID_RAID1C4;
+ else if (flags & BTRFS_BLOCK_GROUP_DUP)
+ return BTRFS_RAID_DUP;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+ return BTRFS_RAID_RAID0;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+ return BTRFS_RAID_RAID5;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+ return BTRFS_RAID_RAID6;
+
+ return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
const char *btrfs_bg_type_to_raid_name(u64 flags)
{
const int index = btrfs_bg_flags_to_raid_index(flags);
@@ -404,44 +430,6 @@ void __exit btrfs_cleanup_fs_uuids(void)
}
}
-/*
- * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
- * Returned struct is not linked onto any lists and must be destroyed using
- * btrfs_free_device.
- */
-static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_device *dev;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return ERR_PTR(-ENOMEM);
-
- /*
- * Preallocate a bio that's always going to be used for flushing device
- * barriers and matches the device lifespan
- */
- dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
- if (!dev->flush_bio) {
- kfree(dev);
- return ERR_PTR(-ENOMEM);
- }
-
- INIT_LIST_HEAD(&dev->dev_list);
- INIT_LIST_HEAD(&dev->dev_alloc_list);
- INIT_LIST_HEAD(&dev->post_commit_list);
-
- atomic_set(&dev->reada_in_flight, 0);
- atomic_set(&dev->dev_stats_ccnt, 0);
- btrfs_device_data_ordered_init(dev);
- INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
- INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
- extent_io_tree_init(fs_info, &dev->alloc_state,
- IO_TREE_DEVICE_ALLOC_STATE, NULL);
-
- return dev;
-}
-
static noinline struct btrfs_fs_devices *find_fsid(
const u8 *fsid, const u8 *metadata_fsid)
{
@@ -1130,6 +1118,9 @@ static void btrfs_close_one_device(struct btrfs_device *device)
fs_devices->rw_devices--;
}
+ if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+ clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
fs_devices->missing_devices--;
@@ -1228,7 +1219,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
static int devid_cmp(void *priv, const struct list_head *a,
const struct list_head *b)
{
- struct btrfs_device *dev1, *dev2;
+ const struct btrfs_device *dev1, *dev2;
dev1 = list_entry(a, struct btrfs_device, dev_list);
dev2 = list_entry(b, struct btrfs_device, dev_list);
@@ -1598,14 +1589,9 @@ again:
key.offset = search_start;
key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_backwards(root, &key, path);
if (ret < 0)
goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid, key.type);
- if (ret < 0)
- goto out;
- }
while (1) {
l = path->nodes[0];
@@ -1759,48 +1745,6 @@ out:
return ret;
}
-static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 chunk_offset, u64 start, u64 num_bytes)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_fs_info *fs_info = device->fs_info;
- struct btrfs_root *root = fs_info->dev_root;
- struct btrfs_dev_extent *extent;
- struct extent_buffer *leaf;
- struct btrfs_key key;
-
- WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
- WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*extent));
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_dev_extent);
- btrfs_set_dev_extent_chunk_tree(leaf, extent,
- BTRFS_CHUNK_TREE_OBJECTID);
- btrfs_set_dev_extent_chunk_objectid(leaf, extent,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
- btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
-
- btrfs_set_dev_extent_length(leaf, extent, num_bytes);
- btrfs_mark_buffer_dirty(leaf);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
{
struct extent_map_tree *em_tree;
@@ -2003,12 +1947,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
if (!(all_avail & btrfs_raid_array[i].bg_flag))
continue;
- if (num_devices < btrfs_raid_array[i].devs_min) {
- int ret = btrfs_raid_array[i].mindev_error;
-
- if (ret)
- return ret;
- }
+ if (num_devices < btrfs_raid_array[i].devs_min)
+ return btrfs_raid_array[i].mindev_error;
}
return 0;
@@ -2137,7 +2077,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
if (IS_ERR(device)) {
if (PTR_ERR(device) == -ENOENT &&
- strcmp(device_path, "missing") == 0)
+ device_path && strcmp(device_path, "missing") == 0)
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else
ret = PTR_ERR(device);
@@ -3622,10 +3562,7 @@ static u64 calc_data_stripes(u64 type, int num_stripes)
const int ncopies = btrfs_raid_array[index].ncopies;
const int nparity = btrfs_raid_array[index].nparity;
- if (nparity)
- return num_stripes - nparity;
- else
- return num_stripes / ncopies;
+ return (num_stripes - nparity) / ncopies;
}
/* [pstart, pend) */
@@ -4025,6 +3962,13 @@ static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
return true;
+ if (fs_info->sectorsize < PAGE_SIZE &&
+ bargs->target & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ btrfs_err(fs_info,
+ "RAID56 is not yet supported for sectorsize %u with page size %lu",
+ fs_info->sectorsize, PAGE_SIZE);
+ return false;
+ }
/* Profile is valid and does not have bits outside of the allowed set */
if (alloc_profile_is_valid(bargs->target, 1) &&
(bargs->target & ~allowed) == 0)
@@ -5464,56 +5408,6 @@ out:
}
/*
- * This function, btrfs_finish_chunk_alloc(), belongs to phase 2.
- *
- * See the comment at btrfs_chunk_alloc() for details about the chunk allocation
- * phases.
- */
-int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- u64 chunk_offset, u64 chunk_size)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_device *device;
- struct extent_map *em;
- struct map_lookup *map;
- u64 dev_offset;
- u64 stripe_size;
- int i;
- int ret = 0;
-
- em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
- map = em->map_lookup;
- stripe_size = em->orig_block_len;
-
- /*
- * Take the device list mutex to prevent races with the final phase of
- * a device replace operation that replaces the device object associated
- * with the map's stripes, because the device object's id can change
- * at any time during that final phase of the device replace operation
- * (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
- * replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
- * resulting in persisting a device extent item with such ID.
- */
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- for (i = 0; i < map->num_stripes; i++) {
- device = map->stripes[i].dev;
- dev_offset = map->stripes[i].physical;
-
- ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
- dev_offset, stripe_size);
- if (ret)
- break;
- }
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- free_extent_map(em);
- return ret;
-}
-
-/*
* This function, btrfs_chunk_alloc_add_chunk_item(), typically belongs to the
* phase 1 of chunk allocation. It belongs to phase 2 only when allocating system
* chunks.
@@ -6923,9 +6817,31 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
if (WARN_ON(!devid && !fs_info))
return ERR_PTR(-EINVAL);
- dev = __alloc_device(fs_info);
- if (IS_ERR(dev))
- return dev;
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Preallocate a bio that's always going to be used for flushing device
+ * barriers and matches the device lifespan
+ */
+ dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
+ if (!dev->flush_bio) {
+ kfree(dev);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&dev->dev_list);
+ INIT_LIST_HEAD(&dev->dev_alloc_list);
+ INIT_LIST_HEAD(&dev->post_commit_list);
+
+ atomic_set(&dev->reada_in_flight, 0);
+ atomic_set(&dev->dev_stats_ccnt, 0);
+ btrfs_device_data_ordered_init(dev);
+ INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+ extent_io_tree_init(fs_info, &dev->alloc_state,
+ IO_TREE_DEVICE_ALLOC_STATE, NULL);
if (devid)
tmp = *devid;
@@ -6961,15 +6877,7 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
{
- int index = btrfs_bg_flags_to_raid_index(type);
- int ncopies = btrfs_raid_array[index].ncopies;
- const int nparity = btrfs_raid_array[index].nparity;
- int data_stripes;
-
- if (nparity)
- data_stripes = num_stripes - nparity;
- else
- data_stripes = num_stripes / ncopies;
+ const int data_stripes = calc_data_stripes(type, num_stripes);
return div_u64(chunk_len, data_stripes);
}
@@ -8144,7 +8052,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
goto out;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_item(root, path);
+ ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
/* No dev extents at all? Not good */
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 55a8ba244716..b082250b42e0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -508,8 +508,6 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
u64 logical);
-int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- u64 chunk_offset, u64 chunk_size);
int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
struct btrfs_block_group *bg);
int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
@@ -568,32 +566,6 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
atomic_inc(&dev->dev_stats_ccnt);
}
-/*
- * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
- * can be used as index to access btrfs_raid_array[].
- */
-static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
-{
- if (flags & BTRFS_BLOCK_GROUP_RAID10)
- return BTRFS_RAID_RAID10;
- else if (flags & BTRFS_BLOCK_GROUP_RAID1)
- return BTRFS_RAID_RAID1;
- else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
- return BTRFS_RAID_RAID1C3;
- else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
- return BTRFS_RAID_RAID1C4;
- else if (flags & BTRFS_BLOCK_GROUP_DUP)
- return BTRFS_RAID_DUP;
- else if (flags & BTRFS_BLOCK_GROUP_RAID0)
- return BTRFS_RAID_RAID0;
- else if (flags & BTRFS_BLOCK_GROUP_RAID5)
- return BTRFS_RAID_RAID5;
- else if (flags & BTRFS_BLOCK_GROUP_RAID6)
- return BTRFS_RAID_RAID6;
-
- return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
-}
-
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
@@ -603,6 +575,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
struct block_device *bdev,
const char *device_path);
+enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags);
int btrfs_bg_type_to_factor(u64 flags);
const char *btrfs_bg_type_to_raid_name(u64 flags);
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index c3fa7d3fa770..8afa90074891 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -121,12 +121,12 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[0] = out_page;
nr_pages = 1;
@@ -148,26 +148,22 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
int i;
for (i = 0; i < in_buf_pages; i++) {
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
in_page = find_get_page(mapping,
start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
memcpy(workspace->buf + i * PAGE_SIZE,
data_in, PAGE_SIZE);
start += PAGE_SIZE;
}
workspace->strm.next_in = workspace->buf;
} else {
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
in_page = find_get_page(mapping,
start >> PAGE_SHIFT);
- data_in = kmap(in_page);
+ data_in = page_address(in_page);
start += PAGE_SIZE;
workspace->strm.next_in = data_in;
}
@@ -196,18 +192,17 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
* the stream end if required
*/
if (workspace->strm.avail_out == 0) {
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[nr_pages] = out_page;
nr_pages++;
workspace->strm.avail_out = PAGE_SIZE;
@@ -234,18 +229,17 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
goto out;
} else if (workspace->strm.avail_out == 0) {
/* get another page for the stream end */
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
- cpage_out = kmap(out_page);
+ cpage_out = page_address(out_page);
pages[nr_pages] = out_page;
nr_pages++;
workspace->strm.avail_out = PAGE_SIZE;
@@ -264,13 +258,8 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
*total_in = workspace->strm.total_in;
out:
*out_pages = nr_pages;
- if (out_page)
- kunmap(out_page);
-
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
return ret;
}
@@ -286,10 +275,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
struct page **pages_in = cb->compressed_pages;
- u64 disk_start = cb->start;
- struct bio *orig_bio = cb->orig_bio;
- data_in = kmap(pages_in[page_in_index]);
+ data_in = page_address(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
workspace->strm.total_in = 0;
@@ -311,7 +298,6 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) {
pr_warn("BTRFS: inflateInit failed\n");
- kunmap(pages_in[page_in_index]);
return -EIO;
}
while (workspace->strm.total_in < srclen) {
@@ -326,9 +312,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (buf_start == total_out)
break;
- ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
- total_out, disk_start,
- orig_bio);
+ ret2 = btrfs_decompress_buf2page(workspace->buf,
+ total_out - buf_start, cb, buf_start);
if (ret2 == 0) {
ret = 0;
goto done;
@@ -339,17 +324,16 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (workspace->strm.avail_in == 0) {
unsigned long tmp;
- kunmap(pages_in[page_in_index]);
+
page_in_index++;
if (page_in_index >= total_pages_in) {
data_in = NULL;
break;
}
- data_in = kmap(pages_in[page_in_index]);
+ data_in = page_address(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
tmp = srclen - workspace->strm.total_in;
- workspace->strm.avail_in = min(tmp,
- PAGE_SIZE);
+ workspace->strm.avail_in = min(tmp, PAGE_SIZE);
}
}
if (ret != Z_STREAM_END)
@@ -358,10 +342,8 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
ret = 0;
done:
zlib_inflateEnd(&workspace->strm);
- if (data_in)
- kunmap(pages_in[page_in_index]);
if (!ret)
- zero_fill_bio(orig_bio);
+ zero_fill_bio(cb->orig_bio);
return ret;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 907c2cc45c9c..47af1ab3bf12 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -245,7 +245,7 @@ static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info)
goto out;
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_item(root, path);
+ ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
/* No dev extents at all? Not good */
@@ -296,7 +296,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_zoned_device_info *zone_info = NULL;
struct block_device *bdev = device->bdev;
- struct request_queue *queue = bdev_get_queue(bdev);
sector_t nr_sectors;
sector_t sector = 0;
struct blk_zone *zones = NULL;
@@ -348,19 +347,10 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
nr_sectors = bdev_nr_sectors(bdev);
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
- zone_info->max_zone_append_size =
- (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;
zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
- if (bdev_is_zoned(bdev) && zone_info->max_zone_append_size == 0) {
- btrfs_err(fs_info, "zoned: device %pg does not support zone append",
- bdev);
- ret = -EINVAL;
- goto out;
- }
-
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
if (!zone_info->seq_zones) {
ret = -ENOMEM;
@@ -529,7 +519,6 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
u64 zoned_devices = 0;
u64 nr_devices = 0;
u64 zone_size = 0;
- u64 max_zone_append_size = 0;
const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
int ret = 0;
@@ -565,11 +554,6 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
ret = -EINVAL;
goto out;
}
- if (!max_zone_append_size ||
- (zone_info->max_zone_append_size &&
- zone_info->max_zone_append_size < max_zone_append_size))
- max_zone_append_size =
- zone_info->max_zone_append_size;
}
nr_devices++;
}
@@ -619,7 +603,6 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
}
fs_info->zone_size = zone_size;
- fs_info->max_zone_append_size = max_zone_append_size;
fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
/*
@@ -1318,9 +1301,6 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
if (!btrfs_is_zoned(fs_info))
return false;
- if (!fs_info->max_zone_append_size)
- return false;
-
if (!is_data_inode(&inode->vfs_inode))
return false;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index b0ae2608cb6b..4b299705bb12 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -22,7 +22,6 @@ struct btrfs_zoned_device_info {
*/
u64 zone_size;
u8 zone_size_shift;
- u64 max_zone_append_size;
u32 nr_zones;
unsigned long *seq_zones;
unsigned long *empty_zones;
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 3e26b466476a..56dce9f00988 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -399,19 +399,19 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
/* map in the first page of input data */
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- workspace->in_buf.src = kmap(in_page);
+ workspace->in_buf.src = page_address(in_page);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
/* Allocate and map in the output buffer */
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.dst = page_address(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
@@ -446,19 +446,18 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
if (workspace->out_buf.pos == workspace->out_buf.size) {
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.dst = page_address(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out,
PAGE_SIZE);
@@ -473,13 +472,12 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
/* Check if we need more input */
if (workspace->in_buf.pos == workspace->in_buf.size) {
tot_in += PAGE_SIZE;
- kunmap(in_page);
put_page(in_page);
start += PAGE_SIZE;
len -= PAGE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- workspace->in_buf.src = kmap(in_page);
+ workspace->in_buf.src = page_address(in_page);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
}
@@ -506,19 +504,18 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
- kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
pages[nr_pages++] = out_page;
- workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.dst = page_address(out_page);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
}
@@ -534,12 +531,8 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
out:
*out_pages = nr_pages;
/* Cleanup */
- if (in_page) {
- kunmap(in_page);
+ if (in_page)
put_page(in_page);
- }
- if (out_page)
- kunmap(out_page);
return ret;
}
@@ -547,8 +540,6 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct page **pages_in = cb->compressed_pages;
- u64 disk_start = cb->start;
- struct bio *orig_bio = cb->orig_bio;
size_t srclen = cb->compressed_len;
ZSTD_DStream *stream;
int ret = 0;
@@ -565,7 +556,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
goto done;
}
- workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ workspace->in_buf.src = page_address(pages_in[page_in_index]);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
@@ -589,7 +580,7 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
workspace->out_buf.pos = 0;
ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
- buf_start, total_out, disk_start, orig_bio);
+ total_out - buf_start, cb, buf_start);
if (ret == 0)
break;
@@ -601,23 +592,21 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
break;
if (workspace->in_buf.pos == workspace->in_buf.size) {
- kunmap(pages_in[page_in_index++]);
+ page_in_index++;
if (page_in_index >= total_pages_in) {
workspace->in_buf.src = NULL;
ret = -EIO;
goto done;
}
srclen -= PAGE_SIZE;
- workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ workspace->in_buf.src = page_address(pages_in[page_in_index]);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
}
}
ret = 0;
- zero_fill_bio(orig_bio);
+ zero_fill_bio(cb->orig_bio);
done:
- if (workspace->in_buf.src)
- kunmap(pages_in[page_in_index]);
return ret;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a1e2813731d1..7e7a897ae0d3 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1395,9 +1395,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
} else {
struct address_space *mapping = inode->i_mapping;
- struct page *page = find_or_create_page(mapping, 0,
- mapping_gfp_constraint(mapping,
- ~__GFP_FS));
+ struct page *page;
+
+ filemap_invalidate_lock_shared(mapping);
+ page = find_or_create_page(mapping, 0,
+ mapping_gfp_constraint(mapping, ~__GFP_FS));
if (!page) {
ret = VM_FAULT_OOM;
goto out_inline;
@@ -1418,6 +1420,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
+ filemap_invalidate_unlock_shared(mapping);
dout("filemap_fault %p %llu read inline data ret %x\n",
inode, off, ret);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7bdefd0c789a..39db97f149b9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1743,7 +1743,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
struct ceph_cap_flush *ceph_alloc_cap_flush(void)
{
- return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+ struct ceph_cap_flush *cf;
+
+ cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
+ cf->is_capsnap = false;
+ return cf;
}
void ceph_free_cap_flush(struct ceph_cap_flush *cf)
@@ -1778,7 +1782,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
prev->wake = true;
wake = false;
}
- list_del(&cf->g_list);
+ list_del_init(&cf->g_list);
return wake;
}
@@ -1793,7 +1797,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
prev->wake = true;
wake = false;
}
- list_del(&cf->i_list);
+ list_del_init(&cf->i_list);
return wake;
}
@@ -2352,7 +2356,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
- if (!cf->caps) {
+ if (cf->is_capsnap) {
last_snap_flush = cf->tid;
break;
}
@@ -2371,7 +2375,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
first_tid = cf->tid + 1;
- if (cf->caps) {
+ if (!cf->is_capsnap) {
struct cap_msg_args arg;
dout("kick_flushing_caps %p cap %p tid %llu %s\n",
@@ -3516,7 +3520,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
cleaned = cf->caps;
/* Is this a capsnap? */
- if (cf->caps == 0)
+ if (cf->is_capsnap)
continue;
if (cf->tid <= flush_tid) {
@@ -3589,8 +3593,9 @@ out:
while (!list_empty(&to_remove)) {
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, i_list);
- list_del(&cf->i_list);
- ceph_free_cap_flush(cf);
+ list_del_init(&cf->i_list);
+ if (!cf->is_capsnap)
+ ceph_free_cap_flush(cf);
}
if (wake_ci)
@@ -4150,11 +4155,19 @@ bad:
/*
* Delayed work handler to process end of delayed cap release LRU list.
+ *
+ * If new caps are added to the list while processing it, these won't get
+ * processed in this run. In this case, the ci->i_hold_caps_max will be
+ * returned so that the work can be scheduled accordingly.
*/
-void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
+unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
{
struct inode *inode;
struct ceph_inode_info *ci;
+ struct ceph_mount_options *opt = mdsc->fsc->mount_options;
+ unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
+ unsigned long loop_start = jiffies;
+ unsigned long delay = 0;
dout("check_delayed_caps\n");
spin_lock(&mdsc->cap_delay_lock);
@@ -4162,6 +4175,11 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
ci = list_first_entry(&mdsc->cap_delay_list,
struct ceph_inode_info,
i_cap_delay_list);
+ if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
+ dout("%s caps added recently. Exiting loop", __func__);
+ delay = ci->i_hold_caps_max;
+ break;
+ }
if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
time_before(jiffies, ci->i_hold_caps_max))
break;
@@ -4177,6 +4195,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
}
}
spin_unlock(&mdsc->cap_delay_lock);
+
+ return delay;
}
/*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d1755ac1d964..e1d605a02d4a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2088,6 +2088,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (ret < 0)
goto unlock;
+ filemap_invalidate_lock(inode->i_mapping);
ceph_zero_pagecache_range(inode, offset, length);
ret = ceph_zero_objects(inode, offset, length);
@@ -2100,6 +2101,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (dirty)
__mark_inode_dirty(inode, dirty);
}
+ filemap_invalidate_unlock(inode->i_mapping);
ceph_put_cap_refs(ci, got);
unlock:
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fa8a847743d0..bdeb271f47d9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -240,9 +240,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- /* No mandatory locks */
- if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9db1b39df773..0b69aec23e5c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1616,7 +1616,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
spin_lock(&mdsc->cap_dirty_lock);
list_for_each_entry(cf, &to_remove, i_list)
- list_del(&cf->g_list);
+ list_del_init(&cf->g_list);
if (!list_empty(&ci->i_dirty_item)) {
pr_warn_ratelimited(
@@ -1668,8 +1668,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
cf = list_first_entry(&to_remove,
struct ceph_cap_flush, i_list);
- list_del(&cf->i_list);
- ceph_free_cap_flush(cf);
+ list_del_init(&cf->i_list);
+ if (!cf->is_capsnap)
+ ceph_free_cap_flush(cf);
}
wake_up_all(&ci->i_cap_wq);
@@ -4490,22 +4491,29 @@ void inc_session_sequence(struct ceph_mds_session *s)
}
/*
- * delayed work -- periodically trim expired leases, renew caps with mds
+ * delayed work -- periodically trim expired leases, renew caps with mds. If
+ * the @delay parameter is set to 0 or if it's more than 5 secs, the default
+ * workqueue delay value of 5 secs will be used.
*/
-static void schedule_delayed(struct ceph_mds_client *mdsc)
+static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay)
{
- int delay = 5;
- unsigned hz = round_jiffies_relative(HZ * delay);
- schedule_delayed_work(&mdsc->delayed_work, hz);
+ unsigned long max_delay = HZ * 5;
+
+ /* 5 secs default delay */
+ if (!delay || (delay > max_delay))
+ delay = max_delay;
+ schedule_delayed_work(&mdsc->delayed_work,
+ round_jiffies_relative(delay));
}
static void delayed_work(struct work_struct *work)
{
- int i;
struct ceph_mds_client *mdsc =
container_of(work, struct ceph_mds_client, delayed_work.work);
+ unsigned long delay;
int renew_interval;
int renew_caps;
+ int i;
dout("mdsc delayed_work\n");
@@ -4545,7 +4553,7 @@ static void delayed_work(struct work_struct *work)
}
mutex_unlock(&mdsc->mutex);
- ceph_check_delayed_caps(mdsc);
+ delay = ceph_check_delayed_caps(mdsc);
ceph_queue_cap_reclaim_work(mdsc);
@@ -4553,7 +4561,7 @@ static void delayed_work(struct work_struct *work)
maybe_recover_session(mdsc);
- schedule_delayed(mdsc);
+ schedule_delayed(mdsc, delay);
}
int ceph_mdsc_init(struct ceph_fs_client *fsc)
@@ -5030,7 +5038,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
mdsc->mdsmap->m_epoch);
mutex_unlock(&mdsc->mutex);
- schedule_delayed(mdsc);
+ schedule_delayed(mdsc, 0);
return;
bad_unlock:
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index abd9af7727ad..3c444b9cb17b 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -394,9 +394,11 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
{
int i;
- for (i = 0; i < m->possible_max_rank; i++)
- kfree(m->m_info[i].export_targets);
- kfree(m->m_info);
+ if (m->m_info) {
+ for (i = 0; i < m->possible_max_rank; i++)
+ kfree(m->m_info[i].export_targets);
+ kfree(m->m_info);
+ }
kfree(m->m_data_pg_pools);
kfree(m);
}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4ac0606dcbd4..15105f9da3fd 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -67,19 +67,19 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
{
lockdep_assert_held(&mdsc->snap_rwsem);
- dout("get_realm %p %d -> %d\n", realm,
- atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
/*
- * since we _only_ increment realm refs or empty the empty
- * list with snap_rwsem held, adjusting the empty list here is
- * safe. we do need to protect against concurrent empty list
- * additions, however.
+ * The 0->1 and 1->0 transitions must take the snap_empty_lock
+ * atomically with the refcount change. Go ahead and bump the
+ * nref here, unless it's 0, in which case we take the spinlock
+ * and then do the increment and remove it from the list.
*/
- if (atomic_inc_return(&realm->nref) == 1) {
- spin_lock(&mdsc->snap_empty_lock);
+ if (atomic_inc_not_zero(&realm->nref))
+ return;
+
+ spin_lock(&mdsc->snap_empty_lock);
+ if (atomic_inc_return(&realm->nref) == 1)
list_del_init(&realm->empty_item);
- spin_unlock(&mdsc->snap_empty_lock);
- }
+ spin_unlock(&mdsc->snap_empty_lock);
}
static void __insert_snap_realm(struct rb_root *root,
@@ -208,28 +208,28 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc,
{
lockdep_assert_held_write(&mdsc->snap_rwsem);
- dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
- atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
+ /*
+ * We do not require the snap_empty_lock here, as any caller that
+ * increments the value must hold the snap_rwsem.
+ */
if (atomic_dec_and_test(&realm->nref))
__destroy_snap_realm(mdsc, realm);
}
/*
- * caller needn't hold any locks
+ * See comments in ceph_get_snap_realm. Caller needn't hold any locks.
*/
void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm)
{
- dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
- atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
- if (!atomic_dec_and_test(&realm->nref))
+ if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock))
return;
if (down_write_trylock(&mdsc->snap_rwsem)) {
+ spin_unlock(&mdsc->snap_empty_lock);
__destroy_snap_realm(mdsc, realm);
up_write(&mdsc->snap_rwsem);
} else {
- spin_lock(&mdsc->snap_empty_lock);
list_add(&realm->empty_item, &mdsc->snap_empty);
spin_unlock(&mdsc->snap_empty_lock);
}
@@ -487,6 +487,9 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci)
pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
return;
}
+ capsnap->cap_flush.is_capsnap = true;
+ INIT_LIST_HEAD(&capsnap->cap_flush.i_list);
+ INIT_LIST_HEAD(&capsnap->cap_flush.g_list);
spin_lock(&ci->i_ceph_lock);
used = __ceph_caps_used(ci);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6b6332a5c113..b1a363641beb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -182,8 +182,9 @@ struct ceph_cap {
struct ceph_cap_flush {
u64 tid;
- int caps; /* 0 means capsnap */
+ int caps;
bool wake; /* wake up flush waiters when finish ? */
+ bool is_capsnap; /* true means capsnap */
struct list_head g_list; // global
struct list_head i_list; // per inode
};
@@ -1167,7 +1168,7 @@ extern void ceph_flush_snaps(struct ceph_inode_info *ci,
extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session);
-extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
+extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
extern int ceph_drop_caps_for_unlink(struct inode *inode);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7364950a9ef4..3b7e3b9e4fd2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -4,19 +4,16 @@ config CIFS
depends on INET
select NLS
select CRYPTO
- select CRYPTO_MD4
select CRYPTO_MD5
select CRYPTO_SHA256
select CRYPTO_SHA512
select CRYPTO_CMAC
select CRYPTO_HMAC
- select CRYPTO_LIB_ARC4
select CRYPTO_AEAD2
select CRYPTO_CCM
select CRYPTO_GCM
select CRYPTO_ECB
select CRYPTO_AES
- select CRYPTO_LIB_DES
select KEYS
select DNS_RESOLVER
select ASN1
@@ -85,33 +82,6 @@ config CIFS_ALLOW_INSECURE_LEGACY
If unsure, say Y.
-config CIFS_WEAK_PW_HASH
- bool "Support legacy servers which use weaker LANMAN security"
- depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY
- help
- Modern CIFS servers including Samba and most Windows versions
- (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
- security mechanisms. These hash the password more securely
- than the mechanisms used in the older LANMAN version of the
- SMB protocol but LANMAN based authentication is needed to
- establish sessions with some old SMB servers.
-
- Enabling this option allows the cifs module to mount to older
- LANMAN based servers such as OS/2 and Windows 95, but such
- mounts may be less secure than mounts using NTLM or more recent
- security mechanisms if you are on a public network. Unless you
- have a need to access old SMB servers (and are on a private
- network) you probably want to say N. Even if this support
- is enabled in the kernel build, LANMAN authentication will not be
- used automatically. At runtime LANMAN mounts are disabled but
- can be set to required (or optional) either in
- /proc/fs/cifs (see Documentation/admin-guide/cifs/usage.rst for
- more detail) or via an option on the mount command. This support
- is disabled by default in order to reduce the possibility of a
- downgrade attack.
-
- If unsure, say N.
-
config CIFS_UPCALL
bool "Kerberos/SPNEGO advanced session setup"
depends on CIFS
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 8857ac7e7a14..51a824fc926a 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -250,9 +250,6 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
seq_printf(m, ",ALLOW_INSECURE_LEGACY");
#endif
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- seq_printf(m, ",WEAK_PW_HASH");
-#endif
#ifdef CONFIG_CIFS_POSIX
seq_printf(m, ",CIFS_POSIX");
#endif
@@ -929,14 +926,6 @@ cifs_security_flags_handle_must_flags(unsigned int *flags)
*flags = CIFSSEC_MUST_NTLMSSP;
else if ((*flags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
*flags = CIFSSEC_MUST_NTLMV2;
- else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM)
- *flags = CIFSSEC_MUST_NTLM;
- else if (CIFSSEC_MUST_LANMAN &&
- (*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN)
- *flags = CIFSSEC_MUST_LANMAN;
- else if (CIFSSEC_MUST_PLNTXT &&
- (*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT)
- *flags = CIFSSEC_MUST_PLNTXT;
*flags |= signflags;
}
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 93b47818c6c2..12bde7bfda86 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -147,8 +147,6 @@ static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg)
goto nlmsg_fail;
}
break;
- case LANMAN:
- case NTLM:
case NTLMv2:
case RawNTLMSSP:
ret = cifs_swn_auth_info_ntlm(swnreg->tcon, skb);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 9bd03a231032..171ad8b42107 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -358,14 +358,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen,
if (!dst)
return NULL;
cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
- NO_MAP_UNI_RSVD);
+ NO_MAP_UNI_RSVD);
} else {
- len = strnlen(src, maxlen);
- len++;
- dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return NULL;
- strlcpy(dst, src, len);
+ dst = kstrndup(src, maxlen, GFP_KERNEL);
}
return dst;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index ecf15d845dbd..6679e07e533e 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -22,7 +22,7 @@
#include <linux/random.h>
#include <linux/highmem.h>
#include <linux/fips.h>
-#include <crypto/arc4.h>
+#include "../cifs_common/arc4.h"
#include <crypto/aead.h>
int __cifs_calc_signature(struct smb_rqst *rqst,
@@ -250,87 +250,6 @@ int cifs_verify_signature(struct smb_rqst *rqst,
}
-/* first calculate 24 bytes ntlm response and then 16 byte session key */
-int setup_ntlm_response(struct cifs_ses *ses, const struct nls_table *nls_cp)
-{
- int rc = 0;
- unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
- char temp_key[CIFS_SESS_KEY_SIZE];
-
- if (!ses)
- return -EINVAL;
-
- ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
- if (!ses->auth_key.response)
- return -ENOMEM;
-
- ses->auth_key.len = temp_len;
-
- rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
- ses->auth_key.response + CIFS_SESS_KEY_SIZE, nls_cp);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NTLM response, error: %d\n",
- __func__, rc);
- return rc;
- }
-
- rc = E_md4hash(ses->password, temp_key, nls_cp);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n",
- __func__, rc);
- return rc;
- }
-
- rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
- if (rc)
- cifs_dbg(FYI, "%s Can't generate NTLM session key, error: %d\n",
- __func__, rc);
-
- return rc;
-}
-
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
- char *lnm_session_key)
-{
- int i, len;
- int rc;
- char password_with_pad[CIFS_ENCPWD_SIZE] = {0};
-
- if (password) {
- for (len = 0; len < CIFS_ENCPWD_SIZE; len++)
- if (!password[len])
- break;
-
- memcpy(password_with_pad, password, len);
- }
-
- if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) {
- memcpy(lnm_session_key, password_with_pad,
- CIFS_ENCPWD_SIZE);
- return 0;
- }
-
- /* calculate old style session key */
- /* calling toupper is less broken than repeatedly
- calling nls_toupper would be since that will never
- work for UTF8, but neither handles multibyte code pages
- but the only alternative would be converting to UCS-16 (Unicode)
- (using a routine something like UniStrupr) then
- uppercasing and then converting back from Unicode - which
- would only worth doing it if we knew it were utf8. Basically
- utf8 and other multibyte codepages each need their own strupper
- function since a byte at a time will ont work. */
-
- for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
- password_with_pad[i] = toupper(password_with_pad[i]);
-
- rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
-
- return rc;
-}
-#endif /* CIFS_WEAK_PW_HASH */
-
/* Build a proper attribute value/target info pairs blob.
* Fill in netbios and dns domain name and workstation name
* and client time (total five av pairs and + one end of fields indicator.
@@ -780,9 +699,9 @@ calc_seckey(struct cifs_ses *ses)
return -ENOMEM;
}
- arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE);
- arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key,
- CIFS_CPHTXT_SIZE);
+ cifs_arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE);
+ cifs_arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key,
+ CIFS_CPHTXT_SIZE);
/* make secondary_key/nonce as session key */
memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 64b71c4e2a9d..8c20bfa187ac 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -399,7 +399,6 @@ cifs_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- cifs_fscache_release_inode_cookie(inode);
}
static void
@@ -438,15 +437,9 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
seq_puts(s, ",sec=");
switch (ses->sectype) {
- case LANMAN:
- seq_puts(s, "lanman");
- break;
case NTLMv2:
seq_puts(s, "ntlmv2");
break;
- case NTLM:
- seq_puts(s, "ntlm");
- break;
case Kerberos:
seq_puts(s, "krb5");
break;
@@ -1755,7 +1748,6 @@ MODULE_DESCRIPTION
MODULE_VERSION(CIFS_VERSION);
MODULE_SOFTDEP("ecb");
MODULE_SOFTDEP("hmac");
-MODULE_SOFTDEP("md4");
MODULE_SOFTDEP("md5");
MODULE_SOFTDEP("nls");
MODULE_SOFTDEP("aes");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c0bfc2f01030..c068f7d8d879 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -114,8 +114,6 @@ enum statusEnum {
enum securityEnum {
Unspecified = 0, /* not specified */
- LANMAN, /* Legacy LANMAN auth */
- NTLM, /* Legacy NTLM012 auth with NTLM hash */
NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
Kerberos, /* Kerberos via SPNEGO */
@@ -634,7 +632,6 @@ struct TCP_Server_Info {
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
-#define CIFS_NEGFLAVOR_LANMAN 0 /* wct == 13, LANMAN */
#define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */
#define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */
char negflavor; /* NEGOTIATE response flavor */
@@ -1611,6 +1608,11 @@ struct dfs_info3_param {
int ttl;
};
+struct file_list {
+ struct list_head list;
+ struct cifsFileInfo *cfile;
+};
+
/*
* common struct for holding inode info when searching for or updating an
* inode with new info
@@ -1729,16 +1731,8 @@ static inline bool is_retryable_error(int error)
/* Security Flags: indicate type of session setup needed */
#define CIFSSEC_MAY_SIGN 0x00001
-#define CIFSSEC_MAY_NTLM 0x00002
#define CIFSSEC_MAY_NTLMV2 0x00004
#define CIFSSEC_MAY_KRB5 0x00008
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFSSEC_MAY_LANMAN 0x00010
-#define CIFSSEC_MAY_PLNTXT 0x00020
-#else
-#define CIFSSEC_MAY_LANMAN 0
-#define CIFSSEC_MAY_PLNTXT 0
-#endif /* weak passwords */
#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
#define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */
@@ -1746,32 +1740,19 @@ static inline bool is_retryable_error(int error)
/* note that only one of the following can be set so the
result of setting MUST flags more than once will be to
require use of the stronger protocol */
-#define CIFSSEC_MUST_NTLM 0x02002
#define CIFSSEC_MUST_NTLMV2 0x04004
#define CIFSSEC_MUST_KRB5 0x08008
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFSSEC_MUST_LANMAN 0x10010
-#define CIFSSEC_MUST_PLNTXT 0x20020
-#ifdef CONFIG_CIFS_UPCALL
-#define CIFSSEC_MASK 0xBF0BF /* allows weak security but also krb5 */
-#else
-#define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */
-#endif /* UPCALL */
-#else /* do not allow weak pw hash */
-#define CIFSSEC_MUST_LANMAN 0
-#define CIFSSEC_MUST_PLNTXT 0
#ifdef CONFIG_CIFS_UPCALL
#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */
#else
#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */
#endif /* UPCALL */
-#endif /* WEAK_PW_HASH */
#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP)
-#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
-#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
+#define CIFSSEC_MAX (CIFSSEC_MUST_NTLMV2)
+#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
/*
*****************************************************************
* All constants go here
@@ -1935,10 +1916,6 @@ static inline char *get_security_type_str(enum securityEnum sectype)
return "Kerberos";
case NTLMv2:
return "NTLMv2";
- case NTLM:
- return "NTLM";
- case LANMAN:
- return "LANMAN";
default:
return "Unknown";
}
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index f6e235001358..dc920e206336 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -14,13 +14,7 @@
#include <asm/unaligned.h>
#include "smbfsctl.h"
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define LANMAN_PROT 0
-#define LANMAN2_PROT 1
-#define CIFS_PROT 2
-#else
#define CIFS_PROT 0
-#endif
#define POSIX_PROT (CIFS_PROT+1)
#define BAD_PROT 0xFFFF
@@ -505,30 +499,8 @@ typedef struct negotiate_req {
unsigned char DialectsArray[1];
} __attribute__((packed)) NEGOTIATE_REQ;
-/* Dialect index is 13 for LANMAN */
-
#define MIN_TZ_ADJ (15 * 60) /* minimum grid for timezones in seconds */
-typedef struct lanman_neg_rsp {
- struct smb_hdr hdr; /* wct = 13 */
- __le16 DialectIndex;
- __le16 SecurityMode;
- __le16 MaxBufSize;
- __le16 MaxMpxCount;
- __le16 MaxNumberVcs;
- __le16 RawMode;
- __le32 SessionKey;
- struct {
- __le16 Time;
- __le16 Date;
- } __attribute__((packed)) SrvTime;
- __le16 ServerTimeZone;
- __le16 EncryptionKeyLength;
- __le16 Reserved;
- __u16 ByteCount;
- unsigned char EncryptionKey[1];
-} __attribute__((packed)) LANMAN_NEG_RSP;
-
#define READ_RAW_ENABLE 1
#define WRITE_RAW_ENABLE 2
#define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e0def0f0714b..f9740c21ca3d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -498,19 +498,12 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
extern int cifs_verify_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server,
__u32 expected_sequence_number);
-extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *,
- const struct nls_table *);
-extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern void cifs_crypto_secmech_release(struct TCP_Server_Info *server);
extern int calc_seckey(struct cifs_ses *);
extern int generate_smb30signingkey(struct cifs_ses *);
extern int generate_smb311signingkey(struct cifs_ses *);
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-extern int calc_lanman_hash(const char *password, const char *cryptkey,
- bool encrypt, char *lnm_session_key);
-#endif /* CIFS_WEAK_PW_HASH */
extern int CIFSSMBCopy(unsigned int xid,
struct cifs_tcon *source_tcon,
const char *fromName,
@@ -547,11 +540,8 @@ extern int check_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
struct cifs_fattr *fattr,
const unsigned char *path);
-extern int mdfour(unsigned char *, unsigned char *, int);
extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
const struct nls_table *codepage);
-extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
- unsigned char *p24);
extern int
cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 65d1a65bfc37..a8e41c1e80ca 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -42,10 +42,6 @@ static struct {
int index;
char *name;
} protocols[] = {
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- {LANMAN_PROT, "\2LM1.2X002"},
- {LANMAN2_PROT, "\2LANMAN2.1"},
-#endif /* weak password hashing for legacy clients */
{CIFS_PROT, "\2NT LM 0.12"},
{POSIX_PROT, "\2POSIX 2"},
{BAD_PROT, "\2"}
@@ -55,10 +51,6 @@ static struct {
int index;
char *name;
} protocols[] = {
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- {LANMAN_PROT, "\2LM1.2X002"},
- {LANMAN2_PROT, "\2LANMAN2.1"},
-#endif /* weak password hashing for legacy clients */
{CIFS_PROT, "\2NT LM 0.12"},
{BAD_PROT, "\2"}
};
@@ -66,17 +58,9 @@ static struct {
/* define the number of elements in the cifs dialect array */
#ifdef CONFIG_CIFS_POSIX
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFS_NUM_PROT 4
-#else
#define CIFS_NUM_PROT 2
-#endif /* CIFS_WEAK_PW_HASH */
#else /* not posix */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-#define CIFS_NUM_PROT 3
-#else
#define CIFS_NUM_PROT 1
-#endif /* CONFIG_CIFS_WEAK_PW_HASH */
#endif /* CIFS_POSIX */
/*
@@ -475,89 +459,6 @@ cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required)
return 0;
}
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-static int
-decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
-{
- __s16 tmp;
- struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr;
-
- if (server->dialect != LANMAN_PROT && server->dialect != LANMAN2_PROT)
- return -EOPNOTSUPP;
-
- server->sec_mode = le16_to_cpu(rsp->SecurityMode);
- server->maxReq = min_t(unsigned int,
- le16_to_cpu(rsp->MaxMpxCount),
- cifs_max_pending);
- set_credits(server, server->maxReq);
- server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
- /* set up max_read for readpages check */
- server->max_read = server->maxBuf;
- /* even though we do not use raw we might as well set this
- accurately, in case we ever find a need for it */
- if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
- server->max_rw = 0xFF00;
- server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE;
- } else {
- server->max_rw = 0;/* do not need to use raw anyway */
- server->capabilities = CAP_MPX_MODE;
- }
- tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone);
- if (tmp == -1) {
- /* OS/2 often does not set timezone therefore
- * we must use server time to calc time zone.
- * Could deviate slightly from the right zone.
- * Smallest defined timezone difference is 15 minutes
- * (i.e. Nepal). Rounding up/down is done to match
- * this requirement.
- */
- int val, seconds, remain, result;
- struct timespec64 ts;
- time64_t utc = ktime_get_real_seconds();
- ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
- rsp->SrvTime.Time, 0);
- cifs_dbg(FYI, "SrvTime %lld sec since 1970 (utc: %lld) diff: %lld\n",
- ts.tv_sec, utc,
- utc - ts.tv_sec);
- val = (int)(utc - ts.tv_sec);
- seconds = abs(val);
- result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
- remain = seconds % MIN_TZ_ADJ;
- if (remain >= (MIN_TZ_ADJ / 2))
- result += MIN_TZ_ADJ;
- if (val < 0)
- result = -result;
- server->timeAdj = result;
- } else {
- server->timeAdj = (int)tmp;
- server->timeAdj *= 60; /* also in seconds */
- }
- cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj);
-
-
- /* BB get server time for time conversions and add
- code to use it and timezone since this is not UTC */
-
- if (rsp->EncryptionKeyLength ==
- cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
- memcpy(server->cryptkey, rsp->EncryptionKey,
- CIFS_CRYPTO_KEY_SIZE);
- } else if (server->sec_mode & SECMODE_PW_ENCRYPT) {
- return -EIO; /* need cryptkey unless plain text */
- }
-
- cifs_dbg(FYI, "LANMAN negotiated\n");
- return 0;
-}
-#else
-static inline int
-decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
-{
- cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n");
- return -EOPNOTSUPP;
-}
-#endif
-
static bool
should_set_ext_sec_flag(enum securityEnum sectype)
{
@@ -626,16 +527,12 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
server->dialect = le16_to_cpu(pSMBr->DialectIndex);
cifs_dbg(FYI, "Dialect: %d\n", server->dialect);
/* Check wct = 1 error case */
- if ((pSMBr->hdr.WordCount < 13) || (server->dialect == BAD_PROT)) {
+ if ((pSMBr->hdr.WordCount <= 13) || (server->dialect == BAD_PROT)) {
/* core returns wct = 1, but we do not ask for core - otherwise
small wct just comes when dialect index is -1 indicating we
could not negotiate a common dialect */
rc = -EOPNOTSUPP;
goto neg_err_exit;
- } else if (pSMBr->hdr.WordCount == 13) {
- server->negflavor = CIFS_NEGFLAVOR_LANMAN;
- rc = decode_lanman_negprot_rsp(server, pSMBr);
- goto signing_check;
} else if (pSMBr->hdr.WordCount != 17) {
/* unknown wct */
rc = -EOPNOTSUPP;
@@ -677,7 +574,6 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
server->capabilities &= ~CAP_EXTENDED_SECURITY;
}
-signing_check:
if (!rc)
rc = cifs_enable_signing(server, ses->sign);
neg_err_exit:
@@ -2101,6 +1997,7 @@ cifs_writev_complete(struct work_struct *work)
else if (wdata->result < 0)
SetPageError(page);
end_page_writeback(page);
+ cifs_readpage_to_fscache(inode, page);
put_page(page);
}
if (wdata->result != -EAGAIN)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3781eee9360a..0db344807ef1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3684,38 +3684,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
*bcc_ptr = 0; /* password is null byte */
bcc_ptr++; /* skip password */
/* already aligned so no need to do it below */
- } else {
- pSMB->PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- /* BB FIXME add code to fail this if NTLMv2 or Kerberos
- specified as required (when that support is added to
- the vfs in the future) as only NTLM or the much
- weaker LANMAN (which we do not send by default) is accepted
- by Samba (not sure whether other servers allow
- NTLMv2 password here) */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
- (ses->sectype == LANMAN))
- calc_lanman_hash(tcon->password, ses->server->cryptkey,
- ses->server->sec_mode &
- SECMODE_PW_ENCRYPT ? true : false,
- bcc_ptr);
- else
-#endif /* CIFS_WEAK_PW_HASH */
- rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
- bcc_ptr, nls_codepage);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
- __func__, rc);
- cifs_buf_release(smb_buffer);
- return rc;
- }
-
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- if (ses->capabilities & CAP_UNICODE) {
- /* must align unicode strings */
- *bcc_ptr = 0; /* null byte password */
- bcc_ptr++;
- }
}
if (ses->server->sign)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 79402ca0ddfa..5f8a302ffcb2 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -100,7 +100,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page,
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0;
- s = dentry_path_raw(direntry, page, PAGE_SIZE);
+ s = dentry_path_raw(direntry, page, PATH_MAX);
if (IS_ERR(s))
return s;
if (!s[1]) // for root we want "", not "/"
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0a72840a88f1..d0216472f1c6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -377,6 +377,8 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
struct cifsLockInfo *li, *tmp;
struct super_block *sb = inode->i_sb;
+ cifs_fscache_release_inode_cookie(inode);
+
/*
* Delete any outstanding lock records. We'll lose them when the file
* is closed anyway.
@@ -882,8 +884,10 @@ int cifs_close(struct inode *inode, struct file *file)
if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
cinode->lease_granted &&
dclose) {
- if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
+ if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
inode->i_ctime = inode->i_mtime = current_time(inode);
+ cifs_fscache_update_inode_cookie(inode);
+ }
spin_lock(&cinode->deferred_lock);
cifs_add_deferred_close(cfile, dclose);
if (cfile->deferred_close_scheduled &&
@@ -4170,6 +4174,10 @@ static vm_fault_t
cifs_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
+ struct file *file = vmf->vma->vm_file;
+ struct inode *inode = file_inode(file);
+
+ cifs_fscache_wait_on_page_write(inode, page);
lock_page(page);
return VM_FAULT_LOCKED;
@@ -4235,13 +4243,16 @@ cifs_readv_complete(struct work_struct *work)
(rdata->result == -EAGAIN && got_bytes)) {
flush_dcache_page(page);
SetPageUptodate(page);
- }
+ } else
+ SetPageError(page);
unlock_page(page);
if (rdata->result == 0 ||
(rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
+ else
+ cifs_fscache_uncache_page(rdata->mapping->host, page);
got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
@@ -4848,34 +4859,33 @@ void cifs_oplock_break(struct work_struct *work)
oplock_break_ack:
/*
- * releasing stale oplock after recent reconnect of smb session using
- * a now incorrect file handle is not a data integrity issue but do
- * not bother sending an oplock release if session to server still is
- * disconnected since oplock already released by the server
- */
- if (!cfile->oplock_break_cancelled) {
- rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
- cinode);
- cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
- }
- /*
* When oplock break is received and there are no active
* file handles but cached, then schedule deferred close immediately.
* So, new open will not use cached handle.
*/
spin_lock(&CIFS_I(inode)->deferred_lock);
is_deferred = cifs_is_deferred_close(cfile, &dclose);
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
if (is_deferred &&
cfile->deferred_close_scheduled &&
delayed_work_pending(&cfile->deferred)) {
- /*
- * If there is no pending work, mod_delayed_work queues new work.
- * So, Increase the ref count to avoid use-after-free.
- */
- if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
- cifsFileInfo_get(cfile);
+ if (cancel_delayed_work(&cfile->deferred)) {
+ _cifsFileInfo_put(cfile, false, false);
+ goto oplock_break_done;
+ }
}
- spin_unlock(&CIFS_I(inode)->deferred_lock);
+ /*
+ * releasing stale oplock after recent reconnect of smb session using
+ * a now incorrect file handle is not a data integrity issue but do
+ * not bother sending an oplock release if session to server still is
+ * disconnected since oplock already released by the server
+ */
+ if (!cfile->oplock_break_cancelled) {
+ rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
+ cinode);
+ cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
+ }
+oplock_break_done:
_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index eed59bc1d913..3109def8e199 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -57,12 +57,9 @@ static const match_table_t cifs_secflavor_tokens = {
{ Opt_sec_krb5p, "krb5p" },
{ Opt_sec_ntlmsspi, "ntlmsspi" },
{ Opt_sec_ntlmssp, "ntlmssp" },
- { Opt_ntlm, "ntlm" },
- { Opt_sec_ntlmi, "ntlmi" },
{ Opt_sec_ntlmv2, "nontlm" },
{ Opt_sec_ntlmv2, "ntlmv2" },
{ Opt_sec_ntlmv2i, "ntlmv2i" },
- { Opt_sec_lanman, "lanman" },
{ Opt_sec_none, "none" },
{ Opt_sec_err, NULL }
@@ -221,23 +218,12 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
case Opt_sec_ntlmssp:
ctx->sectype = RawNTLMSSP;
break;
- case Opt_sec_ntlmi:
- ctx->sign = true;
- fallthrough;
- case Opt_ntlm:
- ctx->sectype = NTLM;
- break;
case Opt_sec_ntlmv2i:
ctx->sign = true;
fallthrough;
case Opt_sec_ntlmv2:
ctx->sectype = NTLMv2;
break;
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- case Opt_sec_lanman:
- ctx->sectype = LANMAN;
- break;
-#endif
case Opt_sec_none:
ctx->nullauth = 1;
break;
@@ -1266,10 +1252,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->posix_paths = 1;
break;
case Opt_unix:
- if (result.negated)
+ if (result.negated) {
+ if (ctx->linux_ext == 1)
+ pr_warn_once("conflicting posix mount options specified\n");
ctx->linux_ext = 0;
- else
ctx->no_linux_ext = 1;
+ } else {
+ if (ctx->no_linux_ext == 1)
+ pr_warn_once("conflicting posix mount options specified\n");
+ ctx->linux_ext = 1;
+ ctx->no_linux_ext = 0;
+ }
break;
case Opt_nocase:
ctx->nocase = 1;
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index b6243972edf3..a42ba71d7a81 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -47,11 +47,8 @@ enum cifs_sec_param {
Opt_sec_krb5p,
Opt_sec_ntlmsspi,
Opt_sec_ntlmssp,
- Opt_ntlm,
- Opt_sec_ntlmi,
Opt_sec_ntlmv2,
Opt_sec_ntlmv2i,
- Opt_sec_lanman,
Opt_sec_none,
Opt_sec_err
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index dd625033cd6b..fab47fa7df74 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -176,29 +176,34 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
+ /* fscache_relinquish_cookie does not seem to update auxdata */
+ fscache_update_cookie(cifsi->fscache, &auxdata);
fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
cifsi->fscache = NULL;
}
}
-static void cifs_fscache_disable_inode_cookie(struct inode *inode)
+void cifs_fscache_update_inode_cookie(struct inode *inode)
{
+ struct cifs_fscache_inode_auxdata auxdata;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
if (cifsi->fscache) {
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+ auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+ auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+ auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
+
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
- fscache_uncache_all_inode_pages(cifsi->fscache, inode);
- fscache_relinquish_cookie(cifsi->fscache, NULL, true);
- cifsi->fscache = NULL;
+ fscache_update_cookie(cifsi->fscache, &auxdata);
}
}
void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
{
- if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
- cifs_fscache_disable_inode_cookie(inode);
- else
- cifs_fscache_enable_inode_cookie(inode);
+ cifs_fscache_enable_inode_cookie(inode);
}
void cifs_fscache_reset_inode_cookie(struct inode *inode)
@@ -310,6 +315,8 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
struct cifsInodeInfo *cifsi = CIFS_I(inode);
int ret;
+ WARN_ON(!cifsi->fscache);
+
cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
__func__, cifsi->fscache, page, inode);
ret = fscache_write_page(cifsi->fscache, page,
@@ -334,3 +341,21 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
fscache_wait_on_page_write(cookie, page);
fscache_uncache_page(cookie, page);
}
+
+void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct fscache_cookie *cookie = cifsi->fscache;
+
+ cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
+ fscache_wait_on_page_write(cookie, page);
+}
+
+void __cifs_fscache_uncache_page(struct inode *inode, struct page *page)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct fscache_cookie *cookie = cifsi->fscache;
+
+ cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie);
+ fscache_uncache_page(cookie, page);
+}
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 3d55cb2ef055..82e856b9cf89 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -55,10 +55,13 @@ extern void cifs_fscache_get_super_cookie(struct cifs_tcon *);
extern void cifs_fscache_release_super_cookie(struct cifs_tcon *);
extern void cifs_fscache_release_inode_cookie(struct inode *);
+extern void cifs_fscache_update_inode_cookie(struct inode *inode);
extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
extern void cifs_fscache_reset_inode_cookie(struct inode *);
extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
+extern void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page);
+extern void __cifs_fscache_uncache_page(struct inode *inode, struct page *page);
extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
extern int __cifs_readpages_from_fscache(struct inode *,
@@ -76,6 +79,20 @@ static inline void cifs_fscache_invalidate_page(struct page *page,
__cifs_fscache_invalidate_page(page, inode);
}
+static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __cifs_fscache_wait_on_page_write(inode, page);
+}
+
+static inline void cifs_fscache_uncache_page(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __cifs_fscache_uncache_page(inode, page);
+}
+
static inline int cifs_readpage_from_fscache(struct inode *inode,
struct page *page)
{
@@ -123,6 +140,7 @@ static inline void
cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {}
static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void cifs_fscache_update_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
struct file *filp) {}
static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
@@ -133,6 +151,11 @@ static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
static inline void cifs_fscache_invalidate_page(struct page *page,
struct inode *inode) {}
+static inline void cifs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page) {}
+static inline void cifs_fscache_uncache_page(struct inode *inode,
+ struct page *page) {}
+
static inline int
cifs_readpage_from_fscache(struct inode *inode, struct page *page)
{
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b96b253e7635..50c01cff4c84 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1625,7 +1625,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
goto unlink_out;
}
- cifs_close_all_deferred_files(tcon);
+ cifs_close_deferred_file(CIFS_I(inode));
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = CIFSPOSIXDelFile(xid, tcon, full_path,
@@ -2084,6 +2084,7 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
FILE_UNIX_BASIC_INFO *info_buf_target;
unsigned int xid;
int rc, tmprc;
+ int retry_count = 0;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
@@ -2113,10 +2114,24 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
goto cifs_rename_exit;
}
- cifs_close_all_deferred_files(tcon);
+ cifs_close_deferred_file(CIFS_I(d_inode(source_dentry)));
+ if (d_inode(target_dentry) != NULL)
+ cifs_close_deferred_file(CIFS_I(d_inode(target_dentry)));
+
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
to_name);
+ if (rc == -EACCES) {
+ while (retry_count < 3) {
+ cifs_close_all_deferred_files(tcon);
+ rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
+ to_name);
+ if (rc != -EACCES)
+ break;
+ retry_count++;
+ }
+ }
+
/*
* No-replace is the natural behavior for CIFS, so skip unlink hacks.
*/
@@ -2282,6 +2297,7 @@ cifs_revalidate_mapping(struct inode *inode)
{
int rc;
unsigned long *flags = &CIFS_I(inode)->flags;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
/* swapfiles are not supposed to be shared */
if (IS_SWAPFILE(inode))
@@ -2293,11 +2309,16 @@ cifs_revalidate_mapping(struct inode *inode)
return rc;
if (test_and_clear_bit(CIFS_INO_INVALID_MAPPING, flags)) {
+ /* for cache=singleclient, do not invalidate */
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE)
+ goto skip_invalidate;
+
rc = cifs_invalidate_mapping(inode);
if (rc)
set_bit(CIFS_INO_INVALID_MAPPING, flags);
}
+skip_invalidate:
clear_bit_unlock(CIFS_INO_LOCK, flags);
smp_mb__after_atomic();
wake_up_bit(flags, CIFS_INO_LOCK);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 844abeb2b48f..9469f1cf0b46 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -723,13 +723,31 @@ void
cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
{
struct cifsFileInfo *cfile = NULL;
- struct cifs_deferred_close *dclose;
+ struct file_list *tmp_list, *tmp_next_list;
+ struct list_head file_head;
+
+ if (cifs_inode == NULL)
+ return;
+ INIT_LIST_HEAD(&file_head);
+ spin_lock(&cifs_inode->open_file_lock);
list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
- spin_lock(&cifs_inode->deferred_lock);
- if (cifs_is_deferred_close(cfile, &dclose))
- mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
- spin_unlock(&cifs_inode->deferred_lock);
+ if (delayed_work_pending(&cfile->deferred)) {
+ if (cancel_delayed_work(&cfile->deferred)) {
+ tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
+ if (tmp_list == NULL)
+ continue;
+ tmp_list->cfile = cfile;
+ list_add_tail(&tmp_list->list, &file_head);
+ }
+ }
+ }
+ spin_unlock(&cifs_inode->open_file_lock);
+
+ list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) {
+ _cifsFileInfo_put(tmp_list->cfile, true, false);
+ list_del(&tmp_list->list);
+ kfree(tmp_list);
}
}
@@ -738,20 +756,30 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
{
struct cifsFileInfo *cfile;
struct list_head *tmp;
+ struct file_list *tmp_list, *tmp_next_list;
+ struct list_head file_head;
+ INIT_LIST_HEAD(&file_head);
spin_lock(&tcon->open_file_lock);
list_for_each(tmp, &tcon->openFileList) {
cfile = list_entry(tmp, struct cifsFileInfo, tlist);
if (delayed_work_pending(&cfile->deferred)) {
- /*
- * If there is no pending work, mod_delayed_work queues new work.
- * So, Increase the ref count to avoid use-after-free.
- */
- if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
- cifsFileInfo_get(cfile);
+ if (cancel_delayed_work(&cfile->deferred)) {
+ tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC);
+ if (tmp_list == NULL)
+ continue;
+ tmp_list->cfile = cfile;
+ list_add_tail(&tmp_list->list, &file_head);
+ }
}
}
spin_unlock(&tcon->open_file_lock);
+
+ list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) {
+ _cifsFileInfo_put(tmp_list->cfile, true, false);
+ list_del(&tmp_list->list);
+ kfree(tmp_list);
+ }
}
/* parses DFS refferal V3 structure
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index bfee176b901d..54d77c99e21c 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -369,7 +369,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
*/
static int
-initiate_cifs_search(const unsigned int xid, struct file *file,
+_initiate_cifs_search(const unsigned int xid, struct file *file,
const char *full_path)
{
__u16 search_flags;
@@ -451,6 +451,27 @@ error_exit:
return rc;
}
+static int
+initiate_cifs_search(const unsigned int xid, struct file *file,
+ const char *full_path)
+{
+ int rc, retry_count = 0;
+
+ do {
+ rc = _initiate_cifs_search(xid, file, full_path);
+ /*
+ * If we don't have enough credits to start reading the
+ * directory just try again after short wait.
+ */
+ if (rc != -EDEADLK)
+ break;
+
+ usleep_range(512, 2048);
+ } while (retry_count++ < 5);
+
+ return rc;
+}
+
/* return length of unicode string in bytes */
static int cifs_unicode_bytelen(const char *str)
{
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index c5785fd3f52e..118403fbeda2 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -799,30 +799,16 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
}
case CIFS_NEGFLAVOR_UNENCAP:
switch (requested) {
- case NTLM:
case NTLMv2:
return requested;
case Unspecified:
if (global_secflags & CIFSSEC_MAY_NTLMV2)
return NTLMv2;
- if (global_secflags & CIFSSEC_MAY_NTLM)
- return NTLM;
break;
default:
break;
}
- fallthrough; /* to attempt LANMAN authentication next */
- case CIFS_NEGFLAVOR_LANMAN:
- switch (requested) {
- case LANMAN:
- return requested;
- case Unspecified:
- if (global_secflags & CIFSSEC_MAY_LANMAN)
- return LANMAN;
- fallthrough;
- default:
- return Unspecified;
- }
+ fallthrough;
default:
return Unspecified;
}
@@ -877,7 +863,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct)
return 0;
out_free_smb_buf:
- kfree(smb_buf);
+ cifs_small_buf_release(smb_buf);
sess_data->iov[0].iov_base = NULL;
sess_data->iov[0].iov_len = 0;
sess_data->buf0_type = CIFS_NO_BUFFER;
@@ -947,230 +933,6 @@ sess_sendreceive(struct sess_data *sess_data)
return rc;
}
-/*
- * LANMAN and plaintext are less secure and off by default.
- * So we make this explicitly be turned on in kconfig (in the
- * build) and turned on at runtime (changed from the default)
- * in proc/fs/cifs or via mount parm. Unfortunately this is
- * needed for old Win (e.g. Win95), some obscure NAS and OS/2
- */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
-static void
-sess_auth_lanman(struct sess_data *sess_data)
-{
- int rc = 0;
- struct smb_hdr *smb_buf;
- SESSION_SETUP_ANDX *pSMB;
- char *bcc_ptr;
- struct cifs_ses *ses = sess_data->ses;
- char lnm_session_key[CIFS_AUTH_RESP_SIZE];
- __u16 bytes_remaining;
-
- /* lanman 2 style sessionsetup */
- /* wct = 10 */
- rc = sess_alloc_buffer(sess_data, 10);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- bcc_ptr = sess_data->iov[2].iov_base;
- (void)cifs_ssetup_hdr(ses, pSMB);
-
- pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
-
- if (ses->user_name != NULL) {
- /* no capabilities flags in old lanman negotiation */
- pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
-
- /* Calculate hash with password and copy into bcc_ptr.
- * Encryption Key (stored as in cryptkey) gets used if the
- * security mode bit in Negotiate Protocol response states
- * to use challenge/response method (i.e. Password bit is 1).
- */
- rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
- ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
- true : false, lnm_session_key);
- if (rc)
- goto out;
-
- memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- } else {
- pSMB->old_req.PasswordLength = 0;
- }
-
- /*
- * can not sign if LANMAN negotiated so no need
- * to calculate signing key? but what if server
- * changed to do higher than lanman dialect and
- * we reconnected would we ever calc signing_key?
- */
-
- cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
- /* Unicode not allowed for LANMAN dialects */
- ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
-
- sess_data->iov[2].iov_len = (long) bcc_ptr -
- (long) sess_data->iov[2].iov_base;
-
- rc = sess_sendreceive(sess_data);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
-
- /* lanman response has a word count of 3 */
- if (smb_buf->WordCount != 3) {
- rc = -EIO;
- cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto out;
- }
-
- if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
- cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
-
- ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
- cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
-
- bytes_remaining = get_bcc(smb_buf);
- bcc_ptr = pByteArea(smb_buf);
-
- /* BB check if Unicode and decode strings */
- if (bytes_remaining == 0) {
- /* no string area to decode, do nothing */
- } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
- /* unicode string area must be word-aligned */
- if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
- ++bcc_ptr;
- --bytes_remaining;
- }
- decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- } else {
- decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- }
-
- rc = sess_establish_session(sess_data);
-out:
- sess_data->result = rc;
- sess_data->func = NULL;
- sess_free_buffer(sess_data);
-}
-
-#endif
-
-static void
-sess_auth_ntlm(struct sess_data *sess_data)
-{
- int rc = 0;
- struct smb_hdr *smb_buf;
- SESSION_SETUP_ANDX *pSMB;
- char *bcc_ptr;
- struct cifs_ses *ses = sess_data->ses;
- __u32 capabilities;
- __u16 bytes_remaining;
-
- /* old style NTLM sessionsetup */
- /* wct = 13 */
- rc = sess_alloc_buffer(sess_data, 13);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- bcc_ptr = sess_data->iov[2].iov_base;
- capabilities = cifs_ssetup_hdr(ses, pSMB);
-
- pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
- if (ses->user_name != NULL) {
- pSMB->req_no_secext.CaseInsensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
-
- /* calculate ntlm response and session key */
- rc = setup_ntlm_response(ses, sess_data->nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLM authentication\n",
- rc);
- goto out;
- }
-
- /* copy ntlm response */
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- } else {
- pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
- pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
- }
-
- if (ses->capabilities & CAP_UNICODE) {
- /* unicode strings must be word aligned */
- if (sess_data->iov[0].iov_len % 2) {
- *bcc_ptr = 0;
- bcc_ptr++;
- }
- unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
- } else {
- ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
- }
-
-
- sess_data->iov[2].iov_len = (long) bcc_ptr -
- (long) sess_data->iov[2].iov_base;
-
- rc = sess_sendreceive(sess_data);
- if (rc)
- goto out;
-
- pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
- smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
-
- if (smb_buf->WordCount != 3) {
- rc = -EIO;
- cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
- goto out;
- }
-
- if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
- cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
-
- ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
- cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
-
- bytes_remaining = get_bcc(smb_buf);
- bcc_ptr = pByteArea(smb_buf);
-
- /* BB check if Unicode and decode strings */
- if (bytes_remaining == 0) {
- /* no string area to decode, do nothing */
- } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
- /* unicode string area must be word-aligned */
- if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
- ++bcc_ptr;
- --bytes_remaining;
- }
- decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- } else {
- decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
- sess_data->nls_cp);
- }
-
- rc = sess_establish_session(sess_data);
-out:
- sess_data->result = rc;
- sess_data->func = NULL;
- sess_free_buffer(sess_data);
- kfree(ses->auth_key.response);
- ses->auth_key.response = NULL;
-}
-
static void
sess_auth_ntlmv2(struct sess_data *sess_data)
{
@@ -1675,21 +1437,6 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
}
switch (type) {
- case LANMAN:
- /* LANMAN and plaintext are less secure and off by default.
- * So we make this explicitly be turned on in kconfig (in the
- * build) and turned on at runtime (changed from the default)
- * in proc/fs/cifs or via mount parm. Unfortunately this is
- * needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
-#ifdef CONFIG_CIFS_WEAK_PW_HASH
- sess_data->func = sess_auth_lanman;
- break;
-#else
- return -EOPNOTSUPP;
-#endif
- case NTLM:
- sess_data->func = sess_auth_ntlm;
- break;
case NTLMv2:
sess_data->func = sess_auth_ntlmv2;
break;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index cea39bcecbab..181514b8770d 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: LGPL-2.1
/*
- * fs/smb2/smb2maperror.c
*
* Functions which do error mapping of SMB2 status codes to POSIX errors
*
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 2dfd0d8297eb..ddc0e8f97872 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3590,6 +3590,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ filemap_invalidate_lock(inode->i_mapping);
/*
* We implement the punch hole through ioctl, so we need remove the page
* caches first, otherwise the data may be inconsistent with the server.
@@ -3607,6 +3608,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
sizeof(struct file_zero_data_information),
CIFSMaxBufSize, NULL, NULL);
free_xid(xid);
+ filemap_invalidate_unlock(inode->i_mapping);
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 781d14e5f2af..b6d2e3591927 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2426,7 +2426,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
memcpy(aclptr, &acl, sizeof(struct cifs_acl));
buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd);
- *len = ptr - (__u8 *)buf;
+ *len = roundup(ptr - (__u8 *)buf, 8);
return buf;
}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 39a938443e3e..10047cc55286 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -18,13 +18,13 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/random.h>
-#include <crypto/des.h>
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifs_debug.h"
#include "cifsproto.h"
+#include "../cifs_common/md4.h"
#ifndef false
#define false 0
@@ -38,126 +38,29 @@
#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
-static void
-str_to_key(unsigned char *str, unsigned char *key)
-{
- int i;
-
- key[0] = str[0] >> 1;
- key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
- key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
- key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
- key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
- key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
- key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
- key[7] = str[6] & 0x7F;
- for (i = 0; i < 8; i++)
- key[i] = (key[i] << 1);
-}
-
-static int
-smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
-{
- unsigned char key2[8];
- struct des_ctx ctx;
-
- str_to_key(key, key2);
-
- if (fips_enabled) {
- cifs_dbg(VFS, "FIPS compliance enabled: DES not permitted\n");
- return -ENOENT;
- }
-
- des_expand_key(&ctx, key2, DES_KEY_SIZE);
- des_encrypt(&ctx, out, in);
- memzero_explicit(&ctx, sizeof(ctx));
-
- return 0;
-}
-
-static int
-E_P16(unsigned char *p14, unsigned char *p16)
-{
- int rc;
- unsigned char sp8[8] =
- { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
-
- rc = smbhash(p16, sp8, p14);
- if (rc)
- return rc;
- rc = smbhash(p16 + 8, sp8, p14 + 7);
- return rc;
-}
-
-static int
-E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
-{
- int rc;
-
- rc = smbhash(p24, c8, p21);
- if (rc)
- return rc;
- rc = smbhash(p24 + 8, c8, p21 + 7);
- if (rc)
- return rc;
- rc = smbhash(p24 + 16, c8, p21 + 14);
- return rc;
-}
-
/* produce a md4 message digest from data of length n bytes */
-int
+static int
mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
{
int rc;
- struct crypto_shash *md4 = NULL;
- struct sdesc *sdescmd4 = NULL;
-
- rc = cifs_alloc_hash("md4", &md4, &sdescmd4);
- if (rc)
- goto mdfour_err;
+ struct md4_ctx mctx;
- rc = crypto_shash_init(&sdescmd4->shash);
+ rc = cifs_md4_init(&mctx);
if (rc) {
- cifs_dbg(VFS, "%s: Could not init md4 shash\n", __func__);
+ cifs_dbg(VFS, "%s: Could not init MD4\n", __func__);
goto mdfour_err;
}
- rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len);
+ rc = cifs_md4_update(&mctx, link_str, link_len);
if (rc) {
- cifs_dbg(VFS, "%s: Could not update with link_str\n", __func__);
+ cifs_dbg(VFS, "%s: Could not update MD4\n", __func__);
goto mdfour_err;
}
- rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
+ rc = cifs_md4_final(&mctx, md4_hash);
if (rc)
- cifs_dbg(VFS, "%s: Could not generate md4 hash\n", __func__);
-
-mdfour_err:
- cifs_free_hash(&md4, &sdescmd4);
- return rc;
-}
-
-/*
- This implements the X/Open SMB password encryption
- It takes a password, a 8 byte "crypt key" and puts 24 bytes of
- encrypted password into p24 */
-/* Note that password must be uppercased and null terminated */
-int
-SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
-{
- int rc;
- unsigned char p14[14], p16[16], p21[21];
+ cifs_dbg(VFS, "%s: Could not finalize MD4\n", __func__);
- memset(p14, '\0', 14);
- memset(p16, '\0', 16);
- memset(p21, '\0', 21);
-
- memcpy(p14, passwd, 14);
- rc = E_P16(p14, p16);
- if (rc)
- return rc;
-
- memcpy(p21, p16, 16);
- rc = E_P24(p21, c8, p24);
+mdfour_err:
return rc;
}
@@ -186,25 +89,3 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16,
return rc;
}
-
-/* Does the NT MD4 hash then des encryption. */
-int
-SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24,
- const struct nls_table *codepage)
-{
- int rc;
- unsigned char p16[16], p21[21];
-
- memset(p16, '\0', 16);
- memset(p21, '\0', 21);
-
- rc = E_md4hash(passwd, p16, codepage);
- if (rc) {
- cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n",
- __func__, rc);
- return rc;
- }
- memcpy(p21, p16, 16);
- rc = E_P24(p21, c8, p24);
- return rc;
-}
diff --git a/fs/cifs_common/Makefile b/fs/cifs_common/Makefile
new file mode 100644
index 000000000000..6fedd2f88a25
--- /dev/null
+++ b/fs/cifs_common/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Linux filesystem routines that are shared by client and server.
+#
+
+obj-$(CONFIG_CIFS_COMMON) += cifs_arc4.o
+obj-$(CONFIG_CIFS_COMMON) += cifs_md4.o
diff --git a/fs/cifs_common/arc4.h b/fs/cifs_common/arc4.h
new file mode 100644
index 000000000000..12e71ec033a1
--- /dev/null
+++ b/fs/cifs_common/arc4.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Common values for ARC4 Cipher Algorithm
+ */
+
+#ifndef _CRYPTO_ARC4_H
+#define _CRYPTO_ARC4_H
+
+#include <linux/types.h>
+
+#define ARC4_MIN_KEY_SIZE 1
+#define ARC4_MAX_KEY_SIZE 256
+#define ARC4_BLOCK_SIZE 1
+
+struct arc4_ctx {
+ u32 S[256];
+ u32 x, y;
+};
+
+int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len);
+void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len);
+
+#endif /* _CRYPTO_ARC4_H */
diff --git a/fs/cifs_common/cifs_arc4.c b/fs/cifs_common/cifs_arc4.c
new file mode 100644
index 000000000000..b964cc682944
--- /dev/null
+++ b/fs/cifs_common/cifs_arc4.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API
+ *
+ * ARC4 Cipher Algorithm
+ *
+ * Jon Oberheide <jon@oberheide.org>
+ */
+
+#include <linux/module.h>
+#include "arc4.h"
+
+MODULE_LICENSE("GPL");
+
+int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len)
+{
+ int i, j = 0, k = 0;
+
+ ctx->x = 1;
+ ctx->y = 0;
+
+ for (i = 0; i < 256; i++)
+ ctx->S[i] = i;
+
+ for (i = 0; i < 256; i++) {
+ u32 a = ctx->S[i];
+
+ j = (j + in_key[k] + a) & 0xff;
+ ctx->S[i] = ctx->S[j];
+ ctx->S[j] = a;
+ if (++k >= key_len)
+ k = 0;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_arc4_setkey);
+
+void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len)
+{
+ u32 *const S = ctx->S;
+ u32 x, y, a, b;
+ u32 ty, ta, tb;
+
+ if (len == 0)
+ return;
+
+ x = ctx->x;
+ y = ctx->y;
+
+ a = S[x];
+ y = (y + a) & 0xff;
+ b = S[y];
+
+ do {
+ S[y] = a;
+ a = (a + b) & 0xff;
+ S[x] = b;
+ x = (x + 1) & 0xff;
+ ta = S[x];
+ ty = (y + ta) & 0xff;
+ tb = S[ty];
+ *out++ = *in++ ^ S[a];
+ if (--len == 0)
+ break;
+ y = ty;
+ a = ta;
+ b = tb;
+ } while (true);
+
+ ctx->x = x;
+ ctx->y = y;
+}
+EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
+
+static int __init
+init_cifs_common(void)
+{
+ return 0;
+}
+static void __init
+exit_cifs_common(void)
+{
+}
+
+module_init(init_cifs_common)
+module_exit(exit_cifs_common)
diff --git a/fs/cifs_common/cifs_md4.c b/fs/cifs_common/cifs_md4.c
new file mode 100644
index 000000000000..50f78cfc6ce9
--- /dev/null
+++ b/fs/cifs_common/cifs_md4.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cryptographic API.
+ *
+ * MD4 Message Digest Algorithm (RFC1320).
+ *
+ * Implementation derived from Andrew Tridgell and Steve French's
+ * CIFS MD4 implementation, and the cryptoapi implementation
+ * originally based on the public domain implementation written
+ * by Colin Plumb in 1993.
+ *
+ * Copyright (c) Andrew Tridgell 1997-1998.
+ * Modified by Steve French (sfrench@us.ibm.com) 2002
+ * Copyright (c) Cryptoapi developers.
+ * Copyright (c) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include "md4.h"
+
+MODULE_LICENSE("GPL");
+
+static inline u32 lshift(u32 x, unsigned int s)
+{
+ x &= 0xFFFFFFFF;
+ return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
+}
+
+static inline u32 F(u32 x, u32 y, u32 z)
+{
+ return (x & y) | ((~x) & z);
+}
+
+static inline u32 G(u32 x, u32 y, u32 z)
+{
+ return (x & y) | (x & z) | (y & z);
+}
+
+static inline u32 H(u32 x, u32 y, u32 z)
+{
+ return x ^ y ^ z;
+}
+
+#define ROUND1(a,b,c,d,k,s) (a = lshift(a + F(b,c,d) + k, s))
+#define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
+#define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
+
+static void md4_transform(u32 *hash, u32 const *in)
+{
+ u32 a, b, c, d;
+
+ a = hash[0];
+ b = hash[1];
+ c = hash[2];
+ d = hash[3];
+
+ ROUND1(a, b, c, d, in[0], 3);
+ ROUND1(d, a, b, c, in[1], 7);
+ ROUND1(c, d, a, b, in[2], 11);
+ ROUND1(b, c, d, a, in[3], 19);
+ ROUND1(a, b, c, d, in[4], 3);
+ ROUND1(d, a, b, c, in[5], 7);
+ ROUND1(c, d, a, b, in[6], 11);
+ ROUND1(b, c, d, a, in[7], 19);
+ ROUND1(a, b, c, d, in[8], 3);
+ ROUND1(d, a, b, c, in[9], 7);
+ ROUND1(c, d, a, b, in[10], 11);
+ ROUND1(b, c, d, a, in[11], 19);
+ ROUND1(a, b, c, d, in[12], 3);
+ ROUND1(d, a, b, c, in[13], 7);
+ ROUND1(c, d, a, b, in[14], 11);
+ ROUND1(b, c, d, a, in[15], 19);
+
+ ROUND2(a, b, c, d, in[0], 3);
+ ROUND2(d, a, b, c, in[4], 5);
+ ROUND2(c, d, a, b, in[8], 9);
+ ROUND2(b, c, d, a, in[12], 13);
+ ROUND2(a, b, c, d, in[1], 3);
+ ROUND2(d, a, b, c, in[5], 5);
+ ROUND2(c, d, a, b, in[9], 9);
+ ROUND2(b, c, d, a, in[13], 13);
+ ROUND2(a, b, c, d, in[2], 3);
+ ROUND2(d, a, b, c, in[6], 5);
+ ROUND2(c, d, a, b, in[10], 9);
+ ROUND2(b, c, d, a, in[14], 13);
+ ROUND2(a, b, c, d, in[3], 3);
+ ROUND2(d, a, b, c, in[7], 5);
+ ROUND2(c, d, a, b, in[11], 9);
+ ROUND2(b, c, d, a, in[15], 13);
+
+ ROUND3(a, b, c, d, in[0], 3);
+ ROUND3(d, a, b, c, in[8], 9);
+ ROUND3(c, d, a, b, in[4], 11);
+ ROUND3(b, c, d, a, in[12], 15);
+ ROUND3(a, b, c, d, in[2], 3);
+ ROUND3(d, a, b, c, in[10], 9);
+ ROUND3(c, d, a, b, in[6], 11);
+ ROUND3(b, c, d, a, in[14], 15);
+ ROUND3(a, b, c, d, in[1], 3);
+ ROUND3(d, a, b, c, in[9], 9);
+ ROUND3(c, d, a, b, in[5], 11);
+ ROUND3(b, c, d, a, in[13], 15);
+ ROUND3(a, b, c, d, in[3], 3);
+ ROUND3(d, a, b, c, in[11], 9);
+ ROUND3(c, d, a, b, in[7], 11);
+ ROUND3(b, c, d, a, in[15], 15);
+
+ hash[0] += a;
+ hash[1] += b;
+ hash[2] += c;
+ hash[3] += d;
+}
+
+static inline void md4_transform_helper(struct md4_ctx *ctx)
+{
+ le32_to_cpu_array(ctx->block, ARRAY_SIZE(ctx->block));
+ md4_transform(ctx->hash, ctx->block);
+}
+
+int cifs_md4_init(struct md4_ctx *mctx)
+{
+ memset(mctx, 0, sizeof(struct md4_ctx));
+ mctx->hash[0] = 0x67452301;
+ mctx->hash[1] = 0xefcdab89;
+ mctx->hash[2] = 0x98badcfe;
+ mctx->hash[3] = 0x10325476;
+ mctx->byte_count = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_init);
+
+int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len)
+{
+ const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
+
+ mctx->byte_count += len;
+
+ if (avail > len) {
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, len);
+ return 0;
+ }
+
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, avail);
+
+ md4_transform_helper(mctx);
+ data += avail;
+ len -= avail;
+
+ while (len >= sizeof(mctx->block)) {
+ memcpy(mctx->block, data, sizeof(mctx->block));
+ md4_transform_helper(mctx);
+ data += sizeof(mctx->block);
+ len -= sizeof(mctx->block);
+ }
+
+ memcpy(mctx->block, data, len);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_update);
+
+int cifs_md4_final(struct md4_ctx *mctx, u8 *out)
+{
+ const unsigned int offset = mctx->byte_count & 0x3f;
+ char *p = (char *)mctx->block + offset;
+ int padding = 56 - (offset + 1);
+
+ *p++ = 0x80;
+ if (padding < 0) {
+ memset(p, 0x00, padding + sizeof(u64));
+ md4_transform_helper(mctx);
+ p = (char *)mctx->block;
+ padding = 56;
+ }
+
+ memset(p, 0, padding);
+ mctx->block[14] = mctx->byte_count << 3;
+ mctx->block[15] = mctx->byte_count >> 29;
+ le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
+ sizeof(u64)) / sizeof(u32));
+ md4_transform(mctx->hash, mctx->block);
+ cpu_to_le32_array(mctx->hash, ARRAY_SIZE(mctx->hash));
+ memcpy(out, mctx->hash, sizeof(mctx->hash));
+ memset(mctx, 0, sizeof(*mctx));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_final);
diff --git a/fs/cifs_common/md4.h b/fs/cifs_common/md4.h
new file mode 100644
index 000000000000..5337becc699a
--- /dev/null
+++ b/fs/cifs_common/md4.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Common values for ARC4 Cipher Algorithm
+ */
+
+#ifndef _CIFS_MD4_H
+#define _CIFS_MD4_H
+
+#include <linux/types.h>
+
+#define MD4_DIGEST_SIZE 16
+#define MD4_HMAC_BLOCK_SIZE 64
+#define MD4_BLOCK_WORDS 16
+#define MD4_HASH_WORDS 4
+
+struct md4_ctx {
+ u32 hash[MD4_HASH_WORDS];
+ u32 block[MD4_BLOCK_WORDS];
+ u64 byte_count;
+};
+
+
+int cifs_md4_init(struct md4_ctx *mctx);
+int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len);
+int cifs_md4_final(struct md4_ctx *mctx, u8 *out);
+
+#endif /* _CIFS_MD4_H */
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 5a0be9985bae..0ad32150611e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -177,28 +177,22 @@ out:
return retval;
}
-/* Fill [buffer, buffer + pos) with data coming from @from. */
-static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
+/* Fill @buffer with data coming from @from. */
+static int fill_write_buffer(struct configfs_buffer *buffer,
struct iov_iter *from)
{
- loff_t to_copy;
int copied;
- u8 *to;
if (!buffer->page)
buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0);
if (!buffer->page)
return -ENOMEM;
- to_copy = SIMPLE_ATTR_SIZE - 1 - pos;
- if (to_copy <= 0)
- return 0;
- to = buffer->page + pos;
- copied = copy_from_iter(to, to_copy, from);
+ copied = copy_from_iter(buffer->page, SIMPLE_ATTR_SIZE - 1, from);
buffer->needs_read_fill = 1;
/* if buf is assumed to contain a string, terminate it by \0,
* so e.g. sscanf() can scan the string easily */
- to[copied] = 0;
+ buffer->page[copied] = 0;
return copied ? : -EFAULT;
}
@@ -227,10 +221,10 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct configfs_buffer *buffer = file->private_data;
- ssize_t len;
+ int len;
mutex_lock(&buffer->mutex);
- len = fill_write_buffer(buffer, iocb->ki_pos, from);
+ len = fill_write_buffer(buffer, from);
if (len > 0)
len = flush_write_buffer(file, buffer, len);
if (len > 0)
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d00455440d08..eb538c28df94 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -26,7 +26,7 @@
* it to find the directory entry again if requested. Naively, that would just
* mean using the ciphertext filenames. However, since the ciphertext filenames
* can contain illegal characters ('\0' and '/'), they must be encoded in some
- * way. We use base64. But that can cause names to exceed NAME_MAX (255
+ * way. We use base64url. But that can cause names to exceed NAME_MAX (255
* bytes), so we also need to use a strong hash to abbreviate long names.
*
* The filesystem may also need another kind of hash, the "dirhash", to quickly
@@ -38,7 +38,7 @@
* casefolded directories use this type of dirhash. At least in these cases,
* each no-key name must include the name's dirhash too.
*
- * To meet all these requirements, we base64-encode the following
+ * To meet all these requirements, we base64url-encode the following
* variable-length structure. It contains the dirhash, or 0's if the filesystem
* didn't provide one; up to 149 bytes of the ciphertext name; and for
* ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes.
@@ -52,15 +52,19 @@ struct fscrypt_nokey_name {
u32 dirhash[2];
u8 bytes[149];
u8 sha256[SHA256_DIGEST_SIZE];
-}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */
+}; /* 189 bytes => 252 bytes base64url-encoded, which is <= NAME_MAX (255) */
/*
- * Decoded size of max-size nokey name, i.e. a name that was abbreviated using
+ * Decoded size of max-size no-key name, i.e. a name that was abbreviated using
* the strong hash and thus includes the 'sha256' field. This isn't simply
* sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included.
*/
#define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256)
+/* Encoded size of max-size no-key name */
+#define FSCRYPT_NOKEY_NAME_MAX_ENCODED \
+ FSCRYPT_BASE64URL_CHARS(FSCRYPT_NOKEY_NAME_MAX)
+
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
if (str->len == 1 && str->name[0] == '.')
@@ -175,62 +179,82 @@ static int fname_decrypt(const struct inode *inode,
return 0;
}
-static const char lookup_table[65] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+static const char base64url_table[65] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
-#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
+#define FSCRYPT_BASE64URL_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
/**
- * base64_encode() - base64-encode some bytes
- * @src: the bytes to encode
- * @len: number of bytes to encode
- * @dst: (output) the base64-encoded string. Not NUL-terminated.
+ * fscrypt_base64url_encode() - base64url-encode some binary data
+ * @src: the binary data to encode
+ * @srclen: the length of @src in bytes
+ * @dst: (output) the base64url-encoded string. Not NUL-terminated.
*
- * Encodes the input string using characters from the set [A-Za-z0-9+,].
- * The encoded string is roughly 4/3 times the size of the input string.
+ * Encodes data using base64url encoding, i.e. the "Base 64 Encoding with URL
+ * and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't used,
+ * as it's unneeded and not required by the RFC. base64url is used instead of
+ * base64 to avoid the '/' character, which isn't allowed in filenames.
*
- * Return: length of the encoded string
+ * Return: the length of the resulting base64url-encoded string in bytes.
+ * This will be equal to FSCRYPT_BASE64URL_CHARS(srclen).
*/
-static int base64_encode(const u8 *src, int len, char *dst)
+static int fscrypt_base64url_encode(const u8 *src, int srclen, char *dst)
{
- int i, bits = 0, ac = 0;
+ u32 ac = 0;
+ int bits = 0;
+ int i;
char *cp = dst;
- for (i = 0; i < len; i++) {
- ac += src[i] << bits;
+ for (i = 0; i < srclen; i++) {
+ ac = (ac << 8) | src[i];
bits += 8;
do {
- *cp++ = lookup_table[ac & 0x3f];
- ac >>= 6;
bits -= 6;
+ *cp++ = base64url_table[(ac >> bits) & 0x3f];
} while (bits >= 6);
}
if (bits)
- *cp++ = lookup_table[ac & 0x3f];
+ *cp++ = base64url_table[(ac << (6 - bits)) & 0x3f];
return cp - dst;
}
-static int base64_decode(const char *src, int len, u8 *dst)
+/**
+ * fscrypt_base64url_decode() - base64url-decode a string
+ * @src: the string to decode. Doesn't need to be NUL-terminated.
+ * @srclen: the length of @src in bytes
+ * @dst: (output) the decoded binary data
+ *
+ * Decodes a string using base64url encoding, i.e. the "Base 64 Encoding with
+ * URL and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't
+ * accepted, nor are non-encoding characters such as whitespace.
+ *
+ * This implementation hasn't been optimized for performance.
+ *
+ * Return: the length of the resulting decoded binary data in bytes,
+ * or -1 if the string isn't a valid base64url string.
+ */
+static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst)
{
- int i, bits = 0, ac = 0;
- const char *p;
- u8 *cp = dst;
+ u32 ac = 0;
+ int bits = 0;
+ int i;
+ u8 *bp = dst;
+
+ for (i = 0; i < srclen; i++) {
+ const char *p = strchr(base64url_table, src[i]);
- for (i = 0; i < len; i++) {
- p = strchr(lookup_table, src[i]);
if (p == NULL || src[i] == 0)
- return -2;
- ac += (p - lookup_table) << bits;
+ return -1;
+ ac = (ac << 6) | (p - base64url_table);
bits += 6;
if (bits >= 8) {
- *cp++ = ac & 0xff;
- ac >>= 8;
bits -= 8;
+ *bp++ = (u8)(ac >> bits);
}
}
- if (ac)
+ if (ac & ((1 << bits) - 1))
return -1;
- return cp - dst;
+ return bp - dst;
}
bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
@@ -263,10 +287,8 @@ bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
int fscrypt_fname_alloc_buffer(u32 max_encrypted_len,
struct fscrypt_str *crypto_str)
{
- const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX);
- u32 max_presented_len;
-
- max_presented_len = max(max_encoded_len, max_encrypted_len);
+ u32 max_presented_len = max_t(u32, FSCRYPT_NOKEY_NAME_MAX_ENCODED,
+ max_encrypted_len);
crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS);
if (!crypto_str->name)
@@ -342,7 +364,7 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
offsetof(struct fscrypt_nokey_name, bytes));
BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) !=
offsetof(struct fscrypt_nokey_name, sha256));
- BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX);
+ BUILD_BUG_ON(FSCRYPT_NOKEY_NAME_MAX_ENCODED > NAME_MAX);
nokey_name.dirhash[0] = hash;
nokey_name.dirhash[1] = minor_hash;
@@ -358,7 +380,8 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
nokey_name.sha256);
size = FSCRYPT_NOKEY_NAME_MAX;
}
- oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name);
+ oname->len = fscrypt_base64url_encode((const u8 *)&nokey_name, size,
+ oname->name);
return 0;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
@@ -432,14 +455,15 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
* user-supplied name
*/
- if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX))
+ if (iname->len > FSCRYPT_NOKEY_NAME_MAX_ENCODED)
return -ENOENT;
fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL);
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
- ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name);
+ ret = fscrypt_base64url_decode(iname->name, iname->len,
+ fname->crypto_buf.name);
if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) ||
(ret > offsetof(struct fscrypt_nokey_name, sha256) &&
ret != FSCRYPT_NOKEY_NAME_MAX)) {
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index a73b0376e6f3..af74599ae1cf 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -384,3 +384,47 @@ err_kfree:
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(fscrypt_get_symlink);
+
+/**
+ * fscrypt_symlink_getattr() - set the correct st_size for encrypted symlinks
+ * @path: the path for the encrypted symlink being queried
+ * @stat: the struct being filled with the symlink's attributes
+ *
+ * Override st_size of encrypted symlinks to be the length of the decrypted
+ * symlink target (or the no-key encoded symlink target, if the key is
+ * unavailable) rather than the length of the encrypted symlink target. This is
+ * necessary for st_size to match the symlink target that userspace actually
+ * sees. POSIX requires this, and some userspace programs depend on it.
+ *
+ * This requires reading the symlink target from disk if needed, setting up the
+ * inode's encryption key if possible, and then decrypting or encoding the
+ * symlink target. This makes lstat() more heavyweight than is normally the
+ * case. However, decrypted symlink targets will be cached in ->i_link, so
+ * usually the symlink won't have to be read and decrypted again later if/when
+ * it is actually followed, readlink() is called, or lstat() is called again.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat)
+{
+ struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
+ const char *link;
+ DEFINE_DELAYED_CALL(done);
+
+ /*
+ * To get the symlink target that userspace will see (whether it's the
+ * decrypted target or the no-key encoded target), we can just get it in
+ * the same way the VFS does during path resolution and readlink().
+ */
+ link = READ_ONCE(inode->i_link);
+ if (!link) {
+ link = inode->i_op->get_link(dentry, inode, &done);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ }
+ stat->size = strlen(link);
+ do_delayed_call(&done);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fscrypt_symlink_getattr);
diff --git a/fs/dax.c b/fs/dax.c
index 118c9e2923f5..4e3e5a283a91 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -722,7 +722,7 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
return rc;
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), &kaddr, NULL);
+ rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e265b6dd4f34..3627dd7d25db 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -25,8 +25,6 @@
#include <linux/idr.h>
#include <linux/uio.h>
-DEFINE_PER_CPU(int, eventfd_wake_count);
-
static DEFINE_IDA(eventfd_ida);
struct eventfd_ctx {
@@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
* Deadlock or stack overflow issues can happen if we recurse here
* through waitqueue wakeup handlers. If the caller users potentially
* nested waitqueues with custom wakeup handlers, then it should
- * check eventfd_signal_count() before calling this function. If
- * it returns true, the eventfd_signal() call should be deferred to a
+ * check eventfd_signal_allowed() before calling this function. If
+ * it returns false, the eventfd_signal() call should be deferred to a
* safe context.
*/
- if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+ if (WARN_ON_ONCE(current->in_eventfd_signal))
return 0;
spin_lock_irqsave(&ctx->wqh.lock, flags);
- this_cpu_inc(eventfd_wake_count);
+ current->in_eventfd_signal = 1;
if (ULLONG_MAX - ctx->count < n)
n = ULLONG_MAX - ctx->count;
ctx->count += n;
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
- this_cpu_dec(eventfd_wake_count);
+ current->in_eventfd_signal = 0;
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
return n;
diff --git a/fs/exec.c b/fs/exec.c
index 38f63451b928..3b78b22addfb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2070,10 +2070,8 @@ SYSCALL_DEFINE5(execveat,
const char __user *const __user *, envp,
int, flags)
{
- int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-
return do_execveat(fd,
- getname_flags(filename, lookup_flags, NULL),
+ getname_uflags(filename, flags),
argv, envp, flags);
}
@@ -2091,10 +2089,8 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
const compat_uptr_t __user *, envp,
int, flags)
{
- int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
-
return compat_do_execveat(fd,
- getname_flags(filename, lookup_flags, NULL),
+ getname_uflags(filename, flags),
argv, envp, flags);
}
#endif
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index 54eec9185627..1248ff4ef562 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config EXT2_FS
tristate "Second extended fs support"
+ select FS_IOMAP
help
Ext2 is a standard Linux file system for hard disks.
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index e512630cb63e..3be9dd6412b7 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -667,9 +667,6 @@ struct ext2_inode_info {
struct rw_semaphore xattr_sem;
#endif
rwlock_t i_meta_lock;
-#ifdef CONFIG_FS_DAX
- struct rw_semaphore dax_sem;
-#endif
/*
* truncate_mutex is for serialising ext2_truncate() against
@@ -685,14 +682,6 @@ struct ext2_inode_info {
#endif
};
-#ifdef CONFIG_FS_DAX
-#define dax_sem_down_write(ext2_inode) down_write(&(ext2_inode)->dax_sem)
-#define dax_sem_up_write(ext2_inode) up_write(&(ext2_inode)->dax_sem)
-#else
-#define dax_sem_down_write(ext2_inode)
-#define dax_sem_up_write(ext2_inode)
-#endif
-
/*
* Inode dynamic state flags
*/
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f98466acc672..eb97aa3d700e 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -81,7 +81,7 @@ out_unlock:
*
* mmap_lock (MM)
* sb_start_pagefault (vfs, freeze)
- * ext2_inode_info->dax_sem
+ * address_space->invalidate_lock
* address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
* ext2_inode_info->truncate_mutex
*
@@ -91,7 +91,6 @@ out_unlock:
static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
- struct ext2_inode_info *ei = EXT2_I(inode);
vm_fault_t ret;
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);
@@ -100,11 +99,11 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
}
- down_read(&ei->dax_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
- up_read(&ei->dax_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
if (write)
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index dadb121beb22..333fa62661d5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -799,7 +799,6 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
}
-#ifdef CONFIG_FS_DAX
static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap, struct iomap *srcmap)
{
@@ -852,16 +851,18 @@ const struct iomap_ops ext2_iomap_ops = {
.iomap_begin = ext2_iomap_begin,
.iomap_end = ext2_iomap_end,
};
-#else
-/* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
-const struct iomap_ops ext2_iomap_ops;
-#endif /* CONFIG_FS_DAX */
int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo, start, len,
- ext2_get_block);
+ int ret;
+
+ inode_lock(inode);
+ len = min_t(u64, len, i_size_read(inode));
+ ret = iomap_fiemap(inode, fieinfo, start, len, &ext2_iomap_ops);
+ inode_unlock(inode);
+
+ return ret;
}
static int ext2_writepage(struct page *page, struct writeback_control *wbc)
@@ -1177,7 +1178,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de
ext2_free_data(inode, p, q);
}
-/* dax_sem must be held when calling this function */
+/* mapping->invalidate_lock must be held when calling this function */
static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
__le32 *i_data = EXT2_I(inode)->i_data;
@@ -1194,7 +1195,7 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
#ifdef CONFIG_FS_DAX
- WARN_ON(!rwsem_is_locked(&ei->dax_sem));
+ WARN_ON(!rwsem_is_locked(&inode->i_mapping->invalidate_lock));
#endif
n = ext2_block_to_path(inode, iblock, offsets, NULL);
@@ -1276,9 +1277,9 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
if (ext2_inode_is_fast_symlink(inode))
return;
- dax_sem_down_write(EXT2_I(inode));
+ filemap_invalidate_lock(inode->i_mapping);
__ext2_truncate_blocks(inode, offset);
- dax_sem_up_write(EXT2_I(inode));
+ filemap_invalidate_unlock(inode->i_mapping);
}
static int ext2_setsize(struct inode *inode, loff_t newsize)
@@ -1308,10 +1309,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
if (error)
return error;
- dax_sem_down_write(EXT2_I(inode));
+ filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, newsize);
__ext2_truncate_blocks(inode, newsize);
- dax_sem_up_write(EXT2_I(inode));
+ filemap_invalidate_unlock(inode->i_mapping);
inode->i_mtime = inode->i_ctime = current_time(inode);
if (inode_needs_sync(inode)) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 21e09fbaa46f..987bcf32ed46 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -206,9 +206,6 @@ static void init_once(void *foo)
init_rwsem(&ei->xattr_sem);
#endif
mutex_init(&ei->truncate_mutex);
-#ifdef CONFIG_FS_DAX
- init_rwsem(&ei->dax_sem);
-#endif
inode_init_once(&ei->vfs_inode);
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3c51e243450d..7ebaf66b6e31 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1086,15 +1086,6 @@ struct ext4_inode_info {
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
- /*
- * i_mmap_sem is for serializing page faults with truncate / punch hole
- * operations. We have to make sure that new page cannot be faulted in
- * a section of the inode that is being punched. We cannot easily use
- * i_data_sem for this since we need protection for the whole punch
- * operation and i_data_sem ranks below transaction start so we have
- * to occasionally drop it.
- */
- struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
@@ -2972,7 +2963,6 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
-extern vm_fault_t ext4_filemap_fault(struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_release_space(struct inode *inode, int to_free);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b96ecba91899..b60f0152ea57 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -244,9 +244,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
* still needs to be revoked.
- *
- * If the handle isn't valid we're not journaling, but we still need to
- * call into ext4_journal_revoke() to put the buffer head.
*/
int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
int is_metadata, struct inode *inode,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 92ad64b89d9b..c33e0a2cb6c3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4474,6 +4474,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
loff_t len, int mode)
{
struct inode *inode = file_inode(file);
+ struct address_space *mapping = file->f_mapping;
handle_t *handle = NULL;
unsigned int max_blocks;
loff_t new_size = 0;
@@ -4560,17 +4561,17 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* Prevent page faults from reinstantiating pages we have
* released from page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
goto out_mutex;
}
ret = ext4_update_disksize_before_punch(inode, offset, len);
if (ret) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
goto out_mutex;
}
/* Now release the pages and zero block aligned part of pages */
@@ -4579,7 +4580,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags);
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (ret)
goto out_mutex;
}
@@ -5221,6 +5222,7 @@ out:
static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
struct super_block *sb = inode->i_sb;
+ struct address_space *mapping = inode->i_mapping;
ext4_lblk_t punch_start, punch_stop;
handle_t *handle;
unsigned int credits;
@@ -5274,7 +5276,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret)
@@ -5289,15 +5291,15 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* Write tail of the last page before removed range since it will get
* removed from the page cache below.
*/
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+ ret = filemap_write_and_wait_range(mapping, ioffset, offset);
if (ret)
goto out_mmap;
/*
* Write data that will be shifted to preserve them when discarding
* page cache below. We are also protected from pages becoming dirty
- * by i_mmap_sem.
+ * by i_rwsem and invalidate_lock.
*/
- ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+ ret = filemap_write_and_wait_range(mapping, offset + len,
LLONG_MAX);
if (ret)
goto out_mmap;
@@ -5350,7 +5352,7 @@ out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
out_mutex:
inode_unlock(inode);
return ret;
@@ -5367,6 +5369,7 @@ out_mutex:
static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
{
struct super_block *sb = inode->i_sb;
+ struct address_space *mapping = inode->i_mapping;
handle_t *handle;
struct ext4_ext_path *path;
struct ext4_extent *extent;
@@ -5425,7 +5428,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret)
@@ -5526,7 +5529,7 @@ out_stop:
ext4_journal_stop(handle);
ext4_fc_stop_ineligible(sb);
out_mmap:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
out_mutex:
inode_unlock(inode);
return ret;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 816dedcbd541..d3b4ed91aa68 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -704,22 +704,23 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
*/
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
pfn_t pfn;
if (write) {
sb_start_pagefault(sb);
file_update_time(vmf->vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(mapping);
retry:
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
if (IS_ERR(handle)) {
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(sb);
return VM_FAULT_SIGBUS;
}
} else {
- down_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(mapping);
}
result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
if (write) {
@@ -731,10 +732,10 @@ retry:
/* Handling synchronous page fault? */
if (result & VM_FAULT_NEEDDSYNC)
result = dax_finish_sync_fault(vmf, pe_size, pfn);
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(sb);
} else {
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
}
return result;
@@ -756,7 +757,7 @@ static const struct vm_operations_struct ext4_dax_vm_ops = {
#endif
static const struct vm_operations_struct ext4_file_vm_ops = {
- .fault = ext4_filemap_fault,
+ .fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d8de607849df..325c038e7b23 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3950,20 +3950,19 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
return ret;
}
-static void ext4_wait_dax_page(struct ext4_inode_info *ei)
+static void ext4_wait_dax_page(struct inode *inode)
{
- up_write(&ei->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
schedule();
- down_write(&ei->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
}
int ext4_break_layouts(struct inode *inode)
{
- struct ext4_inode_info *ei = EXT4_I(inode);
struct page *page;
int error;
- if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem)))
+ if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
return -EINVAL;
do {
@@ -3974,7 +3973,7 @@ int ext4_break_layouts(struct inode *inode)
error = ___wait_var_event(&page->_refcount,
atomic_read(&page->_refcount) == 1,
TASK_INTERRUPTIBLE, 0, 0,
- ext4_wait_dax_page(ei));
+ ext4_wait_dax_page(inode));
} while (error == 0);
return error;
@@ -4005,9 +4004,9 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
if (ext4_has_inline_data(inode)) {
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_convert_inline_data(inode);
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (ret)
return ret;
}
@@ -4058,7 +4057,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
* Prevent page faults from reinstantiating pages we have released from
* page cache.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = ext4_break_layouts(inode);
if (ret)
@@ -4131,7 +4130,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
out_stop:
ext4_journal_stop(handle);
out_dio:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
out_mutex:
inode_unlock(inode);
return ret;
@@ -5426,11 +5425,11 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
inode_dio_wait(inode);
}
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
rc = ext4_break_layouts(inode);
if (rc) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
goto err_out;
}
@@ -5506,7 +5505,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
error = rc;
}
out_mmap_sem:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
}
if (!error) {
@@ -5983,10 +5982,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
* data (and journalled aops don't know how to handle these cases).
*/
if (val) {
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = filemap_write_and_wait(inode->i_mapping);
if (err < 0) {
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return err;
}
}
@@ -6019,7 +6018,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
percpu_up_write(&sbi->s_writepages_rwsem);
if (val)
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
/* Finally we can mark the inode as dirty. */
@@ -6063,7 +6062,7 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(mapping);
err = ext4_convert_inline_data(inode);
if (err)
@@ -6176,7 +6175,7 @@ retry_alloc:
out_ret:
ret = block_page_mkwrite_return(err);
out:
- up_read(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(mapping);
sb_end_pagefault(inode->i_sb);
return ret;
out_error:
@@ -6184,15 +6183,3 @@ out_error:
ext4_journal_stop(handle);
goto out;
}
-
-vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
-{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- vm_fault_t ret;
-
- down_read(&EXT4_I(inode)->i_mmap_sem);
- ret = filemap_fault(vmf);
- up_read(&EXT4_I(inode)->i_mmap_sem);
-
- return ret;
-}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 6eed6170aded..4fb5fe083c2b 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -148,7 +148,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
goto journal_err_out;
}
- down_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = filemap_write_and_wait(inode->i_mapping);
if (err)
goto err_out;
@@ -256,7 +256,7 @@ err_out1:
ext4_double_up_write_data_sem(inode, inode_bl);
err_out:
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
journal_err_out:
unlock_two_nondirectories(inode, inode_bl);
iput(inode_bl);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index bc364c119af6..cebea4270817 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -138,7 +138,7 @@ static int kmmpd(void *data)
unsigned mmp_check_interval;
unsigned long last_update_time;
unsigned long diff;
- int retval;
+ int retval = 0;
mmp_block = le64_to_cpu(es->s_mmp_block);
mmp = (struct mmp_struct *)(bh->b_data);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fd56f616cf0..f3bbcd4efb56 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2517,7 +2517,7 @@ again:
goto journal_error;
err = ext4_handle_dirty_dx_node(handle, dir,
frame->bh);
- if (err)
+ if (restart || err)
goto journal_error;
} else {
struct dx_root *dxroot;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dfa09a277b56..d6df62fc810c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -90,12 +90,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
/*
* Lock ordering
*
- * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
- * i_mmap_rwsem (inode->i_mmap_rwsem)!
- *
* page fault path:
- * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
- * page lock -> i_data_sem (rw)
+ * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
+ * -> page lock -> i_data_sem (rw)
*
* buffered write path:
* sb_start_write -> i_mutex -> mmap_lock
@@ -103,8 +100,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* i_data_sem (rw)
*
* truncate:
- * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
- * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
+ * page lock
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
* i_data_sem (rw)
*
* direct IO:
@@ -1360,7 +1358,6 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
- init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
ext4_fc_init_inode(&ei->vfs_inode);
}
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index dd05af983092..69109746e6e2 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -52,10 +52,20 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
return paddr;
}
+static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
+ const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ ext4_getattr(mnt_userns, path, stat, request_mask, query_flags);
+
+ return fscrypt_symlink_getattr(path, stat);
+}
+
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.get_link = ext4_encrypted_get_link,
.setattr = ext4_setattr,
- .getattr = ext4_getattr,
+ .getattr = ext4_encrypted_symlink_getattr,
.listxattr = ext4_listxattr,
};
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index bcbe3668c1d4..ce84aa2786c7 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -11,14 +11,16 @@
*/
static inline void ext4_truncate_failed_write(struct inode *inode)
{
+ struct address_space *mapping = inode->i_mapping;
+
/*
* We don't need to call ext4_break_layouts() because the blocks we
* are truncating were never visible to userspace.
*/
- down_write(&EXT4_I(inode)->i_mmap_sem);
- truncate_inode_pages(inode->i_mapping, inode->i_size);
+ filemap_invalidate_lock(mapping);
+ truncate_inode_pages(mapping, inode->i_size);
ext4_truncate(inode);
- up_write(&EXT4_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
}
/*
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d2cf48c5a2e4..eb222b35edef 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3187,12 +3187,12 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -3852,7 +3852,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
int ret = 0;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
set_inode_flag(inode, FI_ALIGNED_WRITE);
@@ -3894,7 +3894,7 @@ done:
clear_inode_flag(inode, FI_DO_DEFRAG);
clear_inode_flag(inode, FI_ALIGNED_WRITE);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ee8eb33e2c25..906b2c4b50e7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -754,7 +754,6 @@ struct f2fs_inode_info {
/* avoid racing between foreground op and gc */
struct rw_semaphore i_gc_rwsem[2];
- struct rw_semaphore i_mmap_sem;
struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
int i_extra_isize; /* size of extra space located in i_addr */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6afd4562335f..1ff333755721 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -38,10 +38,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
struct inode *inode = file_inode(vmf->vma->vm_file);
vm_fault_t ret;
- down_read(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_fault(vmf);
- up_read(&F2FS_I(inode)->i_mmap_sem);
-
if (!ret)
f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
F2FS_BLKSIZE);
@@ -101,7 +98,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
file_update_time(vmf->vma->vm_file);
- down_read(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
@@ -159,7 +156,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
trace_f2fs_vm_page_mkwrite(page, DATA);
out_sem:
- up_read(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
sb_end_pagefault(inode->i_sb);
err:
@@ -940,7 +937,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
truncate_setsize(inode, attr->ia_size);
@@ -950,7 +947,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err)
return err;
@@ -1095,7 +1092,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_end = (loff_t)pg_end << PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
@@ -1104,7 +1101,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -1339,7 +1336,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
@@ -1347,7 +1344,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1378,13 +1375,13 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
/* write out all moved pages, if possible */
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
new_size = i_size_read(inode) - len;
ret = f2fs_truncate_blocks(inode, new_size, true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (!ret)
f2fs_i_size_write(inode, new_size);
return ret;
@@ -1484,7 +1481,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
pgoff_t end;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache_range(inode,
(loff_t)index << PAGE_SHIFT,
@@ -1496,7 +1493,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1508,7 +1505,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1543,6 +1540,7 @@ out:
static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
pgoff_t nr, pg_start, pg_end, delta, idx;
loff_t new_size;
int ret = 0;
@@ -1565,14 +1563,14 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (ret)
return ret;
/* write out all dirty pages from offset */
- ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
if (ret)
return ret;
@@ -1583,7 +1581,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1599,14 +1597,14 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
+ filemap_invalidate_lock(mapping);
+ filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
if (!ret)
f2fs_i_size_write(inode, new_size);
@@ -3440,7 +3438,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
goto out;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3476,7 +3474,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
}
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
out:
inode_unlock(inode);
@@ -3593,7 +3591,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
}
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3629,7 +3627,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
}
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (ret >= 0) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
@@ -3748,7 +3746,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
goto err;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
ret = filemap_write_and_wait_range(mapping, range.start,
to_end ? LLONG_MAX : end_addr - 1);
@@ -3835,7 +3833,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
prev_block, len, range.flags);
out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
err:
inode_unlock(inode);
@@ -4313,9 +4311,9 @@ write:
/* if we couldn't write data, we should deallocate blocks. */
if (preallocated && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- down_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e149c8c66a71..9c528e583c9d 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1323,9 +1323,19 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
return target;
}
+static int f2fs_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
+ const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ f2fs_getattr(mnt_userns, path, stat, request_mask, query_flags);
+
+ return fscrypt_symlink_getattr(path, stat);
+}
+
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.get_link = f2fs_encrypted_get_link,
- .getattr = f2fs_getattr,
+ .getattr = f2fs_encrypted_symlink_getattr,
.setattr = f2fs_setattr,
.listxattr = f2fs_listxattr,
};
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8fecd3050ccd..ce2ab1b85c11 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1289,7 +1289,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
mutex_init(&fi->inmem_lock);
init_rwsem(&fi->i_gc_rwsem[READ]);
init_rwsem(&fi->i_gc_rwsem[WRITE]);
- init_rwsem(&fi->i_mmap_sem);
init_rwsem(&fi->i_xattr_sem);
/* Will be used by directory only */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 6642246206bd..daad532a4e2b 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -378,7 +378,7 @@ out:
ret = kstrtol(name, 10, &data);
if (ret)
return ret;
- if (data >= IOPRIO_BE_NR || data < 0)
+ if (data >= IOPRIO_NR_LEVELS || data < 0)
return -EINVAL;
cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 860e884e56e8..978ac6751aeb 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -5,6 +5,7 @@
#include <linux/blkdev.h>
#include <linux/sched/signal.h>
+#include <linux/backing-dev-defs.h>
#include "fat.h"
struct fatent_operations {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f946bec8f1f1..68added37c15 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -150,7 +150,8 @@ void f_delown(struct file *filp)
pid_t f_getown(struct file *filp)
{
pid_t pid = 0;
- read_lock(&filp->f_owner.lock);
+
+ read_lock_irq(&filp->f_owner.lock);
rcu_read_lock();
if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
pid = pid_vnr(filp->f_owner.pid);
@@ -158,7 +159,7 @@ pid_t f_getown(struct file *filp)
pid = -pid;
}
rcu_read_unlock();
- read_unlock(&filp->f_owner.lock);
+ read_unlock_irq(&filp->f_owner.lock);
return pid;
}
@@ -208,7 +209,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
struct f_owner_ex owner = {};
int ret = 0;
- read_lock(&filp->f_owner.lock);
+ read_lock_irq(&filp->f_owner.lock);
rcu_read_lock();
if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
owner.pid = pid_vnr(filp->f_owner.pid);
@@ -231,7 +232,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
ret = -EINVAL;
break;
}
- read_unlock(&filp->f_owner.lock);
+ read_unlock_irq(&filp->f_owner.lock);
if (!ret) {
ret = copy_to_user(owner_p, &owner, sizeof(owner));
@@ -249,10 +250,10 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
uid_t src[2];
int err;
- read_lock(&filp->f_owner.lock);
+ read_lock_irq(&filp->f_owner.lock);
src[0] = from_kuid(user_ns, filp->f_owner.uid);
src[1] = from_kuid(user_ns, filp->f_owner.euid);
- read_unlock(&filp->f_owner.lock);
+ read_unlock_irq(&filp->f_owner.lock);
err = put_user(src[0], &dst[0]);
err |= put_user(src[1], &dst[1]);
@@ -1003,13 +1004,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
+ unsigned long flags;
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- read_lock(&fa->fa_lock);
+ read_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -1018,7 +1020,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- read_unlock(&fa->fa_lock);
+ read_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4c3370548982..eb57dade6076 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2730,23 +2730,6 @@ int write_inode_now(struct inode *inode, int sync)
EXPORT_SYMBOL(write_inode_now);
/**
- * sync_inode - write an inode and its pages to disk.
- * @inode: the inode to sync
- * @wbc: controls the writeback mode
- *
- * sync_inode() will write an inode and its pages to disk. It will also
- * correctly update the inode on its superblock's dirty inode lists and will
- * update inode->i_state.
- *
- * The caller must have a ref on the inode.
- */
-int sync_inode(struct inode *inode, struct writeback_control *wbc)
-{
- return writeback_single_inode(inode, wbc);
-}
-EXPORT_SYMBOL(sync_inode);
-
-/**
* sync_inode_metadata - write an inode to disk
* @inode: the inode to sync
* @wait: wait for I/O to complete.
@@ -2762,6 +2745,6 @@ int sync_inode_metadata(struct inode *inode, int wait)
.nr_to_write = 0, /* metadata-only */
};
- return sync_inode(inode, &wbc);
+ return writeback_single_inode(inode, &wbc);
}
EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index e55723744f58..281d79f8b3d3 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -444,12 +444,12 @@ static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
/*
* Can't do inline reclaim in fault path. We call
* dax_layout_busy_page() before we free a range. And
- * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
- * In fault path we enter with fi->i_mmap_sem held and can't drop
- * it. Also in fault path we hold fi->i_mmap_sem shared and not
- * exclusive, so that creates further issues with fuse_wait_dax_page().
- * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
- * range to become free and retry.
+ * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it.
+ * In fault path we enter with mapping->invalidate_lock held and can't
+ * drop it. Also in fault path we hold mapping->invalidate_lock shared
+ * and not exclusive, so that creates further issues with
+ * fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault()
+ * will wait for a memory range to become free and retry.
*/
if (flags & IOMAP_FAULT) {
alloc_dmap = alloc_dax_mapping(fcd);
@@ -513,7 +513,7 @@ static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
down_write(&fi->dax->sem);
node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
- /* We are holding either inode lock or i_mmap_sem, and that should
+ /* We are holding either inode lock or invalidate_lock, and that should
* ensure that dmap can't be truncated. We are holding a reference
* on dmap and that should make sure it can't be reclaimed. So dmap
* should still be there in tree despite the fact we dropped and
@@ -660,14 +660,12 @@ static const struct iomap_ops fuse_iomap_ops = {
static void fuse_wait_dax_page(struct inode *inode)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
schedule();
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
}
-/* Should be called with fi->i_mmap_sem lock held exclusively */
+/* Should be called with mapping->invalidate_lock held exclusively */
static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
loff_t start, loff_t end)
{
@@ -813,18 +811,18 @@ retry:
* we do not want any read/write/mmap to make progress and try
* to populate page cache or access memory we are trying to free.
*/
- down_read(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
error = 0;
retry = true;
- up_read(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
goto retry;
}
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
- up_read(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
if (write)
sb_end_pagefault(sb);
@@ -960,7 +958,7 @@ inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
int ret;
struct interval_tree_node *node;
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
/* Lookup a dmap and corresponding file offset to reclaim. */
down_read(&fi->dax->sem);
@@ -1021,7 +1019,7 @@ inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
out_write_dmap_sem:
up_write(&fi->dax->sem);
out_mmap_sem:
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return dmap;
}
@@ -1050,10 +1048,10 @@ alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
* had a reference or some other temporary failure,
* Try again. We want to give up inline reclaim only
* if there is no range assigned to this node. Otherwise
- * if a deadlock is possible if we sleep with fi->i_mmap_sem
- * held and worker to free memory can't make progress due
- * to unavailability of fi->i_mmap_sem lock. So sleep
- * only if fi->dax->nr=0
+ * if a deadlock is possible if we sleep with
+ * mapping->invalidate_lock held and worker to free memory
+ * can't make progress due to unavailability of
+ * mapping->invalidate_lock. So sleep only if fi->dax->nr=0
*/
if (retry)
continue;
@@ -1061,8 +1059,8 @@ alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
* There are no mappings which can be reclaimed. Wait for one.
* We are not holding fi->dax->sem. So it is possible
* that range gets added now. But as we are not holding
- * fi->i_mmap_sem, worker should still be able to free up
- * a range and wake us up.
+ * mapping->invalidate_lock, worker should still be able to
+ * free up a range and wake us up.
*/
if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
if (wait_event_killable_exclusive(fcd->range_waitq,
@@ -1108,7 +1106,7 @@ static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd,
/*
* Free a range of memory.
* Locking:
- * 1. Take fi->i_mmap_sem to block dax faults.
+ * 1. Take mapping->invalidate_lock to block dax faults.
* 2. Take fi->dax->sem to protect interval tree and also to make sure
* read/write can not reuse a dmap which we might be freeing.
*/
@@ -1122,7 +1120,7 @@ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
if (ret) {
pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
@@ -1134,7 +1132,7 @@ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
up_write(&fi->dax->sem);
out_mmap_sem:
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return ret;
}
@@ -1235,8 +1233,6 @@ void fuse_dax_conn_free(struct fuse_conn *fc)
static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
{
long nr_pages, nr_ranges;
- void *kaddr;
- pfn_t pfn;
struct fuse_dax_mapping *range;
int ret, id;
size_t dax_size = -1;
@@ -1248,8 +1244,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
id = dax_read_lock();
- nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
- &pfn);
+ nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL,
+ NULL);
dax_read_unlock(id);
if (nr_pages < 0) {
pr_debug("dax_direct_access() returned %ld\n", nr_pages);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index eade6f965b2e..d9b977c0f38d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1556,6 +1556,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_conn *fc = fm->fc;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct address_space *mapping = inode->i_mapping;
FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
@@ -1580,11 +1581,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
}
if (FUSE_IS_DAX(inode) && is_truncate) {
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(mapping);
fault_blocked = true;
err = fuse_dax_break_layouts(inode, 0, 0);
if (err) {
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
return err;
}
}
@@ -1694,13 +1695,13 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if ((is_truncate || !is_wb) &&
S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
truncate_pagecache(inode, outarg.attr.size);
- invalidate_inode_pages2(inode->i_mapping);
+ invalidate_inode_pages2(mapping);
}
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
out:
if (fault_blocked)
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
return 0;
@@ -1711,7 +1712,7 @@ error:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (fault_blocked)
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(mapping);
return err;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 97f860cfc195..621a662c19fb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -243,7 +243,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
}
if (dax_truncate) {
- down_write(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
goto out;
@@ -255,7 +255,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
out:
if (dax_truncate)
- up_write(&get_fuse_inode(inode)->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (is_wb_truncate | dax_truncate) {
fuse_release_nowrite(inode);
@@ -2920,7 +2920,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (lock_inode) {
inode_lock(inode);
if (block_faults) {
- down_write(&fi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
err = fuse_dax_break_layouts(inode, 0, 0);
if (err)
goto out;
@@ -2976,7 +2976,7 @@ out:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (block_faults)
- up_write(&fi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
if (lock_inode)
inode_unlock(inode);
@@ -3045,7 +3045,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
* modifications. Yet this does give less guarantees than if the
* copying was performed with write(2).
*
- * To fix this a i_mmap_sem style lock could be used to prevent new
+ * To fix this a mapping->invalidate_lock could be used to prevent new
* faults while the copy is ongoing.
*/
err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 07829ce78695..6fb639b97ea8 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -149,13 +149,6 @@ struct fuse_inode {
/** Lock to protect write related fields */
spinlock_t lock;
- /**
- * Can't take inode lock in fault path (leads to circular dependency).
- * Introduce another semaphore which can be taken in fault path and
- * then other filesystem paths can take this to block faults.
- */
- struct rw_semaphore i_mmap_sem;
-
#ifdef CONFIG_FUSE_DAX
/*
* Dax specific inode data
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b9beb39a4a18..e07e429f32e1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -85,7 +85,6 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->orig_ino = 0;
fi->state = 0;
mutex_init(&fi->mutex);
- init_rwsem(&fi->i_mmap_sem);
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
if (!fi->forget)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 81d8f064126e..005e920f5d4a 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -574,10 +574,9 @@ void adjust_fs_space(struct inode *inode)
{
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
- struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
- struct buffer_head *m_bh, *l_bh;
+ struct buffer_head *m_bh;
u64 fs_total, new_free;
if (gfs2_trans_begin(sdp, 2 * RES_STATFS, 0) != 0)
@@ -600,11 +599,7 @@ void adjust_fs_space(struct inode *inode)
(unsigned long long)new_free);
gfs2_statfs_change(sdp, new_free, new_free, 0);
- if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
- goto out2;
- update_statfs(sdp, m_bh, l_bh);
- brelse(l_bh);
-out2:
+ update_statfs(sdp, m_bh);
brelse(m_bh);
out:
sdp->sd_rindex_uptodate = 0;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 84ec053d43b4..c559827cb6f9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1237,9 +1237,6 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
-
if (cmd == F_CANCELLK) {
/* Hack: */
cmd = F_SETLK;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1f3902ecdded..e0eaa9cf9fb6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1494,12 +1494,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
- if (find_first_holder(gl) == NULL) {
- if (list_empty(&gl->gl_holders) &&
- !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
- !test_bit(GLF_DEMOTE, &gl->gl_flags))
- fast_path = 1;
- }
+ if (list_empty(&gl->gl_holders) &&
+ !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
+ !test_bit(GLF_DEMOTE, &gl->gl_flags))
+ fast_path = 1;
+
if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
gfs2_glock_add_to_lru(gl);
@@ -2077,8 +2076,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
*p++ = 'H';
if (test_bit(HIF_WAIT, &iflags))
*p++ = 'W';
- if (test_bit(HIF_FIRST, &iflags))
- *p++ = 'F';
*p = 0;
return buf;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 54d3fbeb3002..79c621c7863d 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -33,16 +33,18 @@ extern struct workqueue_struct *gfs2_control_wq;
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
- fs_err(gl->gl_name.ln_sbd,
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ fs_err(sdp,
"AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page "
"state 0x%lx\n",
bh, (unsigned long long)bh->b_blocknr, bh->b_state,
bh->b_page->mapping, bh->b_page->flags);
- fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n",
+ fs_err(sdp, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
- gfs2_lm(gl->gl_name.ln_sbd, "AIL error\n");
- gfs2_withdraw(gl->gl_name.ln_sbd);
+ gfs2_lm(sdp, "AIL error\n");
+ gfs2_withdraw_delayed(sdp);
}
/**
@@ -610,16 +612,13 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl)
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
- if (error)
- gfs2_consist(sdp);
- if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
- gfs2_consist(sdp);
-
- /* Initialize some head of the log stuff */
- if (!gfs2_withdrawn(sdp)) {
- sdp->sd_log_sequence = head.lh_sequence + 1;
- gfs2_log_pointers_init(sdp, head.lh_blkno);
- }
+ if (gfs2_assert_withdraw_delayed(sdp, !error))
+ return error;
+ if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags &
+ GFS2_LOG_HEAD_UNMOUNT))
+ return -EIO;
+ sdp->sd_log_sequence = head.lh_sequence + 1;
+ gfs2_log_pointers_init(sdp, head.lh_blkno);
}
return 0;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e6f820f146cb..0fe49770166e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -253,7 +253,6 @@ struct gfs2_lkstats {
enum {
/* States */
HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
- HIF_FIRST = 7,
HIF_WAIT = 10,
};
@@ -768,6 +767,7 @@ struct gfs2_sbd {
struct gfs2_glock *sd_jinode_gl;
struct gfs2_holder sd_sc_gh;
+ struct buffer_head *sd_sc_bh;
struct gfs2_holder sd_qc_gh;
struct completion sd_journal_ready;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index dac040162ecc..50578f881e6d 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -299,6 +299,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
+ /* don't want to call dlm if we've unmounted the lock protocol */
+ if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+ gfs2_glock_free(gl);
+ return;
+ }
/* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 42c15cfc0821..f0ee3ff6f9a8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -594,7 +594,7 @@ void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
{
unsigned int blks = tr->tr_reserved;
unsigned int revokes = tr->tr_revokes;
- unsigned int revoke_blks = 0;
+ unsigned int revoke_blks;
*extra_revokes = 0;
if (revokes) {
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ee05d25dfa6..ca0bb3a73912 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -761,6 +761,32 @@ static void buf_lo_before_scan(struct gfs2_jdesc *jd,
jd->jd_replayed_blocks = 0;
}
+#define obsolete_rgrp_replay \
+"Replaying 0x%llx from jid=%d/0x%llx but we already have a bh!\n"
+#define obsolete_rgrp_replay2 \
+"busy:%d, pinned:%d rg_gen:0x%llx, j_gen:0x%llx\n"
+
+static void obsolete_rgrp(struct gfs2_jdesc *jd, struct buffer_head *bh_log,
+ u64 blkno)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_rgrp *jrgd = (struct gfs2_rgrp *)bh_log->b_data;
+
+ rgd = gfs2_blk2rgrpd(sdp, blkno, false);
+ if (rgd && rgd->rd_addr == blkno &&
+ rgd->rd_bits && rgd->rd_bits->bi_bh) {
+ fs_info(sdp, obsolete_rgrp_replay, (unsigned long long)blkno,
+ jd->jd_jid, bh_log->b_blocknr);
+ fs_info(sdp, obsolete_rgrp_replay2,
+ buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
+ buffer_pinned(rgd->rd_bits->bi_bh),
+ rgd->rd_igeneration,
+ be64_to_cpu(jrgd->rg_igeneration));
+ gfs2_dump_glock(NULL, rgd->rd_gl, true);
+ }
+}
+
static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass)
@@ -799,21 +825,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
struct gfs2_meta_header *mh =
(struct gfs2_meta_header *)bh_ip->b_data;
- if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) {
- struct gfs2_rgrpd *rgd;
-
- rgd = gfs2_blk2rgrpd(sdp, blkno, false);
- if (rgd && rgd->rd_addr == blkno &&
- rgd->rd_bits && rgd->rd_bits->bi_bh) {
- fs_info(sdp, "Replaying 0x%llx but we "
- "already have a bh!\n",
- (unsigned long long)blkno);
- fs_info(sdp, "busy:%d, pinned:%d\n",
- buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
- buffer_pinned(rgd->rd_bits->bi_bh));
- gfs2_dump_glock(NULL, rgd->rd_gl, true);
- }
- }
+ if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG))
+ obsolete_rgrp(jd, bh_log, blkno);
+
mark_buffer_dirty(bh_ip);
}
brelse(bh_log);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 7c9619997355..72d30a682ece 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -258,8 +258,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head *bh, *bhs[2];
int num = 0;
- if (unlikely(gfs2_withdrawn(sdp)) &&
- (!sdp->sd_jdesc || gl != sdp->sd_jinode_gl)) {
+ if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp)) {
*bhp = NULL;
return -EIO;
}
@@ -317,7 +316,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
- if (unlikely(gfs2_withdrawn(sdp)))
+ if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
return -EIO;
wait_on_buffer(bh);
@@ -328,7 +327,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
gfs2_io_error_bh_wd(sdp, bh);
return -EIO;
}
- if (unlikely(gfs2_withdrawn(sdp)))
+ if (unlikely(gfs2_withdrawn(sdp)) && !gfs2_withdraw_in_prog(sdp))
return -EIO;
return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 5f4504dd0875..7f8410d8fdc1 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -614,6 +614,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
break;
}
+ d_mark_dontcache(jd->jd_inode);
spin_lock(&sdp->sd_jindex_spin);
jd->jd_jid = sdp->sd_journals++;
jip = GFS2_I(jd->jd_inode);
@@ -677,6 +678,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
error = PTR_ERR(lsi->si_sc_inode);
fs_err(sdp, "can't find local \"sc\" file#%u: %d\n",
jd->jd_jid, error);
+ kfree(lsi);
goto free_local;
}
lsi->si_jid = jd->jd_jid;
@@ -695,8 +697,16 @@ static int init_statfs(struct gfs2_sbd *sdp)
fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
goto free_local;
}
+ /* read in the local statfs buffer - other nodes don't change it. */
+ error = gfs2_meta_inode_buffer(ip, &sdp->sd_sc_bh);
+ if (error) {
+ fs_err(sdp, "Cannot read in local statfs: %d\n", error);
+ goto unlock_sd_gh;
+ }
return 0;
+unlock_sd_gh:
+ gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
free_local:
free_local_statfs_inodes(sdp);
iput(pn);
@@ -710,6 +720,7 @@ out:
static void uninit_statfs(struct gfs2_sbd *sdp)
{
if (!sdp->sd_args.ar_spectator) {
+ brelse(sdp->sd_sc_bh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
free_local_statfs_inodes(sdp);
}
@@ -1088,6 +1099,34 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
}
+static int init_threads(struct gfs2_sbd *sdp)
+{
+ struct task_struct *p;
+ int error = 0;
+
+ p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ fs_err(sdp, "can't start logd thread: %d\n", error);
+ return error;
+ }
+ sdp->sd_logd_process = p;
+
+ p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ fs_err(sdp, "can't start quotad thread: %d\n", error);
+ goto fail;
+ }
+ sdp->sd_quotad_process = p;
+ return 0;
+
+fail:
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
+ return error;
+}
+
/**
* gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
@@ -1216,6 +1255,14 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
goto fail_per_node;
}
+ if (!sb_rdonly(sb)) {
+ error = init_threads(sdp);
+ if (error) {
+ gfs2_withdraw_delayed(sdp);
+ goto fail_per_node;
+ }
+ }
+
error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
if (error)
goto fail_per_node;
@@ -1225,6 +1272,12 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
gfs2_freeze_unlock(&freeze_gh);
if (error) {
+ if (sdp->sd_quotad_process)
+ kthread_stop(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ if (sdp->sd_logd_process)
+ kthread_stop(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
fs_err(sdp, "can't make FS RW: %d\n", error);
goto fail_per_node;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4d4ceb0b6903..6e00d15ef0a8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -119,34 +119,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
return 0;
}
-static int init_threads(struct gfs2_sbd *sdp)
-{
- struct task_struct *p;
- int error = 0;
-
- p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- fs_err(sdp, "can't start logd thread: %d\n", error);
- return error;
- }
- sdp->sd_logd_process = p;
-
- p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- fs_err(sdp, "can't start quotad thread: %d\n", error);
- goto fail;
- }
- sdp->sd_quotad_process = p;
- return 0;
-
-fail:
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
- return error;
-}
-
/**
* gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
* @sdp: the filesystem
@@ -161,26 +133,17 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
struct gfs2_log_header_host head;
int error;
- error = init_threads(sdp);
- if (error) {
- gfs2_withdraw_delayed(sdp);
- return error;
- }
-
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
- if (gfs2_withdrawn(sdp)) {
- error = -EIO;
- goto fail;
- }
+ if (gfs2_withdrawn(sdp))
+ return -EIO;
error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
if (error || gfs2_withdrawn(sdp))
- goto fail;
+ return error;
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
gfs2_consist(sdp);
- error = -EIO;
- goto fail;
+ return -EIO;
}
/* Initialize some head of the log stuff */
@@ -188,20 +151,8 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
gfs2_log_pointers_init(sdp, head.lh_blkno);
error = gfs2_quota_init(sdp);
- if (error || gfs2_withdrawn(sdp))
- goto fail;
-
- set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
- return 0;
-
-fail:
- if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
- if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ if (!error && !gfs2_withdrawn(sdp))
+ set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
return error;
}
@@ -227,9 +178,8 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
- struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
- struct buffer_head *m_bh, *l_bh;
+ struct buffer_head *m_bh;
struct gfs2_holder gh;
int error;
@@ -248,21 +198,15 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp)
sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
} else {
- error = gfs2_meta_inode_buffer(l_ip, &l_bh);
- if (error)
- goto out_m_bh;
-
spin_lock(&sdp->sd_statfs_spin);
gfs2_statfs_change_in(m_sc, m_bh->b_data +
sizeof(struct gfs2_dinode));
- gfs2_statfs_change_in(l_sc, l_bh->b_data +
+ gfs2_statfs_change_in(l_sc, sdp->sd_sc_bh->b_data +
sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
- brelse(l_bh);
}
-out_m_bh:
brelse(m_bh);
out:
gfs2_glock_dq_uninit(&gh);
@@ -275,22 +219,17 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
- struct buffer_head *l_bh;
s64 x, y;
int need_sync = 0;
- int error;
-
- error = gfs2_meta_inode_buffer(l_ip, &l_bh);
- if (error)
- return;
- gfs2_trans_add_meta(l_ip->i_gl, l_bh);
+ gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
spin_lock(&sdp->sd_statfs_spin);
l_sc->sc_total += total;
l_sc->sc_free += free;
l_sc->sc_dinodes += dinodes;
- gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
+ gfs2_statfs_change_out(l_sc, sdp->sd_sc_bh->b_data +
+ sizeof(struct gfs2_dinode));
if (sdp->sd_args.ar_statfs_percent) {
x = 100 * l_sc->sc_free;
y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
@@ -299,20 +238,18 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
}
spin_unlock(&sdp->sd_statfs_spin);
- brelse(l_bh);
if (need_sync)
gfs2_wake_up_statfs(sdp);
}
-void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
- struct buffer_head *l_bh)
+void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
- gfs2_trans_add_meta(l_ip->i_gl, l_bh);
+ gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
gfs2_trans_add_meta(m_ip->i_gl, m_bh);
spin_lock(&sdp->sd_statfs_spin);
@@ -320,7 +257,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
m_sc->sc_free += l_sc->sc_free;
m_sc->sc_dinodes += l_sc->sc_dinodes;
memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
- memset(l_bh->b_data + sizeof(struct gfs2_dinode),
+ memset(sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode),
0, sizeof(struct gfs2_statfs_change));
gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
@@ -330,11 +267,10 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
- struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct gfs2_holder gh;
- struct buffer_head *m_bh, *l_bh;
+ struct buffer_head *m_bh;
int error;
error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
@@ -355,21 +291,15 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
}
spin_unlock(&sdp->sd_statfs_spin);
- error = gfs2_meta_inode_buffer(l_ip, &l_bh);
- if (error)
- goto out_bh;
-
error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
if (error)
- goto out_bh2;
+ goto out_bh;
- update_statfs(sdp, m_bh, l_bh);
+ update_statfs(sdp, m_bh);
sdp->sd_statfs_force_sync = 0;
gfs2_trans_end(sdp);
-out_bh2:
- brelse(l_bh);
out_bh:
brelse(m_bh);
out_unlock:
@@ -675,6 +605,7 @@ restart:
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+ brelse(sdp->sd_sc_bh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
free_local_statfs_inodes(sdp);
@@ -1016,7 +947,7 @@ static int gfs2_drop_inode(struct inode *inode)
gfs2_glock_hold(gl);
if (!gfs2_queue_delete_work(gl, 0))
gfs2_glock_queue_put(gl);
- return false;
+ return 0;
}
return generic_drop_inode(inode);
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index ec4affb33ed5..58d13fd77aed 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -43,8 +43,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
const void *buf);
extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
void *buf);
-extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
- struct buffer_head *l_bh);
+extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f4325b44956d..cf345a86ef67 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -278,6 +278,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
goto skip_recovery;
}
sdp->sd_jdesc->jd_inode = inode;
+ d_mark_dontcache(inode);
/*
* Now wait until recovery is complete.
@@ -295,7 +296,7 @@ skip_recovery:
fs_warn(sdp, "Journal recovery complete for jid %d.\n",
sdp->sd_lockstruct.ls_jid);
else
- fs_warn(sdp, "Journal recovery skipped for %d until next "
+ fs_warn(sdp, "Journal recovery skipped for jid %d until next "
"mount.\n", sdp->sd_lockstruct.ls_jid);
fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
sdp->sd_glock_dqs_held = 0;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 69e1a0ae5a4d..78ec190f4155 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -218,6 +218,11 @@ static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp)
!test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
}
+static inline bool gfs2_withdraw_in_prog(struct gfs2_sbd *sdp)
+{
+ return test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+}
+
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 2b36dc6f0a10..ec975f466877 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -2,6 +2,7 @@
config HPFS_FS
tristate "OS/2 HPFS file system support"
depends on BLOCK
+ select FS_IOMAP
help
OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index c3a49aacf20a..fb37f57130aa 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -9,6 +9,7 @@
#include "hpfs_fn.h"
#include <linux/mpage.h>
+#include <linux/iomap.h>
#include <linux/fiemap.h>
#define BLOCKS(size) (((size) + 511) >> 9)
@@ -116,6 +117,47 @@ static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_he
return r;
}
+static int hpfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned flags, struct iomap *iomap, struct iomap *srcmap)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned int blkbits = inode->i_blkbits;
+ unsigned int n_secs;
+ secno s;
+
+ if (WARN_ON_ONCE(flags & (IOMAP_WRITE | IOMAP_ZERO)))
+ return -EINVAL;
+
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = offset;
+
+ hpfs_lock(sb);
+ s = hpfs_bmap(inode, offset >> blkbits, &n_secs);
+ if (s) {
+ n_secs = hpfs_search_hotfix_map_for_range(sb, s,
+ min_t(loff_t, n_secs, length));
+ if (unlikely(!n_secs)) {
+ s = hpfs_search_hotfix_map(sb, s);
+ n_secs = 1;
+ }
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags = IOMAP_F_MERGED;
+ iomap->addr = (u64)s << blkbits;
+ iomap->length = (u64)n_secs << blkbits;
+ } else {
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->length = 1 << blkbits;
+ }
+
+ hpfs_unlock(sb);
+ return 0;
+}
+
+static const struct iomap_ops hpfs_iomap_ops = {
+ .iomap_begin = hpfs_iomap_begin,
+};
+
static int hpfs_readpage(struct file *file, struct page *page)
{
return mpage_readpage(page, hpfs_get_block);
@@ -192,7 +234,14 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo, start, len, hpfs_get_block);
+ int ret;
+
+ inode_lock(inode);
+ len = min_t(u64, len, i_size_read(inode));
+ ret = iomap_fiemap(inode, fieinfo, start, len, &hpfs_iomap_ops);
+ inode_unlock(inode);
+
+ return ret;
}
const struct address_space_operations hpfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index c93500d84264..84c528cd1955 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -190,6 +190,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->writeback_index = 0;
+ __init_rwsem(&mapping->invalidate_lock, "mapping.invalidate_lock",
+ &sb->s_type->invalidate_lock_key);
inode->i_private = NULL;
inode->i_mapping = mapping;
INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
diff --git a/fs/internal.h b/fs/internal.h
index 54c2928d39ec..68a2ae029a27 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -71,11 +71,15 @@ extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
-long do_rmdir(int dfd, struct filename *name);
-long do_unlinkat(int dfd, struct filename *name);
+int do_rmdir(int dfd, struct filename *name);
+int do_unlinkat(int dfd, struct filename *name);
int may_linkat(struct user_namespace *mnt_userns, struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
+int do_mkdirat(int dfd, struct filename *name, umode_t mode);
+int do_symlinkat(struct filename *from, int newdfd, struct filename *to);
+int do_linkat(int olddfd, struct filename *old, int newdfd,
+ struct filename *new, int flags);
/*
* namespace.c
diff --git a/fs/io-wq.c b/fs/io-wq.c
index cf086b01c6c6..cd9bd095fb1b 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -51,6 +51,10 @@ struct io_worker {
struct completion ref_done;
+ unsigned long create_state;
+ struct callback_head create_work;
+ int create_index;
+
struct rcu_head rcu;
};
@@ -129,7 +133,8 @@ struct io_cb_cancel_data {
bool cancel_all;
};
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
+static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
+static void io_wqe_dec_running(struct io_worker *worker);
static bool io_worker_get(struct io_worker *worker)
{
@@ -168,27 +173,22 @@ static void io_worker_exit(struct io_worker *worker)
{
struct io_wqe *wqe = worker->wqe;
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
- unsigned flags;
if (refcount_dec_and_test(&worker->ref))
complete(&worker->ref_done);
wait_for_completion(&worker->ref_done);
- preempt_disable();
- current->flags &= ~PF_IO_WORKER;
- flags = worker->flags;
- worker->flags = 0;
- if (flags & IO_WORKER_F_RUNNING)
- atomic_dec(&acct->nr_running);
- worker->flags = 0;
- preempt_enable();
-
- raw_spin_lock_irq(&wqe->lock);
- if (flags & IO_WORKER_F_FREE)
+ raw_spin_lock(&wqe->lock);
+ if (worker->flags & IO_WORKER_F_FREE)
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
acct->nr_workers--;
- raw_spin_unlock_irq(&wqe->lock);
+ preempt_disable();
+ io_wqe_dec_running(worker);
+ worker->flags = 0;
+ current->flags &= ~PF_IO_WORKER;
+ preempt_enable();
+ raw_spin_unlock(&wqe->lock);
kfree_rcu(worker, rcu);
io_worker_ref_put(wqe->wq);
@@ -214,15 +214,19 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
struct hlist_nulls_node *n;
struct io_worker *worker;
- n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list));
- if (is_a_nulls(n))
- return false;
-
- worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
- if (io_worker_get(worker)) {
- wake_up_process(worker->task);
+ /*
+ * Iterate free_list and see if we can find an idle worker to
+ * activate. If a given worker is on the free_list but in the process
+ * of exiting, keep trying.
+ */
+ hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
+ if (!io_worker_get(worker))
+ continue;
+ if (wake_up_process(worker->task)) {
+ io_worker_release(worker);
+ return true;
+ }
io_worker_release(worker);
- return true;
}
return false;
@@ -247,10 +251,22 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
ret = io_wqe_activate_free_worker(wqe);
rcu_read_unlock();
- if (!ret && acct->nr_workers < acct->max_workers) {
- atomic_inc(&acct->nr_running);
- atomic_inc(&wqe->wq->worker_refs);
- create_io_worker(wqe->wq, wqe, acct->index);
+ if (!ret) {
+ bool do_create = false, first = false;
+
+ raw_spin_lock(&wqe->lock);
+ if (acct->nr_workers < acct->max_workers) {
+ if (!acct->nr_workers)
+ first = true;
+ acct->nr_workers++;
+ do_create = true;
+ }
+ raw_spin_unlock(&wqe->lock);
+ if (do_create) {
+ atomic_inc(&acct->nr_running);
+ atomic_inc(&wqe->wq->worker_refs);
+ create_io_worker(wqe->wq, wqe, acct->index, first);
+ }
}
}
@@ -261,42 +277,63 @@ static void io_wqe_inc_running(struct io_worker *worker)
atomic_inc(&acct->nr_running);
}
-struct create_worker_data {
- struct callback_head work;
- struct io_wqe *wqe;
- int index;
-};
-
static void create_worker_cb(struct callback_head *cb)
{
- struct create_worker_data *cwd;
+ struct io_worker *worker;
struct io_wq *wq;
-
- cwd = container_of(cb, struct create_worker_data, work);
- wq = cwd->wqe->wq;
- create_io_worker(wq, cwd->wqe, cwd->index);
- kfree(cwd);
+ struct io_wqe *wqe;
+ struct io_wqe_acct *acct;
+ bool do_create = false, first = false;
+
+ worker = container_of(cb, struct io_worker, create_work);
+ wqe = worker->wqe;
+ wq = wqe->wq;
+ acct = &wqe->acct[worker->create_index];
+ raw_spin_lock(&wqe->lock);
+ if (acct->nr_workers < acct->max_workers) {
+ if (!acct->nr_workers)
+ first = true;
+ acct->nr_workers++;
+ do_create = true;
+ }
+ raw_spin_unlock(&wqe->lock);
+ if (do_create) {
+ create_io_worker(wq, wqe, worker->create_index, first);
+ } else {
+ atomic_dec(&acct->nr_running);
+ io_worker_ref_put(wq);
+ }
+ clear_bit_unlock(0, &worker->create_state);
+ io_worker_release(worker);
}
-static void io_queue_worker_create(struct io_wqe *wqe, struct io_wqe_acct *acct)
+static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
+ struct io_wqe_acct *acct)
{
- struct create_worker_data *cwd;
struct io_wq *wq = wqe->wq;
/* raced with exit, just ignore create call */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
goto fail;
+ if (!io_worker_get(worker))
+ goto fail;
+ /*
+ * create_state manages ownership of create_work/index. We should
+ * only need one entry per worker, as the worker going to sleep
+ * will trigger the condition, and waking will clear it once it
+ * runs the task_work.
+ */
+ if (test_bit(0, &worker->create_state) ||
+ test_and_set_bit_lock(0, &worker->create_state))
+ goto fail_release;
- cwd = kmalloc(sizeof(*cwd), GFP_ATOMIC);
- if (cwd) {
- init_task_work(&cwd->work, create_worker_cb);
- cwd->wqe = wqe;
- cwd->index = acct->index;
- if (!task_work_add(wq->task, &cwd->work, TWA_SIGNAL))
- return;
-
- kfree(cwd);
- }
+ init_task_work(&worker->create_work, create_worker_cb);
+ worker->create_index = acct->index;
+ if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
+ return;
+ clear_bit_unlock(0, &worker->create_state);
+fail_release:
+ io_worker_release(worker);
fail:
atomic_dec(&acct->nr_running);
io_worker_ref_put(wq);
@@ -314,7 +351,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
- io_queue_worker_create(wqe, acct);
+ io_queue_worker_create(wqe, worker, acct);
}
}
@@ -387,7 +424,28 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
spin_unlock(&wq->hash->wait.lock);
}
-static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
+/*
+ * We can always run the work if the worker is currently the same type as
+ * the work (eg both are bound, or both are unbound). If they are not the
+ * same, only allow it if incrementing the worker count would be allowed.
+ */
+static bool io_worker_can_run_work(struct io_worker *worker,
+ struct io_wq_work *work)
+{
+ struct io_wqe_acct *acct;
+
+ if (!(worker->flags & IO_WORKER_F_BOUND) !=
+ !(work->flags & IO_WQ_WORK_UNBOUND))
+ return true;
+
+ /* not the same type, check if we'd go over the limit */
+ acct = io_work_get_acct(worker->wqe, work);
+ return acct->nr_workers < acct->max_workers;
+}
+
+static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
+ struct io_worker *worker,
+ bool *stalled)
__must_hold(wqe->lock)
{
struct io_wq_work_node *node, *prev;
@@ -399,6 +457,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
work = container_of(node, struct io_wq_work, list);
+ if (!io_worker_can_run_work(worker, work))
+ break;
+
/* not hashed, can run anytime */
if (!io_wq_is_hashed(work)) {
wq_list_del(&wqe->work_list, node, prev);
@@ -425,6 +486,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
raw_spin_unlock(&wqe->lock);
io_wait_on_hash(wqe, stall_hash);
raw_spin_lock(&wqe->lock);
+ *stalled = true;
}
return NULL;
@@ -448,9 +510,9 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
- spin_lock_irq(&worker->lock);
+ spin_lock(&worker->lock);
worker->cur_work = work;
- spin_unlock_irq(&worker->lock);
+ spin_unlock(&worker->lock);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -464,6 +526,7 @@ static void io_worker_handle_work(struct io_worker *worker)
do {
struct io_wq_work *work;
+ bool stalled;
get_next:
/*
* If we got some work, mark us as busy. If we didn't, but
@@ -472,13 +535,14 @@ get_next:
* can't make progress, any work completion or insertion will
* clear the stalled flag.
*/
- work = io_get_next_work(wqe);
+ stalled = false;
+ work = io_get_next_work(wqe, worker, &stalled);
if (work)
__io_worker_busy(wqe, worker, work);
- else if (!wq_list_empty(&wqe->work_list))
+ else if (stalled)
wqe->flags |= IO_WQE_FLAG_STALLED;
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
if (!work)
break;
io_assign_current_work(worker, work);
@@ -510,16 +574,16 @@ get_next:
clear_bit(hash, &wq->hash->map);
if (wq_has_sleeper(&wq->hash->wait))
wake_up(&wq->hash->wait);
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
/* skip unnecessary unlock-lock wqe->lock */
if (!work)
goto get_next;
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
}
} while (work);
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
} while (1);
}
@@ -540,13 +604,13 @@ static int io_wqe_worker(void *data)
set_current_state(TASK_INTERRUPTIBLE);
loop:
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
io_worker_handle_work(worker);
goto loop;
}
__io_worker_idle(wqe, worker);
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
if (io_flush_signals())
continue;
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
@@ -565,7 +629,7 @@ loop:
}
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
io_worker_handle_work(worker);
}
@@ -607,12 +671,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
worker->flags &= ~IO_WORKER_F_RUNNING;
- raw_spin_lock_irq(&worker->wqe->lock);
+ raw_spin_lock(&worker->wqe->lock);
io_wqe_dec_running(worker);
- raw_spin_unlock_irq(&worker->wqe->lock);
+ raw_spin_unlock(&worker->wqe->lock);
}
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
{
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;
@@ -635,6 +699,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
kfree(worker);
fail:
atomic_dec(&acct->nr_running);
+ raw_spin_lock(&wqe->lock);
+ acct->nr_workers--;
+ raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wq);
return;
}
@@ -644,16 +711,15 @@ fail:
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
tsk->flags |= PF_NO_SETAFFINITY;
- raw_spin_lock_irq(&wqe->lock);
+ raw_spin_lock(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
worker->flags |= IO_WORKER_F_FREE;
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
- if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
+ if (first && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
- acct->nr_workers++;
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock(&wqe->lock);
wake_up_new_task(tsk);
}
@@ -728,8 +794,7 @@ append:
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
- int work_flags;
- unsigned long flags;
+ bool do_wake;
/*
* If io-wq is exiting for this task, or if the request has explicitly
@@ -741,14 +806,14 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
return;
}
- work_flags = work->flags;
- raw_spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock(&wqe->lock);
io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
- raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
+ !atomic_read(&acct->nr_running);
+ raw_spin_unlock(&wqe->lock);
- if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
- !atomic_read(&acct->nr_running))
+ if (do_wake)
io_wqe_wake_worker(wqe, acct);
}
@@ -774,19 +839,18 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
- unsigned long flags;
/*
* Hold the lock to avoid ->cur_work going out of scope, caller
* may dereference the passed in work.
*/
- spin_lock_irqsave(&worker->lock, flags);
+ spin_lock(&worker->lock);
if (worker->cur_work &&
match->fn(worker->cur_work, match->data)) {
set_notify_signal(worker->task);
match->nr_running++;
}
- spin_unlock_irqrestore(&worker->lock, flags);
+ spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
}
@@ -814,16 +878,15 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work;
- unsigned long flags;
retry:
- raw_spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock(&wqe->lock);
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data))
continue;
io_wqe_remove_pending(wqe, work, prev);
- raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock(&wqe->lock);
io_run_cancel(work, wqe);
match->nr_pending++;
if (!match->cancel_all)
@@ -832,7 +895,7 @@ retry:
/* not safe to continue after unlock */
goto retry;
}
- raw_spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock(&wqe->lock);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -973,12 +1036,12 @@ err_wq:
static bool io_task_work_match(struct callback_head *cb, void *data)
{
- struct create_worker_data *cwd;
+ struct io_worker *worker;
if (cb->func != create_worker_cb)
return false;
- cwd = container_of(cb, struct create_worker_data, work);
- return cwd->wqe->wq == data;
+ worker = container_of(cb, struct io_worker, create_work);
+ return worker->wqe->wq == data;
}
void io_wq_exit_start(struct io_wq *wq)
@@ -995,12 +1058,13 @@ static void io_wq_exit_workers(struct io_wq *wq)
return;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
- struct create_worker_data *cwd;
+ struct io_worker *worker;
- cwd = container_of(cb, struct create_worker_data, work);
- atomic_dec(&cwd->wqe->acct[cwd->index].nr_running);
+ worker = container_of(cb, struct io_worker, create_work);
+ atomic_dec(&worker->wqe->acct[worker->create_index].nr_running);
io_worker_ref_put(wq);
- kfree(cwd);
+ clear_bit_unlock(0, &worker->create_state);
+ io_worker_release(worker);
}
rcu_read_lock();
@@ -1112,6 +1176,35 @@ int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
return 0;
}
+/*
+ * Set max number of unbounded workers, returns old value. If new_count is 0,
+ * then just return the old value.
+ */
+int io_wq_max_workers(struct io_wq *wq, int *new_count)
+{
+ int i, node, prev = 0;
+
+ for (i = 0; i < 2; i++) {
+ if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
+ new_count[i] = task_rlimit(current, RLIMIT_NPROC);
+ }
+
+ rcu_read_lock();
+ for_each_node(node) {
+ struct io_wqe_acct *acct;
+
+ for (i = 0; i < 2; i++) {
+ acct = &wq->wqes[node]->acct[i];
+ prev = max_t(int, acct->max_workers, prev);
+ if (new_count[i])
+ acct->max_workers = new_count[i];
+ new_count[i] = prev;
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
static __init int io_wq_init(void)
{
int ret;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 3999ee58ff26..bf5c4c533760 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -44,6 +44,7 @@ static inline void wq_list_add_after(struct io_wq_work_node *node,
static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
+ node->next = NULL;
if (!list->first) {
list->last = node;
WRITE_ONCE(list->first, node);
@@ -51,7 +52,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
list->last->next = node;
list->last = node;
}
- node->next = NULL;
}
static inline void wq_list_cut(struct io_wq_work_list *list,
@@ -128,6 +128,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
+int io_wq_max_workers(struct io_wq *wq, int *new_count);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
diff --git a/fs/io_uring.c b/fs/io_uring.c
index bf548af0426c..6f35b1285865 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -78,6 +78,7 @@
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
+#include <linux/tracehook.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -91,17 +92,12 @@
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
-/*
- * Shift of 9 is 512 entries, or exactly one page on 64-bit archs
- */
-#define IORING_FILE_TABLE_SHIFT 9
-#define IORING_MAX_FILES_TABLE (1U << IORING_FILE_TABLE_SHIFT)
-#define IORING_FILE_TABLE_MASK (IORING_MAX_FILES_TABLE - 1)
-#define IORING_MAX_FIXED_FILES (64 * IORING_MAX_FILES_TABLE)
+/* only define max */
+#define IORING_MAX_FIXED_FILES (1U << 15)
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
-#define IO_RSRC_TAG_TABLE_SHIFT 9
+#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
@@ -234,8 +230,7 @@ struct io_rsrc_put {
};
struct io_file_table {
- /* two level table */
- struct io_fixed_file **files;
+ struct io_fixed_file *files;
};
struct io_rsrc_node {
@@ -300,18 +295,10 @@ struct io_sq_data {
struct completion exited;
};
-#define IO_IOPOLL_BATCH 8
#define IO_COMPL_BATCH 32
#define IO_REQ_CACHE_SIZE 32
#define IO_REQ_ALLOC_BATCH 8
-struct io_comp_state {
- struct io_kiocb *reqs[IO_COMPL_BATCH];
- unsigned int nr;
- /* inline/task_work completion list, under ->uring_lock */
- struct list_head free_list;
-};
-
struct io_submit_link {
struct io_kiocb *head;
struct io_kiocb *last;
@@ -332,14 +319,11 @@ struct io_submit_state {
/*
* Batch completion logic
*/
- struct io_comp_state comp;
+ struct io_kiocb *compl_reqs[IO_COMPL_BATCH];
+ unsigned int compl_nr;
+ /* inline/task_work completion list, under ->uring_lock */
+ struct list_head free_list;
- /*
- * File reference cache
- */
- struct file *file;
- unsigned int fd;
- unsigned int file_refs;
unsigned int ios_left;
};
@@ -391,6 +375,7 @@ struct io_ring_ctx {
struct io_submit_state submit_state;
struct list_head timeout_list;
+ struct list_head ltimeout_list;
struct list_head cq_overflow_list;
struct xarray io_buffers;
struct xarray personalities;
@@ -425,6 +410,8 @@ struct io_ring_ctx {
struct {
spinlock_t completion_lock;
+ spinlock_t timeout_lock;
+
/*
* ->iopoll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL.
@@ -486,8 +473,8 @@ struct io_uring_task {
spinlock_t task_lock;
struct io_wq_work_list task_list;
- unsigned long task_state;
struct callback_head task_work;
+ bool task_running;
};
/*
@@ -522,6 +509,7 @@ struct io_timeout_data {
struct hrtimer timer;
struct timespec64 ts;
enum hrtimer_mode mode;
+ u32 flags;
};
struct io_accept {
@@ -529,6 +517,7 @@ struct io_accept {
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
+ u32 file_slot;
unsigned long nofile;
};
@@ -552,6 +541,8 @@ struct io_timeout {
struct list_head list;
/* head of the link, used by linked timeouts only */
struct io_kiocb *head;
+ /* for linked completions */
+ struct io_kiocb *prev;
};
struct io_timeout_rem {
@@ -561,6 +552,7 @@ struct io_timeout_rem {
/* timeout update */
struct timespec64 ts;
u32 flags;
+ bool ltimeout;
};
struct io_rw {
@@ -592,6 +584,7 @@ struct io_sr_msg {
struct io_open {
struct file *file;
int dfd;
+ u32 file_slot;
struct filename *filename;
struct open_how how;
unsigned long nofile;
@@ -674,9 +667,31 @@ struct io_unlink {
struct filename *filename;
};
+struct io_mkdir {
+ struct file *file;
+ int dfd;
+ umode_t mode;
+ struct filename *filename;
+};
+
+struct io_symlink {
+ struct file *file;
+ int new_dfd;
+ struct filename *oldpath;
+ struct filename *newpath;
+};
+
+struct io_hardlink {
+ struct file *file;
+ int old_dfd;
+ int new_dfd;
+ struct filename *oldpath;
+ struct filename *newpath;
+ int flags;
+};
+
struct io_completion {
struct file *file;
- struct list_head list;
u32 cflags;
};
@@ -718,14 +733,15 @@ enum {
REQ_F_NEED_CLEANUP_BIT,
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
- REQ_F_LTIMEOUT_ACTIVE_BIT,
REQ_F_COMPLETE_INLINE_BIT,
REQ_F_REISSUE_BIT,
REQ_F_DONT_REISSUE_BIT,
REQ_F_CREDS_BIT,
+ REQ_F_REFCOUNT_BIT,
+ REQ_F_ARM_LTIMEOUT_BIT,
/* keep async read/write and isreg together and in order */
- REQ_F_ASYNC_READ_BIT,
- REQ_F_ASYNC_WRITE_BIT,
+ REQ_F_NOWAIT_READ_BIT,
+ REQ_F_NOWAIT_WRITE_BIT,
REQ_F_ISREG_BIT,
/* not a real bit, just to check we're not overflowing the space */
@@ -762,8 +778,6 @@ enum {
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
/* buffer already selected */
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
- /* linked timeout is active, i.e. prepared by link's head */
- REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
/* completion is deferred through io_comp_state */
REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
/* caller should reissue async */
@@ -771,13 +785,17 @@ enum {
/* don't attempt request reissue, see io_rw_reissue() */
REQ_F_DONT_REISSUE = BIT(REQ_F_DONT_REISSUE_BIT),
/* supports async reads */
- REQ_F_ASYNC_READ = BIT(REQ_F_ASYNC_READ_BIT),
+ REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT),
/* supports async writes */
- REQ_F_ASYNC_WRITE = BIT(REQ_F_ASYNC_WRITE_BIT),
+ REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT),
/* regular file */
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
/* has creds assigned */
REQ_F_CREDS = BIT(REQ_F_CREDS_BIT),
+ /* skip refcounting if not set */
+ REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT),
+ /* there is a linked timeout that has to be armed */
+ REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
};
struct async_poll {
@@ -785,7 +803,7 @@ struct async_poll {
struct io_poll_iocb *double_poll;
};
-typedef void (*io_req_tw_func_t)(struct io_kiocb *req);
+typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
struct io_task_work {
union {
@@ -831,6 +849,9 @@ struct io_kiocb {
struct io_shutdown shutdown;
struct io_rename rename;
struct io_unlink unlink;
+ struct io_mkdir mkdir;
+ struct io_symlink symlink;
+ struct io_hardlink hardlink;
/* use only after cleaning per-op data, see io_clean_op() */
struct io_completion compl;
};
@@ -1042,39 +1063,43 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_RENAMEAT] = {},
[IORING_OP_UNLINKAT] = {},
+ [IORING_OP_MKDIRAT] = {},
+ [IORING_OP_SYMLINKAT] = {},
+ [IORING_OP_LINKAT] = {},
};
+/* requests with any of those set should undergo io_disarm_next() */
+#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
+
static bool io_disarm_next(struct io_kiocb *req);
static void io_uring_del_tctx_node(unsigned long index);
static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
bool cancel_all);
static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
long res, unsigned int cflags);
static void io_put_req(struct io_kiocb *req);
-static void io_put_req_deferred(struct io_kiocb *req, int nr);
+static void io_put_req_deferred(struct io_kiocb *req);
static void io_dismantle_req(struct io_kiocb *req);
-static void io_put_task(struct task_struct *task, int nr);
-static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req);
static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
struct io_uring_rsrc_update2 *up,
unsigned nr_args);
static void io_clean_op(struct io_kiocb *req);
-static struct file *io_file_get(struct io_submit_state *state,
+static struct file *io_file_get(struct io_ring_ctx *ctx,
struct io_kiocb *req, int fd, bool fixed);
static void __io_queue_sqe(struct io_kiocb *req);
static void io_rsrc_put_work(struct work_struct *work);
static void io_req_task_queue(struct io_kiocb *req);
static void io_submit_flush_completions(struct io_ring_ctx *ctx);
-static bool io_poll_remove_waitqs(struct io_kiocb *req);
static int io_req_prep_async(struct io_kiocb *req);
-static void io_fallback_req_func(struct work_struct *unused);
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index);
+static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static struct kmem_cache *req_cachep;
@@ -1093,9 +1118,65 @@ struct sock *io_uring_get_socket(struct file *file)
}
EXPORT_SYMBOL(io_uring_get_socket);
+static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
+{
+ if (!*locked) {
+ mutex_lock(&ctx->uring_lock);
+ *locked = true;
+ }
+}
+
#define io_for_each_link(pos, head) \
for (pos = (head); pos; pos = pos->link)
+/*
+ * Shamelessly stolen from the mm implementation of page reference checking,
+ * see commit f958d7b528b1 for details.
+ */
+#define req_ref_zero_or_close_to_overflow(req) \
+ ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
+
+static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
+{
+ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+ return atomic_inc_not_zero(&req->refs);
+}
+
+static inline bool req_ref_put_and_test(struct io_kiocb *req)
+{
+ if (likely(!(req->flags & REQ_F_REFCOUNT)))
+ return true;
+
+ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+ return atomic_dec_and_test(&req->refs);
+}
+
+static inline void req_ref_put(struct io_kiocb *req)
+{
+ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+ WARN_ON_ONCE(req_ref_put_and_test(req));
+}
+
+static inline void req_ref_get(struct io_kiocb *req)
+{
+ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+ atomic_inc(&req->refs);
+}
+
+static inline void __io_req_set_refcount(struct io_kiocb *req, int nr)
+{
+ if (!(req->flags & REQ_F_REFCOUNT)) {
+ req->flags |= REQ_F_REFCOUNT;
+ atomic_set(&req->refs, nr);
+ }
+}
+
+static inline void io_req_set_refcount(struct io_kiocb *req)
+{
+ __io_req_set_refcount(req, 1);
+}
+
static inline void io_req_set_rsrc_node(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -1140,6 +1221,12 @@ static inline void req_set_fail(struct io_kiocb *req)
req->flags |= REQ_F_FAIL;
}
+static inline void req_fail_link_node(struct io_kiocb *req, int res)
+{
+ req_set_fail(req);
+ req->result = res;
+}
+
static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -1152,6 +1239,27 @@ static inline bool io_is_timeout_noseq(struct io_kiocb *req)
return !req->timeout.off;
}
+static void io_fallback_req_func(struct work_struct *work)
+{
+ struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
+ fallback_work.work);
+ struct llist_node *node = llist_del_all(&ctx->fallback_llist);
+ struct io_kiocb *req, *tmp;
+ bool locked = false;
+
+ percpu_ref_get(&ctx->refs);
+ llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node)
+ req->io_task_work.func(req, &locked);
+
+ if (locked) {
+ if (ctx->submit_state.compl_nr)
+ io_submit_flush_completions(ctx);
+ mutex_unlock(&ctx->uring_lock);
+ }
+ percpu_ref_put(&ctx->refs);
+
+}
+
static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
{
struct io_ring_ctx *ctx;
@@ -1197,15 +1305,17 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->cq_wait);
spin_lock_init(&ctx->completion_lock);
+ spin_lock_init(&ctx->timeout_lock);
INIT_LIST_HEAD(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
+ INIT_LIST_HEAD(&ctx->ltimeout_list);
spin_lock_init(&ctx->rsrc_ref_lock);
INIT_LIST_HEAD(&ctx->rsrc_ref_list);
INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
init_llist_head(&ctx->rsrc_put_llist);
INIT_LIST_HEAD(&ctx->tctx_list);
- INIT_LIST_HEAD(&ctx->submit_state.comp.free_list);
+ INIT_LIST_HEAD(&ctx->submit_state.free_list);
INIT_LIST_HEAD(&ctx->locked_free_list);
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
return ctx;
@@ -1235,6 +1345,20 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
return false;
}
+#define FFS_ASYNC_READ 0x1UL
+#define FFS_ASYNC_WRITE 0x2UL
+#ifdef CONFIG_64BIT
+#define FFS_ISREG 0x4UL
+#else
+#define FFS_ISREG 0x0UL
+#endif
+#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
+
+static inline bool io_req_ffs_set(struct io_kiocb *req)
+{
+ return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE);
+}
+
static void io_req_track_inflight(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_INFLIGHT)) {
@@ -1243,6 +1367,32 @@ static void io_req_track_inflight(struct io_kiocb *req)
}
}
+static inline void io_unprep_linked_timeout(struct io_kiocb *req)
+{
+ req->flags &= ~REQ_F_LINK_TIMEOUT;
+}
+
+static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
+{
+ if (WARN_ON_ONCE(!req->link))
+ return NULL;
+
+ req->flags &= ~REQ_F_ARM_LTIMEOUT;
+ req->flags |= REQ_F_LINK_TIMEOUT;
+
+ /* linked timeouts should have two refs once prep'ed */
+ io_req_set_refcount(req);
+ __io_req_set_refcount(req->link, 2);
+ return req->link;
+}
+
+static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
+{
+ if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT)))
+ return NULL;
+ return __io_prep_linked_timeout(req);
+}
+
static void io_prep_async_work(struct io_kiocb *req)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
@@ -1282,22 +1432,25 @@ static void io_prep_async_link(struct io_kiocb *req)
if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
io_for_each_link(cur, req)
io_prep_async_work(cur);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
} else {
io_for_each_link(cur, req)
io_prep_async_work(cur);
}
}
-static void io_queue_async_work(struct io_kiocb *req)
+static void io_queue_async_work(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link = io_prep_linked_timeout(req);
struct io_uring_task *tctx = req->task->io_uring;
+ /* must not take the lock, NULL it as a precaution */
+ locked = NULL;
+
BUG_ON(!tctx);
BUG_ON(!tctx->io_wq);
@@ -1323,6 +1476,7 @@ static void io_queue_async_work(struct io_kiocb *req)
static void io_kill_timeout(struct io_kiocb *req, int status)
__must_hold(&req->ctx->completion_lock)
+ __must_hold(&req->ctx->timeout_lock)
{
struct io_timeout_data *io = req->async_data;
@@ -1331,7 +1485,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
io_cqring_fill_event(req->ctx, req->user_data, status, 0);
- io_put_req_deferred(req, 1);
+ io_put_req_deferred(req);
}
}
@@ -1350,9 +1504,11 @@ static void io_queue_deferred(struct io_ring_ctx *ctx)
}
static void io_flush_timeouts(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->completion_lock)
{
u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+ spin_lock_irq(&ctx->timeout_lock);
while (!list_empty(&ctx->timeout_list)) {
u32 events_needed, events_got;
struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
@@ -1377,6 +1533,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
io_kill_timeout(req, 0);
}
ctx->cq_last_tm_flush = seq;
+ spin_unlock_irq(&ctx->timeout_lock);
}
static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
@@ -1433,13 +1590,22 @@ static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
return !ctx->eventfd_async || io_wq_current_is_worker();
}
+/*
+ * This should only get called when at least one event has been posted.
+ * Some applications rely on the eventfd notification count only changing
+ * IFF a new CQE has been added to the CQ ring. There's no depedency on
+ * 1:1 relationship between how many times this function is called (and
+ * hence the eventfd count) and number of CQEs posted to the CQ ring.
+ */
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
{
- /* see waitqueue_active() comment */
- smp_mb();
-
- if (waitqueue_active(&ctx->cq_wait))
- wake_up(&ctx->cq_wait);
+ /*
+ * wake_up_all() may seem excessive, but io_wake_function() and
+ * io_should_wake() handle the termination of the loop and only
+ * wake as many waiters as we need to.
+ */
+ if (wq_has_sleeper(&ctx->cq_wait))
+ wake_up_all(&ctx->cq_wait);
if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
wake_up(&ctx->sq_data->wait);
if (io_should_trigger_evfd(ctx))
@@ -1452,12 +1618,9 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
{
- /* see waitqueue_active() comment */
- smp_mb();
-
if (ctx->flags & IORING_SETUP_SQPOLL) {
- if (waitqueue_active(&ctx->cq_wait))
- wake_up(&ctx->cq_wait);
+ if (wq_has_sleeper(&ctx->cq_wait))
+ wake_up_all(&ctx->cq_wait);
}
if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
@@ -1470,14 +1633,13 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
/* Returns true if there are no backlogged entries after the flush */
static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
{
- unsigned long flags;
bool all_flushed, posted;
if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
return false;
posted = false;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
while (!list_empty(&ctx->cq_overflow_list)) {
struct io_uring_cqe *cqe = io_get_cqe(ctx);
struct io_overflow_cqe *ocqe;
@@ -1499,18 +1661,19 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
all_flushed = list_empty(&ctx->cq_overflow_list);
if (all_flushed) {
clear_bit(0, &ctx->check_cq_overflow);
- ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
}
if (posted)
io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
return all_flushed;
}
-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx)
{
bool ret = true;
@@ -1518,7 +1681,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
/* iopoll syncs against uring_lock, not completion_lock */
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_lock(&ctx->uring_lock);
- ret = __io_cqring_overflow_flush(ctx, force);
+ ret = __io_cqring_overflow_flush(ctx, false);
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_unlock(&ctx->uring_lock);
}
@@ -1526,39 +1689,37 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
return ret;
}
-/*
- * Shamelessly stolen from the mm implementation of page reference checking,
- * see commit f958d7b528b1 for details.
- */
-#define req_ref_zero_or_close_to_overflow(req) \
- ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
-
-static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
+/* must to be called somewhat shortly after putting a request */
+static inline void io_put_task(struct task_struct *task, int nr)
{
- return atomic_inc_not_zero(&req->refs);
-}
+ struct io_uring_task *tctx = task->io_uring;
-static inline bool req_ref_sub_and_test(struct io_kiocb *req, int refs)
-{
- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
- return atomic_sub_and_test(refs, &req->refs);
+ if (likely(task == current)) {
+ tctx->cached_refs += nr;
+ } else {
+ percpu_counter_sub(&tctx->inflight, nr);
+ if (unlikely(atomic_read(&tctx->in_idle)))
+ wake_up(&tctx->wait);
+ put_task_struct_many(task, nr);
+ }
}
-static inline bool req_ref_put_and_test(struct io_kiocb *req)
+static void io_task_refs_refill(struct io_uring_task *tctx)
{
- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
- return atomic_dec_and_test(&req->refs);
-}
+ unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
-static inline void req_ref_put(struct io_kiocb *req)
-{
- WARN_ON_ONCE(req_ref_put_and_test(req));
+ percpu_counter_add(&tctx->inflight, refill);
+ refcount_add(refill, &current->usage);
+ tctx->cached_refs += refill;
}
-static inline void req_ref_get(struct io_kiocb *req)
+static inline void io_get_task_refs(int nr)
{
- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
- atomic_inc(&req->refs);
+ struct io_uring_task *tctx = current->io_uring;
+
+ tctx->cached_refs -= nr;
+ if (unlikely(tctx->cached_refs < 0))
+ io_task_refs_refill(tctx);
}
static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
@@ -1578,7 +1739,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
}
if (list_empty(&ctx->cq_overflow_list)) {
set_bit(0, &ctx->check_cq_overflow);
- ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
+
}
ocqe->cqe.user_data = user_data;
ocqe->cqe.res = res;
@@ -1620,9 +1783,8 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
unsigned int cflags)
{
struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
__io_cqring_fill_event(ctx, req->user_data, res, cflags);
/*
* If we're the last reference to this request, add to our locked
@@ -1630,7 +1792,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
*/
if (req_ref_put_and_test(req)) {
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
- if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL))
+ if (req->flags & IO_DISARM_MASK)
io_disarm_next(req);
if (req->link) {
io_req_task_queue(req->link);
@@ -1639,14 +1801,14 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
}
io_dismantle_req(req);
io_put_task(req->task, 1);
- list_add(&req->compl.list, &ctx->locked_free_list);
+ list_add(&req->inflight_entry, &ctx->locked_free_list);
ctx->locked_free_nr++;
} else {
if (!percpu_ref_tryget(&ctx->refs))
req = NULL;
}
io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
if (req) {
io_cqring_ev_posted(ctx);
@@ -1686,24 +1848,35 @@ static inline void io_req_complete(struct io_kiocb *req, long res)
static void io_req_complete_failed(struct io_kiocb *req, long res)
{
req_set_fail(req);
- io_put_req(req);
io_req_complete_post(req, res, 0);
}
+/*
+ * Don't initialise the fields below on every allocation, but do that in
+ * advance and keep them valid across allocations.
+ */
+static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
+{
+ req->ctx = ctx;
+ req->link = NULL;
+ req->async_data = NULL;
+ /* not necessary, but safer to zero */
+ req->result = 0;
+}
+
static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
- struct io_comp_state *cs)
+ struct io_submit_state *state)
{
- spin_lock_irq(&ctx->completion_lock);
- list_splice_init(&ctx->locked_free_list, &cs->free_list);
+ spin_lock(&ctx->completion_lock);
+ list_splice_init(&ctx->locked_free_list, &state->free_list);
ctx->locked_free_nr = 0;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
}
/* Returns true IFF there are requests in the cache */
static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
{
struct io_submit_state *state = &ctx->submit_state;
- struct io_comp_state *cs = &state->comp;
int nr;
/*
@@ -1712,14 +1885,14 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
* side cache.
*/
if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
- io_flush_cached_locked_reqs(ctx, cs);
+ io_flush_cached_locked_reqs(ctx, state);
nr = state->free_reqs;
- while (!list_empty(&cs->free_list)) {
- struct io_kiocb *req = list_first_entry(&cs->free_list,
- struct io_kiocb, compl.list);
+ while (!list_empty(&state->free_list)) {
+ struct io_kiocb *req = list_first_entry(&state->free_list,
+ struct io_kiocb, inflight_entry);
- list_del(&req->compl.list);
+ list_del(&req->inflight_entry);
state->reqs[nr++] = req;
if (nr == ARRAY_SIZE(state->reqs))
break;
@@ -1729,48 +1902,41 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
return nr != 0;
}
+/*
+ * A request might get retired back into the request caches even before opcode
+ * handlers and io_issue_sqe() are done with it, e.g. inline completion path.
+ * Because of that, io_alloc_req() should be called only under ->uring_lock
+ * and with extra caution to not get a request that is still worked on.
+ */
static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
{
struct io_submit_state *state = &ctx->submit_state;
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
+ int ret, i;
BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH);
- if (!state->free_reqs) {
- gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
- int ret, i;
-
- if (io_flush_cached_reqs(ctx))
- goto got_req;
-
- ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH,
- state->reqs);
+ if (likely(state->free_reqs || io_flush_cached_reqs(ctx)))
+ goto got_req;
- /*
- * Bulk alloc is all-or-nothing. If we fail to get a batch,
- * retry single alloc to be on the safe side.
- */
- if (unlikely(ret <= 0)) {
- state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
- if (!state->reqs[0])
- return NULL;
- ret = 1;
- }
+ ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH,
+ state->reqs);
- /*
- * Don't initialise the fields below on every allocation, but
- * do that in advance and keep valid on free.
- */
- for (i = 0; i < ret; i++) {
- struct io_kiocb *req = state->reqs[i];
-
- req->ctx = ctx;
- req->link = NULL;
- req->async_data = NULL;
- /* not necessary, but safer to zero */
- req->result = 0;
- }
- state->free_reqs = ret;
+ /*
+ * Bulk alloc is all-or-nothing. If we fail to get a batch,
+ * retry single alloc to be on the safe side.
+ */
+ if (unlikely(ret <= 0)) {
+ state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
+ if (!state->reqs[0])
+ return NULL;
+ ret = 1;
}
+
+ for (i = 0; i < ret; i++)
+ io_preinit_req(state->reqs[i], ctx);
+ state->free_reqs = ret;
got_req:
state->free_reqs--;
return state->reqs[state->free_reqs];
@@ -1798,17 +1964,6 @@ static void io_dismantle_req(struct io_kiocb *req)
}
}
-/* must to be called somewhat shortly after putting a request */
-static inline void io_put_task(struct task_struct *task, int nr)
-{
- struct io_uring_task *tctx = task->io_uring;
-
- percpu_counter_sub(&tctx->inflight, nr);
- if (unlikely(atomic_read(&tctx->in_idle)))
- wake_up(&tctx->wait);
- put_task_struct_many(task, nr);
-}
-
static void __io_free_req(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -1816,7 +1971,11 @@ static void __io_free_req(struct io_kiocb *req)
io_dismantle_req(req);
io_put_task(req->task, 1);
- kmem_cache_free(req_cachep, req);
+ spin_lock(&ctx->completion_lock);
+ list_add(&req->inflight_entry, &ctx->locked_free_list);
+ ctx->locked_free_nr++;
+ spin_unlock(&ctx->completion_lock);
+
percpu_ref_put(&ctx->refs);
}
@@ -1830,22 +1989,20 @@ static inline void io_remove_next_linked(struct io_kiocb *req)
static bool io_kill_linked_timeout(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
+ __must_hold(&req->ctx->timeout_lock)
{
struct io_kiocb *link = req->link;
- /*
- * Can happen if a linked timeout fired and link had been like
- * req -> link t-out -> link t-out [-> ...]
- */
- if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
+ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
struct io_timeout_data *io = link->async_data;
io_remove_next_linked(req);
link->timeout.head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
+ list_del(&link->timeout.list);
io_cqring_fill_event(link->ctx, link->user_data,
-ECANCELED, 0);
- io_put_req_deferred(link, 1);
+ io_put_req_deferred(link);
return true;
}
}
@@ -1859,12 +2016,17 @@ static void io_fail_links(struct io_kiocb *req)
req->link = NULL;
while (link) {
+ long res = -ECANCELED;
+
+ if (link->flags & REQ_F_FAIL)
+ res = link->result;
+
nxt = link->link;
link->link = NULL;
trace_io_uring_fail_link(req, link);
- io_cqring_fill_event(link->ctx, link->user_data, -ECANCELED, 0);
- io_put_req_deferred(link, 2);
+ io_cqring_fill_event(link->ctx, link->user_data, res, 0);
+ io_put_req_deferred(link);
link = nxt;
}
}
@@ -1874,8 +2036,24 @@ static bool io_disarm_next(struct io_kiocb *req)
{
bool posted = false;
- if (likely(req->flags & REQ_F_LINK_TIMEOUT))
+ if (req->flags & REQ_F_ARM_LTIMEOUT) {
+ struct io_kiocb *link = req->link;
+
+ req->flags &= ~REQ_F_ARM_LTIMEOUT;
+ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
+ io_remove_next_linked(req);
+ io_cqring_fill_event(link->ctx, link->user_data,
+ -ECANCELED, 0);
+ io_put_req_deferred(link);
+ posted = true;
+ }
+ } else if (req->flags & REQ_F_LINK_TIMEOUT) {
+ struct io_ring_ctx *ctx = req->ctx;
+
+ spin_lock_irq(&ctx->timeout_lock);
posted = io_kill_linked_timeout(req);
+ spin_unlock_irq(&ctx->timeout_lock);
+ }
if (unlikely((req->flags & REQ_F_FAIL) &&
!(req->flags & REQ_F_HARDLINK))) {
posted |= (req->link != NULL);
@@ -1894,16 +2072,15 @@ static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
* dependencies to the next request. In case of failure, fail the rest
* of the chain.
*/
- if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL)) {
+ if (req->flags & IO_DISARM_MASK) {
struct io_ring_ctx *ctx = req->ctx;
- unsigned long flags;
bool posted;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
posted = io_disarm_next(req);
if (posted)
io_commit_cqring(req->ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
}
@@ -1919,20 +2096,22 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
return __io_req_find_next(req);
}
-static void ctx_flush_and_put(struct io_ring_ctx *ctx)
+static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
{
if (!ctx)
return;
- if (ctx->submit_state.comp.nr) {
- mutex_lock(&ctx->uring_lock);
- io_submit_flush_completions(ctx);
+ if (*locked) {
+ if (ctx->submit_state.compl_nr)
+ io_submit_flush_completions(ctx);
mutex_unlock(&ctx->uring_lock);
+ *locked = false;
}
percpu_ref_put(&ctx->refs);
}
static void tctx_task_work(struct callback_head *cb)
{
+ bool locked = false;
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
@@ -1943,37 +2122,32 @@ static void tctx_task_work(struct callback_head *cb)
spin_lock_irq(&tctx->task_lock);
node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
+ if (!node)
+ tctx->task_running = false;
spin_unlock_irq(&tctx->task_lock);
+ if (!node)
+ break;
- while (node) {
+ do {
struct io_wq_work_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
if (req->ctx != ctx) {
- ctx_flush_and_put(ctx);
+ ctx_flush_and_put(ctx, &locked);
ctx = req->ctx;
+ /* if not contended, grab and improve batching */
+ locked = mutex_trylock(&ctx->uring_lock);
percpu_ref_get(&ctx->refs);
}
- req->io_task_work.func(req);
+ req->io_task_work.func(req, &locked);
node = next;
- }
- if (wq_list_empty(&tctx->task_list)) {
- spin_lock_irq(&tctx->task_lock);
- clear_bit(0, &tctx->task_state);
- if (wq_list_empty(&tctx->task_list)) {
- spin_unlock_irq(&tctx->task_lock);
- break;
- }
- spin_unlock_irq(&tctx->task_lock);
- /* another tctx_task_work() is enqueued, yield */
- if (test_and_set_bit(0, &tctx->task_state))
- break;
- }
+ } while (node);
+
cond_resched();
}
- ctx_flush_and_put(ctx);
+ ctx_flush_and_put(ctx, &locked);
}
static void io_req_task_work_add(struct io_kiocb *req)
@@ -1983,19 +2157,20 @@ static void io_req_task_work_add(struct io_kiocb *req)
enum task_work_notify_mode notify;
struct io_wq_work_node *node;
unsigned long flags;
+ bool running;
WARN_ON_ONCE(!tctx);
spin_lock_irqsave(&tctx->task_lock, flags);
wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+ running = tctx->task_running;
+ if (!running)
+ tctx->task_running = true;
spin_unlock_irqrestore(&tctx->task_lock, flags);
/* task_work already pending, we're done */
- if (test_bit(0, &tctx->task_state) ||
- test_and_set_bit(0, &tctx->task_state))
+ if (running)
return;
- if (unlikely(tsk->flags & PF_EXITING))
- goto fail;
/*
* SQPOLL kernel thread doesn't need notification, just a wakeup. For
@@ -2008,9 +2183,9 @@ static void io_req_task_work_add(struct io_kiocb *req)
wake_up_process(tsk);
return;
}
-fail:
- clear_bit(0, &tctx->task_state);
+
spin_lock_irqsave(&tctx->task_lock, flags);
+ tctx->task_running = false;
node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
spin_unlock_irqrestore(&tctx->task_lock, flags);
@@ -2024,27 +2199,25 @@ fail:
}
}
-static void io_req_task_cancel(struct io_kiocb *req)
+static void io_req_task_cancel(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- /* ctx is guaranteed to stay alive while we hold uring_lock */
- mutex_lock(&ctx->uring_lock);
+ /* not needed for normal modes, but SQPOLL depends on it */
+ io_tw_lock(ctx, locked);
io_req_complete_failed(req, req->result);
- mutex_unlock(&ctx->uring_lock);
}
-static void io_req_task_submit(struct io_kiocb *req)
+static void io_req_task_submit(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
- /* ctx stays valid until unlock, even if we drop all ours ctx->refs */
- mutex_lock(&ctx->uring_lock);
- if (!(req->task->flags & PF_EXITING) && !req->task->in_execve)
+ io_tw_lock(ctx, locked);
+ /* req->task == current here, checking PF_EXITING is safe */
+ if (likely(!(req->task->flags & PF_EXITING)))
__io_queue_sqe(req);
else
io_req_complete_failed(req, -EFAULT);
- mutex_unlock(&ctx->uring_lock);
}
static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
@@ -2080,6 +2253,11 @@ static void io_free_req(struct io_kiocb *req)
__io_free_req(req);
}
+static void io_free_req_work(struct io_kiocb *req, bool *locked)
+{
+ io_free_req(req);
+}
+
struct req_batch {
struct task_struct *task;
int task_refs;
@@ -2096,10 +2274,10 @@ static inline void io_init_req_batch(struct req_batch *rb)
static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
struct req_batch *rb)
{
- if (rb->task)
- io_put_task(rb->task, rb->task_refs);
if (rb->ctx_refs)
percpu_ref_put_many(&ctx->refs, rb->ctx_refs);
+ if (rb->task)
+ io_put_task(rb->task, rb->task_refs);
}
static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
@@ -2120,37 +2298,37 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
if (state->free_reqs != ARRAY_SIZE(state->reqs))
state->reqs[state->free_reqs++] = req;
else
- list_add(&req->compl.list, &state->comp.free_list);
+ list_add(&req->inflight_entry, &state->free_list);
}
static void io_submit_flush_completions(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
{
- struct io_comp_state *cs = &ctx->submit_state.comp;
- int i, nr = cs->nr;
+ struct io_submit_state *state = &ctx->submit_state;
+ int i, nr = state->compl_nr;
struct req_batch rb;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
for (i = 0; i < nr; i++) {
- struct io_kiocb *req = cs->reqs[i];
+ struct io_kiocb *req = state->compl_reqs[i];
__io_cqring_fill_event(ctx, req->user_data, req->result,
req->compl.cflags);
}
io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
io_init_req_batch(&rb);
for (i = 0; i < nr; i++) {
- struct io_kiocb *req = cs->reqs[i];
+ struct io_kiocb *req = state->compl_reqs[i];
- /* submission and completion refs */
- if (req_ref_sub_and_test(req, 2))
+ if (req_ref_put_and_test(req))
io_req_free_batch(&rb, req, &ctx->submit_state);
}
io_req_free_batch_finish(ctx, &rb);
- cs->nr = 0;
+ state->compl_nr = 0;
}
/*
@@ -2174,16 +2352,12 @@ static inline void io_put_req(struct io_kiocb *req)
io_free_req(req);
}
-static void io_free_req_deferred(struct io_kiocb *req)
+static inline void io_put_req_deferred(struct io_kiocb *req)
{
- req->io_task_work.func = io_free_req;
- io_req_task_work_add(req);
-}
-
-static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
-{
- if (req_ref_sub_and_test(req, refs))
- io_free_req_deferred(req);
+ if (req_ref_put_and_test(req)) {
+ req->io_task_work.func = io_free_req_work;
+ io_req_task_work_add(req);
+ }
}
static unsigned io_cqring_events(struct io_ring_ctx *ctx)
@@ -2216,15 +2390,17 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
{
struct io_buffer *kbuf;
+ if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+ return 0;
kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
return io_put_kbuf(req, kbuf);
}
static inline bool io_run_task_work(void)
{
- if (current->task_works) {
+ if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
__set_current_state(TASK_RUNNING);
- task_work_run();
+ tracehook_notify_signal();
return true;
}
@@ -2235,7 +2411,7 @@ static inline bool io_run_task_work(void)
* Find and free completed poll iocbs
*/
static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
- struct list_head *done, bool resubmit)
+ struct list_head *done)
{
struct req_batch rb;
struct io_kiocb *req;
@@ -2245,23 +2421,18 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
io_init_req_batch(&rb);
while (!list_empty(done)) {
- int cflags = 0;
-
req = list_first_entry(done, struct io_kiocb, inflight_entry);
list_del(&req->inflight_entry);
- if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
+ if (READ_ONCE(req->result) == -EAGAIN &&
!(req->flags & REQ_F_DONT_REISSUE)) {
req->iopoll_completed = 0;
- req_ref_get(req);
io_req_task_queue_reissue(req);
continue;
}
- if (req->flags & REQ_F_BUFFER_SELECTED)
- cflags = io_put_rw_kbuf(req);
-
- __io_cqring_fill_event(ctx, req->user_data, req->result, cflags);
+ __io_cqring_fill_event(ctx, req->user_data, req->result,
+ io_put_rw_kbuf(req));
(*nr_events)++;
if (req_ref_put_and_test(req))
@@ -2274,12 +2445,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
- long min, bool resubmit)
+ long min)
{
struct io_kiocb *req, *tmp;
LIST_HEAD(done);
bool spin;
- int ret;
/*
* Only spin for completions if we don't have multiple devices hanging
@@ -2287,9 +2457,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
*/
spin = !ctx->poll_multi_queue && *nr_events < min;
- ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
struct kiocb *kiocb = &req->rw.kiocb;
+ int ret;
/*
* Move completed and retryable entries to our local lists.
@@ -2304,22 +2474,20 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
break;
ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
- if (ret < 0)
- break;
+ if (unlikely(ret < 0))
+ return ret;
+ else if (ret)
+ spin = false;
/* iopoll may have completed current req */
if (READ_ONCE(req->iopoll_completed))
list_move_tail(&req->inflight_entry, &done);
-
- if (ret && spin)
- spin = false;
- ret = 0;
}
if (!list_empty(&done))
- io_iopoll_complete(ctx, nr_events, &done, resubmit);
+ io_iopoll_complete(ctx, nr_events, &done);
- return ret;
+ return 0;
}
/*
@@ -2335,7 +2503,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
while (!list_empty(&ctx->iopoll_list)) {
unsigned int nr_events = 0;
- io_do_iopoll(ctx, &nr_events, 0, false);
+ io_do_iopoll(ctx, &nr_events, 0);
/* let it sleep and repeat later if can't complete a request */
if (nr_events == 0)
@@ -2397,7 +2565,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
list_empty(&ctx->iopoll_list))
break;
}
- ret = io_do_iopoll(ctx, &nr_events, min, true);
+ ret = io_do_iopoll(ctx, &nr_events, min);
} while (!ret && nr_events < min && !need_resched());
out:
mutex_unlock(&ctx->uring_lock);
@@ -2466,42 +2634,57 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
}
#endif
-static void io_fallback_req_func(struct work_struct *work)
+static bool __io_complete_rw_common(struct io_kiocb *req, long res)
{
- struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
- fallback_work.work);
- struct llist_node *node = llist_del_all(&ctx->fallback_llist);
- struct io_kiocb *req, *tmp;
-
- llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node)
- req->io_task_work.func(req);
-}
-
-static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
- unsigned int issue_flags)
-{
- int cflags = 0;
-
if (req->rw.kiocb.ki_flags & IOCB_WRITE)
kiocb_end_write(req);
if (res != req->result) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE;
- return;
+ return true;
}
req_set_fail(req);
+ req->result = res;
+ }
+ return false;
+}
+
+static void io_req_task_complete(struct io_kiocb *req, bool *locked)
+{
+ unsigned int cflags = io_put_rw_kbuf(req);
+ long res = req->result;
+
+ if (*locked) {
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_submit_state *state = &ctx->submit_state;
+
+ io_req_complete_state(req, res, cflags);
+ state->compl_reqs[state->compl_nr++] = req;
+ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
+ io_submit_flush_completions(ctx);
+ } else {
+ io_req_complete_post(req, res, cflags);
}
- if (req->flags & REQ_F_BUFFER_SELECTED)
- cflags = io_put_rw_kbuf(req);
- __io_req_complete(req, issue_flags, res, cflags);
+}
+
+static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+ unsigned int issue_flags)
+{
+ if (__io_complete_rw_common(req, res))
+ return;
+ __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
- __io_complete_rw(req, res, res2, 0);
+ if (__io_complete_rw_common(req, res))
+ return;
+ req->result = res;
+ req->io_task_work.func = io_req_task_complete;
+ io_req_task_work_add(req);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -2587,40 +2770,6 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
}
}
-static inline void io_state_file_put(struct io_submit_state *state)
-{
- if (state->file_refs) {
- fput_many(state->file, state->file_refs);
- state->file_refs = 0;
- }
-}
-
-/*
- * Get as many references to a file as we have IOs left in this submission,
- * assuming most submissions are for one file, or at least that each file
- * has more than one submission.
- */
-static struct file *__io_file_get(struct io_submit_state *state, int fd)
-{
- if (!state)
- return fget(fd);
-
- if (state->file_refs) {
- if (state->fd == fd) {
- state->file_refs--;
- return state->file;
- }
- io_state_file_put(state);
- }
- state->file = fget_many(fd, state->ios_left);
- if (unlikely(!state->file))
- return NULL;
-
- state->fd = fd;
- state->file_refs = state->ios_left - 1;
- return state->file;
-}
-
static bool io_bdev_nowait(struct block_device *bdev)
{
return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
@@ -2631,7 +2780,7 @@ static bool io_bdev_nowait(struct block_device *bdev)
* any file. For now, just ensure that anything potentially problematic is done
* inline.
*/
-static bool __io_file_supports_async(struct file *file, int rw)
+static bool __io_file_supports_nowait(struct file *file, int rw)
{
umode_t mode = file_inode(file)->i_mode;
@@ -2664,14 +2813,14 @@ static bool __io_file_supports_async(struct file *file, int rw)
return file->f_op->write_iter != NULL;
}
-static bool io_file_supports_async(struct io_kiocb *req, int rw)
+static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
{
- if (rw == READ && (req->flags & REQ_F_ASYNC_READ))
+ if (rw == READ && (req->flags & REQ_F_NOWAIT_READ))
return true;
- else if (rw == WRITE && (req->flags & REQ_F_ASYNC_WRITE))
+ else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE))
return true;
- return __io_file_supports_async(req->file, rw);
+ return __io_file_supports_nowait(req->file, rw);
}
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -2682,7 +2831,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
unsigned ioprio;
int ret;
- if (!(req->flags & REQ_F_ISREG) && S_ISREG(file_inode(file)->i_mode))
+ if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode))
req->flags |= REQ_F_ISREG;
kiocb->ki_pos = READ_ONCE(sqe->off);
@@ -2715,7 +2864,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
!kiocb->ki_filp->f_op->iopoll)
return -EOPNOTSUPP;
- kiocb->ki_flags |= IOCB_HIPRI;
+ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
kiocb->ki_complete = io_complete_rw_iopoll;
req->iopoll_completed = 0;
} else {
@@ -2782,15 +2931,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
if (check_reissue && (req->flags & REQ_F_REISSUE)) {
req->flags &= ~REQ_F_REISSUE;
if (io_resubmit_prep(req)) {
- req_ref_get(req);
io_req_task_queue_reissue(req);
} else {
- int cflags = 0;
-
req_set_fail(req);
- if (req->flags & REQ_F_BUFFER_SELECTED)
- cflags = io_put_rw_kbuf(req);
- __io_req_complete(req, issue_flags, ret, cflags);
+ __io_req_complete(req, issue_flags, ret,
+ io_put_rw_kbuf(req));
}
}
}
@@ -3208,9 +3353,6 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
list_del_init(&wait->entry);
-
- /* submit ref gets dropped, acquire a new one */
- req_ref_get(req);
io_req_task_queue(req);
return 1;
}
@@ -3295,7 +3437,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
kiocb->ki_flags |= IOCB_NOWAIT;
/* If the file doesn't support async, just async punt */
- if (force_nonblock && !io_file_supports_async(req, READ)) {
+ if (force_nonblock && !io_file_supports_nowait(req, READ)) {
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
return ret ?: -EAGAIN;
}
@@ -3400,7 +3542,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
kiocb->ki_flags |= IOCB_NOWAIT;
/* If the file doesn't support async, just async punt */
- if (force_nonblock && !io_file_supports_async(req, WRITE))
+ if (force_nonblock && !io_file_supports_nowait(req, WRITE))
goto copy_iov;
/* file path doesn't support NOWAIT for non-direct_IO */
@@ -3475,7 +3617,7 @@ static int io_renameat_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -3526,7 +3668,8 @@ static int io_unlinkat_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
+ sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
@@ -3566,14 +3709,157 @@ static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static int io_mkdirat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_mkdir *mkd = &req->mkdir;
+ const char __user *fname;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
+ sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ mkd->dfd = READ_ONCE(sqe->fd);
+ mkd->mode = READ_ONCE(sqe->len);
+
+ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ mkd->filename = getname(fname);
+ if (IS_ERR(mkd->filename))
+ return PTR_ERR(mkd->filename);
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_mkdirat(struct io_kiocb *req, int issue_flags)
+{
+ struct io_mkdir *mkd = &req->mkdir;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+}
+
+static int io_symlinkat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_symlink *sl = &req->symlink;
+ const char __user *oldpath, *newpath;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
+ sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ sl->new_dfd = READ_ONCE(sqe->fd);
+ oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+
+ sl->oldpath = getname(oldpath);
+ if (IS_ERR(sl->oldpath))
+ return PTR_ERR(sl->oldpath);
+
+ sl->newpath = getname(newpath);
+ if (IS_ERR(sl->newpath)) {
+ putname(sl->oldpath);
+ return PTR_ERR(sl->newpath);
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_symlinkat(struct io_kiocb *req, int issue_flags)
+{
+ struct io_symlink *sl = &req->symlink;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+}
+
+static int io_linkat_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_hardlink *lnk = &req->hardlink;
+ const char __user *oldf, *newf;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ lnk->old_dfd = READ_ONCE(sqe->fd);
+ lnk->new_dfd = READ_ONCE(sqe->len);
+ oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ lnk->flags = READ_ONCE(sqe->hardlink_flags);
+
+ lnk->oldpath = getname(oldf);
+ if (IS_ERR(lnk->oldpath))
+ return PTR_ERR(lnk->oldpath);
+
+ lnk->newpath = getname(newf);
+ if (IS_ERR(lnk->newpath)) {
+ putname(lnk->oldpath);
+ return PTR_ERR(lnk->newpath);
+ }
+
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+static int io_linkat(struct io_kiocb *req, int issue_flags)
+{
+ struct io_hardlink *lnk = &req->hardlink;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
+ lnk->newpath, lnk->flags);
+
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_complete(req, ret);
+ return 0;
+}
+
static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
- sqe->buf_index)
+ if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags ||
+ sqe->buf_index || sqe->splice_fd_in))
return -EINVAL;
req->shutdown.how = READ_ONCE(sqe->len);
@@ -3622,7 +3908,7 @@ static int __io_splice_prep(struct io_kiocb *req,
if (unlikely(sp->flags & ~valid_flags))
return -EINVAL;
- sp->file_in = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in),
+ sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in),
(sp->flags & SPLICE_F_FD_IN_FIXED));
if (!sp->file_in)
return -EBADF;
@@ -3721,7 +4007,8 @@ static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+ sqe->splice_fd_in))
return -EINVAL;
req->sync.flags = READ_ONCE(sqe->fsync_flags);
@@ -3754,7 +4041,8 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
static int io_fallocate_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
+ if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -3785,6 +4073,8 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
const char __user *fname;
int ret;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
if (unlikely(sqe->ioprio || sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
@@ -3802,6 +4092,11 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
req->open.filename = NULL;
return ret;
}
+
+ req->open.file_slot = READ_ONCE(sqe->file_index);
+ if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC))
+ return -EINVAL;
+
req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
@@ -3809,12 +4104,9 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- u64 flags, mode;
+ u64 mode = READ_ONCE(sqe->len);
+ u64 flags = READ_ONCE(sqe->open_flags);
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- mode = READ_ONCE(sqe->len);
- flags = READ_ONCE(sqe->open_flags);
req->open.how = build_open_how(flags, mode);
return __io_openat_prep(req, sqe);
}
@@ -3825,8 +4117,6 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
size_t len;
int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
len = READ_ONCE(sqe->len);
if (len < OPEN_HOW_SIZE_VER0)
@@ -3844,8 +4134,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
{
struct open_flags op;
struct file *file;
- bool nonblock_set;
- bool resolve_nonblock;
+ bool resolve_nonblock, nonblock_set;
+ bool fixed = !!req->open.file_slot;
int ret;
ret = build_open_flags(&req->open.how, &op);
@@ -3864,9 +4154,11 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
op.open_flag |= O_NONBLOCK;
}
- ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
- if (ret < 0)
- goto err;
+ if (!fixed) {
+ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
+ if (ret < 0)
+ goto err;
+ }
file = do_filp_open(req->open.dfd, req->open.filename, &op);
if (IS_ERR(file)) {
@@ -3875,7 +4167,8 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
* marginal gain for something that is now known to be a slower
* path. So just put it, and we'll get a new one when we retry.
*/
- put_unused_fd(ret);
+ if (!fixed)
+ put_unused_fd(ret);
ret = PTR_ERR(file);
/* only retry if RESOLVE_CACHED wasn't already set by application */
@@ -3888,7 +4181,12 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
file->f_flags &= ~O_NONBLOCK;
fsnotify_open(file);
- fd_install(ret, file);
+
+ if (!fixed)
+ fd_install(ret, file);
+ else
+ ret = io_install_fixed_file(req, file, issue_flags,
+ req->open.file_slot - 1);
err:
putname(req->open.filename);
req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -3909,7 +4207,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off)
+ if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
+ sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -3980,7 +4279,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
- if (sqe->ioprio || sqe->rw_flags)
+ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
tmp = READ_ONCE(sqe->fd);
@@ -4067,7 +4366,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_EPOLL)
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4113,7 +4412,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
- if (sqe->ioprio || sqe->buf_index || sqe->off)
+ if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4148,7 +4447,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->ioprio || sqe->buf_index || sqe->addr)
+ if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
return -EINVAL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4186,7 +4485,7 @@ static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -4222,7 +4521,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
- sqe->rw_flags || sqe->buf_index)
+ sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
@@ -4283,7 +4582,8 @@ static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
+ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
+ sqe->splice_fd_in))
return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
@@ -4717,6 +5017,15 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
+
+ accept->file_slot = READ_ONCE(sqe->file_index);
+ if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) ||
+ (accept->flags & SOCK_CLOEXEC)))
+ return -EINVAL;
+ if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+ if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
+ accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return 0;
}
@@ -4725,20 +5034,35 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
struct io_accept *accept = &req->accept;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
- int ret;
+ bool fixed = !!accept->file_slot;
+ struct file *file;
+ int ret, fd;
if (req->file->f_flags & O_NONBLOCK)
req->flags |= REQ_F_NOWAIT;
- ret = __sys_accept4_file(req->file, file_flags, accept->addr,
- accept->addr_len, accept->flags,
- accept->nofile);
- if (ret == -EAGAIN && force_nonblock)
- return -EAGAIN;
- if (ret < 0) {
+ if (!fixed) {
+ fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+ if (unlikely(fd < 0))
+ return fd;
+ }
+ file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
+ accept->flags);
+ if (IS_ERR(file)) {
+ if (!fixed)
+ put_unused_fd(fd);
+ ret = PTR_ERR(file);
+ if (ret == -EAGAIN && force_nonblock)
+ return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
+ } else if (!fixed) {
+ fd_install(fd, file);
+ ret = fd;
+ } else {
+ ret = io_install_fixed_file(req, file, issue_flags,
+ accept->file_slot - 1);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
@@ -4758,7 +5082,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
+ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -4871,6 +5196,7 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
{
struct io_ring_ctx *ctx = req->ctx;
+ /* req->task == current here, checking PF_EXITING is safe */
if (unlikely(req->task->flags & PF_EXITING))
WRITE_ONCE(poll->canceled, true);
@@ -4880,7 +5206,7 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
req->result = vfs_poll(req->file, &pt) & poll->events;
}
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
if (!req->result && !READ_ONCE(poll->canceled)) {
add_wait_queue(poll->head, &poll->wait);
return true;
@@ -4914,12 +5240,12 @@ static void io_poll_remove_double(struct io_kiocb *req)
if (poll && poll->head) {
struct wait_queue_head *head = poll->head;
- spin_lock(&head->lock);
+ spin_lock_irq(&head->lock);
list_del_init(&poll->wait.entry);
if (poll->wait.private)
req_ref_put(req);
poll->head = NULL;
- spin_unlock(&head->lock);
+ spin_unlock_irq(&head->lock);
}
}
@@ -4949,13 +5275,13 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
return !(flags & IORING_CQE_F_MORE);
}
-static void io_poll_task_func(struct io_kiocb *req)
+static void io_poll_task_func(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *nxt;
if (io_poll_rewait(req, &req->poll)) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
} else {
bool done;
@@ -4967,13 +5293,13 @@ static void io_poll_task_func(struct io_kiocb *req)
req->result = 0;
add_wait_queue(req->poll.head, &req->poll.wait);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
if (done) {
nxt = io_put_req_find_next(req);
if (nxt)
- io_req_task_submit(nxt);
+ io_req_task_submit(nxt, locked);
}
}
}
@@ -4984,6 +5310,7 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
struct io_kiocb *req = wait->private;
struct io_poll_iocb *poll = io_poll_get_single(req);
__poll_t mask = key_to_poll(key);
+ unsigned long flags;
/* for instances that support it check for an event match first: */
if (mask && !(mask & poll->events))
@@ -4996,13 +5323,13 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
if (poll->head) {
bool done;
- spin_lock(&poll->head->lock);
+ spin_lock_irqsave(&poll->head->lock, flags);
done = list_empty(&poll->wait.entry);
if (!done)
list_del_init(&poll->wait.entry);
/* make sure double remove sees this as being gone */
wait->private = NULL;
- spin_unlock(&poll->head->lock);
+ spin_unlock_irqrestore(&poll->head->lock, flags);
if (!done) {
/* use wait func handler, so it matches the rq type */
poll->wait.func(&poll->wait, mode, sync, key);
@@ -5039,8 +5366,13 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
if (unlikely(pt->nr_entries)) {
struct io_poll_iocb *poll_one = poll;
+ /* double add on the same waitqueue head, ignore */
+ if (poll_one->head == head)
+ return;
/* already have a 2nd entry, fail a third attempt */
if (*poll_ptr) {
+ if ((*poll_ptr)->head == head)
+ return;
pt->error = -EINVAL;
return;
}
@@ -5050,9 +5382,6 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
*/
if (!(poll_one->events & EPOLLONESHOT))
poll_one->events |= EPOLLONESHOT;
- /* double add on the same waitqueue head, ignore */
- if (poll_one->head == head)
- return;
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
if (!poll) {
pt->error = -ENOMEM;
@@ -5082,7 +5411,7 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
__io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
}
-static void io_async_task_func(struct io_kiocb *req)
+static void io_async_task_func(struct io_kiocb *req, bool *locked)
{
struct async_poll *apoll = req->apoll;
struct io_ring_ctx *ctx = req->ctx;
@@ -5090,16 +5419,16 @@ static void io_async_task_func(struct io_kiocb *req)
trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
if (io_poll_rewait(req, &apoll->poll)) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
return;
}
hash_del(&req->hash_node);
io_poll_remove_double(req);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (!READ_ONCE(apoll->poll.canceled))
- io_req_task_submit(req);
+ io_req_task_submit(req, locked);
else
io_req_complete_failed(req, -ECANCELED);
}
@@ -5148,11 +5477,11 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
if (unlikely(!ipt->nr_entries) && !ipt->error)
ipt->error = -EINVAL;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
io_poll_remove_double(req);
if (likely(poll->head)) {
- spin_lock(&poll->head->lock);
+ spin_lock_irq(&poll->head->lock);
if (unlikely(list_empty(&poll->wait.entry))) {
if (ipt->error)
cancel = true;
@@ -5165,7 +5494,7 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
WRITE_ONCE(poll->canceled, true);
else if (!poll->done) /* actually waiting for an event */
io_poll_req_insert(req);
- spin_unlock(&poll->head->lock);
+ spin_unlock_irq(&poll->head->lock);
}
return mask;
@@ -5207,7 +5536,7 @@ static int io_arm_poll_handler(struct io_kiocb *req)
}
/* if we can't nonblock try, then no point in arming a poll handler */
- if (!io_file_supports_async(req, rw))
+ if (!io_file_supports_nowait(req, rw))
return IO_APOLL_ABORTED;
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
@@ -5217,16 +5546,14 @@ static int io_arm_poll_handler(struct io_kiocb *req)
req->apoll = apoll;
req->flags |= REQ_F_POLLED;
ipt.pt._qproc = io_async_queue_proc;
+ io_req_set_refcount(req);
ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
io_async_wake);
- if (ret || ipt.error) {
- spin_unlock_irq(&ctx->completion_lock);
- if (ret)
- return IO_APOLL_READY;
- return IO_APOLL_ABORTED;
- }
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
+ if (ret || ipt.error)
+ return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
+
trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
mask, apoll->poll.events);
return IO_APOLL_OK;
@@ -5240,19 +5567,19 @@ static bool __io_poll_remove_one(struct io_kiocb *req,
if (!poll->head)
return false;
- spin_lock(&poll->head->lock);
+ spin_lock_irq(&poll->head->lock);
if (do_cancel)
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
list_del_init(&poll->wait.entry);
do_complete = true;
}
- spin_unlock(&poll->head->lock);
+ spin_unlock_irq(&poll->head->lock);
hash_del(&req->hash_node);
return do_complete;
}
-static bool io_poll_remove_waitqs(struct io_kiocb *req)
+static bool io_poll_remove_one(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
{
bool do_complete;
@@ -5260,26 +5587,12 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
io_poll_remove_double(req);
do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
- if (req->opcode != IORING_OP_POLL_ADD && do_complete) {
- /* non-poll requests have submit ref still */
- req_ref_put(req);
- }
- return do_complete;
-}
-
-static bool io_poll_remove_one(struct io_kiocb *req)
- __must_hold(&req->ctx->completion_lock)
-{
- bool do_complete;
-
- do_complete = io_poll_remove_waitqs(req);
if (do_complete) {
io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0);
io_commit_cqring(req->ctx);
req_set_fail(req);
- io_put_req_deferred(req, 1);
+ io_put_req_deferred(req);
}
-
return do_complete;
}
@@ -5293,7 +5606,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
struct io_kiocb *req;
int posted = 0, i;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
struct hlist_head *list;
@@ -5303,7 +5616,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
posted += io_poll_remove_one(req);
}
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (posted)
io_cqring_ev_posted(ctx);
@@ -5366,7 +5679,7 @@ static int io_poll_update_prep(struct io_kiocb *req,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index)
+ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
flags = READ_ONCE(sqe->len);
if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
@@ -5421,6 +5734,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
if (flags & ~IORING_POLL_ADD_MULTI)
return -EINVAL;
+ io_req_set_refcount(req);
poll->events = io_poll_parse_events(sqe, flags);
return 0;
}
@@ -5441,7 +5755,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
ipt.error = 0;
io_poll_complete(req, mask);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (mask) {
io_cqring_ev_posted(ctx);
@@ -5458,7 +5772,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
bool completing;
int ret;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
if (!preq) {
ret = -ENOENT;
@@ -5485,7 +5799,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
ret = 0;
err:
if (ret < 0) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
req_set_fail(req);
io_req_complete(req, ret);
return 0;
@@ -5498,7 +5812,7 @@ err:
}
if (req->poll_update.update_user_data)
preq->user_data = req->poll_update.new_user_data;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
/* complete update request, we're done with it */
io_req_complete(req, ret);
@@ -5513,6 +5827,12 @@ err:
return 0;
}
+static void io_req_task_timeout(struct io_kiocb *req, bool *locked)
+{
+ req_set_fail(req);
+ io_req_complete_post(req, -ETIME, 0);
+}
+
static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
{
struct io_timeout_data *data = container_of(timer,
@@ -5521,24 +5841,20 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock_irqsave(&ctx->timeout_lock, flags);
list_del_init(&req->timeout.list);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
+ spin_unlock_irqrestore(&ctx->timeout_lock, flags);
- io_cqring_fill_event(ctx, req->user_data, -ETIME, 0);
- io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
-
- io_cqring_ev_posted(ctx);
- req_set_fail(req);
- io_put_req(req);
+ req->io_task_work.func = io_req_task_timeout;
+ io_req_task_work_add(req);
return HRTIMER_NORESTART;
}
static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
__u64 user_data)
- __must_hold(&ctx->completion_lock)
+ __must_hold(&ctx->timeout_lock)
{
struct io_timeout_data *io;
struct io_kiocb *req;
@@ -5561,6 +5877,7 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
__must_hold(&ctx->completion_lock)
+ __must_hold(&ctx->timeout_lock)
{
struct io_kiocb *req = io_timeout_extract(ctx, user_data);
@@ -5569,13 +5886,54 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
req_set_fail(req);
io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0);
- io_put_req_deferred(req, 1);
+ io_put_req_deferred(req);
+ return 0;
+}
+
+static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
+{
+ switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
+ case IORING_TIMEOUT_BOOTTIME:
+ return CLOCK_BOOTTIME;
+ case IORING_TIMEOUT_REALTIME:
+ return CLOCK_REALTIME;
+ default:
+ /* can't happen, vetted at prep time */
+ WARN_ON_ONCE(1);
+ fallthrough;
+ case 0:
+ return CLOCK_MONOTONIC;
+ }
+}
+
+static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+ struct timespec64 *ts, enum hrtimer_mode mode)
+ __must_hold(&ctx->timeout_lock)
+{
+ struct io_timeout_data *io;
+ struct io_kiocb *req;
+ bool found = false;
+
+ list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) {
+ found = user_data == req->user_data;
+ if (found)
+ break;
+ }
+ if (!found)
+ return -ENOENT;
+
+ io = req->async_data;
+ if (hrtimer_try_to_cancel(&io->timer) == -1)
+ return -EALREADY;
+ hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
+ io->timer.function = io_link_timeout_fn;
+ hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
return 0;
}
static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
struct timespec64 *ts, enum hrtimer_mode mode)
- __must_hold(&ctx->completion_lock)
+ __must_hold(&ctx->timeout_lock)
{
struct io_kiocb *req = io_timeout_extract(ctx, user_data);
struct io_timeout_data *data;
@@ -5586,7 +5944,7 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
req->timeout.off = 0; /* noseq */
data = req->async_data;
list_add_tail(&req->timeout.list, &ctx->timeout_list);
- hrtimer_init(&data->timer, CLOCK_MONOTONIC, mode);
+ hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
return 0;
@@ -5601,13 +5959,18 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len)
+ if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in)
return -EINVAL;
+ tr->ltimeout = false;
tr->addr = READ_ONCE(sqe->addr);
tr->flags = READ_ONCE(sqe->timeout_flags);
- if (tr->flags & IORING_TIMEOUT_UPDATE) {
- if (tr->flags & ~(IORING_TIMEOUT_UPDATE|IORING_TIMEOUT_ABS))
+ if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
+ if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
+ return -EINVAL;
+ if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
+ tr->ltimeout = true;
+ if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
return -EINVAL;
if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
return -EFAULT;
@@ -5634,20 +5997,26 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- spin_lock_irq(&ctx->completion_lock);
- if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE))
+ if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) {
+ spin_lock(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, tr->addr);
- else
- ret = io_timeout_update(ctx, tr->addr, &tr->ts,
- io_translate_timeout_mode(tr->flags));
+ spin_unlock_irq(&ctx->timeout_lock);
+ spin_unlock(&ctx->completion_lock);
+ } else {
+ enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
+
+ spin_lock_irq(&ctx->timeout_lock);
+ if (tr->ltimeout)
+ ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
+ else
+ ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
+ spin_unlock_irq(&ctx->timeout_lock);
+ }
- io_cqring_fill_event(ctx, req->user_data, ret, 0);
- io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
if (ret < 0)
req_set_fail(req);
- io_put_req(req);
+ io_req_complete_post(req, ret, 0);
return 0;
}
@@ -5660,14 +6029,19 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
+ if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
+ sqe->splice_fd_in)
return -EINVAL;
if (off && is_timeout_link)
return -EINVAL;
flags = READ_ONCE(sqe->timeout_flags);
- if (flags & ~IORING_TIMEOUT_ABS)
+ if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK))
+ return -EINVAL;
+ /* more than one clock specified is invalid, obviously */
+ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
return -EINVAL;
+ INIT_LIST_HEAD(&req->timeout.list);
req->timeout.off = off;
if (unlikely(off && !req->ctx->off_timeout_used))
req->ctx->off_timeout_used = true;
@@ -5677,14 +6051,24 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
data = req->async_data;
data->req = req;
+ data->flags = flags;
if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
return -EFAULT;
data->mode = io_translate_timeout_mode(flags);
- hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
- if (is_timeout_link)
- io_req_track_inflight(req);
+ hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
+
+ if (is_timeout_link) {
+ struct io_submit_link *link = &req->ctx->submit_state.link;
+
+ if (!link->head)
+ return -EINVAL;
+ if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
+ return -EINVAL;
+ req->timeout.head = link->last;
+ link->last->flags |= REQ_F_ARM_LTIMEOUT;
+ }
return 0;
}
@@ -5695,7 +6079,7 @@ static int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
struct list_head *entry;
u32 tail, off = req->timeout.off;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
/*
* sqe->off holds how many events that need to occur for this
@@ -5734,7 +6118,7 @@ add:
list_add(&req->timeout.list, entry);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock_irq(&ctx->timeout_lock);
return 0;
}
@@ -5777,31 +6161,27 @@ static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
return ret;
}
-static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
- struct io_kiocb *req, __u64 sqe_addr,
- int success_ret)
+static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
{
- unsigned long flags;
+ struct io_ring_ctx *ctx = req->ctx;
int ret;
+ WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
+
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- spin_lock_irqsave(&ctx->completion_lock, flags);
if (ret != -ENOENT)
- goto done;
+ return ret;
+
+ spin_lock(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, sqe_addr);
+ spin_unlock_irq(&ctx->timeout_lock);
if (ret != -ENOENT)
- goto done;
+ goto out;
ret = io_poll_cancel(ctx, sqe_addr, false);
-done:
- if (!ret)
- ret = success_ret;
- io_cqring_fill_event(ctx, req->user_data, ret, 0);
- io_commit_cqring(ctx);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
- io_cqring_ev_posted(ctx);
-
- if (ret < 0)
- req_set_fail(req);
+out:
+ spin_unlock(&ctx->completion_lock);
+ return ret;
}
static int io_async_cancel_prep(struct io_kiocb *req,
@@ -5811,7 +6191,8 @@ static int io_async_cancel_prep(struct io_kiocb *req,
return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
+ sqe->splice_fd_in)
return -EINVAL;
req->cancel.addr = READ_ONCE(sqe->addr);
@@ -5825,18 +6206,9 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
struct io_tctx_node *node;
int ret;
- /* tasks should wait for their io-wq threads, so safe w/o sync */
- ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- spin_lock_irq(&ctx->completion_lock);
- if (ret != -ENOENT)
- goto done;
- ret = io_timeout_cancel(ctx, sqe_addr);
- if (ret != -ENOENT)
- goto done;
- ret = io_poll_cancel(ctx, sqe_addr, false);
+ ret = io_try_cancel_userdata(req, sqe_addr);
if (ret != -ENOENT)
goto done;
- spin_unlock_irq(&ctx->completion_lock);
/* slow path, try all io-wq's */
io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
@@ -5849,17 +6221,10 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
break;
}
io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
-
- spin_lock_irq(&ctx->completion_lock);
done:
- io_cqring_fill_event(ctx, req->user_data, ret, 0);
- io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
-
if (ret < 0)
req_set_fail(req);
- io_put_req(req);
+ io_req_complete_post(req, ret, 0);
return 0;
}
@@ -5868,7 +6233,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
{
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
- if (sqe->ioprio || sqe->rw_flags)
+ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -5976,6 +6341,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_renameat_prep(req, sqe);
case IORING_OP_UNLINKAT:
return io_unlinkat_prep(req, sqe);
+ case IORING_OP_MKDIRAT:
+ return io_mkdirat_prep(req, sqe);
+ case IORING_OP_SYMLINKAT:
+ return io_symlinkat_prep(req, sqe);
+ case IORING_OP_LINKAT:
+ return io_linkat_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6070,11 +6441,11 @@ fail:
return true;
}
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
kfree(de);
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
return true;
}
@@ -6082,7 +6453,7 @@ fail:
de->req = req;
de->seq = seq;
list_add_tail(&de->list, &ctx->defer_list);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
return true;
}
@@ -6139,6 +6510,17 @@ static void io_clean_op(struct io_kiocb *req)
case IORING_OP_UNLINKAT:
putname(req->unlink.filename);
break;
+ case IORING_OP_MKDIRAT:
+ putname(req->mkdir.filename);
+ break;
+ case IORING_OP_SYMLINKAT:
+ putname(req->symlink.oldpath);
+ putname(req->symlink.newpath);
+ break;
+ case IORING_OP_LINKAT:
+ putname(req->hardlink.oldpath);
+ putname(req->hardlink.newpath);
+ break;
}
}
if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -6267,6 +6649,15 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_UNLINKAT:
ret = io_unlinkat(req, issue_flags);
break;
+ case IORING_OP_MKDIRAT:
+ ret = io_mkdirat(req, issue_flags);
+ break;
+ case IORING_OP_SYMLINKAT:
+ ret = io_symlinkat(req, issue_flags);
+ break;
+ case IORING_OP_LINKAT:
+ ret = io_linkat(req, issue_flags);
+ break;
default:
ret = -EINVAL;
break;
@@ -6283,16 +6674,31 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ req = io_put_req_find_next(req);
+ return req ? &req->work : NULL;
+}
+
static void io_wq_submit_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_kiocb *timeout;
int ret = 0;
+ /* one will be dropped by ->io_free_work() after returning to io-wq */
+ if (!(req->flags & REQ_F_REFCOUNT))
+ __io_req_set_refcount(req, 2);
+ else
+ req_ref_get(req);
+
timeout = io_prep_linked_timeout(req);
if (timeout)
io_queue_linked_timeout(timeout);
+ /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
if (work->flags & IO_WQ_WORK_CANCEL)
ret = -ECANCELED;
@@ -6311,29 +6717,14 @@ static void io_wq_submit_work(struct io_wq_work *work)
}
/* avoid locking problems by failing it from a clean context */
- if (ret) {
- /* io-wq is going to take one down */
- req_ref_get(req);
+ if (ret)
io_req_task_queue_fail(req, ret);
- }
}
-#define FFS_ASYNC_READ 0x1UL
-#define FFS_ASYNC_WRITE 0x2UL
-#ifdef CONFIG_64BIT
-#define FFS_ISREG 0x4UL
-#else
-#define FFS_ISREG 0x0UL
-#endif
-#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG)
-
static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table,
- unsigned i)
+ unsigned i)
{
- struct io_fixed_file *table_l2;
-
- table_l2 = table->files[i >> IORING_FILE_TABLE_SHIFT];
- return &table_l2[i & IORING_FILE_TABLE_MASK];
+ return &table->files[i];
}
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
@@ -6348,45 +6739,69 @@ static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file
{
unsigned long file_ptr = (unsigned long) file;
- if (__io_file_supports_async(file, READ))
+ if (__io_file_supports_nowait(file, READ))
file_ptr |= FFS_ASYNC_READ;
- if (__io_file_supports_async(file, WRITE))
+ if (__io_file_supports_nowait(file, WRITE))
file_ptr |= FFS_ASYNC_WRITE;
if (S_ISREG(file_inode(file)->i_mode))
file_ptr |= FFS_ISREG;
file_slot->file_ptr = file_ptr;
}
-static struct file *io_file_get(struct io_submit_state *state,
- struct io_kiocb *req, int fd, bool fixed)
+static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
+ struct io_kiocb *req, int fd)
{
- struct io_ring_ctx *ctx = req->ctx;
struct file *file;
+ unsigned long file_ptr;
- if (fixed) {
- unsigned long file_ptr;
+ if (unlikely((unsigned int)fd >= ctx->nr_user_files))
+ return NULL;
+ fd = array_index_nospec(fd, ctx->nr_user_files);
+ file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
+ file = (struct file *) (file_ptr & FFS_MASK);
+ file_ptr &= ~FFS_MASK;
+ /* mask in overlapping REQ_F and FFS bits */
+ req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT);
+ io_req_set_rsrc_node(req);
+ return file;
+}
- if (unlikely((unsigned int)fd >= ctx->nr_user_files))
- return NULL;
- fd = array_index_nospec(fd, ctx->nr_user_files);
- file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
- file = (struct file *) (file_ptr & FFS_MASK);
- file_ptr &= ~FFS_MASK;
- /* mask in overlapping REQ_F and FFS bits */
- req->flags |= (file_ptr << REQ_F_ASYNC_READ_BIT);
- io_req_set_rsrc_node(req);
- } else {
- trace_io_uring_file_get(ctx, fd);
- file = __io_file_get(state, fd);
+static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
+ struct io_kiocb *req, int fd)
+{
+ struct file *file = fget(fd);
- /* we don't allow fixed io_uring files */
- if (file && unlikely(file->f_op == &io_uring_fops))
- io_req_track_inflight(req);
- }
+ trace_io_uring_file_get(ctx, fd);
+ /* we don't allow fixed io_uring files */
+ if (file && unlikely(file->f_op == &io_uring_fops))
+ io_req_track_inflight(req);
return file;
}
+static inline struct file *io_file_get(struct io_ring_ctx *ctx,
+ struct io_kiocb *req, int fd, bool fixed)
+{
+ if (fixed)
+ return io_file_get_fixed(ctx, req, fd);
+ else
+ return io_file_get_normal(ctx, req, fd);
+}
+
+static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
+{
+ struct io_kiocb *prev = req->timeout.prev;
+ int ret;
+
+ if (prev) {
+ ret = io_try_cancel_userdata(req, prev->user_data);
+ io_req_complete_post(req, ret ?: -ETIME, 0);
+ io_put_req(prev);
+ } else {
+ io_req_complete_post(req, -ETIME, 0);
+ }
+}
+
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
{
struct io_timeout_data *data = container_of(timer,
@@ -6395,7 +6810,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock_irqsave(&ctx->timeout_lock, flags);
prev = req->timeout.head;
req->timeout.head = NULL;
@@ -6408,15 +6823,12 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (!req_ref_inc_not_zero(prev))
prev = NULL;
}
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ list_del(&req->timeout.list);
+ req->timeout.prev = prev;
+ spin_unlock_irqrestore(&ctx->timeout_lock, flags);
- if (prev) {
- io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
- io_put_req_deferred(prev, 1);
- io_put_req_deferred(req, 1);
- } else {
- io_req_complete_post(req, -ETIME, 0);
- }
+ req->io_task_work.func = io_req_task_link_timeout;
+ io_req_task_work_add(req);
return HRTIMER_NORESTART;
}
@@ -6424,7 +6836,7 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
/*
* If the back reference is NULL, then our linked request finished
* before we got a chance to setup the timer
@@ -6435,29 +6847,17 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
data->timer.function = io_link_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
data->mode);
+ list_add_tail(&req->timeout.list, &ctx->ltimeout_list);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock_irq(&ctx->timeout_lock);
/* drop submission reference */
io_put_req(req);
}
-static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
-{
- struct io_kiocb *nxt = req->link;
-
- if (!nxt || (req->flags & REQ_F_LINK_TIMEOUT) ||
- nxt->opcode != IORING_OP_LINK_TIMEOUT)
- return NULL;
-
- nxt->timeout.head = req;
- nxt->flags |= REQ_F_LTIMEOUT_ACTIVE;
- req->flags |= REQ_F_LINK_TIMEOUT;
- return nxt;
-}
-
static void __io_queue_sqe(struct io_kiocb *req)
+ __must_hold(&req->ctx->uring_lock)
{
- struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
+ struct io_kiocb *linked_timeout;
int ret;
issue_sqe:
@@ -6468,50 +6868,60 @@ issue_sqe:
* doesn't support non-blocking read/write attempts
*/
if (likely(!ret)) {
- /* drop submission reference */
if (req->flags & REQ_F_COMPLETE_INLINE) {
struct io_ring_ctx *ctx = req->ctx;
- struct io_comp_state *cs = &ctx->submit_state.comp;
+ struct io_submit_state *state = &ctx->submit_state;
- cs->reqs[cs->nr++] = req;
- if (cs->nr == ARRAY_SIZE(cs->reqs))
+ state->compl_reqs[state->compl_nr++] = req;
+ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs))
io_submit_flush_completions(ctx);
- } else {
- io_put_req(req);
+ return;
}
+
+ linked_timeout = io_prep_linked_timeout(req);
+ if (linked_timeout)
+ io_queue_linked_timeout(linked_timeout);
} else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+ linked_timeout = io_prep_linked_timeout(req);
+
switch (io_arm_poll_handler(req)) {
case IO_APOLL_READY:
+ if (linked_timeout)
+ io_unprep_linked_timeout(req);
goto issue_sqe;
case IO_APOLL_ABORTED:
/*
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
break;
}
+
+ if (linked_timeout)
+ io_queue_linked_timeout(linked_timeout);
} else {
io_req_complete_failed(req, ret);
}
- if (linked_timeout)
- io_queue_linked_timeout(linked_timeout);
}
static inline void io_queue_sqe(struct io_kiocb *req)
+ __must_hold(&req->ctx->uring_lock)
{
if (unlikely(req->ctx->drain_active) && io_drain_req(req))
return;
- if (likely(!(req->flags & REQ_F_FORCE_ASYNC))) {
+ if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
__io_queue_sqe(req);
+ } else if (req->flags & REQ_F_FAIL) {
+ io_req_complete_failed(req, req->result);
} else {
int ret = io_req_prep_async(req);
if (unlikely(ret))
io_req_complete_failed(req, ret);
else
- io_queue_async_work(req);
+ io_queue_async_work(req, NULL);
}
}
@@ -6543,19 +6953,19 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
+ __must_hold(&ctx->uring_lock)
{
struct io_submit_state *state;
unsigned int sqe_flags;
int personality, ret = 0;
+ /* req is partially pre-initialised, see io_preinit_req() */
req->opcode = READ_ONCE(sqe->opcode);
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags = sqe_flags = READ_ONCE(sqe->flags);
req->user_data = READ_ONCE(sqe->user_data);
req->file = NULL;
req->fixed_rsrc_refs = NULL;
- /* one is dropped after submission, the other at completion */
- atomic_set(&req->refs, 2);
req->task = current;
/* enforce forwards compatibility on users */
@@ -6593,9 +7003,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
if (io_op_defs[req->opcode].needs_file) {
- bool fixed = req->flags & REQ_F_FIXED_FILE;
-
- req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
+ req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
+ (sqe_flags & IOSQE_FIXED_FILE));
if (unlikely(!req->file))
ret = -EBADF;
}
@@ -6606,6 +7015,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
+ __must_hold(&ctx->uring_lock)
{
struct io_submit_link *link = &ctx->submit_state.link;
int ret;
@@ -6613,20 +7023,34 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_init_req(ctx, req, sqe);
if (unlikely(ret)) {
fail_req:
+ /* fail even hard links since we don't submit */
if (link->head) {
- /* fail even hard links since we don't submit */
- req_set_fail(link->head);
- io_req_complete_failed(link->head, -ECANCELED);
- link->head = NULL;
+ /*
+ * we can judge a link req is failed or cancelled by if
+ * REQ_F_FAIL is set, but the head is an exception since
+ * it may be set REQ_F_FAIL because of other req's failure
+ * so let's leverage req->result to distinguish if a head
+ * is set REQ_F_FAIL because of its failure or other req's
+ * failure so that we can set the correct ret code for it.
+ * init result here to avoid affecting the normal path.
+ */
+ if (!(link->head->flags & REQ_F_FAIL))
+ req_fail_link_node(link->head, -ECANCELED);
+ } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
+ /*
+ * the current req is a normal req, we should return
+ * error and thus break the submittion loop.
+ */
+ io_req_complete_failed(req, ret);
+ return ret;
}
- io_req_complete_failed(req, ret);
- return ret;
+ req_fail_link_node(req, ret);
+ } else {
+ ret = io_req_prep(req, sqe);
+ if (unlikely(ret))
+ goto fail_req;
}
- ret = io_req_prep(req, sqe);
- if (unlikely(ret))
- goto fail_req;
-
/* don't need @sqe from now on */
trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
req->flags, true,
@@ -6642,9 +7066,14 @@ fail_req:
if (link->head) {
struct io_kiocb *head = link->head;
- ret = io_req_prep_async(req);
- if (unlikely(ret))
- goto fail_req;
+ if (!(req->flags & REQ_F_FAIL)) {
+ ret = io_req_prep_async(req);
+ if (unlikely(ret)) {
+ req_fail_link_node(req, ret);
+ if (!(head->flags & REQ_F_FAIL))
+ req_fail_link_node(head, -ECANCELED);
+ }
+ }
trace_io_uring_link(ctx, req, head);
link->last->link = req;
link->last = req;
@@ -6674,11 +7103,10 @@ static void io_submit_state_end(struct io_submit_state *state,
{
if (state->link.head)
io_queue_sqe(state->link.head);
- if (state->comp.nr)
+ if (state->compl_nr)
io_submit_flush_completions(ctx);
if (state->plug_started)
blk_finish_plug(&state->plug);
- io_state_file_put(state);
}
/*
@@ -6738,26 +7166,17 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
}
static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
+ __must_hold(&ctx->uring_lock)
{
- struct io_uring_task *tctx;
int submitted = 0;
/* make sure SQ entry isn't read before tail */
nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
if (!percpu_ref_tryget_many(&ctx->refs, nr))
return -EAGAIN;
+ io_get_task_refs(nr);
- tctx = current->io_uring;
- tctx->cached_refs -= nr;
- if (unlikely(tctx->cached_refs < 0)) {
- unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
-
- percpu_counter_add(&tctx->inflight, refill);
- refcount_add(refill, &current->usage);
- tctx->cached_refs += refill;
- }
io_submit_state_start(&ctx->submit_state, nr);
-
while (submitted < nr) {
const struct io_uring_sqe *sqe;
struct io_kiocb *req;
@@ -6770,7 +7189,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
}
sqe = io_get_sqe(ctx);
if (unlikely(!sqe)) {
- kmem_cache_free(req_cachep, req);
+ list_add(&req->inflight_entry, &ctx->submit_state.free_list);
break;
}
/* will complete beyond this point, count as submitted */
@@ -6802,16 +7221,18 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
{
/* Tell userspace we may need a wakeup call */
- spin_lock_irq(&ctx->completion_lock);
- ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
+ spin_unlock(&ctx->completion_lock);
}
static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
{
- spin_lock_irq(&ctx->completion_lock);
- ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
- spin_unlock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ WRITE_ONCE(ctx->rings->sq_flags,
+ ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
+ spin_unlock(&ctx->completion_lock);
}
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
@@ -6833,7 +7254,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->iopoll_list))
- io_do_iopoll(ctx, &nr_events, 0, true);
+ io_do_iopoll(ctx, &nr_events, 0);
/*
* Don't submit if refs are dying, good for io_uring_register(),
@@ -6968,21 +7389,21 @@ static int io_sq_thread(void *data)
struct io_wait_queue {
struct wait_queue_entry wq;
struct io_ring_ctx *ctx;
- unsigned to_wait;
+ unsigned cq_tail;
unsigned nr_timeouts;
};
static inline bool io_should_wake(struct io_wait_queue *iowq)
{
struct io_ring_ctx *ctx = iowq->ctx;
+ int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
/*
* Wake up if we have enough events, or if a timeout occurred since we
* started waiting. For timeouts, we always want to return to userspace,
* regardless of event count.
*/
- return io_cqring_events(ctx) >= iowq->to_wait ||
- atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
+ return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
}
static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
@@ -7038,21 +7459,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz,
struct __kernel_timespec __user *uts)
{
- struct io_wait_queue iowq = {
- .wq = {
- .private = current,
- .func = io_wake_function,
- .entry = LIST_HEAD_INIT(iowq.wq.entry),
- },
- .ctx = ctx,
- .to_wait = min_events,
- };
+ struct io_wait_queue iowq;
struct io_rings *rings = ctx->rings;
signed long timeout = MAX_SCHEDULE_TIMEOUT;
int ret;
do {
- io_cqring_overflow_flush(ctx, false);
+ io_cqring_overflow_flush(ctx);
if (io_cqring_events(ctx) >= min_events)
return 0;
if (!io_run_task_work())
@@ -7080,11 +7493,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
timeout = timespec64_to_jiffies(&ts);
}
+ init_waitqueue_func_entry(&iowq.wq, io_wake_function);
+ iowq.wq.private = current;
+ INIT_LIST_HEAD(&iowq.wq.entry);
+ iowq.ctx = ctx;
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+ iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
+
trace_io_uring_cqring_wait(ctx, min_events);
do {
/* if we can't even flush overflow, don't wait for more */
- if (!io_cqring_overflow_flush(ctx, false)) {
+ if (!io_cqring_overflow_flush(ctx)) {
ret = -EBUSY;
break;
}
@@ -7115,14 +7534,14 @@ static void **io_alloc_page_table(size_t size)
size_t init_size = size;
void **table;
- table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL);
+ table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT);
if (!table)
return NULL;
for (i = 0; i < nr_tables; i++) {
unsigned int this_size = min_t(size_t, size, PAGE_SIZE);
- table[i] = kzalloc(this_size, GFP_KERNEL);
+ table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT);
if (!table[i]) {
io_free_page_table(table, init_size);
return NULL;
@@ -7132,20 +7551,54 @@ static void **io_alloc_page_table(size_t size)
return table;
}
-static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
+static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
{
- spin_lock_bh(&ctx->rsrc_ref_lock);
+ percpu_ref_exit(&ref_node->refs);
+ kfree(ref_node);
}
-static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
+static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
{
- spin_unlock_bh(&ctx->rsrc_ref_lock);
+ struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
+ struct io_ring_ctx *ctx = node->rsrc_data->ctx;
+ unsigned long flags;
+ bool first_add = false;
+
+ spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
+ node->done = true;
+
+ while (!list_empty(&ctx->rsrc_ref_list)) {
+ node = list_first_entry(&ctx->rsrc_ref_list,
+ struct io_rsrc_node, node);
+ /* recycle ref nodes in order */
+ if (!node->done)
+ break;
+ list_del(&node->node);
+ first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
+ }
+ spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
+
+ if (first_add)
+ mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
}
-static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
+static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
{
- percpu_ref_exit(&ref_node->refs);
- kfree(ref_node);
+ struct io_rsrc_node *ref_node;
+
+ ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
+ if (!ref_node)
+ return NULL;
+
+ if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
+ 0, GFP_KERNEL)) {
+ kfree(ref_node);
+ return NULL;
+ }
+ INIT_LIST_HEAD(&ref_node->node);
+ INIT_LIST_HEAD(&ref_node->rsrc_list);
+ ref_node->done = false;
+ return ref_node;
}
static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
@@ -7158,9 +7611,9 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
rsrc_node->rsrc_data = data_to_kill;
- io_rsrc_ref_lock(ctx);
+ spin_lock_irq(&ctx->rsrc_ref_lock);
list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
- io_rsrc_ref_unlock(ctx);
+ spin_unlock_irq(&ctx->rsrc_ref_lock);
atomic_inc(&data_to_kill->refs);
percpu_ref_kill(&rsrc_node->refs);
@@ -7199,17 +7652,19 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
/* kill initial ref, already quiesced if zero */
if (atomic_dec_and_test(&data->refs))
break;
+ mutex_unlock(&ctx->uring_lock);
flush_delayed_work(&ctx->rsrc_put_work);
ret = wait_for_completion_interruptible(&data->done);
- if (!ret)
+ if (!ret) {
+ mutex_lock(&ctx->uring_lock);
break;
+ }
atomic_inc(&data->refs);
/* wait for all works potentially completing data->done */
flush_delayed_work(&ctx->rsrc_put_work);
reinit_completion(&data->done);
- mutex_unlock(&ctx->uring_lock);
ret = io_run_task_work_sig();
mutex_lock(&ctx->uring_lock);
} while (ret >= 0);
@@ -7277,17 +7732,14 @@ fail:
static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
{
- size_t size = nr_files * sizeof(struct io_fixed_file);
-
- table->files = (struct io_fixed_file **)io_alloc_page_table(size);
+ table->files = kvcalloc(nr_files, sizeof(table->files[0]),
+ GFP_KERNEL_ACCOUNT);
return !!table->files;
}
-static void io_free_file_tables(struct io_file_table *table, unsigned nr_files)
+static void io_free_file_tables(struct io_file_table *table)
{
- size_t size = nr_files * sizeof(struct io_fixed_file);
-
- io_free_page_table((void **)table->files, size);
+ kvfree(table->files);
table->files = NULL;
}
@@ -7312,7 +7764,7 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
fput(file);
}
#endif
- io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
+ io_free_file_tables(&ctx->file_table);
io_rsrc_data_free(ctx->file_data);
ctx->file_data = NULL;
ctx->nr_user_files = 0;
@@ -7628,11 +8080,11 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
io_ring_submit_lock(ctx, lock_ring);
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
ctx->cq_extra++;
io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
io_ring_submit_unlock(ctx, lock_ring);
}
@@ -7664,49 +8116,6 @@ static void io_rsrc_put_work(struct work_struct *work)
}
}
-static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
-{
- struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
- struct io_ring_ctx *ctx = node->rsrc_data->ctx;
- bool first_add = false;
-
- io_rsrc_ref_lock(ctx);
- node->done = true;
-
- while (!list_empty(&ctx->rsrc_ref_list)) {
- node = list_first_entry(&ctx->rsrc_ref_list,
- struct io_rsrc_node, node);
- /* recycle ref nodes in order */
- if (!node->done)
- break;
- list_del(&node->node);
- first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
- }
- io_rsrc_ref_unlock(ctx);
-
- if (first_add)
- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
-}
-
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
-{
- struct io_rsrc_node *ref_node;
-
- ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
- if (!ref_node)
- return NULL;
-
- if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero,
- 0, GFP_KERNEL)) {
- kfree(ref_node);
- return NULL;
- }
- INIT_LIST_HEAD(&ref_node->node);
- INIT_LIST_HEAD(&ref_node->rsrc_list);
- ref_node->done = false;
- return ref_node;
-}
-
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args, u64 __user *tags)
{
@@ -7721,6 +8130,8 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
+ if (nr_args > rlimit(RLIMIT_NOFILE))
+ return -EMFILE;
ret = io_rsrc_node_switch_start(ctx);
if (ret)
return ret;
@@ -7779,7 +8190,7 @@ out_fput:
if (file)
fput(file);
}
- io_free_file_tables(&ctx->file_table, nr_args);
+ io_free_file_tables(&ctx->file_table);
ctx->nr_user_files = 0;
out_free:
io_rsrc_data_free(ctx->file_data);
@@ -7830,6 +8241,46 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
#endif
}
+static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+ unsigned int issue_flags, u32 slot_index)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct io_fixed_file *file_slot;
+ int ret = -EBADF;
+
+ io_ring_submit_lock(ctx, !force_nonblock);
+ if (file->f_op == &io_uring_fops)
+ goto err;
+ ret = -ENXIO;
+ if (!ctx->file_data)
+ goto err;
+ ret = -EINVAL;
+ if (slot_index >= ctx->nr_user_files)
+ goto err;
+
+ slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
+ ret = -EBADF;
+ if (file_slot->file_ptr)
+ goto err;
+
+ *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+ io_fixed_file_set(file_slot, file);
+ ret = io_sqe_file_register(ctx, file, slot_index);
+ if (ret) {
+ file_slot->file_ptr = 0;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ io_ring_submit_unlock(ctx, !force_nonblock);
+ if (ret)
+ fput(file);
+ return ret;
+}
+
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
struct io_rsrc_node *node, void *rsrc)
{
@@ -7925,14 +8376,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
return done ? done : err;
}
-static struct io_wq_work *io_free_work(struct io_wq_work *work)
-{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
-
- req = io_put_req_find_next(req);
- return req ? &req->work : NULL;
-}
-
static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
struct task_struct *task)
{
@@ -7956,7 +8399,7 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
data.hash = hash;
data.task = task;
- data.free_work = io_free_work;
+ data.free_work = io_wq_free_work;
data.do_work = io_wq_submit_work;
/* Do QD, or 4 * CPUS, whatever is smallest */
@@ -8623,43 +9066,36 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
__io_remove_buffers(ctx, buf, index, -1U);
}
-static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
+static void io_req_cache_free(struct list_head *list)
{
struct io_kiocb *req, *nxt;
- list_for_each_entry_safe(req, nxt, list, compl.list) {
- if (tsk && req->task != tsk)
- continue;
- list_del(&req->compl.list);
+ list_for_each_entry_safe(req, nxt, list, inflight_entry) {
+ list_del(&req->inflight_entry);
kmem_cache_free(req_cachep, req);
}
}
static void io_req_caches_free(struct io_ring_ctx *ctx)
{
- struct io_submit_state *submit_state = &ctx->submit_state;
- struct io_comp_state *cs = &ctx->submit_state.comp;
+ struct io_submit_state *state = &ctx->submit_state;
mutex_lock(&ctx->uring_lock);
- if (submit_state->free_reqs) {
- kmem_cache_free_bulk(req_cachep, submit_state->free_reqs,
- submit_state->reqs);
- submit_state->free_reqs = 0;
+ if (state->free_reqs) {
+ kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
+ state->free_reqs = 0;
}
- io_flush_cached_locked_reqs(ctx, cs);
- io_req_cache_free(&cs->free_list, NULL);
+ io_flush_cached_locked_reqs(ctx, state);
+ io_req_cache_free(&state->free_list);
mutex_unlock(&ctx->uring_lock);
}
-static bool io_wait_rsrc_data(struct io_rsrc_data *data)
+static void io_wait_rsrc_data(struct io_rsrc_data *data)
{
- if (!data)
- return false;
- if (!atomic_dec_and_test(&data->refs))
+ if (data && !atomic_dec_and_test(&data->refs))
wait_for_completion(&data->done);
- return true;
}
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
@@ -8671,10 +9107,14 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
ctx->mm_account = NULL;
}
+ /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
+ io_wait_rsrc_data(ctx->buf_data);
+ io_wait_rsrc_data(ctx->file_data);
+
mutex_lock(&ctx->uring_lock);
- if (io_wait_rsrc_data(ctx->buf_data))
+ if (ctx->buf_data)
__io_sqe_buffers_unregister(ctx);
- if (io_wait_rsrc_data(ctx->file_data))
+ if (ctx->file_data)
__io_sqe_files_unregister(ctx);
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true);
@@ -8700,6 +9140,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
sock_release(ctx->ring_sock);
}
#endif
+ WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes);
@@ -8799,6 +9240,7 @@ static void io_ring_exit_work(struct work_struct *work)
{
struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
unsigned long timeout = jiffies + HZ * 60 * 5;
+ unsigned long interval = HZ / 20;
struct io_tctx_exit exit;
struct io_tctx_node *node;
int ret;
@@ -8823,8 +9265,11 @@ static void io_ring_exit_work(struct work_struct *work)
io_sq_thread_unpark(sqd);
}
- WARN_ON_ONCE(time_after(jiffies, timeout));
- } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+ if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
+ /* there is little hope left, don't run it too often */
+ interval = HZ * 60;
+ }
+ } while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
init_completion(&exit.completion);
init_task_work(&exit.task_work, io_tctx_exit_cb);
@@ -8853,8 +9298,8 @@ static void io_ring_exit_work(struct work_struct *work)
mutex_lock(&ctx->uring_lock);
}
mutex_unlock(&ctx->uring_lock);
- spin_lock_irq(&ctx->completion_lock);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
io_ring_ctx_free(ctx);
}
@@ -8866,16 +9311,18 @@ static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
struct io_kiocb *req, *tmp;
int canceled = 0;
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
+ spin_lock_irq(&ctx->timeout_lock);
list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
if (io_match_task(req, tsk, cancel_all)) {
io_kill_timeout(req, -ECANCELED);
canceled++;
}
}
+ spin_unlock_irq(&ctx->timeout_lock);
if (canceled != 0)
io_commit_cqring(ctx);
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (canceled != 0)
io_cqring_ev_posted(ctx);
return canceled != 0;
@@ -8931,13 +9378,12 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
bool ret;
if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) {
- unsigned long flags;
struct io_ring_ctx *ctx = req->ctx;
/* protect against races with linked timeouts */
- spin_lock_irqsave(&ctx->completion_lock, flags);
+ spin_lock(&ctx->completion_lock);
ret = io_match_task(req, cancel->task, cancel->all);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ spin_unlock(&ctx->completion_lock);
} else {
ret = io_match_task(req, cancel->task, cancel->all);
}
@@ -8950,14 +9396,14 @@ static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
struct io_defer_entry *de;
LIST_HEAD(list);
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
list_for_each_entry_reverse(de, &ctx->defer_list, list) {
if (io_match_task(de->req, task, cancel_all)) {
list_cut_position(&list, &ctx->defer_list, &de->list);
break;
}
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (list_empty(&list))
return false;
@@ -9122,8 +9568,8 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
* Must be after io_uring_del_task_file() (removes nodes under
* uring_lock) to avoid race with io_uring_try_cancel_iowq().
*/
- tctx->io_wq = NULL;
io_wq_put_and_exit(wq);
+ tctx->io_wq = NULL;
}
}
@@ -9139,9 +9585,11 @@ static void io_uring_drop_tctx_refs(struct task_struct *task)
struct io_uring_task *tctx = task->io_uring;
unsigned int refs = tctx->cached_refs;
- tctx->cached_refs = 0;
- percpu_counter_sub(&tctx->inflight, refs);
- put_task_struct_many(task, refs);
+ if (refs) {
+ tctx->cached_refs = 0;
+ percpu_counter_sub(&tctx->inflight, refs);
+ put_task_struct_many(task, refs);
+ }
}
/*
@@ -9162,9 +9610,9 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
if (tctx->io_wq)
io_wq_exit_start(tctx->io_wq);
- io_uring_drop_tctx_refs(current);
atomic_inc(&tctx->in_idle);
do {
+ io_uring_drop_tctx_refs(current);
/* read completions before cancelations */
inflight = tctx_inflight(tctx, !cancel_all);
if (!inflight)
@@ -9188,6 +9636,7 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
}
prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
+ io_uring_drop_tctx_refs(current);
/*
* If we've seen completions, retry without waiting. This
* avoids a race where a completion comes in before we did
@@ -9206,9 +9655,9 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
}
}
-void __io_uring_cancel(struct files_struct *files)
+void __io_uring_cancel(bool cancel_all)
{
- io_uring_cancel_generic(!files, NULL);
+ io_uring_cancel_generic(cancel_all, NULL);
}
static void *io_uring_validate_mmap_request(struct file *file,
@@ -9368,11 +9817,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
*/
ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
- io_cqring_overflow_flush(ctx, false);
+ io_cqring_overflow_flush(ctx);
- ret = -EOWNERDEAD;
- if (unlikely(ctx->sq_data->thread == NULL))
+ if (unlikely(ctx->sq_data->thread == NULL)) {
+ ret = -EOWNERDEAD;
goto out;
+ }
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sq_data->wait);
if (flags & IORING_ENTER_SQ_WAIT) {
@@ -9503,7 +9953,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
io_uring_show_cred(m, index, cred);
}
seq_printf(m, "PollList:\n");
- spin_lock_irq(&ctx->completion_lock);
+ spin_lock(&ctx->completion_lock);
for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
struct hlist_head *list = &ctx->cancel_hash[i];
struct io_kiocb *req;
@@ -9512,7 +9962,7 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
req->task->task_works != NULL);
}
- spin_unlock_irq(&ctx->completion_lock);
+ spin_unlock(&ctx->completion_lock);
if (has_lock)
mutex_unlock(&ctx->uring_lock);
}
@@ -9840,10 +10290,11 @@ static int io_register_personality(struct io_ring_ctx *ctx)
ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
- if (!ret)
- return id;
- put_cred(creds);
- return ret;
+ if (ret < 0) {
+ put_cred(creds);
+ return ret;
+ }
+ return id;
}
static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
@@ -10044,6 +10495,31 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
return io_wq_cpu_affinity(tctx->io_wq, NULL);
}
+static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
+ void __user *arg)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ __u32 new_count[2];
+ int i, ret;
+
+ if (!tctx || !tctx->io_wq)
+ return -EINVAL;
+ if (copy_from_user(new_count, arg, sizeof(new_count)))
+ return -EFAULT;
+ for (i = 0; i < ARRAY_SIZE(new_count); i++)
+ if (new_count[i] > INT_MAX)
+ return -EINVAL;
+
+ ret = io_wq_max_workers(tctx->io_wq, new_count);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(arg, new_count, sizeof(new_count)))
+ return -EFAULT;
+
+ return 0;
+}
+
static bool io_register_op_must_quiesce(int op)
{
switch (op) {
@@ -10061,12 +10537,40 @@ static bool io_register_op_must_quiesce(int op)
case IORING_REGISTER_BUFFERS_UPDATE:
case IORING_REGISTER_IOWQ_AFF:
case IORING_UNREGISTER_IOWQ_AFF:
+ case IORING_REGISTER_IOWQ_MAX_WORKERS:
return false;
default:
return true;
}
}
+static int io_ctx_quiesce(struct io_ring_ctx *ctx)
+{
+ long ret;
+
+ percpu_ref_kill(&ctx->refs);
+
+ /*
+ * Drop uring mutex before waiting for references to exit. If another
+ * thread is currently inside io_uring_enter() it might need to grab the
+ * uring_lock to make progress. If we hold it here across the drain
+ * wait, then we can deadlock. It's safe to drop the mutex here, since
+ * no new references will come in after we've killed the percpu ref.
+ */
+ mutex_unlock(&ctx->uring_lock);
+ do {
+ ret = wait_for_completion_interruptible(&ctx->ref_comp);
+ if (!ret)
+ break;
+ ret = io_run_task_work_sig();
+ } while (ret >= 0);
+ mutex_lock(&ctx->uring_lock);
+
+ if (ret)
+ io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
+ return ret;
+}
+
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -10091,31 +10595,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
}
if (io_register_op_must_quiesce(opcode)) {
- percpu_ref_kill(&ctx->refs);
-
- /*
- * Drop uring mutex before waiting for references to exit. If
- * another thread is currently inside io_uring_enter() it might
- * need to grab the uring_lock to make progress. If we hold it
- * here across the drain wait, then we can deadlock. It's safe
- * to drop the mutex here, since no new references will come in
- * after we've killed the percpu ref.
- */
- mutex_unlock(&ctx->uring_lock);
- do {
- ret = wait_for_completion_interruptible(&ctx->ref_comp);
- if (!ret)
- break;
- ret = io_run_task_work_sig();
- if (ret < 0)
- break;
- } while (1);
- mutex_lock(&ctx->uring_lock);
-
- if (ret) {
- io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
+ ret = io_ctx_quiesce(ctx);
+ if (ret)
return ret;
- }
}
switch (opcode) {
@@ -10212,6 +10694,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_unregister_iowq_aff(ctx);
break;
+ case IORING_REGISTER_IOWQ_MAX_WORKERS:
+ ret = -EINVAL;
+ if (!arg || nr_args != 2)
+ break;
+ ret = io_register_iowq_max_workers(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
@@ -10293,11 +10781,16 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(40, __u16, buf_group);
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
+ BUILD_BUG_SQE_ELEM(44, __u32, file_index);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
sizeof(struct io_uring_rsrc_update));
BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
sizeof(struct io_uring_rsrc_update2));
+
+ /* ->buf_index is u16 */
+ BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
+
/* should fit into one byte */
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1e2204fa9963..504e69578112 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -263,209 +263,6 @@ static long ioctl_file_clone_range(struct file *file,
args.src_length, args.dest_offset);
}
-#ifdef CONFIG_BLOCK
-
-static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
-{
- return (offset >> inode->i_blkbits);
-}
-
-static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
-{
- return (blk << inode->i_blkbits);
-}
-
-/**
- * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode: the inode to map
- * @fieinfo: the fiemap info struct that will be passed back to userspace
- * @start: where to start mapping in the inode
- * @len: how much space to map
- * @get_block: the fs's get_block function
- *
- * This does FIEMAP for block based inodes. Basically it will just loop
- * through get_block until we hit the number of extents we want to map, or we
- * go past the end of the file and hit a hole.
- *
- * If it is possible to have data blocks beyond a hole past @inode->i_size, then
- * please do not use this function, it will stop at the first unmapped block
- * beyond i_size.
- *
- * If you use this function directly, you need to do your own locking. Use
- * generic_block_fiemap if you want the locking done for you.
- */
-static int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, loff_t start,
- loff_t len, get_block_t *get_block)
-{
- struct buffer_head map_bh;
- sector_t start_blk, last_blk;
- loff_t isize = i_size_read(inode);
- u64 logical = 0, phys = 0, size = 0;
- u32 flags = FIEMAP_EXTENT_MERGED;
- bool past_eof = false, whole_file = false;
- int ret = 0;
-
- ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC);
- if (ret)
- return ret;
-
- /*
- * Either the i_mutex or other appropriate locking needs to be held
- * since we expect isize to not change at all through the duration of
- * this call.
- */
- if (len >= isize) {
- whole_file = true;
- len = isize;
- }
-
- /*
- * Some filesystems can't deal with being asked to map less than
- * blocksize, so make sure our len is at least block length.
- */
- if (logical_to_blk(inode, len) == 0)
- len = blk_to_logical(inode, 1);
-
- start_blk = logical_to_blk(inode, start);
- last_blk = logical_to_blk(inode, start + len - 1);
-
- do {
- /*
- * we set b_size to the total size we want so it will map as
- * many contiguous blocks as possible at once
- */
- memset(&map_bh, 0, sizeof(struct buffer_head));
- map_bh.b_size = len;
-
- ret = get_block(inode, start_blk, &map_bh, 0);
- if (ret)
- break;
-
- /* HOLE */
- if (!buffer_mapped(&map_bh)) {
- start_blk++;
-
- /*
- * We want to handle the case where there is an
- * allocated block at the front of the file, and then
- * nothing but holes up to the end of the file properly,
- * to make sure that extent at the front gets properly
- * marked with FIEMAP_EXTENT_LAST
- */
- if (!past_eof &&
- blk_to_logical(inode, start_blk) >= isize)
- past_eof = 1;
-
- /*
- * First hole after going past the EOF, this is our
- * last extent
- */
- if (past_eof && size) {
- flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size,
- flags);
- } else if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size, flags);
- size = 0;
- }
-
- /* if we have holes up to/past EOF then we're done */
- if (start_blk > last_blk || past_eof || ret)
- break;
- } else {
- /*
- * We have gone over the length of what we wanted to
- * map, and it wasn't the entire file, so add the extent
- * we got last time and exit.
- *
- * This is for the case where say we want to map all the
- * way up to the second to the last block in a file, but
- * the last block is a hole, making the second to last
- * block FIEMAP_EXTENT_LAST. In this case we want to
- * see if there is a hole after the second to last block
- * so we can mark it properly. If we found data after
- * we exceeded the length we were requesting, then we
- * are good to go, just add the extent to the fieinfo
- * and break
- */
- if (start_blk > last_blk && !whole_file) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size,
- flags);
- break;
- }
-
- /*
- * if size != 0 then we know we already have an extent
- * to add, so add it.
- */
- if (size) {
- ret = fiemap_fill_next_extent(fieinfo, logical,
- phys, size,
- flags);
- if (ret)
- break;
- }
-
- logical = blk_to_logical(inode, start_blk);
- phys = blk_to_logical(inode, map_bh.b_blocknr);
- size = map_bh.b_size;
- flags = FIEMAP_EXTENT_MERGED;
-
- start_blk += logical_to_blk(inode, size);
-
- /*
- * If we are past the EOF, then we need to make sure as
- * soon as we find a hole that the last extent we found
- * is marked with FIEMAP_EXTENT_LAST
- */
- if (!past_eof && logical + size >= isize)
- past_eof = true;
- }
- cond_resched();
- if (fatal_signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- } while (1);
-
- /* If ret is 1 then we just hit the end of the extent array */
- if (ret == 1)
- ret = 0;
-
- return ret;
-}
-
-/**
- * generic_block_fiemap - FIEMAP for block based inodes
- * @inode: The inode to map
- * @fieinfo: The mapping information
- * @start: The initial block to map
- * @len: The length of the extect to attempt to map
- * @get_block: The block mapping function for the fs
- *
- * Calls __generic_block_fiemap to map the inode, after taking
- * the inode's mutex lock.
- */
-
-int generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
-{
- int ret;
- inode_lock(inode);
- ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
- inode_unlock(inode);
- return ret;
-}
-EXPORT_SYMBOL(generic_block_fiemap);
-
-#endif /* CONFIG_BLOCK */
-
/*
* This provides compatibility with legacy XFS pre-allocation ioctls
* which predate the fallocate syscall.
@@ -817,6 +614,14 @@ static int fileattr_set_prepare(struct inode *inode,
if ((old_ma->fsx_xflags ^ fa->fsx_xflags) &
FS_XFLAG_PROJINHERIT)
return -EINVAL;
+ } else {
+ /*
+ * Caller is allowed to change the project ID. If it is being
+ * changed, make sure that the new value is valid.
+ */
+ if (old_ma->fsx_projid != fa->fsx_projid &&
+ !projid_valid(make_kprojid(&init_user_ns, fa->fsx_projid)))
+ return -EINVAL;
}
/* Check extent size hints. */
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 21edc423b79f..678e2c51b855 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -155,7 +155,6 @@ struct iso9660_options{
unsigned int overriderockperm:1;
unsigned int uid_set:1;
unsigned int gid_set:1;
- unsigned int utf8:1;
unsigned char map;
unsigned char check;
unsigned int blocksize;
@@ -356,7 +355,6 @@ static int parse_options(char *options, struct iso9660_options *popt)
popt->gid = GLOBAL_ROOT_GID;
popt->uid = GLOBAL_ROOT_UID;
popt->iocharset = NULL;
- popt->utf8 = 0;
popt->overriderockperm = 0;
popt->session=-1;
popt->sbsector=-1;
@@ -389,10 +387,13 @@ static int parse_options(char *options, struct iso9660_options *popt)
case Opt_cruft:
popt->cruft = 1;
break;
+#ifdef CONFIG_JOLIET
case Opt_utf8:
- popt->utf8 = 1;
+ kfree(popt->iocharset);
+ popt->iocharset = kstrdup("utf8", GFP_KERNEL);
+ if (!popt->iocharset)
+ return 0;
break;
-#ifdef CONFIG_JOLIET
case Opt_iocharset:
kfree(popt->iocharset);
popt->iocharset = match_strdup(&args[0]);
@@ -495,7 +496,6 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
if (sbi->s_nocompress) seq_puts(m, ",nocompress");
if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm");
if (sbi->s_showassoc) seq_puts(m, ",showassoc");
- if (sbi->s_utf8) seq_puts(m, ",utf8");
if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check);
if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping);
@@ -518,9 +518,10 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",fmode=%o", sbi->s_fmode);
#ifdef CONFIG_JOLIET
- if (sbi->s_nls_iocharset &&
- strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
+ if (sbi->s_nls_iocharset)
seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+ else
+ seq_puts(m, ",iocharset=utf8");
#endif
return 0;
}
@@ -863,14 +864,13 @@ root_found:
sbi->s_nls_iocharset = NULL;
#ifdef CONFIG_JOLIET
- if (joliet_level && opt.utf8 == 0) {
+ if (joliet_level) {
char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT;
- sbi->s_nls_iocharset = load_nls(p);
- if (! sbi->s_nls_iocharset) {
- /* Fail only if explicit charset specified */
- if (opt.iocharset)
+ if (strcmp(p, "utf8") != 0) {
+ sbi->s_nls_iocharset = opt.iocharset ?
+ load_nls(opt.iocharset) : load_nls_default();
+ if (!sbi->s_nls_iocharset)
goto out_freesbi;
- sbi->s_nls_iocharset = load_nls_default();
}
}
#endif
@@ -886,7 +886,6 @@ root_found:
sbi->s_gid = opt.gid;
sbi->s_uid_set = opt.uid_set;
sbi->s_gid_set = opt.gid_set;
- sbi->s_utf8 = opt.utf8;
sbi->s_nocompress = opt.nocompress;
sbi->s_overriderockperm = opt.overriderockperm;
/*
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 055ec6c586f7..dcdc191ed183 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -44,7 +44,6 @@ struct isofs_sb_info {
unsigned char s_session;
unsigned int s_high_sierra:1;
unsigned int s_rock:2;
- unsigned int s_utf8:1;
unsigned int s_cruft:1; /* Broken disks with high byte of length
* containing junk */
unsigned int s_nocompress:1;
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index be8b6a9d0b92..c0f04a1e7f69 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -41,14 +41,12 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
- unsigned char utf8;
struct nls_table *nls;
unsigned char len = 0;
- utf8 = ISOFS_SB(inode->i_sb)->s_utf8;
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
- if (utf8) {
+ if (!nls) {
len = utf16s_to_utf8s((const wchar_t *) de->name,
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
outname, PAGE_SIZE);
diff --git a/fs/ksmbd/Kconfig b/fs/ksmbd/Kconfig
new file mode 100644
index 000000000000..b83cbd756ae5
--- /dev/null
+++ b/fs/ksmbd/Kconfig
@@ -0,0 +1,68 @@
+config SMB_SERVER
+ tristate "SMB3 server support (EXPERIMENTAL)"
+ depends on INET
+ depends on MULTIUSER
+ depends on FILE_LOCKING
+ select NLS
+ select NLS_UTF8
+ select CRYPTO
+ select CRYPTO_MD4
+ select CRYPTO_MD5
+ select CRYPTO_HMAC
+ select CRYPTO_ECB
+ select CRYPTO_LIB_DES
+ select CRYPTO_SHA256
+ select CRYPTO_CMAC
+ select CRYPTO_SHA512
+ select CRYPTO_AEAD2
+ select CRYPTO_CCM
+ select CRYPTO_GCM
+ select ASN1
+ select OID_REGISTRY
+ default n
+ help
+ Choose Y here if you want to allow SMB3 compliant clients
+ to access files residing on this system using SMB3 protocol.
+ To compile the SMB3 server support as a module,
+ choose M here: the module will be called ksmbd.
+
+ You may choose to use a samba server instead, in which
+ case you can choose N here.
+
+ You also need to install user space programs which can be found
+ in ksmbd-tools, available from
+ https://github.com/cifsd-team/ksmbd-tools.
+ More detail about how to run the ksmbd kernel server is
+ available via README file
+ (https://github.com/cifsd-team/ksmbd-tools/blob/master/README).
+
+ ksmbd kernel server includes support for auto-negotiation,
+ Secure negotiate, Pre-authentication integrity, oplock/lease,
+ compound requests, multi-credit, packet signing, RDMA(smbdirect),
+ smb3 encryption, copy-offload, secure per-user session
+ establishment via NTLM or NTLMv2.
+
+config SMB_SERVER_SMBDIRECT
+ bool "Support for SMB Direct protocol"
+ depends on SMB_SERVER=m && INFINIBAND && INFINIBAND_ADDR_TRANS || SMB_SERVER=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
+ select SG_POOL
+ default n
+
+ help
+ Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1.
+
+ SMB Direct allows transferring SMB packets over RDMA. If unsure,
+ say N.
+
+config SMB_SERVER_CHECK_CAP_NET_ADMIN
+ bool "Enable check network administration capability"
+ depends on SMB_SERVER
+ default y
+
+ help
+ Prevent unprivileged processes to start the ksmbd kernel server.
+
+config SMB_SERVER_KERBEROS5
+ bool "Support for Kerberos 5"
+ depends on SMB_SERVER
+ default n
diff --git a/fs/ksmbd/Makefile b/fs/ksmbd/Makefile
new file mode 100644
index 000000000000..7d6337a7dee4
--- /dev/null
+++ b/fs/ksmbd/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Makefile for Linux SMB3 kernel server
+#
+obj-$(CONFIG_SMB_SERVER) += ksmbd.o
+
+ksmbd-y := unicode.o auth.o vfs.o vfs_cache.o server.o ndr.o \
+ misc.o oplock.o connection.o ksmbd_work.o crypto_ctx.o \
+ mgmt/ksmbd_ida.o mgmt/user_config.o mgmt/share_config.o \
+ mgmt/tree_connect.o mgmt/user_session.o smb_common.o \
+ transport_tcp.o transport_ipc.o smbacl.o smb2pdu.o \
+ smb2ops.o smb2misc.o ksmbd_spnego_negtokeninit.asn1.o \
+ ksmbd_spnego_negtokentarg.asn1.o asn1.o
+
+$(obj)/asn1.o: $(obj)/ksmbd_spnego_negtokeninit.asn1.h $(obj)/ksmbd_spnego_negtokentarg.asn1.h
+
+$(obj)/ksmbd_spnego_negtokeninit.asn1.o: $(obj)/ksmbd_spnego_negtokeninit.asn1.c $(obj)/ksmbd_spnego_negtokeninit.asn1.h
+$(obj)/ksmbd_spnego_negtokentarg.asn1.o: $(obj)/ksmbd_spnego_negtokentarg.asn1.c $(obj)/ksmbd_spnego_negtokentarg.asn1.h
+
+ksmbd-$(CONFIG_SMB_SERVER_SMBDIRECT) += transport_rdma.o
diff --git a/fs/ksmbd/asn1.c b/fs/ksmbd/asn1.c
new file mode 100644
index 000000000000..b014f4638610
--- /dev/null
+++ b/fs/ksmbd/asn1.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The ASB.1/BER parsing code is derived from ip_nat_snmp_basic.c which was in
+ * turn derived from the gxsnmp package by Gregory McLean & Jochen Friedrich
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/oid_registry.h>
+
+#include "glob.h"
+
+#include "asn1.h"
+#include "connection.h"
+#include "auth.h"
+#include "ksmbd_spnego_negtokeninit.asn1.h"
+#include "ksmbd_spnego_negtokentarg.asn1.h"
+
+#define SPNEGO_OID_LEN 7
+#define NTLMSSP_OID_LEN 10
+#define KRB5_OID_LEN 7
+#define KRB5U2U_OID_LEN 8
+#define MSKRB5_OID_LEN 7
+static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
+static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
+static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
+static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
+static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
+
+static char NTLMSSP_OID_STR[NTLMSSP_OID_LEN] = { 0x2b, 0x06, 0x01, 0x04, 0x01,
+ 0x82, 0x37, 0x02, 0x02, 0x0a };
+
+static bool
+asn1_subid_decode(const unsigned char **begin, const unsigned char *end,
+ unsigned long *subid)
+{
+ const unsigned char *ptr = *begin;
+ unsigned char ch;
+
+ *subid = 0;
+
+ do {
+ if (ptr >= end)
+ return false;
+
+ ch = *ptr++;
+ *subid <<= 7;
+ *subid |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+
+ *begin = ptr;
+ return true;
+}
+
+static bool asn1_oid_decode(const unsigned char *value, size_t vlen,
+ unsigned long **oid, size_t *oidlen)
+{
+ const unsigned char *iptr = value, *end = value + vlen;
+ unsigned long *optr;
+ unsigned long subid;
+
+ vlen += 1;
+ if (vlen < 2 || vlen > UINT_MAX / sizeof(unsigned long))
+ goto fail_nullify;
+
+ *oid = kmalloc(vlen * sizeof(unsigned long), GFP_KERNEL);
+ if (!*oid)
+ return false;
+
+ optr = *oid;
+
+ if (!asn1_subid_decode(&iptr, end, &subid))
+ goto fail;
+
+ if (subid < 40) {
+ optr[0] = 0;
+ optr[1] = subid;
+ } else if (subid < 80) {
+ optr[0] = 1;
+ optr[1] = subid - 40;
+ } else {
+ optr[0] = 2;
+ optr[1] = subid - 80;
+ }
+
+ *oidlen = 2;
+ optr += 2;
+
+ while (iptr < end) {
+ if (++(*oidlen) > vlen)
+ goto fail;
+
+ if (!asn1_subid_decode(&iptr, end, optr++))
+ goto fail;
+ }
+ return true;
+
+fail:
+ kfree(*oid);
+fail_nullify:
+ *oid = NULL;
+ return false;
+}
+
+static bool oid_eq(unsigned long *oid1, unsigned int oid1len,
+ unsigned long *oid2, unsigned int oid2len)
+{
+ if (oid1len != oid2len)
+ return false;
+
+ return memcmp(oid1, oid2, oid1len) == 0;
+}
+
+int
+ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn)
+{
+ return asn1_ber_decoder(&ksmbd_spnego_negtokeninit_decoder, conn,
+ security_blob, length);
+}
+
+int
+ksmbd_decode_negTokenTarg(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn)
+{
+ return asn1_ber_decoder(&ksmbd_spnego_negtokentarg_decoder, conn,
+ security_blob, length);
+}
+
+static int compute_asn_hdr_len_bytes(int len)
+{
+ if (len > 0xFFFFFF)
+ return 4;
+ else if (len > 0xFFFF)
+ return 3;
+ else if (len > 0xFF)
+ return 2;
+ else if (len > 0x7F)
+ return 1;
+ else
+ return 0;
+}
+
+static void encode_asn_tag(char *buf, unsigned int *ofs, char tag, char seq,
+ int length)
+{
+ int i;
+ int index = *ofs;
+ char hdr_len = compute_asn_hdr_len_bytes(length);
+ int len = length + 2 + hdr_len;
+
+ /* insert tag */
+ buf[index++] = tag;
+
+ if (!hdr_len) {
+ buf[index++] = len;
+ } else {
+ buf[index++] = 0x80 | hdr_len;
+ for (i = hdr_len - 1; i >= 0; i--)
+ buf[index++] = (len >> (i * 8)) & 0xFF;
+ }
+
+ /* insert seq */
+ len = len - (index - *ofs);
+ buf[index++] = seq;
+
+ if (!hdr_len) {
+ buf[index++] = len;
+ } else {
+ buf[index++] = 0x80 | hdr_len;
+ for (i = hdr_len - 1; i >= 0; i--)
+ buf[index++] = (len >> (i * 8)) & 0xFF;
+ }
+
+ *ofs += (index - *ofs);
+}
+
+int build_spnego_ntlmssp_neg_blob(unsigned char **pbuffer, u16 *buflen,
+ char *ntlm_blob, int ntlm_blob_len)
+{
+ char *buf;
+ unsigned int ofs = 0;
+ int neg_result_len = 4 + compute_asn_hdr_len_bytes(1) * 2 + 1;
+ int oid_len = 4 + compute_asn_hdr_len_bytes(NTLMSSP_OID_LEN) * 2 +
+ NTLMSSP_OID_LEN;
+ int ntlmssp_len = 4 + compute_asn_hdr_len_bytes(ntlm_blob_len) * 2 +
+ ntlm_blob_len;
+ int total_len = 4 + compute_asn_hdr_len_bytes(neg_result_len +
+ oid_len + ntlmssp_len) * 2 +
+ neg_result_len + oid_len + ntlmssp_len;
+
+ buf = kmalloc(total_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* insert main gss header */
+ encode_asn_tag(buf, &ofs, 0xa1, 0x30, neg_result_len + oid_len +
+ ntlmssp_len);
+
+ /* insert neg result */
+ encode_asn_tag(buf, &ofs, 0xa0, 0x0a, 1);
+ buf[ofs++] = 1;
+
+ /* insert oid */
+ encode_asn_tag(buf, &ofs, 0xa1, 0x06, NTLMSSP_OID_LEN);
+ memcpy(buf + ofs, NTLMSSP_OID_STR, NTLMSSP_OID_LEN);
+ ofs += NTLMSSP_OID_LEN;
+
+ /* insert response token - ntlmssp blob */
+ encode_asn_tag(buf, &ofs, 0xa2, 0x04, ntlm_blob_len);
+ memcpy(buf + ofs, ntlm_blob, ntlm_blob_len);
+ ofs += ntlm_blob_len;
+
+ *pbuffer = buf;
+ *buflen = total_len;
+ return 0;
+}
+
+int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
+ int neg_result)
+{
+ char *buf;
+ unsigned int ofs = 0;
+ int neg_result_len = 4 + compute_asn_hdr_len_bytes(1) * 2 + 1;
+ int total_len = 4 + compute_asn_hdr_len_bytes(neg_result_len) * 2 +
+ neg_result_len;
+
+ buf = kmalloc(total_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* insert main gss header */
+ encode_asn_tag(buf, &ofs, 0xa1, 0x30, neg_result_len);
+
+ /* insert neg result */
+ encode_asn_tag(buf, &ofs, 0xa0, 0x0a, 1);
+ if (neg_result)
+ buf[ofs++] = 2;
+ else
+ buf[ofs++] = 0;
+
+ *pbuffer = buf;
+ *buflen = total_len;
+ return 0;
+}
+
+int ksmbd_gssapi_this_mech(void *context, size_t hdrlen, unsigned char tag,
+ const void *value, size_t vlen)
+{
+ unsigned long *oid;
+ size_t oidlen;
+ int err = 0;
+
+ if (!asn1_oid_decode(value, vlen, &oid, &oidlen)) {
+ err = -EBADMSG;
+ goto out;
+ }
+
+ if (!oid_eq(oid, oidlen, SPNEGO_OID, SPNEGO_OID_LEN))
+ err = -EBADMSG;
+ kfree(oid);
+out:
+ if (err) {
+ char buf[50];
+
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ }
+ return err;
+}
+
+int ksmbd_neg_token_init_mech_type(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
+{
+ struct ksmbd_conn *conn = context;
+ unsigned long *oid;
+ size_t oidlen;
+ int mech_type;
+ char buf[50];
+
+ if (!asn1_oid_decode(value, vlen, &oid, &oidlen))
+ goto fail;
+
+ if (oid_eq(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN))
+ mech_type = KSMBD_AUTH_NTLMSSP;
+ else if (oid_eq(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN))
+ mech_type = KSMBD_AUTH_MSKRB5;
+ else if (oid_eq(oid, oidlen, KRB5_OID, KRB5_OID_LEN))
+ mech_type = KSMBD_AUTH_KRB5;
+ else if (oid_eq(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN))
+ mech_type = KSMBD_AUTH_KRB5U2U;
+ else
+ goto fail;
+
+ conn->auth_mechs |= mech_type;
+ if (conn->preferred_auth_mech == 0)
+ conn->preferred_auth_mech = mech_type;
+
+ kfree(oid);
+ return 0;
+
+fail:
+ kfree(oid);
+ sprint_oid(value, vlen, buf, sizeof(buf));
+ ksmbd_debug(AUTH, "Unexpected OID: %s\n", buf);
+ return -EBADMSG;
+}
+
+int ksmbd_neg_token_init_mech_token(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
+{
+ struct ksmbd_conn *conn = context;
+
+ conn->mechToken = kmalloc(vlen + 1, GFP_KERNEL);
+ if (!conn->mechToken)
+ return -ENOMEM;
+
+ memcpy(conn->mechToken, value, vlen);
+ conn->mechToken[vlen] = '\0';
+ return 0;
+}
+
+int ksmbd_neg_token_targ_resp_token(void *context, size_t hdrlen,
+ unsigned char tag, const void *value,
+ size_t vlen)
+{
+ struct ksmbd_conn *conn = context;
+
+ conn->mechToken = kmalloc(vlen + 1, GFP_KERNEL);
+ if (!conn->mechToken)
+ return -ENOMEM;
+
+ memcpy(conn->mechToken, value, vlen);
+ conn->mechToken[vlen] = '\0';
+ return 0;
+}
diff --git a/fs/ksmbd/asn1.h b/fs/ksmbd/asn1.h
new file mode 100644
index 000000000000..ce105f4ce305
--- /dev/null
+++ b/fs/ksmbd/asn1.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * The ASB.1/BER parsing code is derived from ip_nat_snmp_basic.c which was in
+ * turn derived from the gxsnmp package by Gregory McLean & Jochen Friedrich
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __ASN1_H__
+#define __ASN1_H__
+
+int ksmbd_decode_negTokenInit(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn);
+int ksmbd_decode_negTokenTarg(unsigned char *security_blob, int length,
+ struct ksmbd_conn *conn);
+int build_spnego_ntlmssp_neg_blob(unsigned char **pbuffer, u16 *buflen,
+ char *ntlm_blob, int ntlm_blob_len);
+int build_spnego_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
+ int neg_result);
+#endif /* __ASN1_H__ */
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
new file mode 100644
index 000000000000..de36f12070bf
--- /dev/null
+++ b/fs/ksmbd/auth.c
@@ -0,0 +1,1364 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+#include <linux/uio.h>
+#include <linux/xattr.h>
+#include <crypto/hash.h>
+#include <crypto/aead.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+
+#include "auth.h"
+#include "glob.h"
+
+#include <linux/fips.h>
+#include <crypto/des.h>
+
+#include "server.h"
+#include "smb_common.h"
+#include "connection.h"
+#include "mgmt/user_session.h"
+#include "mgmt/user_config.h"
+#include "crypto_ctx.h"
+#include "transport_ipc.h"
+
+/*
+ * Fixed format data defining GSS header and fixed string
+ * "not_defined_in_RFC4178@please_ignore".
+ * So sec blob data in neg phase could be generated statically.
+ */
+static char NEGOTIATE_GSS_HEADER[AUTH_GSS_LENGTH] = {
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+ 0x60, 0x5e, 0x06, 0x06, 0x2b, 0x06, 0x01, 0x05,
+ 0x05, 0x02, 0xa0, 0x54, 0x30, 0x52, 0xa0, 0x24,
+ 0x30, 0x22, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86,
+ 0xf7, 0x12, 0x01, 0x02, 0x02, 0x06, 0x09, 0x2a,
+ 0x86, 0x48, 0x82, 0xf7, 0x12, 0x01, 0x02, 0x02,
+ 0x06, 0x0a, 0x2b, 0x06, 0x01, 0x04, 0x01, 0x82,
+ 0x37, 0x02, 0x02, 0x0a, 0xa3, 0x2a, 0x30, 0x28,
+ 0xa0, 0x26, 0x1b, 0x24, 0x6e, 0x6f, 0x74, 0x5f,
+ 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x5f,
+ 0x69, 0x6e, 0x5f, 0x52, 0x46, 0x43, 0x34, 0x31,
+ 0x37, 0x38, 0x40, 0x70, 0x6c, 0x65, 0x61, 0x73,
+ 0x65, 0x5f, 0x69, 0x67, 0x6e, 0x6f, 0x72, 0x65
+#else
+ 0x60, 0x48, 0x06, 0x06, 0x2b, 0x06, 0x01, 0x05,
+ 0x05, 0x02, 0xa0, 0x3e, 0x30, 0x3c, 0xa0, 0x0e,
+ 0x30, 0x0c, 0x06, 0x0a, 0x2b, 0x06, 0x01, 0x04,
+ 0x01, 0x82, 0x37, 0x02, 0x02, 0x0a, 0xa3, 0x2a,
+ 0x30, 0x28, 0xa0, 0x26, 0x1b, 0x24, 0x6e, 0x6f,
+ 0x74, 0x5f, 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65,
+ 0x64, 0x5f, 0x69, 0x6e, 0x5f, 0x52, 0x46, 0x43,
+ 0x34, 0x31, 0x37, 0x38, 0x40, 0x70, 0x6c, 0x65,
+ 0x61, 0x73, 0x65, 0x5f, 0x69, 0x67, 0x6e, 0x6f,
+ 0x72, 0x65
+#endif
+};
+
+void ksmbd_copy_gss_neg_header(void *buf)
+{
+ memcpy(buf, NEGOTIATE_GSS_HEADER, AUTH_GSS_LENGTH);
+}
+
+static void
+str_to_key(unsigned char *str, unsigned char *key)
+{
+ int i;
+
+ key[0] = str[0] >> 1;
+ key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
+ key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
+ key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
+ key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
+ key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
+ key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
+ key[7] = str[6] & 0x7F;
+ for (i = 0; i < 8; i++)
+ key[i] = (key[i] << 1);
+}
+
+static int
+smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
+{
+ unsigned char key2[8];
+ struct des_ctx ctx;
+
+ if (fips_enabled) {
+ ksmbd_debug(AUTH, "FIPS compliance enabled: DES not permitted\n");
+ return -ENOENT;
+ }
+
+ str_to_key(key, key2);
+ des_expand_key(&ctx, key2, DES_KEY_SIZE);
+ des_encrypt(&ctx, out, in);
+ memzero_explicit(&ctx, sizeof(ctx));
+ return 0;
+}
+
+static int ksmbd_enc_p24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
+{
+ int rc;
+
+ rc = smbhash(p24, c8, p21);
+ if (rc)
+ return rc;
+ rc = smbhash(p24 + 8, c8, p21 + 7);
+ if (rc)
+ return rc;
+ return smbhash(p24 + 16, c8, p21 + 14);
+}
+
+/* produce a md4 message digest from data of length n bytes */
+static int ksmbd_enc_md4(unsigned char *md4_hash, unsigned char *link_str,
+ int link_len)
+{
+ int rc;
+ struct ksmbd_crypto_ctx *ctx;
+
+ ctx = ksmbd_crypto_ctx_find_md4();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "Crypto md4 allocation error\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_MD4(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not init md4 shash\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_MD4(ctx), link_str, link_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with link_str\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_MD4(ctx), md4_hash);
+ if (rc)
+ ksmbd_debug(AUTH, "Could not generate md4 hash\n");
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int ksmbd_enc_update_sess_key(unsigned char *md5_hash, char *nonce,
+ char *server_challenge, int len)
+{
+ int rc;
+ struct ksmbd_crypto_ctx *ctx;
+
+ ctx = ksmbd_crypto_ctx_find_md5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "Crypto md5 allocation error\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_MD5(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not init md5 shash\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_MD5(ctx), server_challenge, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with challenge\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_MD5(ctx), nonce, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with nonce\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_MD5(ctx), md5_hash);
+ if (rc)
+ ksmbd_debug(AUTH, "Could not generate md5 hash\n");
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+/**
+ * ksmbd_gen_sess_key() - function to generate session key
+ * @sess: session of connection
+ * @hash: source hash value to be used for find session key
+ * @hmac: source hmac value to be used for finding session key
+ *
+ */
+static int ksmbd_gen_sess_key(struct ksmbd_session *sess, char *hash,
+ char *hmac)
+{
+ struct ksmbd_crypto_ctx *ctx;
+ int rc;
+
+ ctx = ksmbd_crypto_ctx_find_hmacmd5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACMD5_TFM(ctx),
+ hash,
+ CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacmd5 set key fail error %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_init(CRYPTO_HMACMD5(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "could not init hmacmd5 error %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACMD5(ctx),
+ hmac,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with response error %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACMD5(ctx), sess->sess_key);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hmacmd5 hash error %d\n", rc);
+ goto out;
+ }
+
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int calc_ntlmv2_hash(struct ksmbd_session *sess, char *ntlmv2_hash,
+ char *dname)
+{
+ int ret, len, conv_len;
+ wchar_t *domain = NULL;
+ __le16 *uniname = NULL;
+ struct ksmbd_crypto_ctx *ctx;
+
+ ctx = ksmbd_crypto_ctx_find_hmacmd5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "can't generate ntlmv2 hash\n");
+ return -ENOMEM;
+ }
+
+ ret = crypto_shash_setkey(CRYPTO_HMACMD5_TFM(ctx),
+ user_passkey(sess->user),
+ CIFS_ENCPWD_SIZE);
+ if (ret) {
+ ksmbd_debug(AUTH, "Could not set NT Hash as a key\n");
+ goto out;
+ }
+
+ ret = crypto_shash_init(CRYPTO_HMACMD5(ctx));
+ if (ret) {
+ ksmbd_debug(AUTH, "could not init hmacmd5\n");
+ goto out;
+ }
+
+ /* convert user_name to unicode */
+ len = strlen(user_name(sess->user));
+ uniname = kzalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
+ if (!uniname) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ conv_len = smb_strtoUTF16(uniname, user_name(sess->user), len,
+ sess->conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ ret = -EINVAL;
+ goto out;
+ }
+ UniStrupr(uniname);
+
+ ret = crypto_shash_update(CRYPTO_HMACMD5(ctx),
+ (char *)uniname,
+ UNICODE_LEN(conv_len));
+ if (ret) {
+ ksmbd_debug(AUTH, "Could not update with user\n");
+ goto out;
+ }
+
+ /* Convert domain name or conn name to unicode and uppercase */
+ len = strlen(dname);
+ domain = kzalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
+ if (!domain) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ conv_len = smb_strtoUTF16((__le16 *)domain, dname, len,
+ sess->conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = crypto_shash_update(CRYPTO_HMACMD5(ctx),
+ (char *)domain,
+ UNICODE_LEN(conv_len));
+ if (ret) {
+ ksmbd_debug(AUTH, "Could not update with domain\n");
+ goto out;
+ }
+
+ ret = crypto_shash_final(CRYPTO_HMACMD5(ctx), ntlmv2_hash);
+ if (ret)
+ ksmbd_debug(AUTH, "Could not generate md5 hash\n");
+out:
+ kfree(uniname);
+ kfree(domain);
+ ksmbd_release_crypto_ctx(ctx);
+ return ret;
+}
+
+/**
+ * ksmbd_auth_ntlm() - NTLM authentication handler
+ * @sess: session of connection
+ * @pw_buf: NTLM challenge response
+ * @passkey: user password
+ *
+ * Return: 0 on success, error number on error
+ */
+int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf)
+{
+ int rc;
+ unsigned char p21[21];
+ char key[CIFS_AUTH_RESP_SIZE];
+
+ memset(p21, '\0', 21);
+ memcpy(p21, user_passkey(sess->user), CIFS_NTHASH_SIZE);
+ rc = ksmbd_enc_p24(p21, sess->ntlmssp.cryptkey, key);
+ if (rc) {
+ pr_err("password processing failed\n");
+ return rc;
+ }
+
+ ksmbd_enc_md4(sess->sess_key, user_passkey(sess->user),
+ CIFS_SMB1_SESSKEY_SIZE);
+ memcpy(sess->sess_key + CIFS_SMB1_SESSKEY_SIZE, key,
+ CIFS_AUTH_RESP_SIZE);
+ sess->sequence_number = 1;
+
+ if (strncmp(pw_buf, key, CIFS_AUTH_RESP_SIZE) != 0) {
+ ksmbd_debug(AUTH, "ntlmv1 authentication failed\n");
+ return -EINVAL;
+ }
+
+ ksmbd_debug(AUTH, "ntlmv1 authentication pass\n");
+ return 0;
+}
+
+/**
+ * ksmbd_auth_ntlmv2() - NTLMv2 authentication handler
+ * @sess: session of connection
+ * @ntlmv2: NTLMv2 challenge response
+ * @blen: NTLMv2 blob length
+ * @domain_name: domain name
+ *
+ * Return: 0 on success, error number on error
+ */
+int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+ int blen, char *domain_name)
+{
+ char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+ char ntlmv2_rsp[CIFS_HMAC_MD5_HASH_SIZE];
+ struct ksmbd_crypto_ctx *ctx;
+ char *construct = NULL;
+ int rc, len;
+
+ ctx = ksmbd_crypto_ctx_find_hmacmd5();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = calc_ntlmv2_hash(sess, ntlmv2_hash, domain_name);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not get v2 hash rc %d\n", rc);
+ goto out;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACMD5_TFM(ctx),
+ ntlmv2_hash,
+ CIFS_HMAC_MD5_HASH_SIZE);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not set NTLMV2 Hash as a key\n");
+ goto out;
+ }
+
+ rc = crypto_shash_init(CRYPTO_HMACMD5(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not init hmacmd5\n");
+ goto out;
+ }
+
+ len = CIFS_CRYPTO_KEY_SIZE + blen;
+ construct = kzalloc(len, GFP_KERNEL);
+ if (!construct) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(construct, sess->ntlmssp.cryptkey, CIFS_CRYPTO_KEY_SIZE);
+ memcpy(construct + CIFS_CRYPTO_KEY_SIZE, &ntlmv2->blob_signature, blen);
+
+ rc = crypto_shash_update(CRYPTO_HMACMD5(ctx), construct, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not update with response\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACMD5(ctx), ntlmv2_rsp);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate md5 hash\n");
+ goto out;
+ }
+
+ rc = ksmbd_gen_sess_key(sess, ntlmv2_hash, ntlmv2_rsp);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate sess key\n");
+ goto out;
+ }
+
+ if (memcmp(ntlmv2->ntlmv2_hash, ntlmv2_rsp, CIFS_HMAC_MD5_HASH_SIZE) != 0)
+ rc = -EINVAL;
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ kfree(construct);
+ return rc;
+}
+
+/**
+ * __ksmbd_auth_ntlmv2() - NTLM2(extended security) authentication handler
+ * @sess: session of connection
+ * @client_nonce: client nonce from LM response.
+ * @ntlm_resp: ntlm response data from client.
+ *
+ * Return: 0 on success, error number on error
+ */
+static int __ksmbd_auth_ntlmv2(struct ksmbd_session *sess, char *client_nonce,
+ char *ntlm_resp)
+{
+ char sess_key[CIFS_SMB1_SESSKEY_SIZE] = {0};
+ int rc;
+ unsigned char p21[21];
+ char key[CIFS_AUTH_RESP_SIZE];
+
+ rc = ksmbd_enc_update_sess_key(sess_key,
+ client_nonce,
+ (char *)sess->ntlmssp.cryptkey, 8);
+ if (rc) {
+ pr_err("password processing failed\n");
+ goto out;
+ }
+
+ memset(p21, '\0', 21);
+ memcpy(p21, user_passkey(sess->user), CIFS_NTHASH_SIZE);
+ rc = ksmbd_enc_p24(p21, sess_key, key);
+ if (rc) {
+ pr_err("password processing failed\n");
+ goto out;
+ }
+
+ if (memcmp(ntlm_resp, key, CIFS_AUTH_RESP_SIZE) != 0)
+ rc = -EINVAL;
+out:
+ return rc;
+}
+
+/**
+ * ksmbd_decode_ntlmssp_auth_blob() - helper function to construct
+ * authenticate blob
+ * @authblob: authenticate blob source pointer
+ * @usr: user details
+ * @sess: session of connection
+ *
+ * Return: 0 on success, error number on error
+ */
+int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+ int blob_len, struct ksmbd_session *sess)
+{
+ char *domain_name;
+ unsigned int lm_off, nt_off;
+ unsigned short nt_len;
+ int ret;
+
+ if (blob_len < sizeof(struct authenticate_message)) {
+ ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
+ blob_len);
+ return -EINVAL;
+ }
+
+ if (memcmp(authblob->Signature, "NTLMSSP", 8)) {
+ ksmbd_debug(AUTH, "blob signature incorrect %s\n",
+ authblob->Signature);
+ return -EINVAL;
+ }
+
+ lm_off = le32_to_cpu(authblob->LmChallengeResponse.BufferOffset);
+ nt_off = le32_to_cpu(authblob->NtChallengeResponse.BufferOffset);
+ nt_len = le16_to_cpu(authblob->NtChallengeResponse.Length);
+
+ /* process NTLM authentication */
+ if (nt_len == CIFS_AUTH_RESP_SIZE) {
+ if (le32_to_cpu(authblob->NegotiateFlags) &
+ NTLMSSP_NEGOTIATE_EXTENDED_SEC)
+ return __ksmbd_auth_ntlmv2(sess, (char *)authblob +
+ lm_off, (char *)authblob + nt_off);
+ else
+ return ksmbd_auth_ntlm(sess, (char *)authblob +
+ nt_off);
+ }
+
+ /* TODO : use domain name that imported from configuration file */
+ domain_name = smb_strndup_from_utf16((const char *)authblob +
+ le32_to_cpu(authblob->DomainName.BufferOffset),
+ le16_to_cpu(authblob->DomainName.Length), true,
+ sess->conn->local_nls);
+ if (IS_ERR(domain_name))
+ return PTR_ERR(domain_name);
+
+ /* process NTLMv2 authentication */
+ ksmbd_debug(AUTH, "decode_ntlmssp_authenticate_blob dname%s\n",
+ domain_name);
+ ret = ksmbd_auth_ntlmv2(sess, (struct ntlmv2_resp *)((char *)authblob + nt_off),
+ nt_len - CIFS_ENCPWD_SIZE,
+ domain_name);
+ kfree(domain_name);
+ return ret;
+}
+
+/**
+ * ksmbd_decode_ntlmssp_neg_blob() - helper function to construct
+ * negotiate blob
+ * @negblob: negotiate blob source pointer
+ * @rsp: response header pointer to be updated
+ * @sess: session of connection
+ *
+ */
+int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+ int blob_len, struct ksmbd_session *sess)
+{
+ if (blob_len < sizeof(struct negotiate_message)) {
+ ksmbd_debug(AUTH, "negotiate blob len %d too small\n",
+ blob_len);
+ return -EINVAL;
+ }
+
+ if (memcmp(negblob->Signature, "NTLMSSP", 8)) {
+ ksmbd_debug(AUTH, "blob signature incorrect %s\n",
+ negblob->Signature);
+ return -EINVAL;
+ }
+
+ sess->ntlmssp.client_flags = le32_to_cpu(negblob->NegotiateFlags);
+ return 0;
+}
+
+/**
+ * ksmbd_build_ntlmssp_challenge_blob() - helper function to construct
+ * challenge blob
+ * @chgblob: challenge blob source pointer to initialize
+ * @rsp: response header pointer to be updated
+ * @sess: session of connection
+ *
+ */
+unsigned int
+ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+ struct ksmbd_session *sess)
+{
+ struct target_info *tinfo;
+ wchar_t *name;
+ __u8 *target_name;
+ unsigned int flags, blob_off, blob_len, type, target_info_len = 0;
+ int len, uni_len, conv_len;
+ int cflags = sess->ntlmssp.client_flags;
+
+ memcpy(chgblob->Signature, NTLMSSP_SIGNATURE, 8);
+ chgblob->MessageType = NtLmChallenge;
+
+ flags = NTLMSSP_NEGOTIATE_UNICODE |
+ NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_TARGET_TYPE_SERVER |
+ NTLMSSP_NEGOTIATE_TARGET_INFO;
+
+ if (cflags & NTLMSSP_NEGOTIATE_SIGN) {
+ flags |= NTLMSSP_NEGOTIATE_SIGN;
+ flags |= cflags & (NTLMSSP_NEGOTIATE_128 |
+ NTLMSSP_NEGOTIATE_56);
+ }
+
+ if (cflags & NTLMSSP_NEGOTIATE_ALWAYS_SIGN)
+ flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+
+ if (cflags & NTLMSSP_REQUEST_TARGET)
+ flags |= NTLMSSP_REQUEST_TARGET;
+
+ if (sess->conn->use_spnego &&
+ (cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
+ flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+
+ chgblob->NegotiateFlags = cpu_to_le32(flags);
+ len = strlen(ksmbd_netbios_name());
+ name = kmalloc(2 + UNICODE_LEN(len), GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ conv_len = smb_strtoUTF16((__le16 *)name, ksmbd_netbios_name(), len,
+ sess->conn->local_nls);
+ if (conv_len < 0 || conv_len > len) {
+ kfree(name);
+ return -EINVAL;
+ }
+
+ uni_len = UNICODE_LEN(conv_len);
+
+ blob_off = sizeof(struct challenge_message);
+ blob_len = blob_off + uni_len;
+
+ chgblob->TargetName.Length = cpu_to_le16(uni_len);
+ chgblob->TargetName.MaximumLength = cpu_to_le16(uni_len);
+ chgblob->TargetName.BufferOffset = cpu_to_le32(blob_off);
+
+ /* Initialize random conn challenge */
+ get_random_bytes(sess->ntlmssp.cryptkey, sizeof(__u64));
+ memcpy(chgblob->Challenge, sess->ntlmssp.cryptkey,
+ CIFS_CRYPTO_KEY_SIZE);
+
+ /* Add Target Information to security buffer */
+ chgblob->TargetInfoArray.BufferOffset = cpu_to_le32(blob_len);
+
+ target_name = (__u8 *)chgblob + blob_off;
+ memcpy(target_name, name, uni_len);
+ tinfo = (struct target_info *)(target_name + uni_len);
+
+ chgblob->TargetInfoArray.Length = 0;
+ /* Add target info list for NetBIOS/DNS settings */
+ for (type = NTLMSSP_AV_NB_COMPUTER_NAME;
+ type <= NTLMSSP_AV_DNS_DOMAIN_NAME; type++) {
+ tinfo->Type = cpu_to_le16(type);
+ tinfo->Length = cpu_to_le16(uni_len);
+ memcpy(tinfo->Content, name, uni_len);
+ tinfo = (struct target_info *)((char *)tinfo + 4 + uni_len);
+ target_info_len += 4 + uni_len;
+ }
+
+ /* Add terminator subblock */
+ tinfo->Type = 0;
+ tinfo->Length = 0;
+ target_info_len += 4;
+
+ chgblob->TargetInfoArray.Length = cpu_to_le16(target_info_len);
+ chgblob->TargetInfoArray.MaximumLength = cpu_to_le16(target_info_len);
+ blob_len += target_info_len;
+ kfree(name);
+ ksmbd_debug(AUTH, "NTLMSSP SecurityBufferLength %d\n", blob_len);
+ return blob_len;
+}
+
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
+ int in_len, char *out_blob, int *out_len)
+{
+ struct ksmbd_spnego_authen_response *resp;
+ struct ksmbd_user *user = NULL;
+ int retval;
+
+ resp = ksmbd_ipc_spnego_authen_request(in_blob, in_len);
+ if (!resp) {
+ ksmbd_debug(AUTH, "SPNEGO_AUTHEN_REQUEST failure\n");
+ return -EINVAL;
+ }
+
+ if (!(resp->login_response.status & KSMBD_USER_FLAG_OK)) {
+ ksmbd_debug(AUTH, "krb5 authentication failure\n");
+ retval = -EPERM;
+ goto out;
+ }
+
+ if (*out_len <= resp->spnego_blob_len) {
+ ksmbd_debug(AUTH, "buf len %d, but blob len %d\n",
+ *out_len, resp->spnego_blob_len);
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (resp->session_key_len > sizeof(sess->sess_key)) {
+ ksmbd_debug(AUTH, "session key is too long\n");
+ retval = -EINVAL;
+ goto out;
+ }
+
+ user = ksmbd_alloc_user(&resp->login_response);
+ if (!user) {
+ ksmbd_debug(AUTH, "login failure\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+ sess->user = user;
+
+ memcpy(sess->sess_key, resp->payload, resp->session_key_len);
+ memcpy(out_blob, resp->payload + resp->session_key_len,
+ resp->spnego_blob_len);
+ *out_len = resp->spnego_blob_len;
+ retval = 0;
+out:
+ kvfree(resp);
+ return retval;
+}
+#else
+int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
+ int in_len, char *out_blob, int *out_len)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+/**
+ * ksmbd_sign_smb2_pdu() - function to generate packet signing
+ * @conn: connection
+ * @key: signing key
+ * @iov: buffer iov array
+ * @n_vec: number of iovecs
+ * @sig: signature value generated for client request packet
+ *
+ */
+int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig)
+{
+ struct ksmbd_crypto_ctx *ctx;
+ int rc, i;
+
+ ctx = ksmbd_crypto_ctx_find_hmacsha256();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACSHA256_TFM(ctx),
+ key,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ if (rc)
+ goto out;
+
+ rc = crypto_shash_init(CRYPTO_HMACSHA256(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacsha256 init error %d\n", rc);
+ goto out;
+ }
+
+ for (i = 0; i < n_vec; i++) {
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx),
+ iov[i].iov_base,
+ iov[i].iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacsha256 update error %d\n", rc);
+ goto out;
+ }
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACSHA256(ctx), sig);
+ if (rc)
+ ksmbd_debug(AUTH, "hmacsha256 generation error %d\n", rc);
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+/**
+ * ksmbd_sign_smb3_pdu() - function to generate packet signing
+ * @conn: connection
+ * @key: signing key
+ * @iov: buffer iov array
+ * @n_vec: number of iovecs
+ * @sig: signature value generated for client request packet
+ *
+ */
+int ksmbd_sign_smb3_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig)
+{
+ struct ksmbd_crypto_ctx *ctx;
+ int rc, i;
+
+ ctx = ksmbd_crypto_ctx_find_cmacaes();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc cmac\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_CMACAES_TFM(ctx),
+ key,
+ SMB2_CMACAES_SIZE);
+ if (rc)
+ goto out;
+
+ rc = crypto_shash_init(CRYPTO_CMACAES(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "cmaces init error %d\n", rc);
+ goto out;
+ }
+
+ for (i = 0; i < n_vec; i++) {
+ rc = crypto_shash_update(CRYPTO_CMACAES(ctx),
+ iov[i].iov_base,
+ iov[i].iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "cmaces update error %d\n", rc);
+ goto out;
+ }
+ }
+
+ rc = crypto_shash_final(CRYPTO_CMACAES(ctx), sig);
+ if (rc)
+ ksmbd_debug(AUTH, "cmaces generation error %d\n", rc);
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+struct derivation {
+ struct kvec label;
+ struct kvec context;
+ bool binding;
+};
+
+static int generate_key(struct ksmbd_session *sess, struct kvec label,
+ struct kvec context, __u8 *key, unsigned int key_size)
+{
+ unsigned char zero = 0x0;
+ __u8 i[4] = {0, 0, 0, 1};
+ __u8 L128[4] = {0, 0, 0, 128};
+ __u8 L256[4] = {0, 0, 1, 0};
+ int rc;
+ unsigned char prfhash[SMB2_HMACSHA256_SIZE];
+ unsigned char *hashptr = prfhash;
+ struct ksmbd_crypto_ctx *ctx;
+
+ memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
+ memset(key, 0x0, key_size);
+
+ ctx = ksmbd_crypto_ctx_find_hmacsha256();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not crypto alloc hmacmd5\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_setkey(CRYPTO_HMACSHA256_TFM(ctx),
+ sess->sess_key,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ if (rc)
+ goto smb3signkey_ret;
+
+ rc = crypto_shash_init(CRYPTO_HMACSHA256(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "hmacsha256 init error %d\n", rc);
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), i, 4);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx),
+ label.iov_base,
+ label.iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with label\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), &zero, 1);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with zero\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx),
+ context.iov_base,
+ context.iov_len);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with context\n");
+ goto smb3signkey_ret;
+ }
+
+ if (sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+ sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L256, 4);
+ else
+ rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L128, 4);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with L\n");
+ goto smb3signkey_ret;
+ }
+
+ rc = crypto_shash_final(CRYPTO_HMACSHA256(ctx), hashptr);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hmacmd5 hash error %d\n",
+ rc);
+ goto smb3signkey_ret;
+ }
+
+ memcpy(key, hashptr, key_size);
+
+smb3signkey_ret:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int generate_smb3signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn,
+ const struct derivation *signing)
+{
+ int rc;
+ struct channel *chann;
+ char *key;
+
+ chann = lookup_chann_list(sess, conn);
+ if (!chann)
+ return 0;
+
+ if (sess->conn->dialect >= SMB30_PROT_ID && signing->binding)
+ key = chann->smb3signingkey;
+ else
+ key = sess->smb3signingkey;
+
+ rc = generate_key(sess, signing->label, signing->context, key,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ if (!(sess->conn->dialect >= SMB30_PROT_ID && signing->binding))
+ memcpy(chann->smb3signingkey, key, SMB3_SIGN_KEY_SIZE);
+
+ ksmbd_debug(AUTH, "dumping generated AES signing keys\n");
+ ksmbd_debug(AUTH, "Session Id %llu\n", sess->id);
+ ksmbd_debug(AUTH, "Session Key %*ph\n",
+ SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key);
+ ksmbd_debug(AUTH, "Signing Key %*ph\n",
+ SMB3_SIGN_KEY_SIZE, key);
+ return 0;
+}
+
+int ksmbd_gen_smb30_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn)
+{
+ struct derivation d;
+
+ d.label.iov_base = "SMB2AESCMAC";
+ d.label.iov_len = 12;
+ d.context.iov_base = "SmbSign";
+ d.context.iov_len = 8;
+ d.binding = conn->binding;
+
+ return generate_smb3signingkey(sess, conn, &d);
+}
+
+int ksmbd_gen_smb311_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn)
+{
+ struct derivation d;
+
+ d.label.iov_base = "SMBSigningKey";
+ d.label.iov_len = 14;
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess = ksmbd_preauth_session_lookup(conn, sess->id);
+ if (!preauth_sess)
+ return -ENOENT;
+ d.context.iov_base = preauth_sess->Preauth_HashValue;
+ } else {
+ d.context.iov_base = sess->Preauth_HashValue;
+ }
+ d.context.iov_len = 64;
+ d.binding = conn->binding;
+
+ return generate_smb3signingkey(sess, conn, &d);
+}
+
+struct derivation_twin {
+ struct derivation encryption;
+ struct derivation decryption;
+};
+
+static int generate_smb3encryptionkey(struct ksmbd_session *sess,
+ const struct derivation_twin *ptwin)
+{
+ int rc;
+
+ rc = generate_key(sess, ptwin->encryption.label,
+ ptwin->encryption.context, sess->smb3encryptionkey,
+ SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ rc = generate_key(sess, ptwin->decryption.label,
+ ptwin->decryption.context,
+ sess->smb3decryptionkey, SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ ksmbd_debug(AUTH, "dumping generated AES encryption keys\n");
+ ksmbd_debug(AUTH, "Cipher type %d\n", sess->conn->cipher_type);
+ ksmbd_debug(AUTH, "Session Id %llu\n", sess->id);
+ ksmbd_debug(AUTH, "Session Key %*ph\n",
+ SMB2_NTLMV2_SESSKEY_SIZE, sess->sess_key);
+ if (sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+ sess->conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) {
+ ksmbd_debug(AUTH, "ServerIn Key %*ph\n",
+ SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3encryptionkey);
+ ksmbd_debug(AUTH, "ServerOut Key %*ph\n",
+ SMB3_GCM256_CRYPTKEY_SIZE, sess->smb3decryptionkey);
+ } else {
+ ksmbd_debug(AUTH, "ServerIn Key %*ph\n",
+ SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3encryptionkey);
+ ksmbd_debug(AUTH, "ServerOut Key %*ph\n",
+ SMB3_GCM128_CRYPTKEY_SIZE, sess->smb3decryptionkey);
+ }
+ return 0;
+}
+
+int ksmbd_gen_smb30_encryptionkey(struct ksmbd_session *sess)
+{
+ struct derivation_twin twin;
+ struct derivation *d;
+
+ d = &twin.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ d = &twin.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ return generate_smb3encryptionkey(sess, &twin);
+}
+
+int ksmbd_gen_smb311_encryptionkey(struct ksmbd_session *sess)
+{
+ struct derivation_twin twin;
+ struct derivation *d;
+
+ d = &twin.encryption;
+ d->label.iov_base = "SMBS2CCipherKey";
+ d->label.iov_len = 16;
+ d->context.iov_base = sess->Preauth_HashValue;
+ d->context.iov_len = 64;
+
+ d = &twin.decryption;
+ d->label.iov_base = "SMBC2SCipherKey";
+ d->label.iov_len = 16;
+ d->context.iov_base = sess->Preauth_HashValue;
+ d->context.iov_len = 64;
+
+ return generate_smb3encryptionkey(sess, &twin);
+}
+
+int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
+ __u8 *pi_hash)
+{
+ int rc;
+ struct smb2_hdr *rcv_hdr = (struct smb2_hdr *)buf;
+ char *all_bytes_msg = (char *)&rcv_hdr->ProtocolId;
+ int msg_size = be32_to_cpu(rcv_hdr->smb2_buf_length);
+ struct ksmbd_crypto_ctx *ctx = NULL;
+
+ if (conn->preauth_info->Preauth_HashId !=
+ SMB2_PREAUTH_INTEGRITY_SHA512)
+ return -EINVAL;
+
+ ctx = ksmbd_crypto_ctx_find_sha512();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not alloc sha512\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_SHA512(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "could not init shashn");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_SHA512(ctx), pi_hash, 64);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_SHA512(ctx), all_bytes_msg, msg_size);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_SHA512(ctx), pi_hash);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hash err : %d\n", rc);
+ goto out;
+ }
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+int ksmbd_gen_sd_hash(struct ksmbd_conn *conn, char *sd_buf, int len,
+ __u8 *pi_hash)
+{
+ int rc;
+ struct ksmbd_crypto_ctx *ctx = NULL;
+
+ ctx = ksmbd_crypto_ctx_find_sha256();
+ if (!ctx) {
+ ksmbd_debug(AUTH, "could not alloc sha256\n");
+ return -ENOMEM;
+ }
+
+ rc = crypto_shash_init(CRYPTO_SHA256(ctx));
+ if (rc) {
+ ksmbd_debug(AUTH, "could not init shashn");
+ goto out;
+ }
+
+ rc = crypto_shash_update(CRYPTO_SHA256(ctx), sd_buf, len);
+ if (rc) {
+ ksmbd_debug(AUTH, "could not update with n\n");
+ goto out;
+ }
+
+ rc = crypto_shash_final(CRYPTO_SHA256(ctx), pi_hash);
+ if (rc) {
+ ksmbd_debug(AUTH, "Could not generate hash err : %d\n", rc);
+ goto out;
+ }
+out:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
+
+static int ksmbd_get_encryption_key(struct ksmbd_conn *conn, __u64 ses_id,
+ int enc, u8 *key)
+{
+ struct ksmbd_session *sess;
+ u8 *ses_enc_key;
+
+ sess = ksmbd_session_lookup_all(conn, ses_id);
+ if (!sess)
+ return -EINVAL;
+
+ ses_enc_key = enc ? sess->smb3encryptionkey :
+ sess->smb3decryptionkey;
+ memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
+
+ return 0;
+}
+
+static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ void *addr;
+
+ if (is_vmalloc_addr(buf))
+ addr = vmalloc_to_page(buf);
+ else
+ addr = virt_to_page(buf);
+ sg_set_page(sg, addr, buflen, offset_in_page(buf));
+}
+
+static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
+ u8 *sign)
+{
+ struct scatterlist *sg;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ int i, nr_entries[3] = {0}, total_entries = 0, sg_idx = 0;
+
+ if (!nvec)
+ return NULL;
+
+ for (i = 0; i < nvec - 1; i++) {
+ unsigned long kaddr = (unsigned long)iov[i + 1].iov_base;
+
+ if (is_vmalloc_addr(iov[i + 1].iov_base)) {
+ nr_entries[i] = ((kaddr + iov[i + 1].iov_len +
+ PAGE_SIZE - 1) >> PAGE_SHIFT) -
+ (kaddr >> PAGE_SHIFT);
+ } else {
+ nr_entries[i]++;
+ }
+ total_entries += nr_entries[i];
+ }
+
+ /* Add two entries for transform header and signature */
+ total_entries += 2;
+
+ sg = kmalloc_array(total_entries, sizeof(struct scatterlist), GFP_KERNEL);
+ if (!sg)
+ return NULL;
+
+ sg_init_table(sg, total_entries);
+ smb2_sg_set_buf(&sg[sg_idx++], iov[0].iov_base + 24, assoc_data_len);
+ for (i = 0; i < nvec - 1; i++) {
+ void *data = iov[i + 1].iov_base;
+ int len = iov[i + 1].iov_len;
+
+ if (is_vmalloc_addr(data)) {
+ int j, offset = offset_in_page(data);
+
+ for (j = 0; j < nr_entries[i]; j++) {
+ unsigned int bytes = PAGE_SIZE - offset;
+
+ if (!len)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ sg_set_page(&sg[sg_idx++],
+ vmalloc_to_page(data), bytes,
+ offset_in_page(data));
+
+ data += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+ } else {
+ sg_set_page(&sg[sg_idx++], virt_to_page(data), len,
+ offset_in_page(data));
+ }
+ }
+ smb2_sg_set_buf(&sg[sg_idx], sign, SMB2_SIGNATURE_SIZE);
+ return sg;
+}
+
+int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+ unsigned int nvec, int enc)
+{
+ struct smb2_transform_hdr *tr_hdr =
+ (struct smb2_transform_hdr *)iov[0].iov_base;
+ unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+ int rc;
+ struct scatterlist *sg;
+ u8 sign[SMB2_SIGNATURE_SIZE] = {};
+ u8 key[SMB3_ENC_DEC_KEY_SIZE];
+ struct aead_request *req;
+ char *iv;
+ unsigned int iv_len;
+ struct crypto_aead *tfm;
+ unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
+ struct ksmbd_crypto_ctx *ctx;
+
+ rc = ksmbd_get_encryption_key(conn,
+ le64_to_cpu(tr_hdr->SessionId),
+ enc,
+ key);
+ if (rc) {
+ pr_err("Could not get %scryption key\n", enc ? "en" : "de");
+ return rc;
+ }
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ ctx = ksmbd_crypto_ctx_find_gcm();
+ else
+ ctx = ksmbd_crypto_ctx_find_ccm();
+ if (!ctx) {
+ pr_err("crypto alloc failed\n");
+ return -ENOMEM;
+ }
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ tfm = CRYPTO_GCM(ctx);
+ else
+ tfm = CRYPTO_CCM(ctx);
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE);
+ else
+ rc = crypto_aead_setkey(tfm, key, SMB3_GCM128_CRYPTKEY_SIZE);
+ if (rc) {
+ pr_err("Failed to set aead key %d\n", rc);
+ goto free_ctx;
+ }
+
+ rc = crypto_aead_setauthsize(tfm, SMB2_SIGNATURE_SIZE);
+ if (rc) {
+ pr_err("Failed to set authsize %d\n", rc);
+ goto free_ctx;
+ }
+
+ req = aead_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ rc = -ENOMEM;
+ goto free_ctx;
+ }
+
+ if (!enc) {
+ memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE);
+ crypt_len += SMB2_SIGNATURE_SIZE;
+ }
+
+ sg = ksmbd_init_sg(iov, nvec, sign);
+ if (!sg) {
+ pr_err("Failed to init sg\n");
+ rc = -ENOMEM;
+ goto free_req;
+ }
+
+ iv_len = crypto_aead_ivsize(tfm);
+ iv = kzalloc(iv_len, GFP_KERNEL);
+ if (!iv) {
+ rc = -ENOMEM;
+ goto free_sg;
+ }
+
+ if (conn->cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) {
+ memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
+ } else {
+ iv[0] = 3;
+ memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
+ }
+
+ aead_request_set_crypt(req, sg, sg, crypt_len, iv);
+ aead_request_set_ad(req, assoc_data_len);
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
+ if (enc)
+ rc = crypto_aead_encrypt(req);
+ else
+ rc = crypto_aead_decrypt(req);
+ if (rc)
+ goto free_iv;
+
+ if (enc)
+ memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
+
+free_iv:
+ kfree(iv);
+free_sg:
+ kfree(sg);
+free_req:
+ kfree(req);
+free_ctx:
+ ksmbd_release_crypto_ctx(ctx);
+ return rc;
+}
diff --git a/fs/ksmbd/auth.h b/fs/ksmbd/auth.h
new file mode 100644
index 000000000000..9c2d4badd05d
--- /dev/null
+++ b/fs/ksmbd/auth.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __AUTH_H__
+#define __AUTH_H__
+
+#include "ntlmssp.h"
+
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+#define AUTH_GSS_LENGTH 96
+#define AUTH_GSS_PADDING 0
+#else
+#define AUTH_GSS_LENGTH 74
+#define AUTH_GSS_PADDING 6
+#endif
+
+#define CIFS_HMAC_MD5_HASH_SIZE (16)
+#define CIFS_NTHASH_SIZE (16)
+
+/*
+ * Size of the ntlm client response
+ */
+#define CIFS_AUTH_RESP_SIZE 24
+#define CIFS_SMB1_SIGNATURE_SIZE 8
+#define CIFS_SMB1_SESSKEY_SIZE 16
+
+#define KSMBD_AUTH_NTLMSSP 0x0001
+#define KSMBD_AUTH_KRB5 0x0002
+#define KSMBD_AUTH_MSKRB5 0x0004
+#define KSMBD_AUTH_KRB5U2U 0x0008
+
+struct ksmbd_session;
+struct ksmbd_conn;
+struct kvec;
+
+int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
+ unsigned int nvec, int enc);
+void ksmbd_copy_gss_neg_header(void *buf);
+int ksmbd_auth_ntlm(struct ksmbd_session *sess, char *pw_buf);
+int ksmbd_auth_ntlmv2(struct ksmbd_session *sess, struct ntlmv2_resp *ntlmv2,
+ int blen, char *domain_name);
+int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob,
+ int blob_len, struct ksmbd_session *sess);
+int ksmbd_decode_ntlmssp_neg_blob(struct negotiate_message *negblob,
+ int blob_len, struct ksmbd_session *sess);
+unsigned int
+ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob,
+ struct ksmbd_session *sess);
+int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob,
+ int in_len, char *out_blob, int *out_len);
+int ksmbd_sign_smb2_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig);
+int ksmbd_sign_smb3_pdu(struct ksmbd_conn *conn, char *key, struct kvec *iov,
+ int n_vec, char *sig);
+int ksmbd_gen_smb30_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+int ksmbd_gen_smb311_signingkey(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+int ksmbd_gen_smb30_encryptionkey(struct ksmbd_session *sess);
+int ksmbd_gen_smb311_encryptionkey(struct ksmbd_session *sess);
+int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
+ __u8 *pi_hash);
+int ksmbd_gen_sd_hash(struct ksmbd_conn *conn, char *sd_buf, int len,
+ __u8 *pi_hash);
+#endif
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
new file mode 100644
index 000000000000..af086d35398a
--- /dev/null
+++ b/fs/ksmbd/connection.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <namjae.jeon@protocolfreedom.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/mutex.h>
+#include <linux/freezer.h>
+#include <linux/module.h>
+
+#include "server.h"
+#include "smb_common.h"
+#include "mgmt/ksmbd_ida.h"
+#include "connection.h"
+#include "transport_tcp.h"
+#include "transport_rdma.h"
+
+static DEFINE_MUTEX(init_lock);
+
+static struct ksmbd_conn_ops default_conn_ops;
+
+LIST_HEAD(conn_list);
+DEFINE_RWLOCK(conn_list_lock);
+
+/**
+ * ksmbd_conn_free() - free resources of the connection instance
+ *
+ * @conn: connection instance to be cleand up
+ *
+ * During the thread termination, the corresponding conn instance
+ * resources(sock/memory) are released and finally the conn object is freed.
+ */
+void ksmbd_conn_free(struct ksmbd_conn *conn)
+{
+ write_lock(&conn_list_lock);
+ list_del(&conn->conns_list);
+ write_unlock(&conn_list_lock);
+
+ kvfree(conn->request_buf);
+ kfree(conn->preauth_info);
+ kfree(conn);
+}
+
+/**
+ * ksmbd_conn_alloc() - initialize a new connection instance
+ *
+ * Return: ksmbd_conn struct on success, otherwise NULL
+ */
+struct ksmbd_conn *ksmbd_conn_alloc(void)
+{
+ struct ksmbd_conn *conn;
+
+ conn = kzalloc(sizeof(struct ksmbd_conn), GFP_KERNEL);
+ if (!conn)
+ return NULL;
+
+ conn->need_neg = true;
+ conn->status = KSMBD_SESS_NEW;
+ conn->local_nls = load_nls("utf8");
+ if (!conn->local_nls)
+ conn->local_nls = load_nls_default();
+ atomic_set(&conn->req_running, 0);
+ atomic_set(&conn->r_count, 0);
+ init_waitqueue_head(&conn->req_running_q);
+ INIT_LIST_HEAD(&conn->conns_list);
+ INIT_LIST_HEAD(&conn->sessions);
+ INIT_LIST_HEAD(&conn->requests);
+ INIT_LIST_HEAD(&conn->async_requests);
+ spin_lock_init(&conn->request_lock);
+ spin_lock_init(&conn->credits_lock);
+ ida_init(&conn->async_ida);
+
+ spin_lock_init(&conn->llist_lock);
+ INIT_LIST_HEAD(&conn->lock_list);
+
+ write_lock(&conn_list_lock);
+ list_add(&conn->conns_list, &conn_list);
+ write_unlock(&conn_list_lock);
+ return conn;
+}
+
+bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c)
+{
+ struct ksmbd_conn *t;
+ bool ret = false;
+
+ read_lock(&conn_list_lock);
+ list_for_each_entry(t, &conn_list, conns_list) {
+ if (memcmp(t->ClientGUID, c->ClientGUID, SMB2_CLIENT_GUID_SIZE))
+ continue;
+
+ ret = true;
+ break;
+ }
+ read_unlock(&conn_list_lock);
+ return ret;
+}
+
+void ksmbd_conn_enqueue_request(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct list_head *requests_queue = NULL;
+
+ if (conn->ops->get_cmd_val(work) != SMB2_CANCEL_HE) {
+ requests_queue = &conn->requests;
+ work->syncronous = true;
+ }
+
+ if (requests_queue) {
+ atomic_inc(&conn->req_running);
+ spin_lock(&conn->request_lock);
+ list_add_tail(&work->request_entry, requests_queue);
+ spin_unlock(&conn->request_lock);
+ }
+}
+
+int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ int ret = 1;
+
+ if (list_empty(&work->request_entry) &&
+ list_empty(&work->async_request_entry))
+ return 0;
+
+ if (!work->multiRsp)
+ atomic_dec(&conn->req_running);
+ spin_lock(&conn->request_lock);
+ if (!work->multiRsp) {
+ list_del_init(&work->request_entry);
+ if (work->syncronous == false)
+ list_del_init(&work->async_request_entry);
+ ret = 0;
+ }
+ spin_unlock(&conn->request_lock);
+
+ wake_up_all(&conn->req_running_q);
+ return ret;
+}
+
+static void ksmbd_conn_lock(struct ksmbd_conn *conn)
+{
+ mutex_lock(&conn->srv_mutex);
+}
+
+static void ksmbd_conn_unlock(struct ksmbd_conn *conn)
+{
+ mutex_unlock(&conn->srv_mutex);
+}
+
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn)
+{
+ wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2);
+}
+
+int ksmbd_conn_write(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb_hdr *rsp_hdr = work->response_buf;
+ size_t len = 0;
+ int sent;
+ struct kvec iov[3];
+ int iov_idx = 0;
+
+ ksmbd_conn_try_dequeue_request(work);
+ if (!rsp_hdr) {
+ pr_err("NULL response header\n");
+ return -EINVAL;
+ }
+
+ if (work->tr_buf) {
+ iov[iov_idx] = (struct kvec) { work->tr_buf,
+ sizeof(struct smb2_transform_hdr) };
+ len += iov[iov_idx++].iov_len;
+ }
+
+ if (work->aux_payload_sz) {
+ iov[iov_idx] = (struct kvec) { rsp_hdr, work->resp_hdr_sz };
+ len += iov[iov_idx++].iov_len;
+ iov[iov_idx] = (struct kvec) { work->aux_payload_buf, work->aux_payload_sz };
+ len += iov[iov_idx++].iov_len;
+ } else {
+ if (work->tr_buf)
+ iov[iov_idx].iov_len = work->resp_hdr_sz;
+ else
+ iov[iov_idx].iov_len = get_rfc1002_len(rsp_hdr) + 4;
+ iov[iov_idx].iov_base = rsp_hdr;
+ len += iov[iov_idx++].iov_len;
+ }
+
+ ksmbd_conn_lock(conn);
+ sent = conn->transport->ops->writev(conn->transport, &iov[0],
+ iov_idx, len,
+ work->need_invalidate_rkey,
+ work->remote_key);
+ ksmbd_conn_unlock(conn);
+
+ if (sent < 0) {
+ pr_err("Failed to send message: %d\n", sent);
+ return sent;
+ }
+
+ return 0;
+}
+
+int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key, u64 remote_offset,
+ u32 remote_len)
+{
+ int ret = -EINVAL;
+
+ if (conn->transport->ops->rdma_read)
+ ret = conn->transport->ops->rdma_read(conn->transport,
+ buf, buflen,
+ remote_key, remote_offset,
+ remote_len);
+ return ret;
+}
+
+int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key,
+ u64 remote_offset, u32 remote_len)
+{
+ int ret = -EINVAL;
+
+ if (conn->transport->ops->rdma_write)
+ ret = conn->transport->ops->rdma_write(conn->transport,
+ buf, buflen,
+ remote_key, remote_offset,
+ remote_len);
+ return ret;
+}
+
+bool ksmbd_conn_alive(struct ksmbd_conn *conn)
+{
+ if (!ksmbd_server_running())
+ return false;
+
+ if (conn->status == KSMBD_SESS_EXITING)
+ return false;
+
+ if (kthread_should_stop())
+ return false;
+
+ if (atomic_read(&conn->stats.open_files_count) > 0)
+ return true;
+
+ /*
+ * Stop current session if the time that get last request from client
+ * is bigger than deadtime user configured and opening file count is
+ * zero.
+ */
+ if (server_conf.deadtime > 0 &&
+ time_after(jiffies, conn->last_active + server_conf.deadtime)) {
+ ksmbd_debug(CONN, "No response from client in %lu minutes\n",
+ server_conf.deadtime / SMB_ECHO_INTERVAL);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * ksmbd_conn_handler_loop() - session thread to listen on new smb requests
+ * @p: connection instance
+ *
+ * One thread each per connection
+ *
+ * Return: 0 on success
+ */
+int ksmbd_conn_handler_loop(void *p)
+{
+ struct ksmbd_conn *conn = (struct ksmbd_conn *)p;
+ struct ksmbd_transport *t = conn->transport;
+ unsigned int pdu_size;
+ char hdr_buf[4] = {0,};
+ int size;
+
+ mutex_init(&conn->srv_mutex);
+ __module_get(THIS_MODULE);
+
+ if (t->ops->prepare && t->ops->prepare(t))
+ goto out;
+
+ conn->last_active = jiffies;
+ while (ksmbd_conn_alive(conn)) {
+ if (try_to_freeze())
+ continue;
+
+ kvfree(conn->request_buf);
+ conn->request_buf = NULL;
+
+ size = t->ops->read(t, hdr_buf, sizeof(hdr_buf));
+ if (size != sizeof(hdr_buf))
+ break;
+
+ pdu_size = get_rfc1002_len(hdr_buf);
+ ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size);
+
+ /* make sure we have enough to get to SMB header end */
+ if (!ksmbd_pdu_size_has_room(pdu_size)) {
+ ksmbd_debug(CONN, "SMB request too short (%u bytes)\n",
+ pdu_size);
+ continue;
+ }
+
+ /* 4 for rfc1002 length field */
+ size = pdu_size + 4;
+ conn->request_buf = kvmalloc(size, GFP_KERNEL);
+ if (!conn->request_buf)
+ continue;
+
+ memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf));
+ if (!ksmbd_smb_request(conn))
+ break;
+
+ /*
+ * We already read 4 bytes to find out PDU size, now
+ * read in PDU
+ */
+ size = t->ops->read(t, conn->request_buf + 4, pdu_size);
+ if (size < 0) {
+ pr_err("sock_read failed: %d\n", size);
+ break;
+ }
+
+ if (size != pdu_size) {
+ pr_err("PDU error. Read: %d, Expected: %d\n",
+ size, pdu_size);
+ continue;
+ }
+
+ if (!default_conn_ops.process_fn) {
+ pr_err("No connection request callback\n");
+ break;
+ }
+
+ if (default_conn_ops.process_fn(conn)) {
+ pr_err("Cannot handle request\n");
+ break;
+ }
+ }
+
+out:
+ /* Wait till all reference dropped to the Server object*/
+ while (atomic_read(&conn->r_count) > 0)
+ schedule_timeout(HZ);
+
+ unload_nls(conn->local_nls);
+ if (default_conn_ops.terminate_fn)
+ default_conn_ops.terminate_fn(conn);
+ t->ops->disconnect(t);
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops)
+{
+ default_conn_ops.process_fn = ops->process_fn;
+ default_conn_ops.terminate_fn = ops->terminate_fn;
+}
+
+int ksmbd_conn_transport_init(void)
+{
+ int ret;
+
+ mutex_lock(&init_lock);
+ ret = ksmbd_tcp_init();
+ if (ret) {
+ pr_err("Failed to init TCP subsystem: %d\n", ret);
+ goto out;
+ }
+
+ ret = ksmbd_rdma_init();
+ if (ret) {
+ pr_err("Failed to init RDMA subsystem: %d\n", ret);
+ goto out;
+ }
+out:
+ mutex_unlock(&init_lock);
+ return ret;
+}
+
+static void stop_sessions(void)
+{
+ struct ksmbd_conn *conn;
+
+again:
+ read_lock(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list) {
+ struct task_struct *task;
+
+ task = conn->transport->handler;
+ if (task)
+ ksmbd_debug(CONN, "Stop session handler %s/%d\n",
+ task->comm, task_pid_nr(task));
+ conn->status = KSMBD_SESS_EXITING;
+ }
+ read_unlock(&conn_list_lock);
+
+ if (!list_empty(&conn_list)) {
+ schedule_timeout_interruptible(HZ / 10); /* 100ms */
+ goto again;
+ }
+}
+
+void ksmbd_conn_transport_destroy(void)
+{
+ mutex_lock(&init_lock);
+ ksmbd_tcp_destroy();
+ ksmbd_rdma_destroy();
+ stop_sessions();
+ mutex_unlock(&init_lock);
+}
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
new file mode 100644
index 000000000000..e5403c587a58
--- /dev/null
+++ b/fs/ksmbd/connection.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_CONNECTION_H__
+#define __KSMBD_CONNECTION_H__
+
+#include <linux/list.h>
+#include <linux/ip.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/inet_connection_sock.h>
+#include <net/request_sock.h>
+#include <linux/kthread.h>
+#include <linux/nls.h>
+
+#include "smb_common.h"
+#include "ksmbd_work.h"
+
+#define KSMBD_SOCKET_BACKLOG 16
+
+/*
+ * WARNING
+ *
+ * This is nothing but a HACK. Session status should move to channel
+ * or to session. As of now we have 1 tcp_conn : 1 ksmbd_session, but
+ * we need to change it to 1 tcp_conn : N ksmbd_sessions.
+ */
+enum {
+ KSMBD_SESS_NEW = 0,
+ KSMBD_SESS_GOOD,
+ KSMBD_SESS_EXITING,
+ KSMBD_SESS_NEED_RECONNECT,
+ KSMBD_SESS_NEED_NEGOTIATE
+};
+
+struct ksmbd_stats {
+ atomic_t open_files_count;
+ atomic64_t request_served;
+};
+
+struct ksmbd_transport;
+
+struct ksmbd_conn {
+ struct smb_version_values *vals;
+ struct smb_version_ops *ops;
+ struct smb_version_cmds *cmds;
+ unsigned int max_cmds;
+ struct mutex srv_mutex;
+ int status;
+ unsigned int cli_cap;
+ char *request_buf;
+ struct ksmbd_transport *transport;
+ struct nls_table *local_nls;
+ struct list_head conns_list;
+ /* smb session 1 per user */
+ struct list_head sessions;
+ unsigned long last_active;
+ /* How many request are running currently */
+ atomic_t req_running;
+ /* References which are made for this Server object*/
+ atomic_t r_count;
+ unsigned short total_credits;
+ unsigned short max_credits;
+ spinlock_t credits_lock;
+ wait_queue_head_t req_running_q;
+ /* Lock to protect requests list*/
+ spinlock_t request_lock;
+ struct list_head requests;
+ struct list_head async_requests;
+ int connection_type;
+ struct ksmbd_stats stats;
+ char ClientGUID[SMB2_CLIENT_GUID_SIZE];
+ union {
+ /* pending trans request table */
+ struct trans_state *recent_trans;
+ /* Used by ntlmssp */
+ char *ntlmssp_cryptkey;
+ };
+
+ spinlock_t llist_lock;
+ struct list_head lock_list;
+
+ struct preauth_integrity_info *preauth_info;
+
+ bool need_neg;
+ unsigned int auth_mechs;
+ unsigned int preferred_auth_mech;
+ bool sign;
+ bool use_spnego:1;
+ __u16 cli_sec_mode;
+ __u16 srv_sec_mode;
+ /* dialect index that server chose */
+ __u16 dialect;
+
+ char *mechToken;
+
+ struct ksmbd_conn_ops *conn_ops;
+
+ /* Preauth Session Table */
+ struct list_head preauth_sess_table;
+
+ struct sockaddr_storage peer_addr;
+
+ /* Identifier for async message */
+ struct ida async_ida;
+
+ __le16 cipher_type;
+ __le16 compress_algorithm;
+ bool posix_ext_supported;
+ bool signing_negotiated;
+ __le16 signing_algorithm;
+ bool binding;
+};
+
+struct ksmbd_conn_ops {
+ int (*process_fn)(struct ksmbd_conn *conn);
+ int (*terminate_fn)(struct ksmbd_conn *conn);
+};
+
+struct ksmbd_transport_ops {
+ int (*prepare)(struct ksmbd_transport *t);
+ void (*disconnect)(struct ksmbd_transport *t);
+ int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size);
+ int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov,
+ int size, bool need_invalidate_rkey,
+ unsigned int remote_key);
+ int (*rdma_read)(struct ksmbd_transport *t, void *buf, unsigned int len,
+ u32 remote_key, u64 remote_offset, u32 remote_len);
+ int (*rdma_write)(struct ksmbd_transport *t, void *buf,
+ unsigned int len, u32 remote_key, u64 remote_offset,
+ u32 remote_len);
+};
+
+struct ksmbd_transport {
+ struct ksmbd_conn *conn;
+ struct ksmbd_transport_ops *ops;
+ struct task_struct *handler;
+};
+
+#define KSMBD_TCP_RECV_TIMEOUT (7 * HZ)
+#define KSMBD_TCP_SEND_TIMEOUT (5 * HZ)
+#define KSMBD_TCP_PEER_SOCKADDR(c) ((struct sockaddr *)&((c)->peer_addr))
+
+extern struct list_head conn_list;
+extern rwlock_t conn_list_lock;
+
+bool ksmbd_conn_alive(struct ksmbd_conn *conn);
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn);
+struct ksmbd_conn *ksmbd_conn_alloc(void);
+void ksmbd_conn_free(struct ksmbd_conn *conn);
+bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c);
+int ksmbd_conn_write(struct ksmbd_work *work);
+int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key, u64 remote_offset,
+ u32 remote_len);
+int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf,
+ unsigned int buflen, u32 remote_key, u64 remote_offset,
+ u32 remote_len);
+void ksmbd_conn_enqueue_request(struct ksmbd_work *work);
+int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work);
+void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops);
+int ksmbd_conn_handler_loop(void *p);
+int ksmbd_conn_transport_init(void);
+void ksmbd_conn_transport_destroy(void);
+
+/*
+ * WARNING
+ *
+ * This is a hack. We will move status to a proper place once we land
+ * a multi-sessions support.
+ */
+static inline bool ksmbd_conn_good(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_GOOD;
+}
+
+static inline bool ksmbd_conn_need_negotiate(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_NEED_NEGOTIATE;
+}
+
+static inline bool ksmbd_conn_need_reconnect(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_NEED_RECONNECT;
+}
+
+static inline bool ksmbd_conn_exiting(struct ksmbd_work *work)
+{
+ return work->conn->status == KSMBD_SESS_EXITING;
+}
+
+static inline void ksmbd_conn_set_good(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_GOOD;
+}
+
+static inline void ksmbd_conn_set_need_negotiate(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_NEED_NEGOTIATE;
+}
+
+static inline void ksmbd_conn_set_need_reconnect(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_NEED_RECONNECT;
+}
+
+static inline void ksmbd_conn_set_exiting(struct ksmbd_work *work)
+{
+ work->conn->status = KSMBD_SESS_EXITING;
+}
+#endif /* __CONNECTION_H__ */
diff --git a/fs/ksmbd/crypto_ctx.c b/fs/ksmbd/crypto_ctx.c
new file mode 100644
index 000000000000..5f4b1008d17e
--- /dev/null
+++ b/fs/ksmbd/crypto_ctx.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "glob.h"
+#include "crypto_ctx.h"
+
+struct crypto_ctx_list {
+ spinlock_t ctx_lock;
+ int avail_ctx;
+ struct list_head idle_ctx;
+ wait_queue_head_t ctx_wait;
+};
+
+static struct crypto_ctx_list ctx_list;
+
+static inline void free_aead(struct crypto_aead *aead)
+{
+ if (aead)
+ crypto_free_aead(aead);
+}
+
+static void free_shash(struct shash_desc *shash)
+{
+ if (shash) {
+ crypto_free_shash(shash->tfm);
+ kfree(shash);
+ }
+}
+
+static struct crypto_aead *alloc_aead(int id)
+{
+ struct crypto_aead *tfm = NULL;
+
+ switch (id) {
+ case CRYPTO_AEAD_AES_GCM:
+ tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+ break;
+ case CRYPTO_AEAD_AES_CCM:
+ tfm = crypto_alloc_aead("ccm(aes)", 0, 0);
+ break;
+ default:
+ pr_err("Does not support encrypt ahead(id : %d)\n", id);
+ return NULL;
+ }
+
+ if (IS_ERR(tfm)) {
+ pr_err("Failed to alloc encrypt aead : %ld\n", PTR_ERR(tfm));
+ return NULL;
+ }
+
+ return tfm;
+}
+
+static struct shash_desc *alloc_shash_desc(int id)
+{
+ struct crypto_shash *tfm = NULL;
+ struct shash_desc *shash;
+
+ switch (id) {
+ case CRYPTO_SHASH_HMACMD5:
+ tfm = crypto_alloc_shash("hmac(md5)", 0, 0);
+ break;
+ case CRYPTO_SHASH_HMACSHA256:
+ tfm = crypto_alloc_shash("hmac(sha256)", 0, 0);
+ break;
+ case CRYPTO_SHASH_CMACAES:
+ tfm = crypto_alloc_shash("cmac(aes)", 0, 0);
+ break;
+ case CRYPTO_SHASH_SHA256:
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ break;
+ case CRYPTO_SHASH_SHA512:
+ tfm = crypto_alloc_shash("sha512", 0, 0);
+ break;
+ case CRYPTO_SHASH_MD4:
+ tfm = crypto_alloc_shash("md4", 0, 0);
+ break;
+ case CRYPTO_SHASH_MD5:
+ tfm = crypto_alloc_shash("md5", 0, 0);
+ break;
+ default:
+ return NULL;
+ }
+
+ if (IS_ERR(tfm))
+ return NULL;
+
+ shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm),
+ GFP_KERNEL);
+ if (!shash)
+ crypto_free_shash(tfm);
+ else
+ shash->tfm = tfm;
+ return shash;
+}
+
+static void ctx_free(struct ksmbd_crypto_ctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < CRYPTO_SHASH_MAX; i++)
+ free_shash(ctx->desc[i]);
+ for (i = 0; i < CRYPTO_AEAD_MAX; i++)
+ free_aead(ctx->ccmaes[i]);
+ kfree(ctx);
+}
+
+static struct ksmbd_crypto_ctx *ksmbd_find_crypto_ctx(void)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ while (1) {
+ spin_lock(&ctx_list.ctx_lock);
+ if (!list_empty(&ctx_list.idle_ctx)) {
+ ctx = list_entry(ctx_list.idle_ctx.next,
+ struct ksmbd_crypto_ctx,
+ list);
+ list_del(&ctx->list);
+ spin_unlock(&ctx_list.ctx_lock);
+ return ctx;
+ }
+
+ if (ctx_list.avail_ctx > num_online_cpus()) {
+ spin_unlock(&ctx_list.ctx_lock);
+ wait_event(ctx_list.ctx_wait,
+ !list_empty(&ctx_list.idle_ctx));
+ continue;
+ }
+
+ ctx_list.avail_ctx++;
+ spin_unlock(&ctx_list.ctx_lock);
+
+ ctx = kzalloc(sizeof(struct ksmbd_crypto_ctx), GFP_KERNEL);
+ if (!ctx) {
+ spin_lock(&ctx_list.ctx_lock);
+ ctx_list.avail_ctx--;
+ spin_unlock(&ctx_list.ctx_lock);
+ wait_event(ctx_list.ctx_wait,
+ !list_empty(&ctx_list.idle_ctx));
+ continue;
+ }
+ break;
+ }
+ return ctx;
+}
+
+void ksmbd_release_crypto_ctx(struct ksmbd_crypto_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ spin_lock(&ctx_list.ctx_lock);
+ if (ctx_list.avail_ctx <= num_online_cpus()) {
+ list_add(&ctx->list, &ctx_list.idle_ctx);
+ spin_unlock(&ctx_list.ctx_lock);
+ wake_up(&ctx_list.ctx_wait);
+ return;
+ }
+
+ ctx_list.avail_ctx--;
+ spin_unlock(&ctx_list.ctx_lock);
+ ctx_free(ctx);
+}
+
+static struct ksmbd_crypto_ctx *____crypto_shash_ctx_find(int id)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ if (id >= CRYPTO_SHASH_MAX)
+ return NULL;
+
+ ctx = ksmbd_find_crypto_ctx();
+ if (ctx->desc[id])
+ return ctx;
+
+ ctx->desc[id] = alloc_shash_desc(id);
+ if (ctx->desc[id])
+ return ctx;
+ ksmbd_release_crypto_ctx(ctx);
+ return NULL;
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacmd5(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_HMACMD5);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacsha256(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_HMACSHA256);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_cmacaes(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_CMACAES);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha256(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_SHA256);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha512(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_SHA512);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md4(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_MD4);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md5(void)
+{
+ return ____crypto_shash_ctx_find(CRYPTO_SHASH_MD5);
+}
+
+static struct ksmbd_crypto_ctx *____crypto_aead_ctx_find(int id)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ if (id >= CRYPTO_AEAD_MAX)
+ return NULL;
+
+ ctx = ksmbd_find_crypto_ctx();
+ if (ctx->ccmaes[id])
+ return ctx;
+
+ ctx->ccmaes[id] = alloc_aead(id);
+ if (ctx->ccmaes[id])
+ return ctx;
+ ksmbd_release_crypto_ctx(ctx);
+ return NULL;
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_gcm(void)
+{
+ return ____crypto_aead_ctx_find(CRYPTO_AEAD_AES_GCM);
+}
+
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_ccm(void)
+{
+ return ____crypto_aead_ctx_find(CRYPTO_AEAD_AES_CCM);
+}
+
+void ksmbd_crypto_destroy(void)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ while (!list_empty(&ctx_list.idle_ctx)) {
+ ctx = list_entry(ctx_list.idle_ctx.next,
+ struct ksmbd_crypto_ctx,
+ list);
+ list_del(&ctx->list);
+ ctx_free(ctx);
+ }
+}
+
+int ksmbd_crypto_create(void)
+{
+ struct ksmbd_crypto_ctx *ctx;
+
+ spin_lock_init(&ctx_list.ctx_lock);
+ INIT_LIST_HEAD(&ctx_list.idle_ctx);
+ init_waitqueue_head(&ctx_list.ctx_wait);
+ ctx_list.avail_ctx = 1;
+
+ ctx = kzalloc(sizeof(struct ksmbd_crypto_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ list_add(&ctx->list, &ctx_list.idle_ctx);
+ return 0;
+}
diff --git a/fs/ksmbd/crypto_ctx.h b/fs/ksmbd/crypto_ctx.h
new file mode 100644
index 000000000000..ef11154b43df
--- /dev/null
+++ b/fs/ksmbd/crypto_ctx.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __CRYPTO_CTX_H__
+#define __CRYPTO_CTX_H__
+
+#include <crypto/hash.h>
+#include <crypto/aead.h>
+
+enum {
+ CRYPTO_SHASH_HMACMD5 = 0,
+ CRYPTO_SHASH_HMACSHA256,
+ CRYPTO_SHASH_CMACAES,
+ CRYPTO_SHASH_SHA256,
+ CRYPTO_SHASH_SHA512,
+ CRYPTO_SHASH_MD4,
+ CRYPTO_SHASH_MD5,
+ CRYPTO_SHASH_MAX,
+};
+
+enum {
+ CRYPTO_AEAD_AES_GCM = 16,
+ CRYPTO_AEAD_AES_CCM,
+ CRYPTO_AEAD_MAX,
+};
+
+enum {
+ CRYPTO_BLK_ECBDES = 32,
+ CRYPTO_BLK_MAX,
+};
+
+struct ksmbd_crypto_ctx {
+ struct list_head list;
+
+ struct shash_desc *desc[CRYPTO_SHASH_MAX];
+ struct crypto_aead *ccmaes[CRYPTO_AEAD_MAX];
+};
+
+#define CRYPTO_HMACMD5(c) ((c)->desc[CRYPTO_SHASH_HMACMD5])
+#define CRYPTO_HMACSHA256(c) ((c)->desc[CRYPTO_SHASH_HMACSHA256])
+#define CRYPTO_CMACAES(c) ((c)->desc[CRYPTO_SHASH_CMACAES])
+#define CRYPTO_SHA256(c) ((c)->desc[CRYPTO_SHASH_SHA256])
+#define CRYPTO_SHA512(c) ((c)->desc[CRYPTO_SHASH_SHA512])
+#define CRYPTO_MD4(c) ((c)->desc[CRYPTO_SHASH_MD4])
+#define CRYPTO_MD5(c) ((c)->desc[CRYPTO_SHASH_MD5])
+
+#define CRYPTO_HMACMD5_TFM(c) ((c)->desc[CRYPTO_SHASH_HMACMD5]->tfm)
+#define CRYPTO_HMACSHA256_TFM(c)\
+ ((c)->desc[CRYPTO_SHASH_HMACSHA256]->tfm)
+#define CRYPTO_CMACAES_TFM(c) ((c)->desc[CRYPTO_SHASH_CMACAES]->tfm)
+#define CRYPTO_SHA256_TFM(c) ((c)->desc[CRYPTO_SHASH_SHA256]->tfm)
+#define CRYPTO_SHA512_TFM(c) ((c)->desc[CRYPTO_SHASH_SHA512]->tfm)
+#define CRYPTO_MD4_TFM(c) ((c)->desc[CRYPTO_SHASH_MD4]->tfm)
+#define CRYPTO_MD5_TFM(c) ((c)->desc[CRYPTO_SHASH_MD5]->tfm)
+
+#define CRYPTO_GCM(c) ((c)->ccmaes[CRYPTO_AEAD_AES_GCM])
+#define CRYPTO_CCM(c) ((c)->ccmaes[CRYPTO_AEAD_AES_CCM])
+
+void ksmbd_release_crypto_ctx(struct ksmbd_crypto_ctx *ctx);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacmd5(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_hmacsha256(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_cmacaes(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha512(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_sha256(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md4(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_md5(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_gcm(void);
+struct ksmbd_crypto_ctx *ksmbd_crypto_ctx_find_ccm(void);
+void ksmbd_crypto_destroy(void);
+int ksmbd_crypto_create(void);
+
+#endif /* __CRYPTO_CTX_H__ */
diff --git a/fs/ksmbd/glob.h b/fs/ksmbd/glob.h
new file mode 100644
index 000000000000..49a5a3afa118
--- /dev/null
+++ b/fs/ksmbd/glob.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_GLOB_H
+#define __KSMBD_GLOB_H
+
+#include <linux/ctype.h>
+
+#include "unicode.h"
+#include "vfs_cache.h"
+
+#define KSMBD_VERSION "3.1.9"
+
+extern int ksmbd_debug_types;
+
+#define KSMBD_DEBUG_SMB BIT(0)
+#define KSMBD_DEBUG_AUTH BIT(1)
+#define KSMBD_DEBUG_VFS BIT(2)
+#define KSMBD_DEBUG_OPLOCK BIT(3)
+#define KSMBD_DEBUG_IPC BIT(4)
+#define KSMBD_DEBUG_CONN BIT(5)
+#define KSMBD_DEBUG_RDMA BIT(6)
+#define KSMBD_DEBUG_ALL (KSMBD_DEBUG_SMB | KSMBD_DEBUG_AUTH | \
+ KSMBD_DEBUG_VFS | KSMBD_DEBUG_OPLOCK | \
+ KSMBD_DEBUG_IPC | KSMBD_DEBUG_CONN | \
+ KSMBD_DEBUG_RDMA)
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#ifdef SUBMOD_NAME
+#define pr_fmt(fmt) "ksmbd: " SUBMOD_NAME ": " fmt
+#else
+#define pr_fmt(fmt) "ksmbd: " fmt
+#endif
+
+#define ksmbd_debug(type, fmt, ...) \
+ do { \
+ if (ksmbd_debug_types & KSMBD_DEBUG_##type) \
+ pr_info(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define UNICODE_LEN(x) ((x) * 2)
+
+#endif /* __KSMBD_GLOB_H */
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
new file mode 100644
index 000000000000..2fbe2bc1e093
--- /dev/null
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ *
+ * linux-ksmbd-devel@lists.sourceforge.net
+ */
+
+#ifndef _LINUX_KSMBD_SERVER_H
+#define _LINUX_KSMBD_SERVER_H
+
+#include <linux/types.h>
+
+/*
+ * This is a userspace ABI to communicate data between ksmbd and user IPC
+ * daemon using netlink. This is added to track and cache user account DB
+ * and share configuration info from userspace.
+ *
+ * - KSMBD_EVENT_HEARTBEAT_REQUEST(ksmbd_heartbeat)
+ * This event is to check whether user IPC daemon is alive. If user IPC
+ * daemon is dead, ksmbd keep existing connection till disconnecting and
+ * new connection will be denied.
+ *
+ * - KSMBD_EVENT_STARTING_UP(ksmbd_startup_request)
+ * This event is to receive the information that initializes the ksmbd
+ * server from the user IPC daemon and to start the server. The global
+ * section parameters are given from smb.conf as initialization
+ * information.
+ *
+ * - KSMBD_EVENT_SHUTTING_DOWN(ksmbd_shutdown_request)
+ * This event is to shutdown ksmbd server.
+ *
+ * - KSMBD_EVENT_LOGIN_REQUEST/RESPONSE(ksmbd_login_request/response)
+ * This event is to get user account info to user IPC daemon.
+ *
+ * - KSMBD_EVENT_SHARE_CONFIG_REQUEST/RESPONSE(ksmbd_share_config_request/response)
+ * This event is to get net share configuration info.
+ *
+ * - KSMBD_EVENT_TREE_CONNECT_REQUEST/RESPONSE(ksmbd_tree_connect_request/response)
+ * This event is to get session and tree connect info.
+ *
+ * - KSMBD_EVENT_TREE_DISCONNECT_REQUEST(ksmbd_tree_disconnect_request)
+ * This event is to send tree disconnect info to user IPC daemon.
+ *
+ * - KSMBD_EVENT_LOGOUT_REQUEST(ksmbd_logout_request)
+ * This event is to send logout request to user IPC daemon.
+ *
+ * - KSMBD_EVENT_RPC_REQUEST/RESPONSE(ksmbd_rpc_command)
+ * This event is to make DCE/RPC request like srvsvc, wkssvc, lsarpc,
+ * samr to be processed in userspace.
+ *
+ * - KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST/RESPONSE(ksmbd_spnego_authen_request/response)
+ * This event is to make kerberos authentication to be processed in
+ * userspace.
+ */
+
+#define KSMBD_GENL_NAME "SMBD_GENL"
+#define KSMBD_GENL_VERSION 0x01
+
+#define KSMBD_REQ_MAX_ACCOUNT_NAME_SZ 48
+#define KSMBD_REQ_MAX_HASH_SZ 18
+#define KSMBD_REQ_MAX_SHARE_NAME 64
+
+/*
+ * IPC heartbeat frame to check whether user IPC daemon is alive.
+ */
+struct ksmbd_heartbeat {
+ __u32 handle;
+};
+
+/*
+ * Global config flags.
+ */
+#define KSMBD_GLOBAL_FLAG_INVALID (0)
+#define KSMBD_GLOBAL_FLAG_SMB2_LEASES BIT(0)
+#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION BIT(1)
+#define KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL BIT(2)
+
+/*
+ * IPC request for ksmbd server startup
+ */
+struct ksmbd_startup_request {
+ __u32 flags; /* Flags for global config */
+ __s32 signing; /* Signing enabled */
+ __s8 min_prot[16]; /* The minimum SMB protocol version */
+ __s8 max_prot[16]; /* The maximum SMB protocol version */
+ __s8 netbios_name[16];
+ __s8 work_group[64]; /* Workgroup */
+ __s8 server_string[64]; /* Server string */
+ __u16 tcp_port; /* tcp port */
+ __u16 ipc_timeout; /*
+ * specifies the number of seconds
+ * server will wait for the userspace to
+ * reply to heartbeat frames.
+ */
+ __u32 deadtime; /* Number of minutes of inactivity */
+ __u32 file_max; /* Limits the maximum number of open files */
+ __u32 smb2_max_write; /* MAX write size */
+ __u32 smb2_max_read; /* MAX read size */
+ __u32 smb2_max_trans; /* MAX trans size */
+ __u32 share_fake_fscaps; /*
+ * Support some special application that
+ * makes QFSINFO calls to check whether
+ * we set the SPARSE_FILES bit (0x40).
+ */
+ __u32 sub_auth[3]; /* Subauth value for Security ID */
+ __u32 ifc_list_sz; /* interfaces list size */
+ __s8 ____payload[];
+};
+
+#define KSMBD_STARTUP_CONFIG_INTERFACES(s) ((s)->____payload)
+
+/*
+ * IPC request to shutdown ksmbd server.
+ */
+struct ksmbd_shutdown_request {
+ __s32 reserved;
+};
+
+/*
+ * IPC user login request.
+ */
+struct ksmbd_login_request {
+ __u32 handle;
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+};
+
+/*
+ * IPC user login response.
+ */
+struct ksmbd_login_response {
+ __u32 handle;
+ __u32 gid; /* group id */
+ __u32 uid; /* user id */
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+ __u16 status;
+ __u16 hash_sz; /* hash size */
+ __s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */
+};
+
+/*
+ * IPC request to fetch net share config.
+ */
+struct ksmbd_share_config_request {
+ __u32 handle;
+ __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */
+};
+
+/*
+ * IPC response to the net share config request.
+ */
+struct ksmbd_share_config_response {
+ __u32 handle;
+ __u32 flags;
+ __u16 create_mask;
+ __u16 directory_mask;
+ __u16 force_create_mode;
+ __u16 force_directory_mode;
+ __u16 force_uid;
+ __u16 force_gid;
+ __u32 veto_list_sz;
+ __s8 ____payload[];
+};
+
+#define KSMBD_SHARE_CONFIG_VETO_LIST(s) ((s)->____payload)
+
+static inline char *
+ksmbd_share_config_path(struct ksmbd_share_config_response *sc)
+{
+ char *p = sc->____payload;
+
+ if (sc->veto_list_sz)
+ p += sc->veto_list_sz + 1;
+
+ return p;
+}
+
+/*
+ * IPC request for tree connection. This request include session and tree
+ * connect info from client.
+ */
+struct ksmbd_tree_connect_request {
+ __u32 handle;
+ __u16 account_flags;
+ __u16 flags;
+ __u64 session_id;
+ __u64 connect_id;
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ];
+ __s8 share[KSMBD_REQ_MAX_SHARE_NAME];
+ __s8 peer_addr[64];
+};
+
+/*
+ * IPC Response structure for tree connection.
+ */
+struct ksmbd_tree_connect_response {
+ __u32 handle;
+ __u16 status;
+ __u16 connection_flags;
+};
+
+/*
+ * IPC Request struture to disconnect tree connection.
+ */
+struct ksmbd_tree_disconnect_request {
+ __u64 session_id; /* session id */
+ __u64 connect_id; /* tree connection id */
+};
+
+/*
+ * IPC Response structure to logout user account.
+ */
+struct ksmbd_logout_request {
+ __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */
+};
+
+/*
+ * RPC command structure to send rpc request like srvsvc or wkssvc to
+ * IPC user daemon.
+ */
+struct ksmbd_rpc_command {
+ __u32 handle;
+ __u32 flags;
+ __u32 payload_sz;
+ __u8 payload[];
+};
+
+/*
+ * IPC Request Kerberos authentication
+ */
+struct ksmbd_spnego_authen_request {
+ __u32 handle;
+ __u16 spnego_blob_len; /* the length of spnego_blob */
+ __u8 spnego_blob[0]; /*
+ * the GSS token from SecurityBuffer of
+ * SMB2 SESSION SETUP request
+ */
+};
+
+/*
+ * Response data which includes the GSS token and the session key generated by
+ * user daemon.
+ */
+struct ksmbd_spnego_authen_response {
+ __u32 handle;
+ struct ksmbd_login_response login_response; /*
+ * the login response with
+ * a user identified by the
+ * GSS token from a client
+ */
+ __u16 session_key_len; /* the length of the session key */
+ __u16 spnego_blob_len; /*
+ * the length of the GSS token which will be
+ * stored in SecurityBuffer of SMB2 SESSION
+ * SETUP response
+ */
+ __u8 payload[]; /* session key + AP_REP */
+};
+
+/*
+ * This also used as NETLINK attribute type value.
+ *
+ * NOTE:
+ * Response message type value should be equal to
+ * request message type value + 1.
+ */
+enum ksmbd_event {
+ KSMBD_EVENT_UNSPEC = 0,
+ KSMBD_EVENT_HEARTBEAT_REQUEST,
+
+ KSMBD_EVENT_STARTING_UP,
+ KSMBD_EVENT_SHUTTING_DOWN,
+
+ KSMBD_EVENT_LOGIN_REQUEST,
+ KSMBD_EVENT_LOGIN_RESPONSE = 5,
+
+ KSMBD_EVENT_SHARE_CONFIG_REQUEST,
+ KSMBD_EVENT_SHARE_CONFIG_RESPONSE,
+
+ KSMBD_EVENT_TREE_CONNECT_REQUEST,
+ KSMBD_EVENT_TREE_CONNECT_RESPONSE,
+
+ KSMBD_EVENT_TREE_DISCONNECT_REQUEST = 10,
+
+ KSMBD_EVENT_LOGOUT_REQUEST,
+
+ KSMBD_EVENT_RPC_REQUEST,
+ KSMBD_EVENT_RPC_RESPONSE,
+
+ KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST,
+ KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE = 15,
+
+ KSMBD_EVENT_MAX
+};
+
+/*
+ * Enumeration for IPC tree connect status.
+ */
+enum KSMBD_TREE_CONN_STATUS {
+ KSMBD_TREE_CONN_STATUS_OK = 0,
+ KSMBD_TREE_CONN_STATUS_NOMEM,
+ KSMBD_TREE_CONN_STATUS_NO_SHARE,
+ KSMBD_TREE_CONN_STATUS_NO_USER,
+ KSMBD_TREE_CONN_STATUS_INVALID_USER,
+ KSMBD_TREE_CONN_STATUS_HOST_DENIED = 5,
+ KSMBD_TREE_CONN_STATUS_CONN_EXIST,
+ KSMBD_TREE_CONN_STATUS_TOO_MANY_CONNS,
+ KSMBD_TREE_CONN_STATUS_TOO_MANY_SESSIONS,
+ KSMBD_TREE_CONN_STATUS_ERROR,
+};
+
+/*
+ * User config flags.
+ */
+#define KSMBD_USER_FLAG_INVALID (0)
+#define KSMBD_USER_FLAG_OK BIT(0)
+#define KSMBD_USER_FLAG_BAD_PASSWORD BIT(1)
+#define KSMBD_USER_FLAG_BAD_UID BIT(2)
+#define KSMBD_USER_FLAG_BAD_USER BIT(3)
+#define KSMBD_USER_FLAG_GUEST_ACCOUNT BIT(4)
+
+/*
+ * Share config flags.
+ */
+#define KSMBD_SHARE_FLAG_INVALID (0)
+#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
+#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
+#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
+#define KSMBD_SHARE_FLAG_READONLY BIT(3)
+#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
+#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
+#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
+#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
+#define KSMBD_SHARE_FLAG_PIPE BIT(8)
+#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
+#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
+#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
+#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
+#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
+
+/*
+ * Tree connect request flags.
+ */
+#define KSMBD_TREE_CONN_FLAG_REQUEST_SMB1 (0)
+#define KSMBD_TREE_CONN_FLAG_REQUEST_IPV6 BIT(0)
+#define KSMBD_TREE_CONN_FLAG_REQUEST_SMB2 BIT(1)
+
+/*
+ * Tree connect flags.
+ */
+#define KSMBD_TREE_CONN_FLAG_GUEST_ACCOUNT BIT(0)
+#define KSMBD_TREE_CONN_FLAG_READ_ONLY BIT(1)
+#define KSMBD_TREE_CONN_FLAG_WRITABLE BIT(2)
+#define KSMBD_TREE_CONN_FLAG_ADMIN_ACCOUNT BIT(3)
+
+/*
+ * RPC over IPC.
+ */
+#define KSMBD_RPC_METHOD_RETURN BIT(0)
+#define KSMBD_RPC_SRVSVC_METHOD_INVOKE BIT(1)
+#define KSMBD_RPC_SRVSVC_METHOD_RETURN (KSMBD_RPC_SRVSVC_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_WKSSVC_METHOD_INVOKE BIT(2)
+#define KSMBD_RPC_WKSSVC_METHOD_RETURN (KSMBD_RPC_WKSSVC_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_IOCTL_METHOD (BIT(3) | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_OPEN_METHOD BIT(4)
+#define KSMBD_RPC_WRITE_METHOD BIT(5)
+#define KSMBD_RPC_READ_METHOD (BIT(6) | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_CLOSE_METHOD BIT(7)
+#define KSMBD_RPC_RAP_METHOD (BIT(8) | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_RESTRICTED_CONTEXT BIT(9)
+#define KSMBD_RPC_SAMR_METHOD_INVOKE BIT(10)
+#define KSMBD_RPC_SAMR_METHOD_RETURN (KSMBD_RPC_SAMR_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+#define KSMBD_RPC_LSARPC_METHOD_INVOKE BIT(11)
+#define KSMBD_RPC_LSARPC_METHOD_RETURN (KSMBD_RPC_LSARPC_METHOD_INVOKE | KSMBD_RPC_METHOD_RETURN)
+
+/*
+ * RPC status definitions.
+ */
+#define KSMBD_RPC_OK 0
+#define KSMBD_RPC_EBAD_FUNC 0x00000001
+#define KSMBD_RPC_EACCESS_DENIED 0x00000005
+#define KSMBD_RPC_EBAD_FID 0x00000006
+#define KSMBD_RPC_ENOMEM 0x00000008
+#define KSMBD_RPC_EBAD_DATA 0x0000000D
+#define KSMBD_RPC_ENOTIMPLEMENTED 0x00000040
+#define KSMBD_RPC_EINVALID_PARAMETER 0x00000057
+#define KSMBD_RPC_EMORE_DATA 0x000000EA
+#define KSMBD_RPC_EINVALID_LEVEL 0x0000007C
+#define KSMBD_RPC_SOME_NOT_MAPPED 0x00000107
+
+#define KSMBD_CONFIG_OPT_DISABLED 0
+#define KSMBD_CONFIG_OPT_ENABLED 1
+#define KSMBD_CONFIG_OPT_AUTO 2
+#define KSMBD_CONFIG_OPT_MANDATORY 3
+
+#endif /* _LINUX_KSMBD_SERVER_H */
diff --git a/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1 b/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1
new file mode 100644
index 000000000000..0065f191b54b
--- /dev/null
+++ b/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1
@@ -0,0 +1,31 @@
+GSSAPI ::=
+ [APPLICATION 0] IMPLICIT SEQUENCE {
+ thisMech
+ OBJECT IDENTIFIER ({ksmbd_gssapi_this_mech}),
+ negotiationToken
+ NegotiationToken
+ }
+
+MechType ::= OBJECT IDENTIFIER ({ksmbd_neg_token_init_mech_type})
+
+MechTypeList ::= SEQUENCE OF MechType
+
+NegTokenInit ::=
+ SEQUENCE {
+ mechTypes
+ [0] MechTypeList,
+ reqFlags
+ [1] BIT STRING OPTIONAL,
+ mechToken
+ [2] OCTET STRING OPTIONAL ({ksmbd_neg_token_init_mech_token}),
+ mechListMIC
+ [3] OCTET STRING OPTIONAL
+ }
+
+NegotiationToken ::=
+ CHOICE {
+ negTokenInit
+ [0] NegTokenInit,
+ negTokenTarg
+ [1] ANY
+ }
diff --git a/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1 b/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1
new file mode 100644
index 000000000000..1151933e7b9c
--- /dev/null
+++ b/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1
@@ -0,0 +1,19 @@
+GSSAPI ::=
+ CHOICE {
+ negTokenInit
+ [0] ANY,
+ negTokenTarg
+ [1] NegTokenTarg
+ }
+
+NegTokenTarg ::=
+ SEQUENCE {
+ negResult
+ [0] ENUMERATED OPTIONAL,
+ supportedMech
+ [1] OBJECT IDENTIFIER OPTIONAL,
+ responseToken
+ [2] OCTET STRING OPTIONAL ({ksmbd_neg_token_targ_resp_token}),
+ mechListMIC
+ [3] OCTET STRING OPTIONAL
+ }
diff --git a/fs/ksmbd/ksmbd_work.c b/fs/ksmbd/ksmbd_work.c
new file mode 100644
index 000000000000..fd58eb4809f6
--- /dev/null
+++ b/fs/ksmbd/ksmbd_work.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "server.h"
+#include "connection.h"
+#include "ksmbd_work.h"
+#include "mgmt/ksmbd_ida.h"
+
+static struct kmem_cache *work_cache;
+static struct workqueue_struct *ksmbd_wq;
+
+struct ksmbd_work *ksmbd_alloc_work_struct(void)
+{
+ struct ksmbd_work *work = kmem_cache_zalloc(work_cache, GFP_KERNEL);
+
+ if (work) {
+ work->compound_fid = KSMBD_NO_FID;
+ work->compound_pfid = KSMBD_NO_FID;
+ INIT_LIST_HEAD(&work->request_entry);
+ INIT_LIST_HEAD(&work->async_request_entry);
+ INIT_LIST_HEAD(&work->fp_entry);
+ INIT_LIST_HEAD(&work->interim_entry);
+ }
+ return work;
+}
+
+void ksmbd_free_work_struct(struct ksmbd_work *work)
+{
+ WARN_ON(work->saved_cred != NULL);
+
+ kvfree(work->response_buf);
+ kvfree(work->aux_payload_buf);
+ kfree(work->tr_buf);
+ kvfree(work->request_buf);
+ if (work->async_id)
+ ksmbd_release_id(&work->conn->async_ida, work->async_id);
+ kmem_cache_free(work_cache, work);
+}
+
+void ksmbd_work_pool_destroy(void)
+{
+ kmem_cache_destroy(work_cache);
+}
+
+int ksmbd_work_pool_init(void)
+{
+ work_cache = kmem_cache_create("ksmbd_work_cache",
+ sizeof(struct ksmbd_work), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!work_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+int ksmbd_workqueue_init(void)
+{
+ ksmbd_wq = alloc_workqueue("ksmbd-io", 0, 0);
+ if (!ksmbd_wq)
+ return -ENOMEM;
+ return 0;
+}
+
+void ksmbd_workqueue_destroy(void)
+{
+ flush_workqueue(ksmbd_wq);
+ destroy_workqueue(ksmbd_wq);
+ ksmbd_wq = NULL;
+}
+
+bool ksmbd_queue_work(struct ksmbd_work *work)
+{
+ return queue_work(ksmbd_wq, &work->work);
+}
diff --git a/fs/ksmbd/ksmbd_work.h b/fs/ksmbd/ksmbd_work.h
new file mode 100644
index 000000000000..f7156bc50049
--- /dev/null
+++ b/fs/ksmbd/ksmbd_work.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_WORK_H__
+#define __KSMBD_WORK_H__
+
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+
+struct ksmbd_conn;
+struct ksmbd_session;
+struct ksmbd_tree_connect;
+
+enum {
+ KSMBD_WORK_ACTIVE = 0,
+ KSMBD_WORK_CANCELLED,
+ KSMBD_WORK_CLOSED,
+};
+
+/* one of these for every pending CIFS request at the connection */
+struct ksmbd_work {
+ /* Server corresponding to this mid */
+ struct ksmbd_conn *conn;
+ struct ksmbd_session *sess;
+ struct ksmbd_tree_connect *tcon;
+
+ /* Pointer to received SMB header */
+ void *request_buf;
+ /* Response buffer */
+ void *response_buf;
+
+ /* Read data buffer */
+ void *aux_payload_buf;
+
+ /* Next cmd hdr in compound req buf*/
+ int next_smb2_rcv_hdr_off;
+ /* Next cmd hdr in compound rsp buf*/
+ int next_smb2_rsp_hdr_off;
+
+ /*
+ * Current Local FID assigned compound response if SMB2 CREATE
+ * command is present in compound request
+ */
+ u64 compound_fid;
+ u64 compound_pfid;
+ u64 compound_sid;
+
+ const struct cred *saved_cred;
+
+ /* Number of granted credits */
+ unsigned int credits_granted;
+
+ /* response smb header size */
+ unsigned int resp_hdr_sz;
+ unsigned int response_sz;
+ /* Read data count */
+ unsigned int aux_payload_sz;
+
+ void *tr_buf;
+
+ unsigned char state;
+ /* Multiple responses for one request e.g. SMB ECHO */
+ bool multiRsp:1;
+ /* No response for cancelled request */
+ bool send_no_response:1;
+ /* Request is encrypted */
+ bool encrypted:1;
+ /* Is this SYNC or ASYNC ksmbd_work */
+ bool syncronous:1;
+ bool need_invalidate_rkey:1;
+
+ unsigned int remote_key;
+ /* cancel works */
+ int async_id;
+ void **cancel_argv;
+ void (*cancel_fn)(void **argv);
+
+ struct work_struct work;
+ /* List head at conn->requests */
+ struct list_head request_entry;
+ /* List head at conn->async_requests */
+ struct list_head async_request_entry;
+ struct list_head fp_entry;
+ struct list_head interim_entry;
+};
+
+/**
+ * ksmbd_resp_buf_next - Get next buffer on compound response.
+ * @work: smb work containing response buffer
+ */
+static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
+{
+ return work->response_buf + work->next_smb2_rsp_hdr_off;
+}
+
+/**
+ * ksmbd_req_buf_next - Get next buffer on compound request.
+ * @work: smb work containing response buffer
+ */
+static inline void *ksmbd_req_buf_next(struct ksmbd_work *work)
+{
+ return work->request_buf + work->next_smb2_rcv_hdr_off;
+}
+
+struct ksmbd_work *ksmbd_alloc_work_struct(void);
+void ksmbd_free_work_struct(struct ksmbd_work *work);
+
+void ksmbd_work_pool_destroy(void);
+int ksmbd_work_pool_init(void);
+
+int ksmbd_workqueue_init(void);
+void ksmbd_workqueue_destroy(void);
+bool ksmbd_queue_work(struct ksmbd_work *work);
+
+#endif /* __KSMBD_WORK_H__ */
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.c b/fs/ksmbd/mgmt/ksmbd_ida.c
new file mode 100644
index 000000000000..54194d959a5e
--- /dev/null
+++ b/fs/ksmbd/mgmt/ksmbd_ida.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include "ksmbd_ida.h"
+
+static inline int __acquire_id(struct ida *ida, int from, int to)
+{
+ return ida_simple_get(ida, from, to, GFP_KERNEL);
+}
+
+int ksmbd_acquire_smb2_tid(struct ida *ida)
+{
+ int id;
+
+ id = __acquire_id(ida, 1, 0xFFFFFFFF);
+
+ return id;
+}
+
+int ksmbd_acquire_smb2_uid(struct ida *ida)
+{
+ int id;
+
+ id = __acquire_id(ida, 1, 0);
+ if (id == 0xFFFE)
+ id = __acquire_id(ida, 1, 0);
+
+ return id;
+}
+
+int ksmbd_acquire_async_msg_id(struct ida *ida)
+{
+ return __acquire_id(ida, 1, 0);
+}
+
+int ksmbd_acquire_id(struct ida *ida)
+{
+ return __acquire_id(ida, 0, 0);
+}
+
+void ksmbd_release_id(struct ida *ida, int id)
+{
+ ida_simple_remove(ida, id);
+}
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.h b/fs/ksmbd/mgmt/ksmbd_ida.h
new file mode 100644
index 000000000000..2bc07b16cfde
--- /dev/null
+++ b/fs/ksmbd/mgmt/ksmbd_ida.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_IDA_MANAGEMENT_H__
+#define __KSMBD_IDA_MANAGEMENT_H__
+
+#include <linux/slab.h>
+#include <linux/idr.h>
+
+/*
+ * 2.2.1.6.7 TID Generation
+ * The value 0xFFFF MUST NOT be used as a valid TID. All other
+ * possible values for TID, including zero (0x0000), are valid.
+ * The value 0xFFFF is used to specify all TIDs or no TID,
+ * depending upon the context in which it is used.
+ */
+int ksmbd_acquire_smb2_tid(struct ida *ida);
+
+/*
+ * 2.2.1.6.8 UID Generation
+ * The value 0xFFFE was declared reserved in the LAN Manager 1.0
+ * documentation, so a value of 0xFFFE SHOULD NOT be used as a
+ * valid UID.<21> All other possible values for a UID, excluding
+ * zero (0x0000), are valid.
+ */
+int ksmbd_acquire_smb2_uid(struct ida *ida);
+int ksmbd_acquire_async_msg_id(struct ida *ida);
+
+int ksmbd_acquire_id(struct ida *ida);
+
+void ksmbd_release_id(struct ida *ida, int id);
+#endif /* __KSMBD_IDA_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/ksmbd/mgmt/share_config.c
new file mode 100644
index 000000000000..cb72d30f5b71
--- /dev/null
+++ b/fs/ksmbd/mgmt/share_config.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/parser.h>
+#include <linux/namei.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include "share_config.h"
+#include "user_config.h"
+#include "user_session.h"
+#include "../transport_ipc.h"
+
+#define SHARE_HASH_BITS 3
+static DEFINE_HASHTABLE(shares_table, SHARE_HASH_BITS);
+static DECLARE_RWSEM(shares_table_lock);
+
+struct ksmbd_veto_pattern {
+ char *pattern;
+ struct list_head list;
+};
+
+static unsigned int share_name_hash(char *name)
+{
+ return jhash(name, strlen(name), 0);
+}
+
+static void kill_share(struct ksmbd_share_config *share)
+{
+ while (!list_empty(&share->veto_list)) {
+ struct ksmbd_veto_pattern *p;
+
+ p = list_entry(share->veto_list.next,
+ struct ksmbd_veto_pattern,
+ list);
+ list_del(&p->list);
+ kfree(p->pattern);
+ kfree(p);
+ }
+
+ if (share->path)
+ path_put(&share->vfs_path);
+ kfree(share->name);
+ kfree(share->path);
+ kfree(share);
+}
+
+void __ksmbd_share_config_put(struct ksmbd_share_config *share)
+{
+ down_write(&shares_table_lock);
+ hash_del(&share->hlist);
+ up_write(&shares_table_lock);
+
+ kill_share(share);
+}
+
+static struct ksmbd_share_config *
+__get_share_config(struct ksmbd_share_config *share)
+{
+ if (!atomic_inc_not_zero(&share->refcount))
+ return NULL;
+ return share;
+}
+
+static struct ksmbd_share_config *__share_lookup(char *name)
+{
+ struct ksmbd_share_config *share;
+ unsigned int key = share_name_hash(name);
+
+ hash_for_each_possible(shares_table, share, hlist, key) {
+ if (!strcmp(name, share->name))
+ return share;
+ }
+ return NULL;
+}
+
+static int parse_veto_list(struct ksmbd_share_config *share,
+ char *veto_list,
+ int veto_list_sz)
+{
+ int sz = 0;
+
+ if (!veto_list_sz)
+ return 0;
+
+ while (veto_list_sz > 0) {
+ struct ksmbd_veto_pattern *p;
+
+ sz = strlen(veto_list);
+ if (!sz)
+ break;
+
+ p = kzalloc(sizeof(struct ksmbd_veto_pattern), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->pattern = kstrdup(veto_list, GFP_KERNEL);
+ if (!p->pattern) {
+ kfree(p);
+ return -ENOMEM;
+ }
+
+ list_add(&p->list, &share->veto_list);
+
+ veto_list += sz + 1;
+ veto_list_sz -= (sz + 1);
+ }
+
+ return 0;
+}
+
+static struct ksmbd_share_config *share_config_request(char *name)
+{
+ struct ksmbd_share_config_response *resp;
+ struct ksmbd_share_config *share = NULL;
+ struct ksmbd_share_config *lookup;
+ int ret;
+
+ resp = ksmbd_ipc_share_config_request(name);
+ if (!resp)
+ return NULL;
+
+ if (resp->flags == KSMBD_SHARE_FLAG_INVALID)
+ goto out;
+
+ share = kzalloc(sizeof(struct ksmbd_share_config), GFP_KERNEL);
+ if (!share)
+ goto out;
+
+ share->flags = resp->flags;
+ atomic_set(&share->refcount, 1);
+ INIT_LIST_HEAD(&share->veto_list);
+ share->name = kstrdup(name, GFP_KERNEL);
+
+ if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+ share->path = kstrdup(ksmbd_share_config_path(resp),
+ GFP_KERNEL);
+ if (share->path)
+ share->path_sz = strlen(share->path);
+ share->create_mask = resp->create_mask;
+ share->directory_mask = resp->directory_mask;
+ share->force_create_mode = resp->force_create_mode;
+ share->force_directory_mode = resp->force_directory_mode;
+ share->force_uid = resp->force_uid;
+ share->force_gid = resp->force_gid;
+ ret = parse_veto_list(share,
+ KSMBD_SHARE_CONFIG_VETO_LIST(resp),
+ resp->veto_list_sz);
+ if (!ret && share->path) {
+ ret = kern_path(share->path, 0, &share->vfs_path);
+ if (ret) {
+ ksmbd_debug(SMB, "failed to access '%s'\n",
+ share->path);
+ /* Avoid put_path() */
+ kfree(share->path);
+ share->path = NULL;
+ }
+ }
+ if (ret || !share->name) {
+ kill_share(share);
+ share = NULL;
+ goto out;
+ }
+ }
+
+ down_write(&shares_table_lock);
+ lookup = __share_lookup(name);
+ if (lookup)
+ lookup = __get_share_config(lookup);
+ if (!lookup) {
+ hash_add(shares_table, &share->hlist, share_name_hash(name));
+ } else {
+ kill_share(share);
+ share = lookup;
+ }
+ up_write(&shares_table_lock);
+
+out:
+ kvfree(resp);
+ return share;
+}
+
+static void strtolower(char *share_name)
+{
+ while (*share_name) {
+ *share_name = tolower(*share_name);
+ share_name++;
+ }
+}
+
+struct ksmbd_share_config *ksmbd_share_config_get(char *name)
+{
+ struct ksmbd_share_config *share;
+
+ strtolower(name);
+
+ down_read(&shares_table_lock);
+ share = __share_lookup(name);
+ if (share)
+ share = __get_share_config(share);
+ up_read(&shares_table_lock);
+
+ if (share)
+ return share;
+ return share_config_request(name);
+}
+
+bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
+ const char *filename)
+{
+ struct ksmbd_veto_pattern *p;
+
+ list_for_each_entry(p, &share->veto_list, list) {
+ if (match_wildcard(p->pattern, filename))
+ return true;
+ }
+ return false;
+}
+
+void ksmbd_share_configs_cleanup(void)
+{
+ struct ksmbd_share_config *share;
+ struct hlist_node *tmp;
+ int i;
+
+ down_write(&shares_table_lock);
+ hash_for_each_safe(shares_table, i, tmp, share, hlist) {
+ hash_del(&share->hlist);
+ kill_share(share);
+ }
+ up_write(&shares_table_lock);
+}
diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/ksmbd/mgmt/share_config.h
new file mode 100644
index 000000000000..953befc94e84
--- /dev/null
+++ b/fs/ksmbd/mgmt/share_config.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __SHARE_CONFIG_MANAGEMENT_H__
+#define __SHARE_CONFIG_MANAGEMENT_H__
+
+#include <linux/workqueue.h>
+#include <linux/hashtable.h>
+#include <linux/path.h>
+
+struct ksmbd_share_config {
+ char *name;
+ char *path;
+
+ unsigned int path_sz;
+ unsigned int flags;
+ struct list_head veto_list;
+
+ struct path vfs_path;
+
+ atomic_t refcount;
+ struct hlist_node hlist;
+ unsigned short create_mask;
+ unsigned short directory_mask;
+ unsigned short force_create_mode;
+ unsigned short force_directory_mode;
+ unsigned short force_uid;
+ unsigned short force_gid;
+};
+
+#define KSMBD_SHARE_INVALID_UID ((__u16)-1)
+#define KSMBD_SHARE_INVALID_GID ((__u16)-1)
+
+static inline int share_config_create_mode(struct ksmbd_share_config *share,
+ umode_t posix_mode)
+{
+ if (!share->force_create_mode) {
+ if (!posix_mode)
+ return share->create_mask;
+ else
+ return posix_mode & share->create_mask;
+ }
+ return share->force_create_mode & share->create_mask;
+}
+
+static inline int share_config_directory_mode(struct ksmbd_share_config *share,
+ umode_t posix_mode)
+{
+ if (!share->force_directory_mode) {
+ if (!posix_mode)
+ return share->directory_mask;
+ else
+ return posix_mode & share->directory_mask;
+ }
+
+ return share->force_directory_mode & share->directory_mask;
+}
+
+static inline int test_share_config_flag(struct ksmbd_share_config *share,
+ int flag)
+{
+ return share->flags & flag;
+}
+
+void __ksmbd_share_config_put(struct ksmbd_share_config *share);
+
+static inline void ksmbd_share_config_put(struct ksmbd_share_config *share)
+{
+ if (!atomic_dec_and_test(&share->refcount))
+ return;
+ __ksmbd_share_config_put(share);
+}
+
+struct ksmbd_share_config *ksmbd_share_config_get(char *name);
+bool ksmbd_share_veto_filename(struct ksmbd_share_config *share,
+ const char *filename);
+void ksmbd_share_configs_cleanup(void);
+
+#endif /* __SHARE_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c
new file mode 100644
index 000000000000..0d28e723a28c
--- /dev/null
+++ b/fs/ksmbd/mgmt/tree_connect.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+
+#include "../transport_ipc.h"
+#include "../connection.h"
+
+#include "tree_connect.h"
+#include "user_config.h"
+#include "share_config.h"
+#include "user_session.h"
+
+struct ksmbd_tree_conn_status
+ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name)
+{
+ struct ksmbd_tree_conn_status status = {-EINVAL, NULL};
+ struct ksmbd_tree_connect_response *resp = NULL;
+ struct ksmbd_share_config *sc;
+ struct ksmbd_tree_connect *tree_conn = NULL;
+ struct sockaddr *peer_addr;
+ int ret;
+
+ sc = ksmbd_share_config_get(share_name);
+ if (!sc)
+ return status;
+
+ tree_conn = kzalloc(sizeof(struct ksmbd_tree_connect), GFP_KERNEL);
+ if (!tree_conn) {
+ status.ret = -ENOMEM;
+ goto out_error;
+ }
+
+ tree_conn->id = ksmbd_acquire_tree_conn_id(sess);
+ if (tree_conn->id < 0) {
+ status.ret = -EINVAL;
+ goto out_error;
+ }
+
+ peer_addr = KSMBD_TCP_PEER_SOCKADDR(sess->conn);
+ resp = ksmbd_ipc_tree_connect_request(sess,
+ sc,
+ tree_conn,
+ peer_addr);
+ if (!resp) {
+ status.ret = -EINVAL;
+ goto out_error;
+ }
+
+ status.ret = resp->status;
+ if (status.ret != KSMBD_TREE_CONN_STATUS_OK)
+ goto out_error;
+
+ tree_conn->flags = resp->connection_flags;
+ tree_conn->user = sess->user;
+ tree_conn->share_conf = sc;
+ status.tree_conn = tree_conn;
+
+ ret = xa_err(xa_store(&sess->tree_conns, tree_conn->id, tree_conn,
+ GFP_KERNEL));
+ if (ret) {
+ status.ret = -ENOMEM;
+ goto out_error;
+ }
+ kvfree(resp);
+ return status;
+
+out_error:
+ if (tree_conn)
+ ksmbd_release_tree_conn_id(sess, tree_conn->id);
+ ksmbd_share_config_put(sc);
+ kfree(tree_conn);
+ kvfree(resp);
+ return status;
+}
+
+int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
+ struct ksmbd_tree_connect *tree_conn)
+{
+ int ret;
+
+ ret = ksmbd_ipc_tree_disconnect_request(sess->id, tree_conn->id);
+ ksmbd_release_tree_conn_id(sess, tree_conn->id);
+ xa_erase(&sess->tree_conns, tree_conn->id);
+ ksmbd_share_config_put(tree_conn->share_conf);
+ kfree(tree_conn);
+ return ret;
+}
+
+struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess,
+ unsigned int id)
+{
+ return xa_load(&sess->tree_conns, id);
+}
+
+struct ksmbd_share_config *ksmbd_tree_conn_share(struct ksmbd_session *sess,
+ unsigned int id)
+{
+ struct ksmbd_tree_connect *tc;
+
+ tc = ksmbd_tree_conn_lookup(sess, id);
+ if (tc)
+ return tc->share_conf;
+ return NULL;
+}
+
+int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess)
+{
+ int ret = 0;
+ struct ksmbd_tree_connect *tc;
+ unsigned long id;
+
+ xa_for_each(&sess->tree_conns, id, tc)
+ ret |= ksmbd_tree_conn_disconnect(sess, tc);
+ xa_destroy(&sess->tree_conns);
+ return ret;
+}
diff --git a/fs/ksmbd/mgmt/tree_connect.h b/fs/ksmbd/mgmt/tree_connect.h
new file mode 100644
index 000000000000..18e2a996e0aa
--- /dev/null
+++ b/fs/ksmbd/mgmt/tree_connect.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __TREE_CONNECT_MANAGEMENT_H__
+#define __TREE_CONNECT_MANAGEMENT_H__
+
+#include <linux/hashtable.h>
+
+#include "../ksmbd_netlink.h"
+
+struct ksmbd_share_config;
+struct ksmbd_user;
+
+struct ksmbd_tree_connect {
+ int id;
+
+ unsigned int flags;
+ struct ksmbd_share_config *share_conf;
+ struct ksmbd_user *user;
+
+ struct list_head list;
+
+ int maximal_access;
+ bool posix_extensions;
+};
+
+struct ksmbd_tree_conn_status {
+ unsigned int ret;
+ struct ksmbd_tree_connect *tree_conn;
+};
+
+static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn,
+ int flag)
+{
+ return tree_conn->flags & flag;
+}
+
+struct ksmbd_session;
+
+struct ksmbd_tree_conn_status
+ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name);
+
+int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess,
+ struct ksmbd_tree_connect *tree_conn);
+
+struct ksmbd_tree_connect *ksmbd_tree_conn_lookup(struct ksmbd_session *sess,
+ unsigned int id);
+
+struct ksmbd_share_config *ksmbd_tree_conn_share(struct ksmbd_session *sess,
+ unsigned int id);
+
+int ksmbd_tree_conn_session_logoff(struct ksmbd_session *sess);
+
+#endif /* __TREE_CONNECT_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c
new file mode 100644
index 000000000000..d21629ae5c89
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_config.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "user_config.h"
+#include "../transport_ipc.h"
+
+struct ksmbd_user *ksmbd_login_user(const char *account)
+{
+ struct ksmbd_login_response *resp;
+ struct ksmbd_user *user = NULL;
+
+ resp = ksmbd_ipc_login_request(account);
+ if (!resp)
+ return NULL;
+
+ if (!(resp->status & KSMBD_USER_FLAG_OK))
+ goto out;
+
+ user = ksmbd_alloc_user(resp);
+out:
+ kvfree(resp);
+ return user;
+}
+
+struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp)
+{
+ struct ksmbd_user *user = NULL;
+
+ user = kmalloc(sizeof(struct ksmbd_user), GFP_KERNEL);
+ if (!user)
+ return NULL;
+
+ user->name = kstrdup(resp->account, GFP_KERNEL);
+ user->flags = resp->status;
+ user->gid = resp->gid;
+ user->uid = resp->uid;
+ user->passkey_sz = resp->hash_sz;
+ user->passkey = kmalloc(resp->hash_sz, GFP_KERNEL);
+ if (user->passkey)
+ memcpy(user->passkey, resp->hash, resp->hash_sz);
+
+ if (!user->name || !user->passkey) {
+ kfree(user->name);
+ kfree(user->passkey);
+ kfree(user);
+ user = NULL;
+ }
+ return user;
+}
+
+void ksmbd_free_user(struct ksmbd_user *user)
+{
+ ksmbd_ipc_logout_request(user->name);
+ kfree(user->name);
+ kfree(user->passkey);
+ kfree(user);
+}
+
+int ksmbd_anonymous_user(struct ksmbd_user *user)
+{
+ if (user->name[0] == '\0')
+ return 1;
+ return 0;
+}
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h
new file mode 100644
index 000000000000..b2bb074a0150
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_config.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __USER_CONFIG_MANAGEMENT_H__
+#define __USER_CONFIG_MANAGEMENT_H__
+
+#include "../glob.h"
+
+struct ksmbd_user {
+ unsigned short flags;
+
+ unsigned int uid;
+ unsigned int gid;
+
+ char *name;
+
+ size_t passkey_sz;
+ char *passkey;
+};
+
+static inline bool user_guest(struct ksmbd_user *user)
+{
+ return user->flags & KSMBD_USER_FLAG_GUEST_ACCOUNT;
+}
+
+static inline void set_user_flag(struct ksmbd_user *user, int flag)
+{
+ user->flags |= flag;
+}
+
+static inline int test_user_flag(struct ksmbd_user *user, int flag)
+{
+ return user->flags & flag;
+}
+
+static inline void set_user_guest(struct ksmbd_user *user)
+{
+}
+
+static inline char *user_passkey(struct ksmbd_user *user)
+{
+ return user->passkey;
+}
+
+static inline char *user_name(struct ksmbd_user *user)
+{
+ return user->name;
+}
+
+static inline unsigned int user_uid(struct ksmbd_user *user)
+{
+ return user->uid;
+}
+
+static inline unsigned int user_gid(struct ksmbd_user *user)
+{
+ return user->gid;
+}
+
+struct ksmbd_user *ksmbd_login_user(const char *account);
+struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp);
+void ksmbd_free_user(struct ksmbd_user *user);
+int ksmbd_anonymous_user(struct ksmbd_user *user);
+#endif /* __USER_CONFIG_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c
new file mode 100644
index 000000000000..8d8ffd8c6f19
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_session.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/xarray.h>
+
+#include "ksmbd_ida.h"
+#include "user_session.h"
+#include "user_config.h"
+#include "tree_connect.h"
+#include "../transport_ipc.h"
+#include "../connection.h"
+#include "../vfs_cache.h"
+
+static DEFINE_IDA(session_ida);
+
+#define SESSION_HASH_BITS 3
+static DEFINE_HASHTABLE(sessions_table, SESSION_HASH_BITS);
+static DECLARE_RWSEM(sessions_table_lock);
+
+struct ksmbd_session_rpc {
+ int id;
+ unsigned int method;
+ struct list_head list;
+};
+
+static void free_channel_list(struct ksmbd_session *sess)
+{
+ struct channel *chann, *tmp;
+
+ list_for_each_entry_safe(chann, tmp, &sess->ksmbd_chann_list,
+ chann_list) {
+ list_del(&chann->chann_list);
+ kfree(chann);
+ }
+}
+
+static void __session_rpc_close(struct ksmbd_session *sess,
+ struct ksmbd_session_rpc *entry)
+{
+ struct ksmbd_rpc_command *resp;
+
+ resp = ksmbd_rpc_close(sess, entry->id);
+ if (!resp)
+ pr_err("Unable to close RPC pipe %d\n", entry->id);
+
+ kvfree(resp);
+ ksmbd_rpc_id_free(entry->id);
+ kfree(entry);
+}
+
+static void ksmbd_session_rpc_clear_list(struct ksmbd_session *sess)
+{
+ struct ksmbd_session_rpc *entry;
+
+ while (!list_empty(&sess->rpc_handle_list)) {
+ entry = list_entry(sess->rpc_handle_list.next,
+ struct ksmbd_session_rpc,
+ list);
+
+ list_del(&entry->list);
+ __session_rpc_close(sess, entry);
+ }
+}
+
+static int __rpc_method(char *rpc_name)
+{
+ if (!strcmp(rpc_name, "\\srvsvc") || !strcmp(rpc_name, "srvsvc"))
+ return KSMBD_RPC_SRVSVC_METHOD_INVOKE;
+
+ if (!strcmp(rpc_name, "\\wkssvc") || !strcmp(rpc_name, "wkssvc"))
+ return KSMBD_RPC_WKSSVC_METHOD_INVOKE;
+
+ if (!strcmp(rpc_name, "LANMAN") || !strcmp(rpc_name, "lanman"))
+ return KSMBD_RPC_RAP_METHOD;
+
+ if (!strcmp(rpc_name, "\\samr") || !strcmp(rpc_name, "samr"))
+ return KSMBD_RPC_SAMR_METHOD_INVOKE;
+
+ if (!strcmp(rpc_name, "\\lsarpc") || !strcmp(rpc_name, "lsarpc"))
+ return KSMBD_RPC_LSARPC_METHOD_INVOKE;
+
+ pr_err("Unsupported RPC: %s\n", rpc_name);
+ return 0;
+}
+
+int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
+{
+ struct ksmbd_session_rpc *entry;
+ struct ksmbd_rpc_command *resp;
+ int method;
+
+ method = __rpc_method(rpc_name);
+ if (!method)
+ return -EINVAL;
+
+ entry = kzalloc(sizeof(struct ksmbd_session_rpc), GFP_KERNEL);
+ if (!entry)
+ return -EINVAL;
+
+ list_add(&entry->list, &sess->rpc_handle_list);
+ entry->method = method;
+ entry->id = ksmbd_ipc_id_alloc();
+ if (entry->id < 0)
+ goto error;
+
+ resp = ksmbd_rpc_open(sess, entry->id);
+ if (!resp)
+ goto error;
+
+ kvfree(resp);
+ return entry->id;
+error:
+ list_del(&entry->list);
+ kfree(entry);
+ return -EINVAL;
+}
+
+void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id)
+{
+ struct ksmbd_session_rpc *entry;
+
+ list_for_each_entry(entry, &sess->rpc_handle_list, list) {
+ if (entry->id == id) {
+ list_del(&entry->list);
+ __session_rpc_close(sess, entry);
+ break;
+ }
+ }
+}
+
+int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id)
+{
+ struct ksmbd_session_rpc *entry;
+
+ list_for_each_entry(entry, &sess->rpc_handle_list, list) {
+ if (entry->id == id)
+ return entry->method;
+ }
+ return 0;
+}
+
+void ksmbd_session_destroy(struct ksmbd_session *sess)
+{
+ if (!sess)
+ return;
+
+ if (!atomic_dec_and_test(&sess->refcnt))
+ return;
+
+ list_del(&sess->sessions_entry);
+
+ down_write(&sessions_table_lock);
+ hash_del(&sess->hlist);
+ up_write(&sessions_table_lock);
+
+ if (sess->user)
+ ksmbd_free_user(sess->user);
+
+ ksmbd_tree_conn_session_logoff(sess);
+ ksmbd_destroy_file_table(&sess->file_table);
+ ksmbd_session_rpc_clear_list(sess);
+ free_channel_list(sess);
+ kfree(sess->Preauth_HashValue);
+ ksmbd_release_id(&session_ida, sess->id);
+ kfree(sess);
+}
+
+static struct ksmbd_session *__session_lookup(unsigned long long id)
+{
+ struct ksmbd_session *sess;
+
+ hash_for_each_possible(sessions_table, sess, hlist, id) {
+ if (id == sess->id)
+ return sess;
+ }
+ return NULL;
+}
+
+void ksmbd_session_register(struct ksmbd_conn *conn,
+ struct ksmbd_session *sess)
+{
+ sess->conn = conn;
+ list_add(&sess->sessions_entry, &conn->sessions);
+}
+
+void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
+{
+ struct ksmbd_session *sess;
+
+ while (!list_empty(&conn->sessions)) {
+ sess = list_entry(conn->sessions.next,
+ struct ksmbd_session,
+ sessions_entry);
+
+ ksmbd_session_destroy(sess);
+ }
+}
+
+static bool ksmbd_session_id_match(struct ksmbd_session *sess,
+ unsigned long long id)
+{
+ return sess->id == id;
+}
+
+struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id)
+{
+ struct ksmbd_session *sess = NULL;
+
+ list_for_each_entry(sess, &conn->sessions, sessions_entry) {
+ if (ksmbd_session_id_match(sess, id))
+ return sess;
+ }
+ return NULL;
+}
+
+int get_session(struct ksmbd_session *sess)
+{
+ return atomic_inc_not_zero(&sess->refcnt);
+}
+
+void put_session(struct ksmbd_session *sess)
+{
+ if (atomic_dec_and_test(&sess->refcnt))
+ pr_err("get/%s seems to be mismatched.", __func__);
+}
+
+struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id)
+{
+ struct ksmbd_session *sess;
+
+ down_read(&sessions_table_lock);
+ sess = __session_lookup(id);
+ if (sess) {
+ if (!get_session(sess))
+ sess = NULL;
+ }
+ up_read(&sessions_table_lock);
+
+ return sess;
+}
+
+struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
+ unsigned long long id)
+{
+ struct ksmbd_session *sess;
+
+ sess = ksmbd_session_lookup(conn, id);
+ if (!sess && conn->binding)
+ sess = ksmbd_session_lookup_slowpath(id);
+ return sess;
+}
+
+struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn,
+ u64 sess_id)
+{
+ struct preauth_session *sess;
+
+ sess = kmalloc(sizeof(struct preauth_session), GFP_KERNEL);
+ if (!sess)
+ return NULL;
+
+ sess->id = sess_id;
+ memcpy(sess->Preauth_HashValue, conn->preauth_info->Preauth_HashValue,
+ PREAUTH_HASHVALUE_SIZE);
+ list_add(&sess->preauth_entry, &conn->preauth_sess_table);
+
+ return sess;
+}
+
+static bool ksmbd_preauth_session_id_match(struct preauth_session *sess,
+ unsigned long long id)
+{
+ return sess->id == id;
+}
+
+struct preauth_session *ksmbd_preauth_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id)
+{
+ struct preauth_session *sess = NULL;
+
+ list_for_each_entry(sess, &conn->preauth_sess_table, preauth_entry) {
+ if (ksmbd_preauth_session_id_match(sess, id))
+ return sess;
+ }
+ return NULL;
+}
+
+static int __init_smb2_session(struct ksmbd_session *sess)
+{
+ int id = ksmbd_acquire_smb2_uid(&session_ida);
+
+ if (id < 0)
+ return -EINVAL;
+ sess->id = id;
+ return 0;
+}
+
+static struct ksmbd_session *__session_create(int protocol)
+{
+ struct ksmbd_session *sess;
+ int ret;
+
+ sess = kzalloc(sizeof(struct ksmbd_session), GFP_KERNEL);
+ if (!sess)
+ return NULL;
+
+ if (ksmbd_init_file_table(&sess->file_table))
+ goto error;
+
+ set_session_flag(sess, protocol);
+ INIT_LIST_HEAD(&sess->sessions_entry);
+ xa_init(&sess->tree_conns);
+ INIT_LIST_HEAD(&sess->ksmbd_chann_list);
+ INIT_LIST_HEAD(&sess->rpc_handle_list);
+ sess->sequence_number = 1;
+ atomic_set(&sess->refcnt, 1);
+
+ switch (protocol) {
+ case CIFDS_SESSION_FLAG_SMB2:
+ ret = __init_smb2_session(sess);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ goto error;
+
+ ida_init(&sess->tree_conn_ida);
+
+ if (protocol == CIFDS_SESSION_FLAG_SMB2) {
+ down_write(&sessions_table_lock);
+ hash_add(sessions_table, &sess->hlist, sess->id);
+ up_write(&sessions_table_lock);
+ }
+ return sess;
+
+error:
+ ksmbd_session_destroy(sess);
+ return NULL;
+}
+
+struct ksmbd_session *ksmbd_smb2_session_create(void)
+{
+ return __session_create(CIFDS_SESSION_FLAG_SMB2);
+}
+
+int ksmbd_acquire_tree_conn_id(struct ksmbd_session *sess)
+{
+ int id = -EINVAL;
+
+ if (test_session_flag(sess, CIFDS_SESSION_FLAG_SMB2))
+ id = ksmbd_acquire_smb2_tid(&sess->tree_conn_ida);
+
+ return id;
+}
+
+void ksmbd_release_tree_conn_id(struct ksmbd_session *sess, int id)
+{
+ if (id >= 0)
+ ksmbd_release_id(&sess->tree_conn_ida, id);
+}
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/ksmbd/mgmt/user_session.h
new file mode 100644
index 000000000000..82289c3cbd2b
--- /dev/null
+++ b/fs/ksmbd/mgmt/user_session.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __USER_SESSION_MANAGEMENT_H__
+#define __USER_SESSION_MANAGEMENT_H__
+
+#include <linux/hashtable.h>
+#include <linux/xarray.h>
+
+#include "../smb_common.h"
+#include "../ntlmssp.h"
+
+#define CIFDS_SESSION_FLAG_SMB2 BIT(1)
+
+#define PREAUTH_HASHVALUE_SIZE 64
+
+struct ksmbd_file_table;
+
+struct channel {
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+ struct ksmbd_conn *conn;
+ struct list_head chann_list;
+};
+
+struct preauth_session {
+ __u8 Preauth_HashValue[PREAUTH_HASHVALUE_SIZE];
+ u64 id;
+ struct list_head preauth_entry;
+};
+
+struct ksmbd_session {
+ u64 id;
+
+ struct ksmbd_user *user;
+ struct ksmbd_conn *conn;
+ unsigned int sequence_number;
+ unsigned int flags;
+
+ bool sign;
+ bool enc;
+ bool is_anonymous;
+
+ int state;
+ __u8 *Preauth_HashValue;
+
+ struct ntlmssp_auth ntlmssp;
+ char sess_key[CIFS_KEY_SIZE];
+
+ struct hlist_node hlist;
+ struct list_head ksmbd_chann_list;
+ struct xarray tree_conns;
+ struct ida tree_conn_ida;
+ struct list_head rpc_handle_list;
+
+ __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
+ __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+
+ struct list_head sessions_entry;
+ struct ksmbd_file_table file_table;
+ atomic_t refcnt;
+};
+
+static inline int test_session_flag(struct ksmbd_session *sess, int bit)
+{
+ return sess->flags & bit;
+}
+
+static inline void set_session_flag(struct ksmbd_session *sess, int bit)
+{
+ sess->flags |= bit;
+}
+
+static inline void clear_session_flag(struct ksmbd_session *sess, int bit)
+{
+ sess->flags &= ~bit;
+}
+
+struct ksmbd_session *ksmbd_smb2_session_create(void);
+
+void ksmbd_session_destroy(struct ksmbd_session *sess);
+
+struct ksmbd_session *ksmbd_session_lookup_slowpath(unsigned long long id);
+struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id);
+void ksmbd_session_register(struct ksmbd_conn *conn,
+ struct ksmbd_session *sess);
+void ksmbd_sessions_deregister(struct ksmbd_conn *conn);
+struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
+ unsigned long long id);
+struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn,
+ u64 sess_id);
+struct preauth_session *ksmbd_preauth_session_lookup(struct ksmbd_conn *conn,
+ unsigned long long id);
+
+int ksmbd_acquire_tree_conn_id(struct ksmbd_session *sess);
+void ksmbd_release_tree_conn_id(struct ksmbd_session *sess, int id);
+
+int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name);
+void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id);
+int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id);
+int get_session(struct ksmbd_session *sess);
+void put_session(struct ksmbd_session *sess);
+#endif /* __USER_SESSION_MANAGEMENT_H__ */
diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c
new file mode 100644
index 000000000000..0b307ca28a19
--- /dev/null
+++ b/fs/ksmbd/misc.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/xattr.h>
+#include <linux/fs.h>
+
+#include "misc.h"
+#include "smb_common.h"
+#include "connection.h"
+#include "vfs.h"
+
+#include "mgmt/share_config.h"
+
+/**
+ * match_pattern() - compare a string with a pattern which might include
+ * wildcard '*' and '?'
+ * TODO : implement consideration about DOS_DOT, DOS_QM and DOS_STAR
+ *
+ * @string: string to compare with a pattern
+ * @len: string length
+ * @pattern: pattern string which might include wildcard '*' and '?'
+ *
+ * Return: 0 if pattern matched with the string, otherwise non zero value
+ */
+int match_pattern(const char *str, size_t len, const char *pattern)
+{
+ const char *s = str;
+ const char *p = pattern;
+ bool star = false;
+
+ while (*s && len) {
+ switch (*p) {
+ case '?':
+ s++;
+ len--;
+ p++;
+ break;
+ case '*':
+ star = true;
+ str = s;
+ if (!*++p)
+ return true;
+ pattern = p;
+ break;
+ default:
+ if (tolower(*s) == tolower(*p)) {
+ s++;
+ len--;
+ p++;
+ } else {
+ if (!star)
+ return false;
+ str++;
+ s = str;
+ p = pattern;
+ }
+ break;
+ }
+ }
+
+ if (*p == '*')
+ ++p;
+ return !*p;
+}
+
+/*
+ * is_char_allowed() - check for valid character
+ * @ch: input character to be checked
+ *
+ * Return: 1 if char is allowed, otherwise 0
+ */
+static inline int is_char_allowed(char ch)
+{
+ /* check for control chars, wildcards etc. */
+ if (!(ch & 0x80) &&
+ (ch <= 0x1f ||
+ ch == '?' || ch == '"' || ch == '<' ||
+ ch == '>' || ch == '|' || ch == '*'))
+ return 0;
+
+ return 1;
+}
+
+int ksmbd_validate_filename(char *filename)
+{
+ while (*filename) {
+ char c = *filename;
+
+ filename++;
+ if (!is_char_allowed(c)) {
+ ksmbd_debug(VFS, "File name validation failed: 0x%x\n", c);
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
+static int ksmbd_validate_stream_name(char *stream_name)
+{
+ while (*stream_name) {
+ char c = *stream_name;
+
+ stream_name++;
+ if (c == '/' || c == ':' || c == '\\') {
+ pr_err("Stream name validation failed: %c\n", c);
+ return -ENOENT;
+ }
+ }
+
+ return 0;
+}
+
+int parse_stream_name(char *filename, char **stream_name, int *s_type)
+{
+ char *stream_type;
+ char *s_name;
+ int rc = 0;
+
+ s_name = filename;
+ filename = strsep(&s_name, ":");
+ ksmbd_debug(SMB, "filename : %s, streams : %s\n", filename, s_name);
+ if (strchr(s_name, ':')) {
+ stream_type = s_name;
+ s_name = strsep(&stream_type, ":");
+
+ rc = ksmbd_validate_stream_name(s_name);
+ if (rc < 0) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "stream name : %s, stream type : %s\n", s_name,
+ stream_type);
+ if (!strncasecmp("$data", stream_type, 5))
+ *s_type = DATA_STREAM;
+ else if (!strncasecmp("$index_allocation", stream_type, 17))
+ *s_type = DIR_STREAM;
+ else
+ rc = -ENOENT;
+ }
+
+ *stream_name = s_name;
+out:
+ return rc;
+}
+
+/**
+ * convert_to_nt_pathname() - extract and return windows path string
+ * whose share directory prefix was removed from file path
+ * @filename : unix filename
+ * @sharepath: share path string
+ *
+ * Return : windows path string or error
+ */
+
+char *convert_to_nt_pathname(char *filename, char *sharepath)
+{
+ char *ab_pathname;
+ int len, name_len;
+
+ name_len = strlen(filename);
+ ab_pathname = kmalloc(name_len, GFP_KERNEL);
+ if (!ab_pathname)
+ return NULL;
+
+ ab_pathname[0] = '\\';
+ ab_pathname[1] = '\0';
+
+ len = strlen(sharepath);
+ if (!strncmp(filename, sharepath, len) && name_len != len) {
+ strscpy(ab_pathname, &filename[len], name_len);
+ ksmbd_conv_path_to_windows(ab_pathname);
+ }
+
+ return ab_pathname;
+}
+
+int get_nlink(struct kstat *st)
+{
+ int nlink;
+
+ nlink = st->nlink;
+ if (S_ISDIR(st->mode))
+ nlink--;
+
+ return nlink;
+}
+
+void ksmbd_conv_path_to_unix(char *path)
+{
+ strreplace(path, '\\', '/');
+}
+
+void ksmbd_strip_last_slash(char *path)
+{
+ int len = strlen(path);
+
+ while (len && path[len - 1] == '/') {
+ path[len - 1] = '\0';
+ len--;
+ }
+}
+
+void ksmbd_conv_path_to_windows(char *path)
+{
+ strreplace(path, '/', '\\');
+}
+
+/**
+ * ksmbd_extract_sharename() - get share name from tree connect request
+ * @treename: buffer containing tree name and share name
+ *
+ * Return: share name on success, otherwise error
+ */
+char *ksmbd_extract_sharename(char *treename)
+{
+ char *name = treename;
+ char *dst;
+ char *pos = strrchr(name, '\\');
+
+ if (pos)
+ name = (pos + 1);
+
+ /* caller has to free the memory */
+ dst = kstrdup(name, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+ return dst;
+}
+
+/**
+ * convert_to_unix_name() - convert windows name to unix format
+ * @path: name to be converted
+ * @tid: tree id of mathing share
+ *
+ * Return: converted name on success, otherwise NULL
+ */
+char *convert_to_unix_name(struct ksmbd_share_config *share, char *name)
+{
+ int no_slash = 0, name_len, path_len;
+ char *new_name;
+
+ if (name[0] == '/')
+ name++;
+
+ path_len = share->path_sz;
+ name_len = strlen(name);
+ new_name = kmalloc(path_len + name_len + 2, GFP_KERNEL);
+ if (!new_name)
+ return new_name;
+
+ memcpy(new_name, share->path, path_len);
+ if (new_name[path_len - 1] != '/') {
+ new_name[path_len] = '/';
+ no_slash = 1;
+ }
+
+ memcpy(new_name + path_len + no_slash, name, name_len);
+ path_len += name_len + no_slash;
+ new_name[path_len] = 0x00;
+ return new_name;
+}
+
+char *ksmbd_convert_dir_info_name(struct ksmbd_dir_info *d_info,
+ const struct nls_table *local_nls,
+ int *conv_len)
+{
+ char *conv;
+ int sz = min(4 * d_info->name_len, PATH_MAX);
+
+ if (!sz)
+ return NULL;
+
+ conv = kmalloc(sz, GFP_KERNEL);
+ if (!conv)
+ return NULL;
+
+ /* XXX */
+ *conv_len = smbConvertToUTF16((__le16 *)conv, d_info->name,
+ d_info->name_len, local_nls, 0);
+ *conv_len *= 2;
+
+ /* We allocate buffer twice bigger than needed. */
+ conv[*conv_len] = 0x00;
+ conv[*conv_len + 1] = 0x00;
+ return conv;
+}
+
+/*
+ * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
+ * into Unix UTC (based 1970-01-01, in seconds).
+ */
+struct timespec64 ksmbd_NTtimeToUnix(__le64 ntutc)
+{
+ struct timespec64 ts;
+
+ /* Subtract the NTFS time offset, then convert to 1s intervals. */
+ s64 t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
+ u64 abs_t;
+
+ /*
+ * Unfortunately can not use normal 64 bit division on 32 bit arch, but
+ * the alternative, do_div, does not work with negative numbers so have
+ * to special case them
+ */
+ if (t < 0) {
+ abs_t = -t;
+ ts.tv_nsec = do_div(abs_t, 10000000) * 100;
+ ts.tv_nsec = -ts.tv_nsec;
+ ts.tv_sec = -abs_t;
+ } else {
+ abs_t = t;
+ ts.tv_nsec = do_div(abs_t, 10000000) * 100;
+ ts.tv_sec = abs_t;
+ }
+
+ return ts;
+}
+
+/* Convert the Unix UTC into NT UTC. */
+inline u64 ksmbd_UnixTimeToNT(struct timespec64 t)
+{
+ /* Convert to 100ns intervals and then add the NTFS time offset. */
+ return (u64)t.tv_sec * 10000000 + t.tv_nsec / 100 + NTFS_TIME_OFFSET;
+}
+
+inline long long ksmbd_systime(void)
+{
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+ return ksmbd_UnixTimeToNT(ts);
+}
diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h
new file mode 100644
index 000000000000..af8717d4d85b
--- /dev/null
+++ b/fs/ksmbd/misc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_MISC_H__
+#define __KSMBD_MISC_H__
+
+struct ksmbd_share_config;
+struct nls_table;
+struct kstat;
+struct ksmbd_file;
+
+int match_pattern(const char *str, size_t len, const char *pattern);
+int ksmbd_validate_filename(char *filename);
+int parse_stream_name(char *filename, char **stream_name, int *s_type);
+char *convert_to_nt_pathname(char *filename, char *sharepath);
+int get_nlink(struct kstat *st);
+void ksmbd_conv_path_to_unix(char *path);
+void ksmbd_strip_last_slash(char *path);
+void ksmbd_conv_path_to_windows(char *path);
+char *ksmbd_extract_sharename(char *treename);
+char *convert_to_unix_name(struct ksmbd_share_config *share, char *name);
+
+#define KSMBD_DIR_INFO_ALIGNMENT 8
+struct ksmbd_dir_info;
+char *ksmbd_convert_dir_info_name(struct ksmbd_dir_info *d_info,
+ const struct nls_table *local_nls,
+ int *conv_len);
+
+#define NTFS_TIME_OFFSET ((u64)(369 * 365 + 89) * 24 * 3600 * 10000000)
+struct timespec64 ksmbd_NTtimeToUnix(__le64 ntutc);
+u64 ksmbd_UnixTimeToNT(struct timespec64 t);
+long long ksmbd_systime(void);
+#endif /* __KSMBD_MISC_H__ */
diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c
new file mode 100644
index 000000000000..2243a2c64b37
--- /dev/null
+++ b/fs/ksmbd/ndr.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Samsung Electronics Co., Ltd.
+ * Author(s): Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include <linux/fs.h>
+
+#include "glob.h"
+#include "ndr.h"
+
+static inline char *ndr_get_field(struct ndr *n)
+{
+ return n->data + n->offset;
+}
+
+static int try_to_realloc_ndr_blob(struct ndr *n, size_t sz)
+{
+ char *data;
+
+ data = krealloc(n->data, n->offset + sz + 1024, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ n->data = data;
+ n->length += 1024;
+ memset(n->data + n->offset, 0, 1024);
+ return 0;
+}
+
+static void ndr_write_int16(struct ndr *n, __u16 value)
+{
+ if (n->length <= n->offset + sizeof(value))
+ try_to_realloc_ndr_blob(n, sizeof(value));
+
+ *(__le16 *)ndr_get_field(n) = cpu_to_le16(value);
+ n->offset += sizeof(value);
+}
+
+static void ndr_write_int32(struct ndr *n, __u32 value)
+{
+ if (n->length <= n->offset + sizeof(value))
+ try_to_realloc_ndr_blob(n, sizeof(value));
+
+ *(__le32 *)ndr_get_field(n) = cpu_to_le32(value);
+ n->offset += sizeof(value);
+}
+
+static void ndr_write_int64(struct ndr *n, __u64 value)
+{
+ if (n->length <= n->offset + sizeof(value))
+ try_to_realloc_ndr_blob(n, sizeof(value));
+
+ *(__le64 *)ndr_get_field(n) = cpu_to_le64(value);
+ n->offset += sizeof(value);
+}
+
+static int ndr_write_bytes(struct ndr *n, void *value, size_t sz)
+{
+ if (n->length <= n->offset + sz)
+ try_to_realloc_ndr_blob(n, sz);
+
+ memcpy(ndr_get_field(n), value, sz);
+ n->offset += sz;
+ return 0;
+}
+
+static int ndr_write_string(struct ndr *n, char *value)
+{
+ size_t sz;
+
+ sz = strlen(value) + 1;
+ if (n->length <= n->offset + sz)
+ try_to_realloc_ndr_blob(n, sz);
+
+ memcpy(ndr_get_field(n), value, sz);
+ n->offset += sz;
+ n->offset = ALIGN(n->offset, 2);
+ return 0;
+}
+
+static int ndr_read_string(struct ndr *n, void *value, size_t sz)
+{
+ int len = strnlen(ndr_get_field(n), sz);
+
+ memcpy(value, ndr_get_field(n), len);
+ len++;
+ n->offset += len;
+ n->offset = ALIGN(n->offset, 2);
+ return 0;
+}
+
+static int ndr_read_bytes(struct ndr *n, void *value, size_t sz)
+{
+ memcpy(value, ndr_get_field(n), sz);
+ n->offset += sz;
+ return 0;
+}
+
+static __u16 ndr_read_int16(struct ndr *n)
+{
+ __u16 ret;
+
+ ret = le16_to_cpu(*(__le16 *)ndr_get_field(n));
+ n->offset += sizeof(__u16);
+ return ret;
+}
+
+static __u32 ndr_read_int32(struct ndr *n)
+{
+ __u32 ret;
+
+ ret = le32_to_cpu(*(__le32 *)ndr_get_field(n));
+ n->offset += sizeof(__u32);
+ return ret;
+}
+
+static __u64 ndr_read_int64(struct ndr *n)
+{
+ __u64 ret;
+
+ ret = le64_to_cpu(*(__le64 *)ndr_get_field(n));
+ n->offset += sizeof(__u64);
+ return ret;
+}
+
+int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
+{
+ char hex_attr[12] = {0};
+
+ n->offset = 0;
+ n->length = 1024;
+ n->data = kzalloc(n->length, GFP_KERNEL);
+ if (!n->data)
+ return -ENOMEM;
+
+ if (da->version == 3) {
+ snprintf(hex_attr, 10, "0x%x", da->attr);
+ ndr_write_string(n, hex_attr);
+ } else {
+ ndr_write_string(n, "");
+ }
+ ndr_write_int16(n, da->version);
+ ndr_write_int32(n, da->version);
+
+ ndr_write_int32(n, da->flags);
+ ndr_write_int32(n, da->attr);
+ if (da->version == 3) {
+ ndr_write_int32(n, da->ea_size);
+ ndr_write_int64(n, da->size);
+ ndr_write_int64(n, da->alloc_size);
+ } else {
+ ndr_write_int64(n, da->itime);
+ }
+ ndr_write_int64(n, da->create_time);
+ if (da->version == 3)
+ ndr_write_int64(n, da->change_time);
+ return 0;
+}
+
+int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
+{
+ char *hex_attr;
+ int version2;
+
+ hex_attr = kzalloc(n->length, GFP_KERNEL);
+ if (!hex_attr)
+ return -ENOMEM;
+
+ n->offset = 0;
+ ndr_read_string(n, hex_attr, n->length);
+ kfree(hex_attr);
+ da->version = ndr_read_int16(n);
+
+ if (da->version != 3 && da->version != 4) {
+ pr_err("v%d version is not supported\n", da->version);
+ return -EINVAL;
+ }
+
+ version2 = ndr_read_int32(n);
+ if (da->version != version2) {
+ pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ da->version, version2);
+ return -EINVAL;
+ }
+
+ ndr_read_int32(n);
+ da->attr = ndr_read_int32(n);
+ if (da->version == 4) {
+ da->itime = ndr_read_int64(n);
+ da->create_time = ndr_read_int64(n);
+ } else {
+ ndr_read_int32(n);
+ ndr_read_int64(n);
+ ndr_read_int64(n);
+ da->create_time = ndr_read_int64(n);
+ ndr_read_int64(n);
+ }
+
+ return 0;
+}
+
+static int ndr_encode_posix_acl_entry(struct ndr *n, struct xattr_smb_acl *acl)
+{
+ int i;
+
+ ndr_write_int32(n, acl->count);
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int32(n, acl->count);
+ ndr_write_int32(n, 0);
+
+ for (i = 0; i < acl->count; i++) {
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int16(n, acl->entries[i].type);
+ ndr_write_int16(n, acl->entries[i].type);
+
+ if (acl->entries[i].type == SMB_ACL_USER) {
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int64(n, acl->entries[i].uid);
+ } else if (acl->entries[i].type == SMB_ACL_GROUP) {
+ n->offset = ALIGN(n->offset, 8);
+ ndr_write_int64(n, acl->entries[i].gid);
+ }
+
+ /* push permission */
+ ndr_write_int32(n, acl->entries[i].perm);
+ }
+
+ return 0;
+}
+
+int ndr_encode_posix_acl(struct ndr *n,
+ struct user_namespace *user_ns,
+ struct inode *inode,
+ struct xattr_smb_acl *acl,
+ struct xattr_smb_acl *def_acl)
+{
+ int ref_id = 0x00020000;
+
+ n->offset = 0;
+ n->length = 1024;
+ n->data = kzalloc(n->length, GFP_KERNEL);
+ if (!n->data)
+ return -ENOMEM;
+
+ if (acl) {
+ /* ACL ACCESS */
+ ndr_write_int32(n, ref_id);
+ ref_id += 4;
+ } else {
+ ndr_write_int32(n, 0);
+ }
+
+ if (def_acl) {
+ /* DEFAULT ACL ACCESS */
+ ndr_write_int32(n, ref_id);
+ ref_id += 4;
+ } else {
+ ndr_write_int32(n, 0);
+ }
+
+ ndr_write_int64(n, from_kuid(user_ns, inode->i_uid));
+ ndr_write_int64(n, from_kgid(user_ns, inode->i_gid));
+ ndr_write_int32(n, inode->i_mode);
+
+ if (acl) {
+ ndr_encode_posix_acl_entry(n, acl);
+ if (def_acl)
+ ndr_encode_posix_acl_entry(n, def_acl);
+ }
+ return 0;
+}
+
+int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
+{
+ int ref_id = 0x00020004;
+
+ n->offset = 0;
+ n->length = 2048;
+ n->data = kzalloc(n->length, GFP_KERNEL);
+ if (!n->data)
+ return -ENOMEM;
+
+ ndr_write_int16(n, acl->version);
+ ndr_write_int32(n, acl->version);
+ ndr_write_int16(n, 2);
+ ndr_write_int32(n, ref_id);
+
+ /* push hash type and hash 64bytes */
+ ndr_write_int16(n, acl->hash_type);
+ ndr_write_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+ ndr_write_bytes(n, acl->desc, acl->desc_len);
+ ndr_write_int64(n, acl->current_time);
+ ndr_write_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+
+ /* push ndr for security descriptor */
+ ndr_write_bytes(n, acl->sd_buf, acl->sd_size);
+
+ return 0;
+}
+
+int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
+{
+ int version2;
+
+ n->offset = 0;
+ acl->version = ndr_read_int16(n);
+ if (acl->version != 4) {
+ pr_err("v%d version is not supported\n", acl->version);
+ return -EINVAL;
+ }
+
+ version2 = ndr_read_int32(n);
+ if (acl->version != version2) {
+ pr_err("ndr version mismatched(version: %d, version2: %d)\n",
+ acl->version, version2);
+ return -EINVAL;
+ }
+
+ /* Read Level */
+ ndr_read_int16(n);
+ /* Read Ref Id */
+ ndr_read_int32(n);
+ acl->hash_type = ndr_read_int16(n);
+ ndr_read_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+
+ ndr_read_bytes(n, acl->desc, 10);
+ if (strncmp(acl->desc, "posix_acl", 9)) {
+ pr_err("Invalid acl description : %s\n", acl->desc);
+ return -EINVAL;
+ }
+
+ /* Read Time */
+ ndr_read_int64(n);
+ /* Read Posix ACL hash */
+ ndr_read_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+ acl->sd_size = n->length - n->offset;
+ acl->sd_buf = kzalloc(acl->sd_size, GFP_KERNEL);
+ if (!acl->sd_buf)
+ return -ENOMEM;
+
+ ndr_read_bytes(n, acl->sd_buf, acl->sd_size);
+
+ return 0;
+}
diff --git a/fs/ksmbd/ndr.h b/fs/ksmbd/ndr.h
new file mode 100644
index 000000000000..60ca265d1bb0
--- /dev/null
+++ b/fs/ksmbd/ndr.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Samsung Electronics Co., Ltd.
+ * Author(s): Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+struct ndr {
+ char *data;
+ int offset;
+ int length;
+};
+
+#define NDR_NTSD_OFFSETOF 0xA0
+
+int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da);
+int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da);
+int ndr_encode_posix_acl(struct ndr *n, struct user_namespace *user_ns,
+ struct inode *inode, struct xattr_smb_acl *acl,
+ struct xattr_smb_acl *def_acl);
+int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl);
+int ndr_encode_v3_ntacl(struct ndr *n, struct xattr_ntacl *acl);
+int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl);
diff --git a/fs/ksmbd/nterr.h b/fs/ksmbd/nterr.h
new file mode 100644
index 000000000000..2f358f88a018
--- /dev/null
+++ b/fs/ksmbd/nterr.h
@@ -0,0 +1,543 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Unix SMB/Netbios implementation.
+ * Version 1.9.
+ * NT error code constants
+ * Copyright (C) Andrew Tridgell 1992-2000
+ * Copyright (C) John H Terpstra 1996-2000
+ * Copyright (C) Luke Kenneth Casson Leighton 1996-2000
+ * Copyright (C) Paul Ashton 1998-2000
+ */
+
+#ifndef _NTERR_H
+#define _NTERR_H
+
+/* Win32 Status codes. */
+#define NT_STATUS_MORE_ENTRIES 0x0105
+#define NT_ERROR_INVALID_PARAMETER 0x0057
+#define NT_ERROR_INSUFFICIENT_BUFFER 0x007a
+#define NT_STATUS_1804 0x070c
+#define NT_STATUS_NOTIFY_ENUM_DIR 0x010c
+#define NT_STATUS_INVALID_LOCK_RANGE (0xC0000000 | 0x01a1)
+/*
+ * Win32 Error codes extracted using a loop in smbclient then printing a netmon
+ * sniff to a file.
+ */
+
+#define NT_STATUS_OK 0x0000
+#define NT_STATUS_SOME_UNMAPPED 0x0107
+#define NT_STATUS_BUFFER_OVERFLOW 0x80000005
+#define NT_STATUS_NO_MORE_ENTRIES 0x8000001a
+#define NT_STATUS_MEDIA_CHANGED 0x8000001c
+#define NT_STATUS_END_OF_MEDIA 0x8000001e
+#define NT_STATUS_MEDIA_CHECK 0x80000020
+#define NT_STATUS_NO_DATA_DETECTED 0x8000001c
+#define NT_STATUS_STOPPED_ON_SYMLINK 0x8000002d
+#define NT_STATUS_DEVICE_REQUIRES_CLEANING 0x80000288
+#define NT_STATUS_DEVICE_DOOR_OPEN 0x80000288
+#define NT_STATUS_UNSUCCESSFUL (0xC0000000 | 0x0001)
+#define NT_STATUS_NOT_IMPLEMENTED (0xC0000000 | 0x0002)
+#define NT_STATUS_INVALID_INFO_CLASS (0xC0000000 | 0x0003)
+#define NT_STATUS_INFO_LENGTH_MISMATCH (0xC0000000 | 0x0004)
+#define NT_STATUS_ACCESS_VIOLATION (0xC0000000 | 0x0005)
+#define NT_STATUS_IN_PAGE_ERROR (0xC0000000 | 0x0006)
+#define NT_STATUS_PAGEFILE_QUOTA (0xC0000000 | 0x0007)
+#define NT_STATUS_INVALID_HANDLE (0xC0000000 | 0x0008)
+#define NT_STATUS_BAD_INITIAL_STACK (0xC0000000 | 0x0009)
+#define NT_STATUS_BAD_INITIAL_PC (0xC0000000 | 0x000a)
+#define NT_STATUS_INVALID_CID (0xC0000000 | 0x000b)
+#define NT_STATUS_TIMER_NOT_CANCELED (0xC0000000 | 0x000c)
+#define NT_STATUS_INVALID_PARAMETER (0xC0000000 | 0x000d)
+#define NT_STATUS_NO_SUCH_DEVICE (0xC0000000 | 0x000e)
+#define NT_STATUS_NO_SUCH_FILE (0xC0000000 | 0x000f)
+#define NT_STATUS_INVALID_DEVICE_REQUEST (0xC0000000 | 0x0010)
+#define NT_STATUS_END_OF_FILE (0xC0000000 | 0x0011)
+#define NT_STATUS_WRONG_VOLUME (0xC0000000 | 0x0012)
+#define NT_STATUS_NO_MEDIA_IN_DEVICE (0xC0000000 | 0x0013)
+#define NT_STATUS_UNRECOGNIZED_MEDIA (0xC0000000 | 0x0014)
+#define NT_STATUS_NONEXISTENT_SECTOR (0xC0000000 | 0x0015)
+#define NT_STATUS_MORE_PROCESSING_REQUIRED (0xC0000000 | 0x0016)
+#define NT_STATUS_NO_MEMORY (0xC0000000 | 0x0017)
+#define NT_STATUS_CONFLICTING_ADDRESSES (0xC0000000 | 0x0018)
+#define NT_STATUS_NOT_MAPPED_VIEW (0xC0000000 | 0x0019)
+#define NT_STATUS_UNABLE_TO_FREE_VM (0x80000000 | 0x001a)
+#define NT_STATUS_UNABLE_TO_DELETE_SECTION (0xC0000000 | 0x001b)
+#define NT_STATUS_INVALID_SYSTEM_SERVICE (0xC0000000 | 0x001c)
+#define NT_STATUS_ILLEGAL_INSTRUCTION (0xC0000000 | 0x001d)
+#define NT_STATUS_INVALID_LOCK_SEQUENCE (0xC0000000 | 0x001e)
+#define NT_STATUS_INVALID_VIEW_SIZE (0xC0000000 | 0x001f)
+#define NT_STATUS_INVALID_FILE_FOR_SECTION (0xC0000000 | 0x0020)
+#define NT_STATUS_ALREADY_COMMITTED (0xC0000000 | 0x0021)
+#define NT_STATUS_ACCESS_DENIED (0xC0000000 | 0x0022)
+#define NT_STATUS_BUFFER_TOO_SMALL (0xC0000000 | 0x0023)
+#define NT_STATUS_OBJECT_TYPE_MISMATCH (0xC0000000 | 0x0024)
+#define NT_STATUS_NONCONTINUABLE_EXCEPTION (0xC0000000 | 0x0025)
+#define NT_STATUS_INVALID_DISPOSITION (0xC0000000 | 0x0026)
+#define NT_STATUS_UNWIND (0xC0000000 | 0x0027)
+#define NT_STATUS_BAD_STACK (0xC0000000 | 0x0028)
+#define NT_STATUS_INVALID_UNWIND_TARGET (0xC0000000 | 0x0029)
+#define NT_STATUS_NOT_LOCKED (0xC0000000 | 0x002a)
+#define NT_STATUS_PARITY_ERROR (0xC0000000 | 0x002b)
+#define NT_STATUS_UNABLE_TO_DECOMMIT_VM (0xC0000000 | 0x002c)
+#define NT_STATUS_NOT_COMMITTED (0xC0000000 | 0x002d)
+#define NT_STATUS_INVALID_PORT_ATTRIBUTES (0xC0000000 | 0x002e)
+#define NT_STATUS_PORT_MESSAGE_TOO_LONG (0xC0000000 | 0x002f)
+#define NT_STATUS_INVALID_PARAMETER_MIX (0xC0000000 | 0x0030)
+#define NT_STATUS_INVALID_QUOTA_LOWER (0xC0000000 | 0x0031)
+#define NT_STATUS_DISK_CORRUPT_ERROR (0xC0000000 | 0x0032)
+#define NT_STATUS_OBJECT_NAME_INVALID (0xC0000000 | 0x0033)
+#define NT_STATUS_OBJECT_NAME_NOT_FOUND (0xC0000000 | 0x0034)
+#define NT_STATUS_OBJECT_NAME_COLLISION (0xC0000000 | 0x0035)
+#define NT_STATUS_HANDLE_NOT_WAITABLE (0xC0000000 | 0x0036)
+#define NT_STATUS_PORT_DISCONNECTED (0xC0000000 | 0x0037)
+#define NT_STATUS_DEVICE_ALREADY_ATTACHED (0xC0000000 | 0x0038)
+#define NT_STATUS_OBJECT_PATH_INVALID (0xC0000000 | 0x0039)
+#define NT_STATUS_OBJECT_PATH_NOT_FOUND (0xC0000000 | 0x003a)
+#define NT_STATUS_OBJECT_PATH_SYNTAX_BAD (0xC0000000 | 0x003b)
+#define NT_STATUS_DATA_OVERRUN (0xC0000000 | 0x003c)
+#define NT_STATUS_DATA_LATE_ERROR (0xC0000000 | 0x003d)
+#define NT_STATUS_DATA_ERROR (0xC0000000 | 0x003e)
+#define NT_STATUS_CRC_ERROR (0xC0000000 | 0x003f)
+#define NT_STATUS_SECTION_TOO_BIG (0xC0000000 | 0x0040)
+#define NT_STATUS_PORT_CONNECTION_REFUSED (0xC0000000 | 0x0041)
+#define NT_STATUS_INVALID_PORT_HANDLE (0xC0000000 | 0x0042)
+#define NT_STATUS_SHARING_VIOLATION (0xC0000000 | 0x0043)
+#define NT_STATUS_QUOTA_EXCEEDED (0xC0000000 | 0x0044)
+#define NT_STATUS_INVALID_PAGE_PROTECTION (0xC0000000 | 0x0045)
+#define NT_STATUS_MUTANT_NOT_OWNED (0xC0000000 | 0x0046)
+#define NT_STATUS_SEMAPHORE_LIMIT_EXCEEDED (0xC0000000 | 0x0047)
+#define NT_STATUS_PORT_ALREADY_SET (0xC0000000 | 0x0048)
+#define NT_STATUS_SECTION_NOT_IMAGE (0xC0000000 | 0x0049)
+#define NT_STATUS_SUSPEND_COUNT_EXCEEDED (0xC0000000 | 0x004a)
+#define NT_STATUS_THREAD_IS_TERMINATING (0xC0000000 | 0x004b)
+#define NT_STATUS_BAD_WORKING_SET_LIMIT (0xC0000000 | 0x004c)
+#define NT_STATUS_INCOMPATIBLE_FILE_MAP (0xC0000000 | 0x004d)
+#define NT_STATUS_SECTION_PROTECTION (0xC0000000 | 0x004e)
+#define NT_STATUS_EAS_NOT_SUPPORTED (0xC0000000 | 0x004f)
+#define NT_STATUS_EA_TOO_LARGE (0xC0000000 | 0x0050)
+#define NT_STATUS_NONEXISTENT_EA_ENTRY (0xC0000000 | 0x0051)
+#define NT_STATUS_NO_EAS_ON_FILE (0xC0000000 | 0x0052)
+#define NT_STATUS_EA_CORRUPT_ERROR (0xC0000000 | 0x0053)
+#define NT_STATUS_FILE_LOCK_CONFLICT (0xC0000000 | 0x0054)
+#define NT_STATUS_LOCK_NOT_GRANTED (0xC0000000 | 0x0055)
+#define NT_STATUS_DELETE_PENDING (0xC0000000 | 0x0056)
+#define NT_STATUS_CTL_FILE_NOT_SUPPORTED (0xC0000000 | 0x0057)
+#define NT_STATUS_UNKNOWN_REVISION (0xC0000000 | 0x0058)
+#define NT_STATUS_REVISION_MISMATCH (0xC0000000 | 0x0059)
+#define NT_STATUS_INVALID_OWNER (0xC0000000 | 0x005a)
+#define NT_STATUS_INVALID_PRIMARY_GROUP (0xC0000000 | 0x005b)
+#define NT_STATUS_NO_IMPERSONATION_TOKEN (0xC0000000 | 0x005c)
+#define NT_STATUS_CANT_DISABLE_MANDATORY (0xC0000000 | 0x005d)
+#define NT_STATUS_NO_LOGON_SERVERS (0xC0000000 | 0x005e)
+#define NT_STATUS_NO_SUCH_LOGON_SESSION (0xC0000000 | 0x005f)
+#define NT_STATUS_NO_SUCH_PRIVILEGE (0xC0000000 | 0x0060)
+#define NT_STATUS_PRIVILEGE_NOT_HELD (0xC0000000 | 0x0061)
+#define NT_STATUS_INVALID_ACCOUNT_NAME (0xC0000000 | 0x0062)
+#define NT_STATUS_USER_EXISTS (0xC0000000 | 0x0063)
+#define NT_STATUS_NO_SUCH_USER (0xC0000000 | 0x0064)
+#define NT_STATUS_GROUP_EXISTS (0xC0000000 | 0x0065)
+#define NT_STATUS_NO_SUCH_GROUP (0xC0000000 | 0x0066)
+#define NT_STATUS_MEMBER_IN_GROUP (0xC0000000 | 0x0067)
+#define NT_STATUS_MEMBER_NOT_IN_GROUP (0xC0000000 | 0x0068)
+#define NT_STATUS_LAST_ADMIN (0xC0000000 | 0x0069)
+#define NT_STATUS_WRONG_PASSWORD (0xC0000000 | 0x006a)
+#define NT_STATUS_ILL_FORMED_PASSWORD (0xC0000000 | 0x006b)
+#define NT_STATUS_PASSWORD_RESTRICTION (0xC0000000 | 0x006c)
+#define NT_STATUS_LOGON_FAILURE (0xC0000000 | 0x006d)
+#define NT_STATUS_ACCOUNT_RESTRICTION (0xC0000000 | 0x006e)
+#define NT_STATUS_INVALID_LOGON_HOURS (0xC0000000 | 0x006f)
+#define NT_STATUS_INVALID_WORKSTATION (0xC0000000 | 0x0070)
+#define NT_STATUS_PASSWORD_EXPIRED (0xC0000000 | 0x0071)
+#define NT_STATUS_ACCOUNT_DISABLED (0xC0000000 | 0x0072)
+#define NT_STATUS_NONE_MAPPED (0xC0000000 | 0x0073)
+#define NT_STATUS_TOO_MANY_LUIDS_REQUESTED (0xC0000000 | 0x0074)
+#define NT_STATUS_LUIDS_EXHAUSTED (0xC0000000 | 0x0075)
+#define NT_STATUS_INVALID_SUB_AUTHORITY (0xC0000000 | 0x0076)
+#define NT_STATUS_INVALID_ACL (0xC0000000 | 0x0077)
+#define NT_STATUS_INVALID_SID (0xC0000000 | 0x0078)
+#define NT_STATUS_INVALID_SECURITY_DESCR (0xC0000000 | 0x0079)
+#define NT_STATUS_PROCEDURE_NOT_FOUND (0xC0000000 | 0x007a)
+#define NT_STATUS_INVALID_IMAGE_FORMAT (0xC0000000 | 0x007b)
+#define NT_STATUS_NO_TOKEN (0xC0000000 | 0x007c)
+#define NT_STATUS_BAD_INHERITANCE_ACL (0xC0000000 | 0x007d)
+#define NT_STATUS_RANGE_NOT_LOCKED (0xC0000000 | 0x007e)
+#define NT_STATUS_DISK_FULL (0xC0000000 | 0x007f)
+#define NT_STATUS_SERVER_DISABLED (0xC0000000 | 0x0080)
+#define NT_STATUS_SERVER_NOT_DISABLED (0xC0000000 | 0x0081)
+#define NT_STATUS_TOO_MANY_GUIDS_REQUESTED (0xC0000000 | 0x0082)
+#define NT_STATUS_GUIDS_EXHAUSTED (0xC0000000 | 0x0083)
+#define NT_STATUS_INVALID_ID_AUTHORITY (0xC0000000 | 0x0084)
+#define NT_STATUS_AGENTS_EXHAUSTED (0xC0000000 | 0x0085)
+#define NT_STATUS_INVALID_VOLUME_LABEL (0xC0000000 | 0x0086)
+#define NT_STATUS_SECTION_NOT_EXTENDED (0xC0000000 | 0x0087)
+#define NT_STATUS_NOT_MAPPED_DATA (0xC0000000 | 0x0088)
+#define NT_STATUS_RESOURCE_DATA_NOT_FOUND (0xC0000000 | 0x0089)
+#define NT_STATUS_RESOURCE_TYPE_NOT_FOUND (0xC0000000 | 0x008a)
+#define NT_STATUS_RESOURCE_NAME_NOT_FOUND (0xC0000000 | 0x008b)
+#define NT_STATUS_ARRAY_BOUNDS_EXCEEDED (0xC0000000 | 0x008c)
+#define NT_STATUS_FLOAT_DENORMAL_OPERAND (0xC0000000 | 0x008d)
+#define NT_STATUS_FLOAT_DIVIDE_BY_ZERO (0xC0000000 | 0x008e)
+#define NT_STATUS_FLOAT_INEXACT_RESULT (0xC0000000 | 0x008f)
+#define NT_STATUS_FLOAT_INVALID_OPERATION (0xC0000000 | 0x0090)
+#define NT_STATUS_FLOAT_OVERFLOW (0xC0000000 | 0x0091)
+#define NT_STATUS_FLOAT_STACK_CHECK (0xC0000000 | 0x0092)
+#define NT_STATUS_FLOAT_UNDERFLOW (0xC0000000 | 0x0093)
+#define NT_STATUS_INTEGER_DIVIDE_BY_ZERO (0xC0000000 | 0x0094)
+#define NT_STATUS_INTEGER_OVERFLOW (0xC0000000 | 0x0095)
+#define NT_STATUS_PRIVILEGED_INSTRUCTION (0xC0000000 | 0x0096)
+#define NT_STATUS_TOO_MANY_PAGING_FILES (0xC0000000 | 0x0097)
+#define NT_STATUS_FILE_INVALID (0xC0000000 | 0x0098)
+#define NT_STATUS_ALLOTTED_SPACE_EXCEEDED (0xC0000000 | 0x0099)
+#define NT_STATUS_INSUFFICIENT_RESOURCES (0xC0000000 | 0x009a)
+#define NT_STATUS_DFS_EXIT_PATH_FOUND (0xC0000000 | 0x009b)
+#define NT_STATUS_DEVICE_DATA_ERROR (0xC0000000 | 0x009c)
+#define NT_STATUS_DEVICE_NOT_CONNECTED (0xC0000000 | 0x009d)
+#define NT_STATUS_DEVICE_POWER_FAILURE (0xC0000000 | 0x009e)
+#define NT_STATUS_FREE_VM_NOT_AT_BASE (0xC0000000 | 0x009f)
+#define NT_STATUS_MEMORY_NOT_ALLOCATED (0xC0000000 | 0x00a0)
+#define NT_STATUS_WORKING_SET_QUOTA (0xC0000000 | 0x00a1)
+#define NT_STATUS_MEDIA_WRITE_PROTECTED (0xC0000000 | 0x00a2)
+#define NT_STATUS_DEVICE_NOT_READY (0xC0000000 | 0x00a3)
+#define NT_STATUS_INVALID_GROUP_ATTRIBUTES (0xC0000000 | 0x00a4)
+#define NT_STATUS_BAD_IMPERSONATION_LEVEL (0xC0000000 | 0x00a5)
+#define NT_STATUS_CANT_OPEN_ANONYMOUS (0xC0000000 | 0x00a6)
+#define NT_STATUS_BAD_VALIDATION_CLASS (0xC0000000 | 0x00a7)
+#define NT_STATUS_BAD_TOKEN_TYPE (0xC0000000 | 0x00a8)
+#define NT_STATUS_BAD_MASTER_BOOT_RECORD (0xC0000000 | 0x00a9)
+#define NT_STATUS_INSTRUCTION_MISALIGNMENT (0xC0000000 | 0x00aa)
+#define NT_STATUS_INSTANCE_NOT_AVAILABLE (0xC0000000 | 0x00ab)
+#define NT_STATUS_PIPE_NOT_AVAILABLE (0xC0000000 | 0x00ac)
+#define NT_STATUS_INVALID_PIPE_STATE (0xC0000000 | 0x00ad)
+#define NT_STATUS_PIPE_BUSY (0xC0000000 | 0x00ae)
+#define NT_STATUS_ILLEGAL_FUNCTION (0xC0000000 | 0x00af)
+#define NT_STATUS_PIPE_DISCONNECTED (0xC0000000 | 0x00b0)
+#define NT_STATUS_PIPE_CLOSING (0xC0000000 | 0x00b1)
+#define NT_STATUS_PIPE_CONNECTED (0xC0000000 | 0x00b2)
+#define NT_STATUS_PIPE_LISTENING (0xC0000000 | 0x00b3)
+#define NT_STATUS_INVALID_READ_MODE (0xC0000000 | 0x00b4)
+#define NT_STATUS_IO_TIMEOUT (0xC0000000 | 0x00b5)
+#define NT_STATUS_FILE_FORCED_CLOSED (0xC0000000 | 0x00b6)
+#define NT_STATUS_PROFILING_NOT_STARTED (0xC0000000 | 0x00b7)
+#define NT_STATUS_PROFILING_NOT_STOPPED (0xC0000000 | 0x00b8)
+#define NT_STATUS_COULD_NOT_INTERPRET (0xC0000000 | 0x00b9)
+#define NT_STATUS_FILE_IS_A_DIRECTORY (0xC0000000 | 0x00ba)
+#define NT_STATUS_NOT_SUPPORTED (0xC0000000 | 0x00bb)
+#define NT_STATUS_REMOTE_NOT_LISTENING (0xC0000000 | 0x00bc)
+#define NT_STATUS_DUPLICATE_NAME (0xC0000000 | 0x00bd)
+#define NT_STATUS_BAD_NETWORK_PATH (0xC0000000 | 0x00be)
+#define NT_STATUS_NETWORK_BUSY (0xC0000000 | 0x00bf)
+#define NT_STATUS_DEVICE_DOES_NOT_EXIST (0xC0000000 | 0x00c0)
+#define NT_STATUS_TOO_MANY_COMMANDS (0xC0000000 | 0x00c1)
+#define NT_STATUS_ADAPTER_HARDWARE_ERROR (0xC0000000 | 0x00c2)
+#define NT_STATUS_INVALID_NETWORK_RESPONSE (0xC0000000 | 0x00c3)
+#define NT_STATUS_UNEXPECTED_NETWORK_ERROR (0xC0000000 | 0x00c4)
+#define NT_STATUS_BAD_REMOTE_ADAPTER (0xC0000000 | 0x00c5)
+#define NT_STATUS_PRINT_QUEUE_FULL (0xC0000000 | 0x00c6)
+#define NT_STATUS_NO_SPOOL_SPACE (0xC0000000 | 0x00c7)
+#define NT_STATUS_PRINT_CANCELLED (0xC0000000 | 0x00c8)
+#define NT_STATUS_NETWORK_NAME_DELETED (0xC0000000 | 0x00c9)
+#define NT_STATUS_NETWORK_ACCESS_DENIED (0xC0000000 | 0x00ca)
+#define NT_STATUS_BAD_DEVICE_TYPE (0xC0000000 | 0x00cb)
+#define NT_STATUS_BAD_NETWORK_NAME (0xC0000000 | 0x00cc)
+#define NT_STATUS_TOO_MANY_NAMES (0xC0000000 | 0x00cd)
+#define NT_STATUS_TOO_MANY_SESSIONS (0xC0000000 | 0x00ce)
+#define NT_STATUS_SHARING_PAUSED (0xC0000000 | 0x00cf)
+#define NT_STATUS_REQUEST_NOT_ACCEPTED (0xC0000000 | 0x00d0)
+#define NT_STATUS_REDIRECTOR_PAUSED (0xC0000000 | 0x00d1)
+#define NT_STATUS_NET_WRITE_FAULT (0xC0000000 | 0x00d2)
+#define NT_STATUS_PROFILING_AT_LIMIT (0xC0000000 | 0x00d3)
+#define NT_STATUS_NOT_SAME_DEVICE (0xC0000000 | 0x00d4)
+#define NT_STATUS_FILE_RENAMED (0xC0000000 | 0x00d5)
+#define NT_STATUS_VIRTUAL_CIRCUIT_CLOSED (0xC0000000 | 0x00d6)
+#define NT_STATUS_NO_SECURITY_ON_OBJECT (0xC0000000 | 0x00d7)
+#define NT_STATUS_CANT_WAIT (0xC0000000 | 0x00d8)
+#define NT_STATUS_PIPE_EMPTY (0xC0000000 | 0x00d9)
+#define NT_STATUS_CANT_ACCESS_DOMAIN_INFO (0xC0000000 | 0x00da)
+#define NT_STATUS_CANT_TERMINATE_SELF (0xC0000000 | 0x00db)
+#define NT_STATUS_INVALID_SERVER_STATE (0xC0000000 | 0x00dc)
+#define NT_STATUS_INVALID_DOMAIN_STATE (0xC0000000 | 0x00dd)
+#define NT_STATUS_INVALID_DOMAIN_ROLE (0xC0000000 | 0x00de)
+#define NT_STATUS_NO_SUCH_DOMAIN (0xC0000000 | 0x00df)
+#define NT_STATUS_DOMAIN_EXISTS (0xC0000000 | 0x00e0)
+#define NT_STATUS_DOMAIN_LIMIT_EXCEEDED (0xC0000000 | 0x00e1)
+#define NT_STATUS_OPLOCK_NOT_GRANTED (0xC0000000 | 0x00e2)
+#define NT_STATUS_INVALID_OPLOCK_PROTOCOL (0xC0000000 | 0x00e3)
+#define NT_STATUS_INTERNAL_DB_CORRUPTION (0xC0000000 | 0x00e4)
+#define NT_STATUS_INTERNAL_ERROR (0xC0000000 | 0x00e5)
+#define NT_STATUS_GENERIC_NOT_MAPPED (0xC0000000 | 0x00e6)
+#define NT_STATUS_BAD_DESCRIPTOR_FORMAT (0xC0000000 | 0x00e7)
+#define NT_STATUS_INVALID_USER_BUFFER (0xC0000000 | 0x00e8)
+#define NT_STATUS_UNEXPECTED_IO_ERROR (0xC0000000 | 0x00e9)
+#define NT_STATUS_UNEXPECTED_MM_CREATE_ERR (0xC0000000 | 0x00ea)
+#define NT_STATUS_UNEXPECTED_MM_MAP_ERROR (0xC0000000 | 0x00eb)
+#define NT_STATUS_UNEXPECTED_MM_EXTEND_ERR (0xC0000000 | 0x00ec)
+#define NT_STATUS_NOT_LOGON_PROCESS (0xC0000000 | 0x00ed)
+#define NT_STATUS_LOGON_SESSION_EXISTS (0xC0000000 | 0x00ee)
+#define NT_STATUS_INVALID_PARAMETER_1 (0xC0000000 | 0x00ef)
+#define NT_STATUS_INVALID_PARAMETER_2 (0xC0000000 | 0x00f0)
+#define NT_STATUS_INVALID_PARAMETER_3 (0xC0000000 | 0x00f1)
+#define NT_STATUS_INVALID_PARAMETER_4 (0xC0000000 | 0x00f2)
+#define NT_STATUS_INVALID_PARAMETER_5 (0xC0000000 | 0x00f3)
+#define NT_STATUS_INVALID_PARAMETER_6 (0xC0000000 | 0x00f4)
+#define NT_STATUS_INVALID_PARAMETER_7 (0xC0000000 | 0x00f5)
+#define NT_STATUS_INVALID_PARAMETER_8 (0xC0000000 | 0x00f6)
+#define NT_STATUS_INVALID_PARAMETER_9 (0xC0000000 | 0x00f7)
+#define NT_STATUS_INVALID_PARAMETER_10 (0xC0000000 | 0x00f8)
+#define NT_STATUS_INVALID_PARAMETER_11 (0xC0000000 | 0x00f9)
+#define NT_STATUS_INVALID_PARAMETER_12 (0xC0000000 | 0x00fa)
+#define NT_STATUS_REDIRECTOR_NOT_STARTED (0xC0000000 | 0x00fb)
+#define NT_STATUS_REDIRECTOR_STARTED (0xC0000000 | 0x00fc)
+#define NT_STATUS_STACK_OVERFLOW (0xC0000000 | 0x00fd)
+#define NT_STATUS_NO_SUCH_PACKAGE (0xC0000000 | 0x00fe)
+#define NT_STATUS_BAD_FUNCTION_TABLE (0xC0000000 | 0x00ff)
+#define NT_STATUS_DIRECTORY_NOT_EMPTY (0xC0000000 | 0x0101)
+#define NT_STATUS_FILE_CORRUPT_ERROR (0xC0000000 | 0x0102)
+#define NT_STATUS_NOT_A_DIRECTORY (0xC0000000 | 0x0103)
+#define NT_STATUS_BAD_LOGON_SESSION_STATE (0xC0000000 | 0x0104)
+#define NT_STATUS_LOGON_SESSION_COLLISION (0xC0000000 | 0x0105)
+#define NT_STATUS_NAME_TOO_LONG (0xC0000000 | 0x0106)
+#define NT_STATUS_FILES_OPEN (0xC0000000 | 0x0107)
+#define NT_STATUS_CONNECTION_IN_USE (0xC0000000 | 0x0108)
+#define NT_STATUS_MESSAGE_NOT_FOUND (0xC0000000 | 0x0109)
+#define NT_STATUS_PROCESS_IS_TERMINATING (0xC0000000 | 0x010a)
+#define NT_STATUS_INVALID_LOGON_TYPE (0xC0000000 | 0x010b)
+#define NT_STATUS_NO_GUID_TRANSLATION (0xC0000000 | 0x010c)
+#define NT_STATUS_CANNOT_IMPERSONATE (0xC0000000 | 0x010d)
+#define NT_STATUS_IMAGE_ALREADY_LOADED (0xC0000000 | 0x010e)
+#define NT_STATUS_ABIOS_NOT_PRESENT (0xC0000000 | 0x010f)
+#define NT_STATUS_ABIOS_LID_NOT_EXIST (0xC0000000 | 0x0110)
+#define NT_STATUS_ABIOS_LID_ALREADY_OWNED (0xC0000000 | 0x0111)
+#define NT_STATUS_ABIOS_NOT_LID_OWNER (0xC0000000 | 0x0112)
+#define NT_STATUS_ABIOS_INVALID_COMMAND (0xC0000000 | 0x0113)
+#define NT_STATUS_ABIOS_INVALID_LID (0xC0000000 | 0x0114)
+#define NT_STATUS_ABIOS_SELECTOR_NOT_AVAILABLE (0xC0000000 | 0x0115)
+#define NT_STATUS_ABIOS_INVALID_SELECTOR (0xC0000000 | 0x0116)
+#define NT_STATUS_NO_LDT (0xC0000000 | 0x0117)
+#define NT_STATUS_INVALID_LDT_SIZE (0xC0000000 | 0x0118)
+#define NT_STATUS_INVALID_LDT_OFFSET (0xC0000000 | 0x0119)
+#define NT_STATUS_INVALID_LDT_DESCRIPTOR (0xC0000000 | 0x011a)
+#define NT_STATUS_INVALID_IMAGE_NE_FORMAT (0xC0000000 | 0x011b)
+#define NT_STATUS_RXACT_INVALID_STATE (0xC0000000 | 0x011c)
+#define NT_STATUS_RXACT_COMMIT_FAILURE (0xC0000000 | 0x011d)
+#define NT_STATUS_MAPPED_FILE_SIZE_ZERO (0xC0000000 | 0x011e)
+#define NT_STATUS_TOO_MANY_OPENED_FILES (0xC0000000 | 0x011f)
+#define NT_STATUS_CANCELLED (0xC0000000 | 0x0120)
+#define NT_STATUS_CANNOT_DELETE (0xC0000000 | 0x0121)
+#define NT_STATUS_INVALID_COMPUTER_NAME (0xC0000000 | 0x0122)
+#define NT_STATUS_FILE_DELETED (0xC0000000 | 0x0123)
+#define NT_STATUS_SPECIAL_ACCOUNT (0xC0000000 | 0x0124)
+#define NT_STATUS_SPECIAL_GROUP (0xC0000000 | 0x0125)
+#define NT_STATUS_SPECIAL_USER (0xC0000000 | 0x0126)
+#define NT_STATUS_MEMBERS_PRIMARY_GROUP (0xC0000000 | 0x0127)
+#define NT_STATUS_FILE_CLOSED (0xC0000000 | 0x0128)
+#define NT_STATUS_TOO_MANY_THREADS (0xC0000000 | 0x0129)
+#define NT_STATUS_THREAD_NOT_IN_PROCESS (0xC0000000 | 0x012a)
+#define NT_STATUS_TOKEN_ALREADY_IN_USE (0xC0000000 | 0x012b)
+#define NT_STATUS_PAGEFILE_QUOTA_EXCEEDED (0xC0000000 | 0x012c)
+#define NT_STATUS_COMMITMENT_LIMIT (0xC0000000 | 0x012d)
+#define NT_STATUS_INVALID_IMAGE_LE_FORMAT (0xC0000000 | 0x012e)
+#define NT_STATUS_INVALID_IMAGE_NOT_MZ (0xC0000000 | 0x012f)
+#define NT_STATUS_INVALID_IMAGE_PROTECT (0xC0000000 | 0x0130)
+#define NT_STATUS_INVALID_IMAGE_WIN_16 (0xC0000000 | 0x0131)
+#define NT_STATUS_LOGON_SERVER_CONFLICT (0xC0000000 | 0x0132)
+#define NT_STATUS_TIME_DIFFERENCE_AT_DC (0xC0000000 | 0x0133)
+#define NT_STATUS_SYNCHRONIZATION_REQUIRED (0xC0000000 | 0x0134)
+#define NT_STATUS_DLL_NOT_FOUND (0xC0000000 | 0x0135)
+#define NT_STATUS_OPEN_FAILED (0xC0000000 | 0x0136)
+#define NT_STATUS_IO_PRIVILEGE_FAILED (0xC0000000 | 0x0137)
+#define NT_STATUS_ORDINAL_NOT_FOUND (0xC0000000 | 0x0138)
+#define NT_STATUS_ENTRYPOINT_NOT_FOUND (0xC0000000 | 0x0139)
+#define NT_STATUS_CONTROL_C_EXIT (0xC0000000 | 0x013a)
+#define NT_STATUS_LOCAL_DISCONNECT (0xC0000000 | 0x013b)
+#define NT_STATUS_REMOTE_DISCONNECT (0xC0000000 | 0x013c)
+#define NT_STATUS_REMOTE_RESOURCES (0xC0000000 | 0x013d)
+#define NT_STATUS_LINK_FAILED (0xC0000000 | 0x013e)
+#define NT_STATUS_LINK_TIMEOUT (0xC0000000 | 0x013f)
+#define NT_STATUS_INVALID_CONNECTION (0xC0000000 | 0x0140)
+#define NT_STATUS_INVALID_ADDRESS (0xC0000000 | 0x0141)
+#define NT_STATUS_DLL_INIT_FAILED (0xC0000000 | 0x0142)
+#define NT_STATUS_MISSING_SYSTEMFILE (0xC0000000 | 0x0143)
+#define NT_STATUS_UNHANDLED_EXCEPTION (0xC0000000 | 0x0144)
+#define NT_STATUS_APP_INIT_FAILURE (0xC0000000 | 0x0145)
+#define NT_STATUS_PAGEFILE_CREATE_FAILED (0xC0000000 | 0x0146)
+#define NT_STATUS_NO_PAGEFILE (0xC0000000 | 0x0147)
+#define NT_STATUS_INVALID_LEVEL (0xC0000000 | 0x0148)
+#define NT_STATUS_WRONG_PASSWORD_CORE (0xC0000000 | 0x0149)
+#define NT_STATUS_ILLEGAL_FLOAT_CONTEXT (0xC0000000 | 0x014a)
+#define NT_STATUS_PIPE_BROKEN (0xC0000000 | 0x014b)
+#define NT_STATUS_REGISTRY_CORRUPT (0xC0000000 | 0x014c)
+#define NT_STATUS_REGISTRY_IO_FAILED (0xC0000000 | 0x014d)
+#define NT_STATUS_NO_EVENT_PAIR (0xC0000000 | 0x014e)
+#define NT_STATUS_UNRECOGNIZED_VOLUME (0xC0000000 | 0x014f)
+#define NT_STATUS_SERIAL_NO_DEVICE_INITED (0xC0000000 | 0x0150)
+#define NT_STATUS_NO_SUCH_ALIAS (0xC0000000 | 0x0151)
+#define NT_STATUS_MEMBER_NOT_IN_ALIAS (0xC0000000 | 0x0152)
+#define NT_STATUS_MEMBER_IN_ALIAS (0xC0000000 | 0x0153)
+#define NT_STATUS_ALIAS_EXISTS (0xC0000000 | 0x0154)
+#define NT_STATUS_LOGON_NOT_GRANTED (0xC0000000 | 0x0155)
+#define NT_STATUS_TOO_MANY_SECRETS (0xC0000000 | 0x0156)
+#define NT_STATUS_SECRET_TOO_LONG (0xC0000000 | 0x0157)
+#define NT_STATUS_INTERNAL_DB_ERROR (0xC0000000 | 0x0158)
+#define NT_STATUS_FULLSCREEN_MODE (0xC0000000 | 0x0159)
+#define NT_STATUS_TOO_MANY_CONTEXT_IDS (0xC0000000 | 0x015a)
+#define NT_STATUS_LOGON_TYPE_NOT_GRANTED (0xC0000000 | 0x015b)
+#define NT_STATUS_NOT_REGISTRY_FILE (0xC0000000 | 0x015c)
+#define NT_STATUS_NT_CROSS_ENCRYPTION_REQUIRED (0xC0000000 | 0x015d)
+#define NT_STATUS_DOMAIN_CTRLR_CONFIG_ERROR (0xC0000000 | 0x015e)
+#define NT_STATUS_FT_MISSING_MEMBER (0xC0000000 | 0x015f)
+#define NT_STATUS_ILL_FORMED_SERVICE_ENTRY (0xC0000000 | 0x0160)
+#define NT_STATUS_ILLEGAL_CHARACTER (0xC0000000 | 0x0161)
+#define NT_STATUS_UNMAPPABLE_CHARACTER (0xC0000000 | 0x0162)
+#define NT_STATUS_UNDEFINED_CHARACTER (0xC0000000 | 0x0163)
+#define NT_STATUS_FLOPPY_VOLUME (0xC0000000 | 0x0164)
+#define NT_STATUS_FLOPPY_ID_MARK_NOT_FOUND (0xC0000000 | 0x0165)
+#define NT_STATUS_FLOPPY_WRONG_CYLINDER (0xC0000000 | 0x0166)
+#define NT_STATUS_FLOPPY_UNKNOWN_ERROR (0xC0000000 | 0x0167)
+#define NT_STATUS_FLOPPY_BAD_REGISTERS (0xC0000000 | 0x0168)
+#define NT_STATUS_DISK_RECALIBRATE_FAILED (0xC0000000 | 0x0169)
+#define NT_STATUS_DISK_OPERATION_FAILED (0xC0000000 | 0x016a)
+#define NT_STATUS_DISK_RESET_FAILED (0xC0000000 | 0x016b)
+#define NT_STATUS_SHARED_IRQ_BUSY (0xC0000000 | 0x016c)
+#define NT_STATUS_FT_ORPHANING (0xC0000000 | 0x016d)
+#define NT_STATUS_PARTITION_FAILURE (0xC0000000 | 0x0172)
+#define NT_STATUS_INVALID_BLOCK_LENGTH (0xC0000000 | 0x0173)
+#define NT_STATUS_DEVICE_NOT_PARTITIONED (0xC0000000 | 0x0174)
+#define NT_STATUS_UNABLE_TO_LOCK_MEDIA (0xC0000000 | 0x0175)
+#define NT_STATUS_UNABLE_TO_UNLOAD_MEDIA (0xC0000000 | 0x0176)
+#define NT_STATUS_EOM_OVERFLOW (0xC0000000 | 0x0177)
+#define NT_STATUS_NO_MEDIA (0xC0000000 | 0x0178)
+#define NT_STATUS_NO_SUCH_MEMBER (0xC0000000 | 0x017a)
+#define NT_STATUS_INVALID_MEMBER (0xC0000000 | 0x017b)
+#define NT_STATUS_KEY_DELETED (0xC0000000 | 0x017c)
+#define NT_STATUS_NO_LOG_SPACE (0xC0000000 | 0x017d)
+#define NT_STATUS_TOO_MANY_SIDS (0xC0000000 | 0x017e)
+#define NT_STATUS_LM_CROSS_ENCRYPTION_REQUIRED (0xC0000000 | 0x017f)
+#define NT_STATUS_KEY_HAS_CHILDREN (0xC0000000 | 0x0180)
+#define NT_STATUS_CHILD_MUST_BE_VOLATILE (0xC0000000 | 0x0181)
+#define NT_STATUS_DEVICE_CONFIGURATION_ERROR (0xC0000000 | 0x0182)
+#define NT_STATUS_DRIVER_INTERNAL_ERROR (0xC0000000 | 0x0183)
+#define NT_STATUS_INVALID_DEVICE_STATE (0xC0000000 | 0x0184)
+#define NT_STATUS_IO_DEVICE_ERROR (0xC0000000 | 0x0185)
+#define NT_STATUS_DEVICE_PROTOCOL_ERROR (0xC0000000 | 0x0186)
+#define NT_STATUS_BACKUP_CONTROLLER (0xC0000000 | 0x0187)
+#define NT_STATUS_LOG_FILE_FULL (0xC0000000 | 0x0188)
+#define NT_STATUS_TOO_LATE (0xC0000000 | 0x0189)
+#define NT_STATUS_NO_TRUST_LSA_SECRET (0xC0000000 | 0x018a)
+#define NT_STATUS_NO_TRUST_SAM_ACCOUNT (0xC0000000 | 0x018b)
+#define NT_STATUS_TRUSTED_DOMAIN_FAILURE (0xC0000000 | 0x018c)
+#define NT_STATUS_TRUSTED_RELATIONSHIP_FAILURE (0xC0000000 | 0x018d)
+#define NT_STATUS_EVENTLOG_FILE_CORRUPT (0xC0000000 | 0x018e)
+#define NT_STATUS_EVENTLOG_CANT_START (0xC0000000 | 0x018f)
+#define NT_STATUS_TRUST_FAILURE (0xC0000000 | 0x0190)
+#define NT_STATUS_MUTANT_LIMIT_EXCEEDED (0xC0000000 | 0x0191)
+#define NT_STATUS_NETLOGON_NOT_STARTED (0xC0000000 | 0x0192)
+#define NT_STATUS_ACCOUNT_EXPIRED (0xC0000000 | 0x0193)
+#define NT_STATUS_POSSIBLE_DEADLOCK (0xC0000000 | 0x0194)
+#define NT_STATUS_NETWORK_CREDENTIAL_CONFLICT (0xC0000000 | 0x0195)
+#define NT_STATUS_REMOTE_SESSION_LIMIT (0xC0000000 | 0x0196)
+#define NT_STATUS_EVENTLOG_FILE_CHANGED (0xC0000000 | 0x0197)
+#define NT_STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT (0xC0000000 | 0x0198)
+#define NT_STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT (0xC0000000 | 0x0199)
+#define NT_STATUS_NOLOGON_SERVER_TRUST_ACCOUNT (0xC0000000 | 0x019a)
+#define NT_STATUS_DOMAIN_TRUST_INCONSISTENT (0xC0000000 | 0x019b)
+#define NT_STATUS_FS_DRIVER_REQUIRED (0xC0000000 | 0x019c)
+#define NT_STATUS_NO_USER_SESSION_KEY (0xC0000000 | 0x0202)
+#define NT_STATUS_USER_SESSION_DELETED (0xC0000000 | 0x0203)
+#define NT_STATUS_RESOURCE_LANG_NOT_FOUND (0xC0000000 | 0x0204)
+#define NT_STATUS_INSUFF_SERVER_RESOURCES (0xC0000000 | 0x0205)
+#define NT_STATUS_INVALID_BUFFER_SIZE (0xC0000000 | 0x0206)
+#define NT_STATUS_INVALID_ADDRESS_COMPONENT (0xC0000000 | 0x0207)
+#define NT_STATUS_INVALID_ADDRESS_WILDCARD (0xC0000000 | 0x0208)
+#define NT_STATUS_TOO_MANY_ADDRESSES (0xC0000000 | 0x0209)
+#define NT_STATUS_ADDRESS_ALREADY_EXISTS (0xC0000000 | 0x020a)
+#define NT_STATUS_ADDRESS_CLOSED (0xC0000000 | 0x020b)
+#define NT_STATUS_CONNECTION_DISCONNECTED (0xC0000000 | 0x020c)
+#define NT_STATUS_CONNECTION_RESET (0xC0000000 | 0x020d)
+#define NT_STATUS_TOO_MANY_NODES (0xC0000000 | 0x020e)
+#define NT_STATUS_TRANSACTION_ABORTED (0xC0000000 | 0x020f)
+#define NT_STATUS_TRANSACTION_TIMED_OUT (0xC0000000 | 0x0210)
+#define NT_STATUS_TRANSACTION_NO_RELEASE (0xC0000000 | 0x0211)
+#define NT_STATUS_TRANSACTION_NO_MATCH (0xC0000000 | 0x0212)
+#define NT_STATUS_TRANSACTION_RESPONDED (0xC0000000 | 0x0213)
+#define NT_STATUS_TRANSACTION_INVALID_ID (0xC0000000 | 0x0214)
+#define NT_STATUS_TRANSACTION_INVALID_TYPE (0xC0000000 | 0x0215)
+#define NT_STATUS_NOT_SERVER_SESSION (0xC0000000 | 0x0216)
+#define NT_STATUS_NOT_CLIENT_SESSION (0xC0000000 | 0x0217)
+#define NT_STATUS_CANNOT_LOAD_REGISTRY_FILE (0xC0000000 | 0x0218)
+#define NT_STATUS_DEBUG_ATTACH_FAILED (0xC0000000 | 0x0219)
+#define NT_STATUS_SYSTEM_PROCESS_TERMINATED (0xC0000000 | 0x021a)
+#define NT_STATUS_DATA_NOT_ACCEPTED (0xC0000000 | 0x021b)
+#define NT_STATUS_NO_BROWSER_SERVERS_FOUND (0xC0000000 | 0x021c)
+#define NT_STATUS_VDM_HARD_ERROR (0xC0000000 | 0x021d)
+#define NT_STATUS_DRIVER_CANCEL_TIMEOUT (0xC0000000 | 0x021e)
+#define NT_STATUS_REPLY_MESSAGE_MISMATCH (0xC0000000 | 0x021f)
+#define NT_STATUS_MAPPED_ALIGNMENT (0xC0000000 | 0x0220)
+#define NT_STATUS_IMAGE_CHECKSUM_MISMATCH (0xC0000000 | 0x0221)
+#define NT_STATUS_LOST_WRITEBEHIND_DATA (0xC0000000 | 0x0222)
+#define NT_STATUS_CLIENT_SERVER_PARAMETERS_INVALID (0xC0000000 | 0x0223)
+#define NT_STATUS_PASSWORD_MUST_CHANGE (0xC0000000 | 0x0224)
+#define NT_STATUS_NOT_FOUND (0xC0000000 | 0x0225)
+#define NT_STATUS_NOT_TINY_STREAM (0xC0000000 | 0x0226)
+#define NT_STATUS_RECOVERY_FAILURE (0xC0000000 | 0x0227)
+#define NT_STATUS_STACK_OVERFLOW_READ (0xC0000000 | 0x0228)
+#define NT_STATUS_FAIL_CHECK (0xC0000000 | 0x0229)
+#define NT_STATUS_DUPLICATE_OBJECTID (0xC0000000 | 0x022a)
+#define NT_STATUS_OBJECTID_EXISTS (0xC0000000 | 0x022b)
+#define NT_STATUS_CONVERT_TO_LARGE (0xC0000000 | 0x022c)
+#define NT_STATUS_RETRY (0xC0000000 | 0x022d)
+#define NT_STATUS_FOUND_OUT_OF_SCOPE (0xC0000000 | 0x022e)
+#define NT_STATUS_ALLOCATE_BUCKET (0xC0000000 | 0x022f)
+#define NT_STATUS_PROPSET_NOT_FOUND (0xC0000000 | 0x0230)
+#define NT_STATUS_MARSHALL_OVERFLOW (0xC0000000 | 0x0231)
+#define NT_STATUS_INVALID_VARIANT (0xC0000000 | 0x0232)
+#define NT_STATUS_DOMAIN_CONTROLLER_NOT_FOUND (0xC0000000 | 0x0233)
+#define NT_STATUS_ACCOUNT_LOCKED_OUT (0xC0000000 | 0x0234)
+#define NT_STATUS_HANDLE_NOT_CLOSABLE (0xC0000000 | 0x0235)
+#define NT_STATUS_CONNECTION_REFUSED (0xC0000000 | 0x0236)
+#define NT_STATUS_GRACEFUL_DISCONNECT (0xC0000000 | 0x0237)
+#define NT_STATUS_ADDRESS_ALREADY_ASSOCIATED (0xC0000000 | 0x0238)
+#define NT_STATUS_ADDRESS_NOT_ASSOCIATED (0xC0000000 | 0x0239)
+#define NT_STATUS_CONNECTION_INVALID (0xC0000000 | 0x023a)
+#define NT_STATUS_CONNECTION_ACTIVE (0xC0000000 | 0x023b)
+#define NT_STATUS_NETWORK_UNREACHABLE (0xC0000000 | 0x023c)
+#define NT_STATUS_HOST_UNREACHABLE (0xC0000000 | 0x023d)
+#define NT_STATUS_PROTOCOL_UNREACHABLE (0xC0000000 | 0x023e)
+#define NT_STATUS_PORT_UNREACHABLE (0xC0000000 | 0x023f)
+#define NT_STATUS_REQUEST_ABORTED (0xC0000000 | 0x0240)
+#define NT_STATUS_CONNECTION_ABORTED (0xC0000000 | 0x0241)
+#define NT_STATUS_BAD_COMPRESSION_BUFFER (0xC0000000 | 0x0242)
+#define NT_STATUS_USER_MAPPED_FILE (0xC0000000 | 0x0243)
+#define NT_STATUS_AUDIT_FAILED (0xC0000000 | 0x0244)
+#define NT_STATUS_TIMER_RESOLUTION_NOT_SET (0xC0000000 | 0x0245)
+#define NT_STATUS_CONNECTION_COUNT_LIMIT (0xC0000000 | 0x0246)
+#define NT_STATUS_LOGIN_TIME_RESTRICTION (0xC0000000 | 0x0247)
+#define NT_STATUS_LOGIN_WKSTA_RESTRICTION (0xC0000000 | 0x0248)
+#define NT_STATUS_IMAGE_MP_UP_MISMATCH (0xC0000000 | 0x0249)
+#define NT_STATUS_INSUFFICIENT_LOGON_INFO (0xC0000000 | 0x0250)
+#define NT_STATUS_BAD_DLL_ENTRYPOINT (0xC0000000 | 0x0251)
+#define NT_STATUS_BAD_SERVICE_ENTRYPOINT (0xC0000000 | 0x0252)
+#define NT_STATUS_LPC_REPLY_LOST (0xC0000000 | 0x0253)
+#define NT_STATUS_IP_ADDRESS_CONFLICT1 (0xC0000000 | 0x0254)
+#define NT_STATUS_IP_ADDRESS_CONFLICT2 (0xC0000000 | 0x0255)
+#define NT_STATUS_REGISTRY_QUOTA_LIMIT (0xC0000000 | 0x0256)
+#define NT_STATUS_PATH_NOT_COVERED (0xC0000000 | 0x0257)
+#define NT_STATUS_NO_CALLBACK_ACTIVE (0xC0000000 | 0x0258)
+#define NT_STATUS_LICENSE_QUOTA_EXCEEDED (0xC0000000 | 0x0259)
+#define NT_STATUS_PWD_TOO_SHORT (0xC0000000 | 0x025a)
+#define NT_STATUS_PWD_TOO_RECENT (0xC0000000 | 0x025b)
+#define NT_STATUS_PWD_HISTORY_CONFLICT (0xC0000000 | 0x025c)
+#define NT_STATUS_PLUGPLAY_NO_DEVICE (0xC0000000 | 0x025e)
+#define NT_STATUS_UNSUPPORTED_COMPRESSION (0xC0000000 | 0x025f)
+#define NT_STATUS_INVALID_HW_PROFILE (0xC0000000 | 0x0260)
+#define NT_STATUS_INVALID_PLUGPLAY_DEVICE_PATH (0xC0000000 | 0x0261)
+#define NT_STATUS_DRIVER_ORDINAL_NOT_FOUND (0xC0000000 | 0x0262)
+#define NT_STATUS_DRIVER_ENTRYPOINT_NOT_FOUND (0xC0000000 | 0x0263)
+#define NT_STATUS_RESOURCE_NOT_OWNED (0xC0000000 | 0x0264)
+#define NT_STATUS_TOO_MANY_LINKS (0xC0000000 | 0x0265)
+#define NT_STATUS_QUOTA_LIST_INCONSISTENT (0xC0000000 | 0x0266)
+#define NT_STATUS_FILE_IS_OFFLINE (0xC0000000 | 0x0267)
+#define NT_STATUS_NETWORK_SESSION_EXPIRED (0xC0000000 | 0x035c)
+#define NT_STATUS_NO_SUCH_JOB (0xC0000000 | 0xEDE) /* scheduler */
+#define NT_STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP (0xC0000000 | 0x5D0000)
+#define NT_STATUS_PENDING 0x00000103
+#endif /* _NTERR_H */
diff --git a/fs/ksmbd/ntlmssp.h b/fs/ksmbd/ntlmssp.h
new file mode 100644
index 000000000000..adaf4c0cbe8f
--- /dev/null
+++ b/fs/ksmbd/ntlmssp.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Copyright (c) International Business Machines Corp., 2002,2007
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ */
+
+#ifndef __KSMBD_NTLMSSP_H
+#define __KSMBD_NTLMSSP_H
+
+#define NTLMSSP_SIGNATURE "NTLMSSP"
+
+/* Security blob target info data */
+#define TGT_Name "KSMBD"
+
+/*
+ * Size of the crypto key returned on the negotiate SMB in bytes
+ */
+#define CIFS_CRYPTO_KEY_SIZE (8)
+#define CIFS_KEY_SIZE (40)
+
+/*
+ * Size of encrypted user password in bytes
+ */
+#define CIFS_ENCPWD_SIZE (16)
+#define CIFS_CPHTXT_SIZE (16)
+
+/* Message Types */
+#define NtLmNegotiate cpu_to_le32(1)
+#define NtLmChallenge cpu_to_le32(2)
+#define NtLmAuthenticate cpu_to_le32(3)
+#define UnknownMessage cpu_to_le32(8)
+
+/* Negotiate Flags */
+#define NTLMSSP_NEGOTIATE_UNICODE 0x01 /* Text strings are unicode */
+#define NTLMSSP_NEGOTIATE_OEM 0x02 /* Text strings are in OEM */
+#define NTLMSSP_REQUEST_TARGET 0x04 /* Srv returns its auth realm */
+/* define reserved9 0x08 */
+#define NTLMSSP_NEGOTIATE_SIGN 0x0010 /* Request signing capability */
+#define NTLMSSP_NEGOTIATE_SEAL 0x0020 /* Request confidentiality */
+#define NTLMSSP_NEGOTIATE_DGRAM 0x0040
+#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 /* Use LM session key */
+/* defined reserved 8 0x0100 */
+#define NTLMSSP_NEGOTIATE_NTLM 0x0200 /* NTLM authentication */
+#define NTLMSSP_NEGOTIATE_NT_ONLY 0x0400 /* Lanman not allowed */
+#define NTLMSSP_ANONYMOUS 0x0800
+#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 /* reserved6 */
+#define NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED 0x2000
+#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 /* client/server same machine */
+#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign. All security levels */
+#define NTLMSSP_TARGET_TYPE_DOMAIN 0x10000
+#define NTLMSSP_TARGET_TYPE_SERVER 0x20000
+#define NTLMSSP_TARGET_TYPE_SHARE 0x40000
+#define NTLMSSP_NEGOTIATE_EXTENDED_SEC 0x80000 /* NB:not related to NTLMv2 pwd*/
+/* #define NTLMSSP_REQUEST_INIT_RESP 0x100000 */
+#define NTLMSSP_NEGOTIATE_IDENTIFY 0x100000
+#define NTLMSSP_REQUEST_ACCEPT_RESP 0x200000 /* reserved5 */
+#define NTLMSSP_REQUEST_NON_NT_KEY 0x400000
+#define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000
+/* #define reserved4 0x1000000 */
+#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */
+/* #define reserved3 0x4000000 */
+/* #define reserved2 0x8000000 */
+/* #define reserved1 0x10000000 */
+#define NTLMSSP_NEGOTIATE_128 0x20000000
+#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000
+#define NTLMSSP_NEGOTIATE_56 0x80000000
+
+/* Define AV Pair Field IDs */
+enum av_field_type {
+ NTLMSSP_AV_EOL = 0,
+ NTLMSSP_AV_NB_COMPUTER_NAME,
+ NTLMSSP_AV_NB_DOMAIN_NAME,
+ NTLMSSP_AV_DNS_COMPUTER_NAME,
+ NTLMSSP_AV_DNS_DOMAIN_NAME,
+ NTLMSSP_AV_DNS_TREE_NAME,
+ NTLMSSP_AV_FLAGS,
+ NTLMSSP_AV_TIMESTAMP,
+ NTLMSSP_AV_RESTRICTION,
+ NTLMSSP_AV_TARGET_NAME,
+ NTLMSSP_AV_CHANNEL_BINDINGS
+};
+
+/* Although typedefs are not commonly used for structure definitions */
+/* in the Linux kernel, in this particular case they are useful */
+/* to more closely match the standards document for NTLMSSP from */
+/* OpenGroup and to make the code more closely match the standard in */
+/* appearance */
+
+struct security_buffer {
+ __le16 Length;
+ __le16 MaximumLength;
+ __le32 BufferOffset; /* offset to buffer */
+} __packed;
+
+struct target_info {
+ __le16 Type;
+ __le16 Length;
+ __u8 Content[0];
+} __packed;
+
+struct negotiate_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmNegotiate = 1 */
+ __le32 NegotiateFlags;
+ struct security_buffer DomainName; /* RFC 1001 style and ASCII */
+ struct security_buffer WorkstationName; /* RFC 1001 and ASCII */
+ /*
+ * struct security_buffer for version info not present since we
+ * do not set the version is present flag
+ */
+ char DomainString[0];
+ /* followed by WorkstationString */
+} __packed;
+
+struct challenge_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmChallenge = 2 */
+ struct security_buffer TargetName;
+ __le32 NegotiateFlags;
+ __u8 Challenge[CIFS_CRYPTO_KEY_SIZE];
+ __u8 Reserved[8];
+ struct security_buffer TargetInfoArray;
+ /*
+ * struct security_buffer for version info not present since we
+ * do not set the version is present flag
+ */
+} __packed;
+
+struct authenticate_message {
+ __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
+ __le32 MessageType; /* NtLmsAuthenticate = 3 */
+ struct security_buffer LmChallengeResponse;
+ struct security_buffer NtChallengeResponse;
+ struct security_buffer DomainName;
+ struct security_buffer UserName;
+ struct security_buffer WorkstationName;
+ struct security_buffer SessionKey;
+ __le32 NegotiateFlags;
+ /*
+ * struct security_buffer for version info not present since we
+ * do not set the version is present flag
+ */
+ char UserString[0];
+} __packed;
+
+struct ntlmv2_resp {
+ char ntlmv2_hash[CIFS_ENCPWD_SIZE];
+ __le32 blob_signature;
+ __u32 reserved;
+ __le64 time;
+ __u64 client_chal; /* random */
+ __u32 reserved2;
+ /* array of name entries could follow ending in minimum 4 byte struct */
+} __packed;
+
+/* per smb session structure/fields */
+struct ntlmssp_auth {
+ /* whether session key is per smb session */
+ bool sesskey_per_smbsess;
+ /* sent by client in type 1 ntlmsssp exchange */
+ __u32 client_flags;
+ /* sent by server in type 2 ntlmssp exchange */
+ __u32 conn_flags;
+ /* sent to server */
+ unsigned char ciphertext[CIFS_CPHTXT_SIZE];
+ /* used by ntlmssp */
+ char cryptkey[CIFS_CRYPTO_KEY_SIZE];
+};
+#endif /* __KSMBD_NTLMSSP_H */
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
new file mode 100644
index 000000000000..6ace6c2f22dc
--- /dev/null
+++ b/fs/ksmbd/oplock.c
@@ -0,0 +1,1709 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/moduleparam.h>
+
+#include "glob.h"
+#include "oplock.h"
+
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "connection.h"
+#include "mgmt/user_session.h"
+#include "mgmt/share_config.h"
+#include "mgmt/tree_connect.h"
+
+static LIST_HEAD(lease_table_list);
+static DEFINE_RWLOCK(lease_list_lock);
+
+/**
+ * alloc_opinfo() - allocate a new opinfo object for oplock info
+ * @work: smb work
+ * @id: fid of open file
+ * @Tid: tree id of connection
+ *
+ * Return: allocated opinfo object on success, otherwise NULL
+ */
+static struct oplock_info *alloc_opinfo(struct ksmbd_work *work,
+ u64 id, __u16 Tid)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct oplock_info *opinfo;
+
+ opinfo = kzalloc(sizeof(struct oplock_info), GFP_KERNEL);
+ if (!opinfo)
+ return NULL;
+
+ opinfo->sess = sess;
+ opinfo->conn = sess->conn;
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ opinfo->pending_break = 0;
+ opinfo->fid = id;
+ opinfo->Tid = Tid;
+ INIT_LIST_HEAD(&opinfo->op_entry);
+ INIT_LIST_HEAD(&opinfo->interim_list);
+ init_waitqueue_head(&opinfo->oplock_q);
+ init_waitqueue_head(&opinfo->oplock_brk);
+ atomic_set(&opinfo->refcount, 1);
+ atomic_set(&opinfo->breaking_cnt, 0);
+
+ return opinfo;
+}
+
+static void lease_add_list(struct oplock_info *opinfo)
+{
+ struct lease_table *lb = opinfo->o_lease->l_lb;
+
+ spin_lock(&lb->lb_lock);
+ list_add_rcu(&opinfo->lease_entry, &lb->lease_list);
+ spin_unlock(&lb->lb_lock);
+}
+
+static void lease_del_list(struct oplock_info *opinfo)
+{
+ struct lease_table *lb = opinfo->o_lease->l_lb;
+
+ if (!lb)
+ return;
+
+ spin_lock(&lb->lb_lock);
+ if (list_empty(&opinfo->lease_entry)) {
+ spin_unlock(&lb->lb_lock);
+ return;
+ }
+
+ list_del_init(&opinfo->lease_entry);
+ opinfo->o_lease->l_lb = NULL;
+ spin_unlock(&lb->lb_lock);
+}
+
+static void lb_add(struct lease_table *lb)
+{
+ write_lock(&lease_list_lock);
+ list_add(&lb->l_entry, &lease_table_list);
+ write_unlock(&lease_list_lock);
+}
+
+static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx)
+{
+ struct lease *lease;
+
+ lease = kmalloc(sizeof(struct lease), GFP_KERNEL);
+ if (!lease)
+ return -ENOMEM;
+
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ lease->state = lctx->req_state;
+ lease->new_state = 0;
+ lease->flags = lctx->flags;
+ lease->duration = lctx->duration;
+ memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE);
+ lease->version = lctx->version;
+ lease->epoch = 0;
+ INIT_LIST_HEAD(&opinfo->lease_entry);
+ opinfo->o_lease = lease;
+
+ return 0;
+}
+
+static void free_lease(struct oplock_info *opinfo)
+{
+ struct lease *lease;
+
+ lease = opinfo->o_lease;
+ kfree(lease);
+}
+
+static void free_opinfo(struct oplock_info *opinfo)
+{
+ if (opinfo->is_lease)
+ free_lease(opinfo);
+ kfree(opinfo);
+}
+
+static inline void opinfo_free_rcu(struct rcu_head *rcu_head)
+{
+ struct oplock_info *opinfo;
+
+ opinfo = container_of(rcu_head, struct oplock_info, rcu_head);
+ free_opinfo(opinfo);
+}
+
+struct oplock_info *opinfo_get(struct ksmbd_file *fp)
+{
+ struct oplock_info *opinfo;
+
+ rcu_read_lock();
+ opinfo = rcu_dereference(fp->f_opinfo);
+ if (opinfo && !atomic_inc_not_zero(&opinfo->refcount))
+ opinfo = NULL;
+ rcu_read_unlock();
+
+ return opinfo;
+}
+
+static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
+{
+ struct oplock_info *opinfo;
+
+ if (list_empty(&ci->m_op_list))
+ return NULL;
+
+ rcu_read_lock();
+ opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
+ op_entry);
+ if (opinfo && !atomic_inc_not_zero(&opinfo->refcount))
+ opinfo = NULL;
+ rcu_read_unlock();
+
+ return opinfo;
+}
+
+void opinfo_put(struct oplock_info *opinfo)
+{
+ if (!atomic_dec_and_test(&opinfo->refcount))
+ return;
+
+ call_rcu(&opinfo->rcu_head, opinfo_free_rcu);
+}
+
+static void opinfo_add(struct oplock_info *opinfo)
+{
+ struct ksmbd_inode *ci = opinfo->o_fp->f_ci;
+
+ write_lock(&ci->m_lock);
+ list_add_rcu(&opinfo->op_entry, &ci->m_op_list);
+ write_unlock(&ci->m_lock);
+}
+
+static void opinfo_del(struct oplock_info *opinfo)
+{
+ struct ksmbd_inode *ci = opinfo->o_fp->f_ci;
+
+ if (opinfo->is_lease) {
+ write_lock(&lease_list_lock);
+ lease_del_list(opinfo);
+ write_unlock(&lease_list_lock);
+ }
+ write_lock(&ci->m_lock);
+ list_del_rcu(&opinfo->op_entry);
+ write_unlock(&ci->m_lock);
+}
+
+static unsigned long opinfo_count(struct ksmbd_file *fp)
+{
+ if (ksmbd_stream_fd(fp))
+ return atomic_read(&fp->f_ci->sop_count);
+ else
+ return atomic_read(&fp->f_ci->op_count);
+}
+
+static void opinfo_count_inc(struct ksmbd_file *fp)
+{
+ if (ksmbd_stream_fd(fp))
+ return atomic_inc(&fp->f_ci->sop_count);
+ else
+ return atomic_inc(&fp->f_ci->op_count);
+}
+
+static void opinfo_count_dec(struct ksmbd_file *fp)
+{
+ if (ksmbd_stream_fd(fp))
+ return atomic_dec(&fp->f_ci->sop_count);
+ else
+ return atomic_dec(&fp->f_ci->op_count);
+}
+
+/**
+ * opinfo_write_to_read() - convert a write oplock to read oplock
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_write_to_read(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(opinfo->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)) {
+ pr_err("bad oplock(0x%x)\n", opinfo->level);
+ if (opinfo->is_lease)
+ pr_err("lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+
+ if (opinfo->is_lease)
+ lease->state = lease->new_state;
+ return 0;
+}
+
+/**
+ * opinfo_read_handle_to_read() - convert a read/handle oplock to read oplock
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_read_handle_to_read(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ lease->state = lease->new_state;
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+ return 0;
+}
+
+/**
+ * opinfo_write_to_none() - convert a write oplock to none
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_write_to_none(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(opinfo->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)) {
+ pr_err("bad oplock(0x%x)\n", opinfo->level);
+ if (opinfo->is_lease)
+ pr_err("lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ if (opinfo->is_lease)
+ lease->state = lease->new_state;
+ return 0;
+}
+
+/**
+ * opinfo_read_to_none() - convert a write read to none
+ * @opinfo: current oplock info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int opinfo_read_to_none(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (opinfo->level != SMB2_OPLOCK_LEVEL_II) {
+ pr_err("bad oplock(0x%x)\n", opinfo->level);
+ if (opinfo->is_lease)
+ pr_err("lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ if (opinfo->is_lease)
+ lease->state = lease->new_state;
+ return 0;
+}
+
+/**
+ * lease_read_to_write() - upgrade lease state from read to write
+ * @opinfo: current lease info
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+int lease_read_to_write(struct oplock_info *opinfo)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(lease->state & SMB2_LEASE_READ_CACHING_LE)) {
+ ksmbd_debug(OPLOCK, "bad lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ lease->state |= SMB2_LEASE_WRITE_CACHING_LE;
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_BATCH;
+ else
+ opinfo->level = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ return 0;
+}
+
+/**
+ * lease_none_upgrade() - upgrade lease state from none
+ * @opinfo: current lease info
+ * @new_state: new lease state
+ *
+ * Return: 0 on success, otherwise -EINVAL
+ */
+static int lease_none_upgrade(struct oplock_info *opinfo, __le32 new_state)
+{
+ struct lease *lease = opinfo->o_lease;
+
+ if (!(lease->state == SMB2_LEASE_NONE_LE)) {
+ ksmbd_debug(OPLOCK, "bad lease state(0x%x)\n", lease->state);
+ return -EINVAL;
+ }
+
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ lease->state = new_state;
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_BATCH;
+ else
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+ else if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ else if (lease->state & SMB2_LEASE_READ_CACHING_LE)
+ opinfo->level = SMB2_OPLOCK_LEVEL_II;
+
+ return 0;
+}
+
+/**
+ * close_id_del_oplock() - release oplock object at file close time
+ * @fp: ksmbd file pointer
+ */
+void close_id_del_oplock(struct ksmbd_file *fp)
+{
+ struct oplock_info *opinfo;
+
+ if (S_ISDIR(file_inode(fp->filp)->i_mode))
+ return;
+
+ opinfo = opinfo_get(fp);
+ if (!opinfo)
+ return;
+
+ opinfo_del(opinfo);
+
+ rcu_assign_pointer(fp->f_opinfo, NULL);
+ if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ opinfo->op_state = OPLOCK_CLOSING;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+ if (opinfo->is_lease) {
+ atomic_set(&opinfo->breaking_cnt, 0);
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+ }
+ }
+
+ opinfo_count_dec(fp);
+ atomic_dec(&opinfo->refcount);
+ opinfo_put(opinfo);
+}
+
+/**
+ * grant_write_oplock() - grant exclusive/batch oplock or write lease
+ * @opinfo_new: new oplock info object
+ * @req_oplock: request oplock
+ * @lctx: lease context information
+ *
+ * Return: 0
+ */
+static void grant_write_oplock(struct oplock_info *opinfo_new, int req_oplock,
+ struct lease_ctx_info *lctx)
+{
+ struct lease *lease = opinfo_new->o_lease;
+
+ if (req_oplock == SMB2_OPLOCK_LEVEL_BATCH)
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_BATCH;
+ else
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+
+ if (lctx) {
+ lease->state = lctx->req_state;
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ }
+}
+
+/**
+ * grant_read_oplock() - grant level2 oplock or read lease
+ * @opinfo_new: new oplock info object
+ * @lctx: lease context information
+ *
+ * Return: 0
+ */
+static void grant_read_oplock(struct oplock_info *opinfo_new,
+ struct lease_ctx_info *lctx)
+{
+ struct lease *lease = opinfo_new->o_lease;
+
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_II;
+
+ if (lctx) {
+ lease->state = SMB2_LEASE_READ_CACHING_LE;
+ if (lctx->req_state & SMB2_LEASE_HANDLE_CACHING_LE)
+ lease->state |= SMB2_LEASE_HANDLE_CACHING_LE;
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ }
+}
+
+/**
+ * grant_none_oplock() - grant none oplock or none lease
+ * @opinfo_new: new oplock info object
+ * @lctx: lease context information
+ *
+ * Return: 0
+ */
+static void grant_none_oplock(struct oplock_info *opinfo_new,
+ struct lease_ctx_info *lctx)
+{
+ struct lease *lease = opinfo_new->o_lease;
+
+ opinfo_new->level = SMB2_OPLOCK_LEVEL_NONE;
+
+ if (lctx) {
+ lease->state = 0;
+ memcpy(lease->lease_key, lctx->lease_key, SMB2_LEASE_KEY_SIZE);
+ }
+}
+
+static inline int compare_guid_key(struct oplock_info *opinfo,
+ const char *guid1, const char *key1)
+{
+ const char *guid2, *key2;
+
+ guid2 = opinfo->conn->ClientGUID;
+ key2 = opinfo->o_lease->lease_key;
+ if (!memcmp(guid1, guid2, SMB2_CLIENT_GUID_SIZE) &&
+ !memcmp(key1, key2, SMB2_LEASE_KEY_SIZE))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * same_client_has_lease() - check whether current lease request is
+ * from lease owner of file
+ * @ci: master file pointer
+ * @client_guid: Client GUID
+ * @lctx: lease context information
+ *
+ * Return: oplock(lease) object on success, otherwise NULL
+ */
+static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci,
+ char *client_guid,
+ struct lease_ctx_info *lctx)
+{
+ int ret;
+ struct lease *lease;
+ struct oplock_info *opinfo;
+ struct oplock_info *m_opinfo = NULL;
+
+ if (!lctx)
+ return NULL;
+
+ /*
+ * Compare lease key and client_guid to know request from same owner
+ * of same client
+ */
+ read_lock(&ci->m_lock);
+ list_for_each_entry(opinfo, &ci->m_op_list, op_entry) {
+ if (!opinfo->is_lease)
+ continue;
+ read_unlock(&ci->m_lock);
+ lease = opinfo->o_lease;
+
+ ret = compare_guid_key(opinfo, client_guid, lctx->lease_key);
+ if (ret) {
+ m_opinfo = opinfo;
+ /* skip upgrading lease about breaking lease */
+ if (atomic_read(&opinfo->breaking_cnt)) {
+ read_lock(&ci->m_lock);
+ continue;
+ }
+
+ /* upgrading lease */
+ if ((atomic_read(&ci->op_count) +
+ atomic_read(&ci->sop_count)) == 1) {
+ if (lease->state ==
+ (lctx->req_state & lease->state)) {
+ lease->state |= lctx->req_state;
+ if (lctx->req_state &
+ SMB2_LEASE_WRITE_CACHING_LE)
+ lease_read_to_write(opinfo);
+ }
+ } else if ((atomic_read(&ci->op_count) +
+ atomic_read(&ci->sop_count)) > 1) {
+ if (lctx->req_state ==
+ (SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE))
+ lease->state = lctx->req_state;
+ }
+
+ if (lctx->req_state && lease->state ==
+ SMB2_LEASE_NONE_LE)
+ lease_none_upgrade(opinfo, lctx->req_state);
+ }
+ read_lock(&ci->m_lock);
+ }
+ read_unlock(&ci->m_lock);
+
+ return m_opinfo;
+}
+
+static void wait_for_break_ack(struct oplock_info *opinfo)
+{
+ int rc = 0;
+
+ rc = wait_event_interruptible_timeout(opinfo->oplock_q,
+ opinfo->op_state == OPLOCK_STATE_NONE ||
+ opinfo->op_state == OPLOCK_CLOSING,
+ OPLOCK_WAIT_TIME);
+
+ /* is this a timeout ? */
+ if (!rc) {
+ if (opinfo->is_lease)
+ opinfo->o_lease->state = SMB2_LEASE_NONE_LE;
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ }
+}
+
+static void wake_up_oplock_break(struct oplock_info *opinfo)
+{
+ clear_bit_unlock(0, &opinfo->pending_break);
+ /* memory barrier is needed for wake_up_bit() */
+ smp_mb__after_atomic();
+ wake_up_bit(&opinfo->pending_break, 0);
+}
+
+static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level)
+{
+ while (test_and_set_bit(0, &opinfo->pending_break)) {
+ wait_on_bit(&opinfo->pending_break, 0, TASK_UNINTERRUPTIBLE);
+
+ /* Not immediately break to none. */
+ opinfo->open_trunc = 0;
+
+ if (opinfo->op_state == OPLOCK_CLOSING)
+ return -ENOENT;
+ else if (!opinfo->is_lease && opinfo->level <= req_op_level)
+ return 1;
+ }
+
+ if (!opinfo->is_lease && opinfo->level <= req_op_level) {
+ wake_up_oplock_break(opinfo);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int allocate_oplock_break_buf(struct ksmbd_work *work)
+{
+ work->response_buf = kzalloc(MAX_CIFS_SMALL_BUFFER_SIZE, GFP_KERNEL);
+ if (!work->response_buf)
+ return -ENOMEM;
+ work->response_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+ return 0;
+}
+
+/**
+ * __smb2_oplock_break_noti() - send smb2 oplock break cmd from conn
+ * to client
+ * @wk: smb work object
+ *
+ * There are two ways this function can be called. 1- while file open we break
+ * from exclusive/batch lock to levelII oplock and 2- while file write/truncate
+ * we break from levelII oplock no oplock.
+ * work->request_buf contains oplock_info.
+ */
+static void __smb2_oplock_break_noti(struct work_struct *wk)
+{
+ struct smb2_oplock_break *rsp = NULL;
+ struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
+ struct oplock_break_info *br_info = work->request_buf;
+ struct smb2_hdr *rsp_hdr;
+ struct ksmbd_file *fp;
+
+ fp = ksmbd_lookup_durable_fd(br_info->fid);
+ if (!fp) {
+ atomic_dec(&conn->r_count);
+ ksmbd_free_work_struct(work);
+ return;
+ }
+
+ if (allocate_oplock_break_buf(work)) {
+ pr_err("smb2_allocate_rsp_buf failed! ");
+ atomic_dec(&conn->r_count);
+ ksmbd_fd_put(work, fp);
+ ksmbd_free_work_struct(work);
+ return;
+ }
+
+ rsp_hdr = work->response_buf;
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+ rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->CreditRequest = cpu_to_le16(0);
+ rsp_hdr->Command = SMB2_OPLOCK_BREAK;
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = cpu_to_le64(-1);
+ rsp_hdr->Id.SyncId.ProcessId = 0;
+ rsp_hdr->Id.SyncId.TreeId = 0;
+ rsp_hdr->SessionId = 0;
+ memset(rsp_hdr->Signature, 0, 16);
+
+ rsp = work->response_buf;
+
+ rsp->StructureSize = cpu_to_le16(24);
+ if (!br_info->open_trunc &&
+ (br_info->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ br_info->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_II;
+ else
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_NONE;
+ rsp->Reserved = 0;
+ rsp->Reserved2 = 0;
+ rsp->PersistentFid = cpu_to_le64(fp->persistent_id);
+ rsp->VolatileFid = cpu_to_le64(fp->volatile_id);
+
+ inc_rfc1001_len(rsp, 24);
+
+ ksmbd_debug(OPLOCK,
+ "sending oplock break v_id %llu p_id = %llu lock level = %d\n",
+ rsp->VolatileFid, rsp->PersistentFid, rsp->OplockLevel);
+
+ ksmbd_fd_put(work, fp);
+ ksmbd_conn_write(work);
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+}
+
+/**
+ * smb2_oplock_break_noti() - send smb2 exclusive/batch to level2 oplock
+ * break command from server to client
+ * @opinfo: oplock info object
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_oplock_break_noti(struct oplock_info *opinfo)
+{
+ struct ksmbd_conn *conn = opinfo->conn;
+ struct oplock_break_info *br_info;
+ int ret = 0;
+ struct ksmbd_work *work = ksmbd_alloc_work_struct();
+
+ if (!work)
+ return -ENOMEM;
+
+ br_info = kmalloc(sizeof(struct oplock_break_info), GFP_KERNEL);
+ if (!br_info) {
+ ksmbd_free_work_struct(work);
+ return -ENOMEM;
+ }
+
+ br_info->level = opinfo->level;
+ br_info->fid = opinfo->fid;
+ br_info->open_trunc = opinfo->open_trunc;
+
+ work->request_buf = (char *)br_info;
+ work->conn = conn;
+ work->sess = opinfo->sess;
+
+ atomic_inc(&conn->r_count);
+ if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ INIT_WORK(&work->work, __smb2_oplock_break_noti);
+ ksmbd_queue_work(work);
+
+ wait_for_break_ack(opinfo);
+ } else {
+ __smb2_oplock_break_noti(&work->work);
+ if (opinfo->level == SMB2_OPLOCK_LEVEL_II)
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ }
+ return ret;
+}
+
+/**
+ * __smb2_lease_break_noti() - send lease break command from server
+ * to client
+ * @wk: smb work object
+ */
+static void __smb2_lease_break_noti(struct work_struct *wk)
+{
+ struct smb2_lease_break *rsp = NULL;
+ struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct lease_break_info *br_info = work->request_buf;
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *rsp_hdr;
+
+ if (allocate_oplock_break_buf(work)) {
+ ksmbd_debug(OPLOCK, "smb2_allocate_rsp_buf failed! ");
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+ return;
+ }
+
+ rsp_hdr = work->response_buf;
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+ rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->CreditRequest = cpu_to_le16(0);
+ rsp_hdr->Command = SMB2_OPLOCK_BREAK;
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = cpu_to_le64(-1);
+ rsp_hdr->Id.SyncId.ProcessId = 0;
+ rsp_hdr->Id.SyncId.TreeId = 0;
+ rsp_hdr->SessionId = 0;
+ memset(rsp_hdr->Signature, 0, 16);
+
+ rsp = work->response_buf;
+ rsp->StructureSize = cpu_to_le16(44);
+ rsp->Epoch = br_info->epoch;
+ rsp->Flags = 0;
+
+ if (br_info->curr_state & (SMB2_LEASE_WRITE_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE))
+ rsp->Flags = SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED;
+
+ memcpy(rsp->LeaseKey, br_info->lease_key, SMB2_LEASE_KEY_SIZE);
+ rsp->CurrentLeaseState = br_info->curr_state;
+ rsp->NewLeaseState = br_info->new_state;
+ rsp->BreakReason = 0;
+ rsp->AccessMaskHint = 0;
+ rsp->ShareMaskHint = 0;
+
+ inc_rfc1001_len(rsp, 44);
+
+ ksmbd_conn_write(work);
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+}
+
+/**
+ * smb2_lease_break_noti() - break lease when a new client request
+ * write lease
+ * @opinfo: conains lease state information
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_lease_break_noti(struct oplock_info *opinfo)
+{
+ struct ksmbd_conn *conn = opinfo->conn;
+ struct list_head *tmp, *t;
+ struct ksmbd_work *work;
+ struct lease_break_info *br_info;
+ struct lease *lease = opinfo->o_lease;
+
+ work = ksmbd_alloc_work_struct();
+ if (!work)
+ return -ENOMEM;
+
+ br_info = kmalloc(sizeof(struct lease_break_info), GFP_KERNEL);
+ if (!br_info) {
+ ksmbd_free_work_struct(work);
+ return -ENOMEM;
+ }
+
+ br_info->curr_state = lease->state;
+ br_info->new_state = lease->new_state;
+ if (lease->version == 2)
+ br_info->epoch = cpu_to_le16(++lease->epoch);
+ else
+ br_info->epoch = 0;
+ memcpy(br_info->lease_key, lease->lease_key, SMB2_LEASE_KEY_SIZE);
+
+ work->request_buf = (char *)br_info;
+ work->conn = conn;
+ work->sess = opinfo->sess;
+
+ atomic_inc(&conn->r_count);
+ if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ list_for_each_safe(tmp, t, &opinfo->interim_list) {
+ struct ksmbd_work *in_work;
+
+ in_work = list_entry(tmp, struct ksmbd_work,
+ interim_entry);
+ setup_async_work(in_work, NULL, NULL);
+ smb2_send_interim_resp(in_work, STATUS_PENDING);
+ list_del(&in_work->interim_entry);
+ }
+ INIT_WORK(&work->work, __smb2_lease_break_noti);
+ ksmbd_queue_work(work);
+ wait_for_break_ack(opinfo);
+ } else {
+ __smb2_lease_break_noti(&work->work);
+ if (opinfo->o_lease->new_state == SMB2_LEASE_NONE_LE) {
+ opinfo->level = SMB2_OPLOCK_LEVEL_NONE;
+ opinfo->o_lease->state = SMB2_LEASE_NONE_LE;
+ }
+ }
+ return 0;
+}
+
+static void wait_lease_breaking(struct oplock_info *opinfo)
+{
+ if (!opinfo->is_lease)
+ return;
+
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+ if (atomic_read(&opinfo->breaking_cnt)) {
+ int ret = 0;
+
+ ret = wait_event_interruptible_timeout(opinfo->oplock_brk,
+ atomic_read(&opinfo->breaking_cnt) == 0,
+ HZ);
+ if (!ret)
+ atomic_set(&opinfo->breaking_cnt, 0);
+ }
+}
+
+static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
+{
+ int err = 0;
+
+ /* Need to break exclusive/batch oplock, write lease or overwrite_if */
+ ksmbd_debug(OPLOCK,
+ "request to send oplock(level : 0x%x) break notification\n",
+ brk_opinfo->level);
+
+ if (brk_opinfo->is_lease) {
+ struct lease *lease = brk_opinfo->o_lease;
+
+ atomic_inc(&brk_opinfo->breaking_cnt);
+
+ err = oplock_break_pending(brk_opinfo, req_op_level);
+ if (err)
+ return err < 0 ? err : 0;
+
+ if (brk_opinfo->open_trunc) {
+ /*
+ * Create overwrite break trigger the lease break to
+ * none.
+ */
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ } else {
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE) {
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ lease->new_state =
+ SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE;
+ else
+ lease->new_state =
+ SMB2_LEASE_READ_CACHING_LE;
+ } else {
+ if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ lease->new_state =
+ SMB2_LEASE_READ_CACHING_LE;
+ else
+ lease->new_state = SMB2_LEASE_NONE_LE;
+ }
+ }
+
+ if (lease->state & (SMB2_LEASE_WRITE_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE))
+ brk_opinfo->op_state = OPLOCK_ACK_WAIT;
+ else
+ atomic_dec(&brk_opinfo->breaking_cnt);
+ } else {
+ err = oplock_break_pending(brk_opinfo, req_op_level);
+ if (err)
+ return err < 0 ? err : 0;
+
+ if (brk_opinfo->level == SMB2_OPLOCK_LEVEL_BATCH ||
+ brk_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
+ brk_opinfo->op_state = OPLOCK_ACK_WAIT;
+ }
+
+ if (brk_opinfo->is_lease)
+ err = smb2_lease_break_noti(brk_opinfo);
+ else
+ err = smb2_oplock_break_noti(brk_opinfo);
+
+ ksmbd_debug(OPLOCK, "oplock granted = %d\n", brk_opinfo->level);
+ if (brk_opinfo->op_state == OPLOCK_CLOSING)
+ err = -ENOENT;
+ wake_up_oplock_break(brk_opinfo);
+
+ wait_lease_breaking(brk_opinfo);
+
+ return err;
+}
+
+void destroy_lease_table(struct ksmbd_conn *conn)
+{
+ struct lease_table *lb, *lbtmp;
+ struct oplock_info *opinfo;
+
+ write_lock(&lease_list_lock);
+ if (list_empty(&lease_table_list)) {
+ write_unlock(&lease_list_lock);
+ return;
+ }
+
+ list_for_each_entry_safe(lb, lbtmp, &lease_table_list, l_entry) {
+ if (conn && memcmp(lb->client_guid, conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE))
+ continue;
+again:
+ rcu_read_lock();
+ list_for_each_entry_rcu(opinfo, &lb->lease_list,
+ lease_entry) {
+ rcu_read_unlock();
+ lease_del_list(opinfo);
+ goto again;
+ }
+ rcu_read_unlock();
+ list_del(&lb->l_entry);
+ kfree(lb);
+ }
+ write_unlock(&lease_list_lock);
+}
+
+int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
+ struct lease_ctx_info *lctx)
+{
+ struct oplock_info *opinfo;
+ int err = 0;
+ struct lease_table *lb;
+
+ if (!lctx)
+ return err;
+
+ read_lock(&lease_list_lock);
+ if (list_empty(&lease_table_list)) {
+ read_unlock(&lease_list_lock);
+ return 0;
+ }
+
+ list_for_each_entry(lb, &lease_table_list, l_entry) {
+ if (!memcmp(lb->client_guid, sess->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE))
+ goto found;
+ }
+ read_unlock(&lease_list_lock);
+
+ return 0;
+
+found:
+ rcu_read_lock();
+ list_for_each_entry_rcu(opinfo, &lb->lease_list, lease_entry) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+ rcu_read_unlock();
+ if (opinfo->o_fp->f_ci == ci)
+ goto op_next;
+ err = compare_guid_key(opinfo, sess->conn->ClientGUID,
+ lctx->lease_key);
+ if (err) {
+ err = -EINVAL;
+ ksmbd_debug(OPLOCK,
+ "found same lease key is already used in other files\n");
+ opinfo_put(opinfo);
+ goto out;
+ }
+op_next:
+ opinfo_put(opinfo);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+out:
+ read_unlock(&lease_list_lock);
+ return err;
+}
+
+static void copy_lease(struct oplock_info *op1, struct oplock_info *op2)
+{
+ struct lease *lease1 = op1->o_lease;
+ struct lease *lease2 = op2->o_lease;
+
+ op2->level = op1->level;
+ lease2->state = lease1->state;
+ memcpy(lease2->lease_key, lease1->lease_key,
+ SMB2_LEASE_KEY_SIZE);
+ lease2->duration = lease1->duration;
+ lease2->flags = lease1->flags;
+}
+
+static int add_lease_global_list(struct oplock_info *opinfo)
+{
+ struct lease_table *lb;
+
+ read_lock(&lease_list_lock);
+ list_for_each_entry(lb, &lease_table_list, l_entry) {
+ if (!memcmp(lb->client_guid, opinfo->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE)) {
+ opinfo->o_lease->l_lb = lb;
+ lease_add_list(opinfo);
+ read_unlock(&lease_list_lock);
+ return 0;
+ }
+ }
+ read_unlock(&lease_list_lock);
+
+ lb = kmalloc(sizeof(struct lease_table), GFP_KERNEL);
+ if (!lb)
+ return -ENOMEM;
+
+ memcpy(lb->client_guid, opinfo->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE);
+ INIT_LIST_HEAD(&lb->lease_list);
+ spin_lock_init(&lb->lb_lock);
+ opinfo->o_lease->l_lb = lb;
+ lease_add_list(opinfo);
+ lb_add(lb);
+ return 0;
+}
+
+static void set_oplock_level(struct oplock_info *opinfo, int level,
+ struct lease_ctx_info *lctx)
+{
+ switch (level) {
+ case SMB2_OPLOCK_LEVEL_BATCH:
+ case SMB2_OPLOCK_LEVEL_EXCLUSIVE:
+ grant_write_oplock(opinfo, level, lctx);
+ break;
+ case SMB2_OPLOCK_LEVEL_II:
+ grant_read_oplock(opinfo, lctx);
+ break;
+ default:
+ grant_none_oplock(opinfo, lctx);
+ break;
+ }
+}
+
+/**
+ * smb_grant_oplock() - handle oplock/lease request on file open
+ * @work: smb work
+ * @req_op_level: oplock level
+ * @pid: id of open file
+ * @fp: ksmbd file pointer
+ * @tid: Tree id of connection
+ * @lctx: lease context information on file open
+ * @share_ret: share mode
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
+ struct ksmbd_file *fp, __u16 tid,
+ struct lease_ctx_info *lctx, int share_ret)
+{
+ struct ksmbd_session *sess = work->sess;
+ int err = 0;
+ struct oplock_info *opinfo = NULL, *prev_opinfo = NULL;
+ struct ksmbd_inode *ci = fp->f_ci;
+ bool prev_op_has_lease;
+ __le32 prev_op_state = 0;
+
+ /* not support directory lease */
+ if (S_ISDIR(file_inode(fp->filp)->i_mode))
+ return 0;
+
+ opinfo = alloc_opinfo(work, pid, tid);
+ if (!opinfo)
+ return -ENOMEM;
+
+ if (lctx) {
+ err = alloc_lease(opinfo, lctx);
+ if (err)
+ goto err_out;
+ opinfo->is_lease = 1;
+ }
+
+ /* ci does not have any oplock */
+ if (!opinfo_count(fp))
+ goto set_lev;
+
+ /* grant none-oplock if second open is trunc */
+ if (fp->attrib_only && fp->cdoption != FILE_OVERWRITE_IF_LE &&
+ fp->cdoption != FILE_OVERWRITE_LE &&
+ fp->cdoption != FILE_SUPERSEDE_LE) {
+ req_op_level = SMB2_OPLOCK_LEVEL_NONE;
+ goto set_lev;
+ }
+
+ if (lctx) {
+ struct oplock_info *m_opinfo;
+
+ /* is lease already granted ? */
+ m_opinfo = same_client_has_lease(ci, sess->conn->ClientGUID,
+ lctx);
+ if (m_opinfo) {
+ copy_lease(m_opinfo, opinfo);
+ if (atomic_read(&m_opinfo->breaking_cnt))
+ opinfo->o_lease->flags =
+ SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE;
+ goto out;
+ }
+ }
+ prev_opinfo = opinfo_get_list(ci);
+ if (!prev_opinfo ||
+ (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx))
+ goto set_lev;
+ prev_op_has_lease = prev_opinfo->is_lease;
+ if (prev_op_has_lease)
+ prev_op_state = prev_opinfo->o_lease->state;
+
+ if (share_ret < 0 &&
+ prev_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+ err = share_ret;
+ opinfo_put(prev_opinfo);
+ goto err_out;
+ }
+
+ if (prev_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
+ prev_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+ opinfo_put(prev_opinfo);
+ goto op_break_not_needed;
+ }
+
+ list_add(&work->interim_entry, &prev_opinfo->interim_list);
+ err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II);
+ opinfo_put(prev_opinfo);
+ if (err == -ENOENT)
+ goto set_lev;
+ /* Check all oplock was freed by close */
+ else if (err < 0)
+ goto err_out;
+
+op_break_not_needed:
+ if (share_ret < 0) {
+ err = share_ret;
+ goto err_out;
+ }
+
+ if (req_op_level != SMB2_OPLOCK_LEVEL_NONE)
+ req_op_level = SMB2_OPLOCK_LEVEL_II;
+
+ /* grant fixed oplock on stacked locking between lease and oplock */
+ if (prev_op_has_lease && !lctx)
+ if (prev_op_state & SMB2_LEASE_HANDLE_CACHING_LE)
+ req_op_level = SMB2_OPLOCK_LEVEL_NONE;
+
+ if (!prev_op_has_lease && lctx) {
+ req_op_level = SMB2_OPLOCK_LEVEL_II;
+ lctx->req_state = SMB2_LEASE_READ_CACHING_LE;
+ }
+
+set_lev:
+ set_oplock_level(opinfo, req_op_level, lctx);
+
+out:
+ rcu_assign_pointer(fp->f_opinfo, opinfo);
+ opinfo->o_fp = fp;
+
+ opinfo_count_inc(fp);
+ opinfo_add(opinfo);
+ if (opinfo->is_lease) {
+ err = add_lease_global_list(opinfo);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+err_out:
+ free_opinfo(opinfo);
+ return err;
+}
+
+/**
+ * smb_break_all_write_oplock() - break batch/exclusive oplock to level2
+ * @work: smb work
+ * @fp: ksmbd file pointer
+ * @is_trunc: truncate on open
+ */
+static void smb_break_all_write_oplock(struct ksmbd_work *work,
+ struct ksmbd_file *fp, int is_trunc)
+{
+ struct oplock_info *brk_opinfo;
+
+ brk_opinfo = opinfo_get_list(fp->f_ci);
+ if (!brk_opinfo)
+ return;
+ if (brk_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
+ brk_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+ opinfo_put(brk_opinfo);
+ return;
+ }
+
+ brk_opinfo->open_trunc = is_trunc;
+ list_add(&work->interim_entry, &brk_opinfo->interim_list);
+ oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II);
+ opinfo_put(brk_opinfo);
+}
+
+/**
+ * smb_break_all_levII_oplock() - send level2 oplock or read lease break command
+ * from server to client
+ * @work: smb work
+ * @fp: ksmbd file pointer
+ * @is_trunc: truncate on open
+ */
+void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
+ int is_trunc)
+{
+ struct oplock_info *op, *brk_op;
+ struct ksmbd_inode *ci;
+ struct ksmbd_conn *conn = work->sess->conn;
+
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_OPLOCKS))
+ return;
+
+ ci = fp->f_ci;
+ op = opinfo_get(fp);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
+ if (!atomic_inc_not_zero(&brk_op->refcount))
+ continue;
+ rcu_read_unlock();
+ if (brk_op->is_lease && (brk_op->o_lease->state &
+ (~(SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_HANDLE_CACHING_LE)))) {
+ ksmbd_debug(OPLOCK, "unexpected lease state(0x%x)\n",
+ brk_op->o_lease->state);
+ goto next;
+ } else if (brk_op->level !=
+ SMB2_OPLOCK_LEVEL_II) {
+ ksmbd_debug(OPLOCK, "unexpected oplock(0x%x)\n",
+ brk_op->level);
+ goto next;
+ }
+
+ /* Skip oplock being break to none */
+ if (brk_op->is_lease &&
+ brk_op->o_lease->new_state == SMB2_LEASE_NONE_LE &&
+ atomic_read(&brk_op->breaking_cnt))
+ goto next;
+
+ if (op && op->is_lease && brk_op->is_lease &&
+ !memcmp(conn->ClientGUID, brk_op->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE) &&
+ !memcmp(op->o_lease->lease_key, brk_op->o_lease->lease_key,
+ SMB2_LEASE_KEY_SIZE))
+ goto next;
+ brk_op->open_trunc = is_trunc;
+ oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE);
+next:
+ opinfo_put(brk_op);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+ if (op)
+ opinfo_put(op);
+}
+
+/**
+ * smb_break_all_oplock() - break both batch/exclusive and level2 oplock
+ * @work: smb work
+ * @fp: ksmbd file pointer
+ */
+void smb_break_all_oplock(struct ksmbd_work *work, struct ksmbd_file *fp)
+{
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_OPLOCKS))
+ return;
+
+ smb_break_all_write_oplock(work, fp, 1);
+ smb_break_all_levII_oplock(work, fp, 1);
+}
+
+/**
+ * smb2_map_lease_to_oplock() - map lease state to corresponding oplock type
+ * @lease_state: lease type
+ *
+ * Return: 0 if no mapping, otherwise corresponding oplock type
+ */
+__u8 smb2_map_lease_to_oplock(__le32 lease_state)
+{
+ if (lease_state == (SMB2_LEASE_HANDLE_CACHING_LE |
+ SMB2_LEASE_READ_CACHING_LE |
+ SMB2_LEASE_WRITE_CACHING_LE)) {
+ return SMB2_OPLOCK_LEVEL_BATCH;
+ } else if (lease_state != SMB2_LEASE_WRITE_CACHING_LE &&
+ lease_state & SMB2_LEASE_WRITE_CACHING_LE) {
+ if (!(lease_state & SMB2_LEASE_HANDLE_CACHING_LE))
+ return SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ } else if (lease_state & SMB2_LEASE_READ_CACHING_LE) {
+ return SMB2_OPLOCK_LEVEL_II;
+ }
+ return 0;
+}
+
+/**
+ * create_lease_buf() - create lease context for open cmd response
+ * @rbuf: buffer to create lease context response
+ * @lease: buffer to stored parsed lease state information
+ */
+void create_lease_buf(u8 *rbuf, struct lease *lease)
+{
+ char *LeaseKey = (char *)&lease->lease_key;
+
+ if (lease->version == 2) {
+ struct create_lease_v2 *buf = (struct create_lease_v2 *)rbuf;
+ char *ParentLeaseKey = (char *)&lease->parent_lease_key;
+
+ memset(buf, 0, sizeof(struct create_lease_v2));
+ buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
+ buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+ buf->lcontext.LeaseFlags = lease->flags;
+ buf->lcontext.LeaseState = lease->state;
+ buf->lcontext.ParentLeaseKeyLow = *((__le64 *)ParentLeaseKey);
+ buf->lcontext.ParentLeaseKeyHigh = *((__le64 *)(ParentLeaseKey + 8));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_lease_v2, lcontext));
+ buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2));
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_lease_v2, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ buf->Name[0] = 'R';
+ buf->Name[1] = 'q';
+ buf->Name[2] = 'L';
+ buf->Name[3] = 's';
+ } else {
+ struct create_lease *buf = (struct create_lease *)rbuf;
+
+ memset(buf, 0, sizeof(struct create_lease));
+ buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
+ buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+ buf->lcontext.LeaseFlags = lease->flags;
+ buf->lcontext.LeaseState = lease->state;
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_lease, lcontext));
+ buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_lease, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ buf->Name[0] = 'R';
+ buf->Name[1] = 'q';
+ buf->Name[2] = 'L';
+ buf->Name[3] = 's';
+ }
+}
+
+/**
+ * parse_lease_state() - parse lease context containted in file open request
+ * @open_req: buffer containing smb2 file open(create) request
+ *
+ * Return: oplock state, -ENOENT if create lease context not found
+ */
+struct lease_ctx_info *parse_lease_state(void *open_req)
+{
+ char *data_offset;
+ struct create_context *cc;
+ unsigned int next = 0;
+ char *name;
+ bool found = false;
+ struct smb2_create_req *req = (struct smb2_create_req *)open_req;
+ struct lease_ctx_info *lreq = kzalloc(sizeof(struct lease_ctx_info),
+ GFP_KERNEL);
+ if (!lreq)
+ return NULL;
+
+ data_offset = (char *)req + 4 + le32_to_cpu(req->CreateContextsOffset);
+ cc = (struct create_context *)data_offset;
+ do {
+ cc = (struct create_context *)((char *)cc + next);
+ name = le16_to_cpu(cc->NameOffset) + (char *)cc;
+ if (le16_to_cpu(cc->NameLength) != 4 ||
+ strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) {
+ next = le32_to_cpu(cc->Next);
+ continue;
+ }
+ found = true;
+ break;
+ } while (next != 0);
+
+ if (found) {
+ if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
+ struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
+
+ *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
+ *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+ lreq->req_state = lc->lcontext.LeaseState;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ *((__le64 *)lreq->parent_lease_key) = lc->lcontext.ParentLeaseKeyLow;
+ *((__le64 *)(lreq->parent_lease_key + 8)) = lc->lcontext.ParentLeaseKeyHigh;
+ lreq->version = 2;
+ } else {
+ struct create_lease *lc = (struct create_lease *)cc;
+
+ *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
+ *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+ lreq->req_state = lc->lcontext.LeaseState;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ lreq->version = 1;
+ }
+ return lreq;
+ }
+
+ kfree(lreq);
+ return NULL;
+}
+
+/**
+ * smb2_find_context_vals() - find a particular context info in open request
+ * @open_req: buffer containing smb2 file open(create) request
+ * @tag: context name to search for
+ *
+ * Return: pointer to requested context, NULL if @str context not found
+ * or error pointer if name length is invalid.
+ */
+struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
+{
+ char *data_offset;
+ struct create_context *cc;
+ unsigned int next = 0;
+ char *name;
+ struct smb2_create_req *req = (struct smb2_create_req *)open_req;
+
+ data_offset = (char *)req + 4 + le32_to_cpu(req->CreateContextsOffset);
+ cc = (struct create_context *)data_offset;
+ do {
+ int val;
+
+ cc = (struct create_context *)((char *)cc + next);
+ name = le16_to_cpu(cc->NameOffset) + (char *)cc;
+ val = le16_to_cpu(cc->NameLength);
+ if (val < 4)
+ return ERR_PTR(-EINVAL);
+
+ if (memcmp(name, tag, val) == 0)
+ return cc;
+ next = le32_to_cpu(cc->Next);
+ } while (next != 0);
+
+ return NULL;
+}
+
+/**
+ * create_durable_rsp_buf() - create durable handle context
+ * @cc: buffer to create durable context response
+ */
+void create_durable_rsp_buf(char *cc)
+{
+ struct create_durable_rsp *buf;
+
+ buf = (struct create_durable_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_durable_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Data));
+ buf->ccontext.DataLength = cpu_to_le32(8);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_DURABLE_HANDLE_RESPONSE is "DHnQ" */
+ buf->Name[0] = 'D';
+ buf->Name[1] = 'H';
+ buf->Name[2] = 'n';
+ buf->Name[3] = 'Q';
+}
+
+/**
+ * create_durable_v2_rsp_buf() - create durable handle v2 context
+ * @cc: buffer to create durable context response
+ * @fp: ksmbd file pointer
+ */
+void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp)
+{
+ struct create_durable_v2_rsp *buf;
+
+ buf = (struct create_durable_v2_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_durable_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Data));
+ buf->ccontext.DataLength = cpu_to_le32(8);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_durable_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_DURABLE_HANDLE_RESPONSE_V2 is "DH2Q" */
+ buf->Name[0] = 'D';
+ buf->Name[1] = 'H';
+ buf->Name[2] = '2';
+ buf->Name[3] = 'Q';
+
+ buf->Timeout = cpu_to_le32(fp->durable_timeout);
+}
+
+/**
+ * create_mxac_rsp_buf() - create query maximal access context
+ * @cc: buffer to create maximal access context response
+ * @maximal_access: maximal access
+ */
+void create_mxac_rsp_buf(char *cc, int maximal_access)
+{
+ struct create_mxac_rsp *buf;
+
+ buf = (struct create_mxac_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_mxac_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_mxac_rsp, QueryStatus));
+ buf->ccontext.DataLength = cpu_to_le32(8);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_mxac_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_QUERY_MAXIMAL_ACCESS_RESPONSE is "MxAc" */
+ buf->Name[0] = 'M';
+ buf->Name[1] = 'x';
+ buf->Name[2] = 'A';
+ buf->Name[3] = 'c';
+
+ buf->QueryStatus = STATUS_SUCCESS;
+ buf->MaximalAccess = cpu_to_le32(maximal_access);
+}
+
+void create_disk_id_rsp_buf(char *cc, __u64 file_id, __u64 vol_id)
+{
+ struct create_disk_id_rsp *buf;
+
+ buf = (struct create_disk_id_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_disk_id_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_disk_id_rsp, DiskFileId));
+ buf->ccontext.DataLength = cpu_to_le32(32);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_mxac_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_QUERY_ON_DISK_ID_RESPONSE is "QFid" */
+ buf->Name[0] = 'Q';
+ buf->Name[1] = 'F';
+ buf->Name[2] = 'i';
+ buf->Name[3] = 'd';
+
+ buf->DiskFileId = cpu_to_le64(file_id);
+ buf->VolumeId = cpu_to_le64(vol_id);
+}
+
+/**
+ * create_posix_rsp_buf() - create posix extension context
+ * @cc: buffer to create posix on posix response
+ * @fp: ksmbd file pointer
+ */
+void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
+{
+ struct create_posix_rsp *buf;
+ struct inode *inode = file_inode(fp->filp);
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+
+ buf = (struct create_posix_rsp *)cc;
+ memset(buf, 0, sizeof(struct create_posix_rsp));
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct create_posix_rsp, nlink));
+ buf->ccontext.DataLength = cpu_to_le32(52);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct create_posix_rsp, Name));
+ buf->ccontext.NameLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
+ /* SMB2_CREATE_TAG_POSIX is "0x93AD25509CB411E7B42383DE968BCD7C" */
+ buf->Name[0] = 0x93;
+ buf->Name[1] = 0xAD;
+ buf->Name[2] = 0x25;
+ buf->Name[3] = 0x50;
+ buf->Name[4] = 0x9C;
+ buf->Name[5] = 0xB4;
+ buf->Name[6] = 0x11;
+ buf->Name[7] = 0xE7;
+ buf->Name[8] = 0xB4;
+ buf->Name[9] = 0x23;
+ buf->Name[10] = 0x83;
+ buf->Name[11] = 0xDE;
+ buf->Name[12] = 0x96;
+ buf->Name[13] = 0x8B;
+ buf->Name[14] = 0xCD;
+ buf->Name[15] = 0x7C;
+
+ buf->nlink = cpu_to_le32(inode->i_nlink);
+ buf->reparse_tag = cpu_to_le32(fp->volatile_id);
+ buf->mode = cpu_to_le32(inode->i_mode);
+ id_to_sid(from_kuid(user_ns, inode->i_uid),
+ SIDNFS_USER, (struct smb_sid *)&buf->SidBuffer[0]);
+ id_to_sid(from_kgid(user_ns, inode->i_gid),
+ SIDNFS_GROUP, (struct smb_sid *)&buf->SidBuffer[20]);
+}
+
+/*
+ * Find lease object(opinfo) for given lease key/fid from lease
+ * break/file close path.
+ */
+/**
+ * lookup_lease_in_table() - find a matching lease info object
+ * @conn: connection instance
+ * @lease_key: lease key to be searched for
+ *
+ * Return: opinfo if found matching opinfo, otherwise NULL
+ */
+struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
+ char *lease_key)
+{
+ struct oplock_info *opinfo = NULL, *ret_op = NULL;
+ struct lease_table *lt;
+ int ret;
+
+ read_lock(&lease_list_lock);
+ list_for_each_entry(lt, &lease_table_list, l_entry) {
+ if (!memcmp(lt->client_guid, conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE))
+ goto found;
+ }
+
+ read_unlock(&lease_list_lock);
+ return NULL;
+
+found:
+ rcu_read_lock();
+ list_for_each_entry_rcu(opinfo, &lt->lease_list, lease_entry) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ continue;
+ rcu_read_unlock();
+ if (!opinfo->op_state || opinfo->op_state == OPLOCK_CLOSING)
+ goto op_next;
+ if (!(opinfo->o_lease->state &
+ (SMB2_LEASE_HANDLE_CACHING_LE |
+ SMB2_LEASE_WRITE_CACHING_LE)))
+ goto op_next;
+ ret = compare_guid_key(opinfo, conn->ClientGUID,
+ lease_key);
+ if (ret) {
+ ksmbd_debug(OPLOCK, "found opinfo\n");
+ ret_op = opinfo;
+ goto out;
+ }
+op_next:
+ opinfo_put(opinfo);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+out:
+ read_unlock(&lease_list_lock);
+ return ret_op;
+}
+
+int smb2_check_durable_oplock(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx, char *name)
+{
+ struct oplock_info *opinfo = opinfo_get(fp);
+ int ret = 0;
+
+ if (opinfo && opinfo->is_lease) {
+ if (!lctx) {
+ pr_err("open does not include lease\n");
+ ret = -EBADF;
+ goto out;
+ }
+ if (memcmp(opinfo->o_lease->lease_key, lctx->lease_key,
+ SMB2_LEASE_KEY_SIZE)) {
+ pr_err("invalid lease key\n");
+ ret = -EBADF;
+ goto out;
+ }
+ if (name && strcmp(fp->filename, name)) {
+ pr_err("invalid name reconnect %s\n", name);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+out:
+ if (opinfo)
+ opinfo_put(opinfo);
+ return ret;
+}
diff --git a/fs/ksmbd/oplock.h b/fs/ksmbd/oplock.h
new file mode 100644
index 000000000000..119b8047cfbd
--- /dev/null
+++ b/fs/ksmbd/oplock.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_OPLOCK_H
+#define __KSMBD_OPLOCK_H
+
+#include "smb_common.h"
+
+#define OPLOCK_WAIT_TIME (35 * HZ)
+
+/* SMB2 Oplock levels */
+#define SMB2_OPLOCK_LEVEL_NONE 0x00
+#define SMB2_OPLOCK_LEVEL_II 0x01
+#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08
+#define SMB2_OPLOCK_LEVEL_BATCH 0x09
+#define SMB2_OPLOCK_LEVEL_LEASE 0xFF
+
+/* Oplock states */
+#define OPLOCK_STATE_NONE 0x00
+#define OPLOCK_ACK_WAIT 0x01
+#define OPLOCK_CLOSING 0x02
+
+#define OPLOCK_WRITE_TO_READ 0x01
+#define OPLOCK_READ_HANDLE_TO_READ 0x02
+#define OPLOCK_WRITE_TO_NONE 0x04
+#define OPLOCK_READ_TO_NONE 0x08
+
+#define SMB2_LEASE_KEY_SIZE 16
+
+struct lease_ctx_info {
+ __u8 lease_key[SMB2_LEASE_KEY_SIZE];
+ __le32 req_state;
+ __le32 flags;
+ __le64 duration;
+ __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
+ int version;
+};
+
+struct lease_table {
+ char client_guid[SMB2_CLIENT_GUID_SIZE];
+ struct list_head lease_list;
+ struct list_head l_entry;
+ spinlock_t lb_lock;
+};
+
+struct lease {
+ __u8 lease_key[SMB2_LEASE_KEY_SIZE];
+ __le32 state;
+ __le32 new_state;
+ __le32 flags;
+ __le64 duration;
+ __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
+ int version;
+ unsigned short epoch;
+ struct lease_table *l_lb;
+};
+
+struct oplock_info {
+ struct ksmbd_conn *conn;
+ struct ksmbd_session *sess;
+ struct ksmbd_work *work;
+ struct ksmbd_file *o_fp;
+ int level;
+ int op_state;
+ unsigned long pending_break;
+ u64 fid;
+ atomic_t breaking_cnt;
+ atomic_t refcount;
+ __u16 Tid;
+ bool is_lease;
+ bool open_trunc; /* truncate on open */
+ struct lease *o_lease;
+ struct list_head interim_list;
+ struct list_head op_entry;
+ struct list_head lease_entry;
+ wait_queue_head_t oplock_q; /* Other server threads */
+ wait_queue_head_t oplock_brk; /* oplock breaking wait */
+ struct rcu_head rcu_head;
+};
+
+struct lease_break_info {
+ __le32 curr_state;
+ __le32 new_state;
+ __le16 epoch;
+ char lease_key[SMB2_LEASE_KEY_SIZE];
+};
+
+struct oplock_break_info {
+ int level;
+ int open_trunc;
+ int fid;
+};
+
+int smb_grant_oplock(struct ksmbd_work *work, int req_op_level,
+ u64 pid, struct ksmbd_file *fp, __u16 tid,
+ struct lease_ctx_info *lctx, int share_ret);
+void smb_break_all_levII_oplock(struct ksmbd_work *work,
+ struct ksmbd_file *fp, int is_trunc);
+int opinfo_write_to_read(struct oplock_info *opinfo);
+int opinfo_read_handle_to_read(struct oplock_info *opinfo);
+int opinfo_write_to_none(struct oplock_info *opinfo);
+int opinfo_read_to_none(struct oplock_info *opinfo);
+void close_id_del_oplock(struct ksmbd_file *fp);
+void smb_break_all_oplock(struct ksmbd_work *work, struct ksmbd_file *fp);
+struct oplock_info *opinfo_get(struct ksmbd_file *fp);
+void opinfo_put(struct oplock_info *opinfo);
+
+/* Lease related functions */
+void create_lease_buf(u8 *rbuf, struct lease *lease);
+struct lease_ctx_info *parse_lease_state(void *open_req);
+__u8 smb2_map_lease_to_oplock(__le32 lease_state);
+int lease_read_to_write(struct oplock_info *opinfo);
+
+/* Durable related functions */
+void create_durable_rsp_buf(char *cc);
+void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp);
+void create_mxac_rsp_buf(char *cc, int maximal_access);
+void create_disk_id_rsp_buf(char *cc, __u64 file_id, __u64 vol_id);
+void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp);
+struct create_context *smb2_find_context_vals(void *open_req, const char *str);
+struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
+ char *lease_key);
+int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
+ struct lease_ctx_info *lctx);
+void destroy_lease_table(struct ksmbd_conn *conn);
+int smb2_check_durable_oplock(struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx, char *name);
+#endif /* __KSMBD_OPLOCK_H */
diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
new file mode 100644
index 000000000000..e6a9f6aa47eb
--- /dev/null
+++ b/fs/ksmbd/server.c
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include "glob.h"
+#include "oplock.h"
+#include "misc.h"
+#include <linux/sched/signal.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include "server.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "connection.h"
+#include "transport_ipc.h"
+#include "mgmt/user_session.h"
+#include "crypto_ctx.h"
+#include "auth.h"
+
+int ksmbd_debug_types;
+
+struct ksmbd_server_config server_conf;
+
+enum SERVER_CTRL_TYPE {
+ SERVER_CTRL_TYPE_INIT,
+ SERVER_CTRL_TYPE_RESET,
+};
+
+struct server_ctrl_struct {
+ int type;
+ struct work_struct ctrl_work;
+};
+
+static DEFINE_MUTEX(ctrl_lock);
+
+static int ___server_conf_set(int idx, char *val)
+{
+ if (idx >= ARRAY_SIZE(server_conf.conf))
+ return -EINVAL;
+
+ if (!val || val[0] == 0x00)
+ return -EINVAL;
+
+ kfree(server_conf.conf[idx]);
+ server_conf.conf[idx] = kstrdup(val, GFP_KERNEL);
+ if (!server_conf.conf[idx])
+ return -ENOMEM;
+ return 0;
+}
+
+int ksmbd_set_netbios_name(char *v)
+{
+ return ___server_conf_set(SERVER_CONF_NETBIOS_NAME, v);
+}
+
+int ksmbd_set_server_string(char *v)
+{
+ return ___server_conf_set(SERVER_CONF_SERVER_STRING, v);
+}
+
+int ksmbd_set_work_group(char *v)
+{
+ return ___server_conf_set(SERVER_CONF_WORK_GROUP, v);
+}
+
+char *ksmbd_netbios_name(void)
+{
+ return server_conf.conf[SERVER_CONF_NETBIOS_NAME];
+}
+
+char *ksmbd_server_string(void)
+{
+ return server_conf.conf[SERVER_CONF_SERVER_STRING];
+}
+
+char *ksmbd_work_group(void)
+{
+ return server_conf.conf[SERVER_CONF_WORK_GROUP];
+}
+
+/**
+ * check_conn_state() - check state of server thread connection
+ * @work: smb work containing server thread information
+ *
+ * Return: 0 on valid connection, otherwise 1 to reconnect
+ */
+static inline int check_conn_state(struct ksmbd_work *work)
+{
+ struct smb_hdr *rsp_hdr;
+
+ if (ksmbd_conn_exiting(work) || ksmbd_conn_need_reconnect(work)) {
+ rsp_hdr = work->response_buf;
+ rsp_hdr->Status.CifsError = STATUS_CONNECTION_DISCONNECTED;
+ return 1;
+ }
+ return 0;
+}
+
+#define SERVER_HANDLER_CONTINUE 0
+#define SERVER_HANDLER_ABORT 1
+
+static int __process_request(struct ksmbd_work *work, struct ksmbd_conn *conn,
+ u16 *cmd)
+{
+ struct smb_version_cmds *cmds;
+ u16 command;
+ int ret;
+
+ if (check_conn_state(work))
+ return SERVER_HANDLER_CONTINUE;
+
+ if (ksmbd_verify_smb_message(work))
+ return SERVER_HANDLER_ABORT;
+
+ command = conn->ops->get_cmd_val(work);
+ *cmd = command;
+
+andx_again:
+ if (command >= conn->max_cmds) {
+ conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
+ return SERVER_HANDLER_CONTINUE;
+ }
+
+ cmds = &conn->cmds[command];
+ if (!cmds->proc) {
+ ksmbd_debug(SMB, "*** not implemented yet cmd = %x\n", command);
+ conn->ops->set_rsp_status(work, STATUS_NOT_IMPLEMENTED);
+ return SERVER_HANDLER_CONTINUE;
+ }
+
+ if (work->sess && conn->ops->is_sign_req(work, command)) {
+ ret = conn->ops->check_sign_req(work);
+ if (!ret) {
+ conn->ops->set_rsp_status(work, STATUS_ACCESS_DENIED);
+ return SERVER_HANDLER_CONTINUE;
+ }
+ }
+
+ ret = cmds->proc(work);
+
+ if (ret < 0)
+ ksmbd_debug(CONN, "Failed to process %u [%d]\n", command, ret);
+ /* AndX commands - chained request can return positive values */
+ else if (ret > 0) {
+ command = ret;
+ *cmd = command;
+ goto andx_again;
+ }
+
+ if (work->send_no_response)
+ return SERVER_HANDLER_ABORT;
+ return SERVER_HANDLER_CONTINUE;
+}
+
+static void __handle_ksmbd_work(struct ksmbd_work *work,
+ struct ksmbd_conn *conn)
+{
+ u16 command = 0;
+ int rc;
+
+ if (conn->ops->allocate_rsp_buf(work))
+ return;
+
+ if (conn->ops->is_transform_hdr &&
+ conn->ops->is_transform_hdr(work->request_buf)) {
+ rc = conn->ops->decrypt_req(work);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
+ goto send;
+ }
+
+ work->encrypted = true;
+ }
+
+ rc = conn->ops->init_rsp_hdr(work);
+ if (rc) {
+ /* either uid or tid is not correct */
+ conn->ops->set_rsp_status(work, STATUS_INVALID_HANDLE);
+ goto send;
+ }
+
+ if (conn->ops->check_user_session) {
+ rc = conn->ops->check_user_session(work);
+ if (rc < 0) {
+ command = conn->ops->get_cmd_val(work);
+ conn->ops->set_rsp_status(work,
+ STATUS_USER_SESSION_DELETED);
+ goto send;
+ } else if (rc > 0) {
+ rc = conn->ops->get_ksmbd_tcon(work);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work,
+ STATUS_NETWORK_NAME_DELETED);
+ goto send;
+ }
+ }
+ }
+
+ do {
+ rc = __process_request(work, conn, &command);
+ if (rc == SERVER_HANDLER_ABORT)
+ break;
+
+ /*
+ * Call smb2_set_rsp_credits() function to set number of credits
+ * granted in hdr of smb2 response.
+ */
+ if (conn->ops->set_rsp_credits) {
+ spin_lock(&conn->credits_lock);
+ rc = conn->ops->set_rsp_credits(work);
+ spin_unlock(&conn->credits_lock);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work,
+ STATUS_INVALID_PARAMETER);
+ goto send;
+ }
+ }
+
+ if (work->sess &&
+ (work->sess->sign || smb3_11_final_sess_setup_resp(work) ||
+ conn->ops->is_sign_req(work, command)))
+ conn->ops->set_sign_rsp(work);
+ } while (is_chained_smb2_message(work));
+
+ if (work->send_no_response)
+ return;
+
+send:
+ smb3_preauth_hash_rsp(work);
+ if (work->sess && work->sess->enc && work->encrypted &&
+ conn->ops->encrypt_resp) {
+ rc = conn->ops->encrypt_resp(work);
+ if (rc < 0) {
+ conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
+ goto send;
+ }
+ }
+
+ ksmbd_conn_write(work);
+}
+
+/**
+ * handle_ksmbd_work() - process pending smb work requests
+ * @wk: smb work containing request command buffer
+ *
+ * called by kworker threads to processing remaining smb work requests
+ */
+static void handle_ksmbd_work(struct work_struct *wk)
+{
+ struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
+
+ atomic64_inc(&conn->stats.request_served);
+
+ __handle_ksmbd_work(work, conn);
+
+ ksmbd_conn_try_dequeue_request(work);
+ ksmbd_free_work_struct(work);
+ atomic_dec(&conn->r_count);
+}
+
+/**
+ * queue_ksmbd_work() - queue a smb request to worker thread queue
+ * for proccessing smb command and sending response
+ * @conn: connection instance
+ *
+ * read remaining data from socket create and submit work.
+ */
+static int queue_ksmbd_work(struct ksmbd_conn *conn)
+{
+ struct ksmbd_work *work;
+
+ work = ksmbd_alloc_work_struct();
+ if (!work) {
+ pr_err("allocation for work failed\n");
+ return -ENOMEM;
+ }
+
+ work->conn = conn;
+ work->request_buf = conn->request_buf;
+ conn->request_buf = NULL;
+
+ if (ksmbd_init_smb_server(work)) {
+ ksmbd_free_work_struct(work);
+ return -EINVAL;
+ }
+
+ ksmbd_conn_enqueue_request(work);
+ atomic_inc(&conn->r_count);
+ /* update activity on connection */
+ conn->last_active = jiffies;
+ INIT_WORK(&work->work, handle_ksmbd_work);
+ ksmbd_queue_work(work);
+ return 0;
+}
+
+static int ksmbd_server_process_request(struct ksmbd_conn *conn)
+{
+ return queue_ksmbd_work(conn);
+}
+
+static int ksmbd_server_terminate_conn(struct ksmbd_conn *conn)
+{
+ ksmbd_sessions_deregister(conn);
+ destroy_lease_table(conn);
+ return 0;
+}
+
+static void ksmbd_server_tcp_callbacks_init(void)
+{
+ struct ksmbd_conn_ops ops;
+
+ ops.process_fn = ksmbd_server_process_request;
+ ops.terminate_fn = ksmbd_server_terminate_conn;
+
+ ksmbd_conn_init_server_callbacks(&ops);
+}
+
+static void server_conf_free(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(server_conf.conf); i++) {
+ kfree(server_conf.conf[i]);
+ server_conf.conf[i] = NULL;
+ }
+}
+
+static int server_conf_init(void)
+{
+ WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP);
+ server_conf.enforced_signing = 0;
+ server_conf.min_protocol = ksmbd_min_protocol();
+ server_conf.max_protocol = ksmbd_max_protocol();
+ server_conf.auth_mechs = KSMBD_AUTH_NTLMSSP;
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+ server_conf.auth_mechs |= KSMBD_AUTH_KRB5 |
+ KSMBD_AUTH_MSKRB5;
+#endif
+ return 0;
+}
+
+static void server_ctrl_handle_init(struct server_ctrl_struct *ctrl)
+{
+ int ret;
+
+ ret = ksmbd_conn_transport_init();
+ if (ret) {
+ server_queue_ctrl_reset_work();
+ return;
+ }
+
+ WRITE_ONCE(server_conf.state, SERVER_STATE_RUNNING);
+}
+
+static void server_ctrl_handle_reset(struct server_ctrl_struct *ctrl)
+{
+ ksmbd_ipc_soft_reset();
+ ksmbd_conn_transport_destroy();
+ server_conf_free();
+ server_conf_init();
+ WRITE_ONCE(server_conf.state, SERVER_STATE_STARTING_UP);
+}
+
+static void server_ctrl_handle_work(struct work_struct *work)
+{
+ struct server_ctrl_struct *ctrl;
+
+ ctrl = container_of(work, struct server_ctrl_struct, ctrl_work);
+
+ mutex_lock(&ctrl_lock);
+ switch (ctrl->type) {
+ case SERVER_CTRL_TYPE_INIT:
+ server_ctrl_handle_init(ctrl);
+ break;
+ case SERVER_CTRL_TYPE_RESET:
+ server_ctrl_handle_reset(ctrl);
+ break;
+ default:
+ pr_err("Unknown server work type: %d\n", ctrl->type);
+ }
+ mutex_unlock(&ctrl_lock);
+ kfree(ctrl);
+ module_put(THIS_MODULE);
+}
+
+static int __queue_ctrl_work(int type)
+{
+ struct server_ctrl_struct *ctrl;
+
+ ctrl = kmalloc(sizeof(struct server_ctrl_struct), GFP_KERNEL);
+ if (!ctrl)
+ return -ENOMEM;
+
+ __module_get(THIS_MODULE);
+ ctrl->type = type;
+ INIT_WORK(&ctrl->ctrl_work, server_ctrl_handle_work);
+ queue_work(system_long_wq, &ctrl->ctrl_work);
+ return 0;
+}
+
+int server_queue_ctrl_init_work(void)
+{
+ return __queue_ctrl_work(SERVER_CTRL_TYPE_INIT);
+}
+
+int server_queue_ctrl_reset_work(void)
+{
+ return __queue_ctrl_work(SERVER_CTRL_TYPE_RESET);
+}
+
+static ssize_t stats_show(struct class *class, struct class_attribute *attr,
+ char *buf)
+{
+ /*
+ * Inc this each time you change stats output format,
+ * so user space will know what to do.
+ */
+ static int stats_version = 2;
+ static const char * const state[] = {
+ "startup",
+ "running",
+ "reset",
+ "shutdown"
+ };
+
+ ssize_t sz = scnprintf(buf, PAGE_SIZE, "%d %s %d %lu\n", stats_version,
+ state[server_conf.state], server_conf.tcp_port,
+ server_conf.ipc_last_active / HZ);
+ return sz;
+}
+
+static ssize_t kill_server_store(struct class *class,
+ struct class_attribute *attr, const char *buf,
+ size_t len)
+{
+ if (!sysfs_streq(buf, "hard"))
+ return len;
+
+ pr_info("kill command received\n");
+ mutex_lock(&ctrl_lock);
+ WRITE_ONCE(server_conf.state, SERVER_STATE_RESETTING);
+ __module_get(THIS_MODULE);
+ server_ctrl_handle_reset(NULL);
+ module_put(THIS_MODULE);
+ mutex_unlock(&ctrl_lock);
+ return len;
+}
+
+static const char * const debug_type_strings[] = {"smb", "auth", "vfs",
+ "oplock", "ipc", "conn",
+ "rdma"};
+
+static ssize_t debug_show(struct class *class, struct class_attribute *attr,
+ char *buf)
+{
+ ssize_t sz = 0;
+ int i, pos = 0;
+
+ for (i = 0; i < ARRAY_SIZE(debug_type_strings); i++) {
+ if ((ksmbd_debug_types >> i) & 1) {
+ pos = scnprintf(buf + sz,
+ PAGE_SIZE - sz,
+ "[%s] ",
+ debug_type_strings[i]);
+ } else {
+ pos = scnprintf(buf + sz,
+ PAGE_SIZE - sz,
+ "%s ",
+ debug_type_strings[i]);
+ }
+ sz += pos;
+ }
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
+ return sz;
+}
+
+static ssize_t debug_store(struct class *class, struct class_attribute *attr,
+ const char *buf, size_t len)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(debug_type_strings); i++) {
+ if (sysfs_streq(buf, "all")) {
+ if (ksmbd_debug_types == KSMBD_DEBUG_ALL)
+ ksmbd_debug_types = 0;
+ else
+ ksmbd_debug_types = KSMBD_DEBUG_ALL;
+ break;
+ }
+
+ if (sysfs_streq(buf, debug_type_strings[i])) {
+ if (ksmbd_debug_types & (1 << i))
+ ksmbd_debug_types &= ~(1 << i);
+ else
+ ksmbd_debug_types |= (1 << i);
+ break;
+ }
+ }
+
+ return len;
+}
+
+static CLASS_ATTR_RO(stats);
+static CLASS_ATTR_WO(kill_server);
+static CLASS_ATTR_RW(debug);
+
+static struct attribute *ksmbd_control_class_attrs[] = {
+ &class_attr_stats.attr,
+ &class_attr_kill_server.attr,
+ &class_attr_debug.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ksmbd_control_class);
+
+static struct class ksmbd_control_class = {
+ .name = "ksmbd-control",
+ .owner = THIS_MODULE,
+ .class_groups = ksmbd_control_class_groups,
+};
+
+static int ksmbd_server_shutdown(void)
+{
+ WRITE_ONCE(server_conf.state, SERVER_STATE_SHUTTING_DOWN);
+
+ class_unregister(&ksmbd_control_class);
+ ksmbd_workqueue_destroy();
+ ksmbd_ipc_release();
+ ksmbd_conn_transport_destroy();
+ ksmbd_crypto_destroy();
+ ksmbd_free_global_file_table();
+ destroy_lease_table(NULL);
+ ksmbd_work_pool_destroy();
+ ksmbd_exit_file_cache();
+ server_conf_free();
+ return 0;
+}
+
+static int __init ksmbd_server_init(void)
+{
+ int ret;
+
+ ret = class_register(&ksmbd_control_class);
+ if (ret) {
+ pr_err("Unable to register ksmbd-control class\n");
+ return ret;
+ }
+
+ ksmbd_server_tcp_callbacks_init();
+
+ ret = server_conf_init();
+ if (ret)
+ goto err_unregister;
+
+ ret = ksmbd_work_pool_init();
+ if (ret)
+ goto err_unregister;
+
+ ret = ksmbd_init_file_cache();
+ if (ret)
+ goto err_destroy_work_pools;
+
+ ret = ksmbd_ipc_init();
+ if (ret)
+ goto err_exit_file_cache;
+
+ ret = ksmbd_init_global_file_table();
+ if (ret)
+ goto err_ipc_release;
+
+ ret = ksmbd_inode_hash_init();
+ if (ret)
+ goto err_destroy_file_table;
+
+ ret = ksmbd_crypto_create();
+ if (ret)
+ goto err_release_inode_hash;
+
+ ret = ksmbd_workqueue_init();
+ if (ret)
+ goto err_crypto_destroy;
+ return 0;
+
+err_crypto_destroy:
+ ksmbd_crypto_destroy();
+err_release_inode_hash:
+ ksmbd_release_inode_hash();
+err_destroy_file_table:
+ ksmbd_free_global_file_table();
+err_ipc_release:
+ ksmbd_ipc_release();
+err_exit_file_cache:
+ ksmbd_exit_file_cache();
+err_destroy_work_pools:
+ ksmbd_work_pool_destroy();
+err_unregister:
+ class_unregister(&ksmbd_control_class);
+
+ return ret;
+}
+
+/**
+ * ksmbd_server_exit() - shutdown forker thread and free memory at module exit
+ */
+static void __exit ksmbd_server_exit(void)
+{
+ ksmbd_server_shutdown();
+ ksmbd_release_inode_hash();
+}
+
+MODULE_AUTHOR("Namjae Jeon <linkinjeon@kernel.org>");
+MODULE_VERSION(KSMBD_VERSION);
+MODULE_DESCRIPTION("Linux kernel CIFS/SMB SERVER");
+MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: ecb");
+MODULE_SOFTDEP("pre: hmac");
+MODULE_SOFTDEP("pre: md4");
+MODULE_SOFTDEP("pre: md5");
+MODULE_SOFTDEP("pre: nls");
+MODULE_SOFTDEP("pre: aes");
+MODULE_SOFTDEP("pre: cmac");
+MODULE_SOFTDEP("pre: sha256");
+MODULE_SOFTDEP("pre: sha512");
+MODULE_SOFTDEP("pre: aead2");
+MODULE_SOFTDEP("pre: ccm");
+MODULE_SOFTDEP("pre: gcm");
+module_init(ksmbd_server_init)
+module_exit(ksmbd_server_exit)
diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h
new file mode 100644
index 000000000000..ac9d932f8c8a
--- /dev/null
+++ b/fs/ksmbd/server.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __SERVER_H__
+#define __SERVER_H__
+
+#include "smbacl.h"
+
+/*
+ * Server state type
+ */
+enum {
+ SERVER_STATE_STARTING_UP,
+ SERVER_STATE_RUNNING,
+ SERVER_STATE_RESETTING,
+ SERVER_STATE_SHUTTING_DOWN,
+};
+
+/*
+ * Server global config string index
+ */
+enum {
+ SERVER_CONF_NETBIOS_NAME,
+ SERVER_CONF_SERVER_STRING,
+ SERVER_CONF_WORK_GROUP,
+};
+
+struct ksmbd_server_config {
+ unsigned int flags;
+ unsigned int state;
+ short signing;
+ short enforced_signing;
+ short min_protocol;
+ short max_protocol;
+ unsigned short tcp_port;
+ unsigned short ipc_timeout;
+ unsigned long ipc_last_active;
+ unsigned long deadtime;
+ unsigned int share_fake_fscaps;
+ struct smb_sid domain_sid;
+ unsigned int auth_mechs;
+
+ char *conf[SERVER_CONF_WORK_GROUP + 1];
+};
+
+extern struct ksmbd_server_config server_conf;
+
+int ksmbd_set_netbios_name(char *v);
+int ksmbd_set_server_string(char *v);
+int ksmbd_set_work_group(char *v);
+
+char *ksmbd_netbios_name(void);
+char *ksmbd_server_string(void);
+char *ksmbd_work_group(void);
+
+static inline int ksmbd_server_running(void)
+{
+ return READ_ONCE(server_conf.state) == SERVER_STATE_RUNNING;
+}
+
+static inline int ksmbd_server_configurable(void)
+{
+ return READ_ONCE(server_conf.state) < SERVER_STATE_RESETTING;
+}
+
+int server_queue_ctrl_init_work(void);
+int server_queue_ctrl_reset_work(void);
+#endif /* __SERVER_H__ */
diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
new file mode 100644
index 000000000000..9aa46bb3e10d
--- /dev/null
+++ b/fs/ksmbd/smb2misc.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include "glob.h"
+#include "nterr.h"
+#include "smb2pdu.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "mgmt/user_session.h"
+#include "connection.h"
+
+static int check_smb2_hdr(struct smb2_hdr *hdr)
+{
+ /*
+ * Make sure that this really is an SMB, that it is a response.
+ */
+ if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
+ return 1;
+ return 0;
+}
+
+/*
+ * The following table defines the expected "StructureSize" of SMB2 requests
+ * in order by SMB2 command. This is similar to "wct" in SMB/CIFS requests.
+ *
+ * Note that commands are defined in smb2pdu.h in le16 but the array below is
+ * indexed by command in host byte order
+ */
+static const __le16 smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
+ /* SMB2_NEGOTIATE */ cpu_to_le16(36),
+ /* SMB2_SESSION_SETUP */ cpu_to_le16(25),
+ /* SMB2_LOGOFF */ cpu_to_le16(4),
+ /* SMB2_TREE_CONNECT */ cpu_to_le16(9),
+ /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+ /* SMB2_CREATE */ cpu_to_le16(57),
+ /* SMB2_CLOSE */ cpu_to_le16(24),
+ /* SMB2_FLUSH */ cpu_to_le16(24),
+ /* SMB2_READ */ cpu_to_le16(49),
+ /* SMB2_WRITE */ cpu_to_le16(49),
+ /* SMB2_LOCK */ cpu_to_le16(48),
+ /* SMB2_IOCTL */ cpu_to_le16(57),
+ /* SMB2_CANCEL */ cpu_to_le16(4),
+ /* SMB2_ECHO */ cpu_to_le16(4),
+ /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(33),
+ /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(32),
+ /* SMB2_QUERY_INFO */ cpu_to_le16(41),
+ /* SMB2_SET_INFO */ cpu_to_le16(33),
+ /* use 44 for lease break */
+ /* SMB2_OPLOCK_BREAK */ cpu_to_le16(36)
+};
+
+/*
+ * The size of the variable area depends on the offset and length fields
+ * located in different fields for various SMB2 requests. SMB2 requests
+ * with no variable length info, show an offset of zero for the offset field.
+ */
+static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
+ /* SMB2_NEGOTIATE */ true,
+ /* SMB2_SESSION_SETUP */ true,
+ /* SMB2_LOGOFF */ false,
+ /* SMB2_TREE_CONNECT */ true,
+ /* SMB2_TREE_DISCONNECT */ false,
+ /* SMB2_CREATE */ true,
+ /* SMB2_CLOSE */ false,
+ /* SMB2_FLUSH */ false,
+ /* SMB2_READ */ true,
+ /* SMB2_WRITE */ true,
+ /* SMB2_LOCK */ true,
+ /* SMB2_IOCTL */ true,
+ /* SMB2_CANCEL */ false, /* BB CHECK this not listed in documentation */
+ /* SMB2_ECHO */ false,
+ /* SMB2_QUERY_DIRECTORY */ true,
+ /* SMB2_CHANGE_NOTIFY */ false,
+ /* SMB2_QUERY_INFO */ true,
+ /* SMB2_SET_INFO */ true,
+ /* SMB2_OPLOCK_BREAK */ false
+};
+
+/*
+ * Returns the pointer to the beginning of the data area. Length of the data
+ * area and the offset to it (from the beginning of the smb are also returned.
+ */
+static char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
+{
+ *off = 0;
+ *len = 0;
+
+ /* error reqeusts do not have data area */
+ if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
+ (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE)
+ return NULL;
+
+ /*
+ * Following commands have data areas so we have to get the location
+ * of the data buffer offset and data buffer length for the particular
+ * command.
+ */
+ switch (hdr->Command) {
+ case SMB2_SESSION_SETUP:
+ *off = le16_to_cpu(((struct smb2_sess_setup_req *)hdr)->SecurityBufferOffset);
+ *len = le16_to_cpu(((struct smb2_sess_setup_req *)hdr)->SecurityBufferLength);
+ break;
+ case SMB2_TREE_CONNECT:
+ *off = le16_to_cpu(((struct smb2_tree_connect_req *)hdr)->PathOffset);
+ *len = le16_to_cpu(((struct smb2_tree_connect_req *)hdr)->PathLength);
+ break;
+ case SMB2_CREATE:
+ {
+ if (((struct smb2_create_req *)hdr)->CreateContextsLength) {
+ *off = le32_to_cpu(((struct smb2_create_req *)
+ hdr)->CreateContextsOffset);
+ *len = le32_to_cpu(((struct smb2_create_req *)
+ hdr)->CreateContextsLength);
+ break;
+ }
+
+ *off = le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset);
+ *len = le16_to_cpu(((struct smb2_create_req *)hdr)->NameLength);
+ break;
+ }
+ case SMB2_QUERY_INFO:
+ *off = le16_to_cpu(((struct smb2_query_info_req *)hdr)->InputBufferOffset);
+ *len = le32_to_cpu(((struct smb2_query_info_req *)hdr)->InputBufferLength);
+ break;
+ case SMB2_SET_INFO:
+ *off = le16_to_cpu(((struct smb2_set_info_req *)hdr)->BufferOffset);
+ *len = le32_to_cpu(((struct smb2_set_info_req *)hdr)->BufferLength);
+ break;
+ case SMB2_READ:
+ *off = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoOffset);
+ *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength);
+ break;
+ case SMB2_WRITE:
+ if (((struct smb2_write_req *)hdr)->DataOffset) {
+ *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset);
+ *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
+ break;
+ }
+
+ *off = le16_to_cpu(((struct smb2_write_req *)hdr)->WriteChannelInfoOffset);
+ *len = le16_to_cpu(((struct smb2_write_req *)hdr)->WriteChannelInfoLength);
+ break;
+ case SMB2_QUERY_DIRECTORY:
+ *off = le16_to_cpu(((struct smb2_query_directory_req *)hdr)->FileNameOffset);
+ *len = le16_to_cpu(((struct smb2_query_directory_req *)hdr)->FileNameLength);
+ break;
+ case SMB2_LOCK:
+ {
+ int lock_count;
+
+ /*
+ * smb2_lock request size is 48 included single
+ * smb2_lock_element structure size.
+ */
+ lock_count = le16_to_cpu(((struct smb2_lock_req *)hdr)->LockCount) - 1;
+ if (lock_count > 0) {
+ *off = __SMB2_HEADER_STRUCTURE_SIZE + 48;
+ *len = sizeof(struct smb2_lock_element) * lock_count;
+ }
+ break;
+ }
+ case SMB2_IOCTL:
+ *off = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputOffset);
+ *len = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputCount);
+
+ break;
+ default:
+ ksmbd_debug(SMB, "no length check for command\n");
+ break;
+ }
+
+ /*
+ * Invalid length or offset probably means data area is invalid, but
+ * we have little choice but to ignore the data area in this case.
+ */
+ if (*off > 4096) {
+ ksmbd_debug(SMB, "offset %d too large, data area ignored\n",
+ *off);
+ *len = 0;
+ *off = 0;
+ } else if (*off < 0) {
+ ksmbd_debug(SMB,
+ "negative offset %d to data invalid ignore data area\n",
+ *off);
+ *off = 0;
+ *len = 0;
+ } else if (*len < 0) {
+ ksmbd_debug(SMB,
+ "negative data length %d invalid, data area ignored\n",
+ *len);
+ *len = 0;
+ } else if (*len > 128 * 1024) {
+ ksmbd_debug(SMB, "data area larger than 128K: %d\n", *len);
+ *len = 0;
+ }
+
+ /* return pointer to beginning of data area, ie offset from SMB start */
+ if ((*off != 0) && (*len != 0))
+ return (char *)hdr + *off;
+ else
+ return NULL;
+}
+
+/*
+ * Calculate the size of the SMB message based on the fixed header
+ * portion, the number of word parameters and the data portion of the message.
+ */
+static unsigned int smb2_calc_size(void *buf)
+{
+ struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
+ struct smb2_hdr *hdr = &pdu->hdr;
+ int offset; /* the offset from the beginning of SMB to data area */
+ int data_length; /* the length of the variable length data area */
+ /* Structure Size has already been checked to make sure it is 64 */
+ int len = le16_to_cpu(hdr->StructureSize);
+
+ /*
+ * StructureSize2, ie length of fixed parameter area has already
+ * been checked to make sure it is the correct length.
+ */
+ len += le16_to_cpu(pdu->StructureSize2);
+
+ if (has_smb2_data_area[le16_to_cpu(hdr->Command)] == false)
+ goto calc_size_exit;
+
+ smb2_get_data_area_len(&offset, &data_length, hdr);
+ ksmbd_debug(SMB, "SMB2 data length %d offset %d\n", data_length,
+ offset);
+
+ if (data_length > 0) {
+ /*
+ * Check to make sure that data area begins after fixed area,
+ * Note that last byte of the fixed area is part of data area
+ * for some commands, typically those with odd StructureSize,
+ * so we must add one to the calculation.
+ */
+ if (offset + 1 < len)
+ ksmbd_debug(SMB,
+ "data area offset %d overlaps SMB2 header %d\n",
+ offset + 1, len);
+ else
+ len = offset + data_length;
+ }
+calc_size_exit:
+ ksmbd_debug(SMB, "SMB2 len %d\n", len);
+ return len;
+}
+
+static inline int smb2_query_info_req_len(struct smb2_query_info_req *h)
+{
+ return le32_to_cpu(h->InputBufferLength) +
+ le32_to_cpu(h->OutputBufferLength);
+}
+
+static inline int smb2_set_info_req_len(struct smb2_set_info_req *h)
+{
+ return le32_to_cpu(h->BufferLength);
+}
+
+static inline int smb2_read_req_len(struct smb2_read_req *h)
+{
+ return le32_to_cpu(h->Length);
+}
+
+static inline int smb2_write_req_len(struct smb2_write_req *h)
+{
+ return le32_to_cpu(h->Length);
+}
+
+static inline int smb2_query_dir_req_len(struct smb2_query_directory_req *h)
+{
+ return le32_to_cpu(h->OutputBufferLength);
+}
+
+static inline int smb2_ioctl_req_len(struct smb2_ioctl_req *h)
+{
+ return le32_to_cpu(h->InputCount) +
+ le32_to_cpu(h->OutputCount);
+}
+
+static inline int smb2_ioctl_resp_len(struct smb2_ioctl_req *h)
+{
+ return le32_to_cpu(h->MaxInputResponse) +
+ le32_to_cpu(h->MaxOutputResponse);
+}
+
+static int smb2_validate_credit_charge(struct smb2_hdr *hdr)
+{
+ int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len;
+ int credit_charge = le16_to_cpu(hdr->CreditCharge);
+ void *__hdr = hdr;
+
+ switch (hdr->Command) {
+ case SMB2_QUERY_INFO:
+ req_len = smb2_query_info_req_len(__hdr);
+ break;
+ case SMB2_SET_INFO:
+ req_len = smb2_set_info_req_len(__hdr);
+ break;
+ case SMB2_READ:
+ req_len = smb2_read_req_len(__hdr);
+ break;
+ case SMB2_WRITE:
+ req_len = smb2_write_req_len(__hdr);
+ break;
+ case SMB2_QUERY_DIRECTORY:
+ req_len = smb2_query_dir_req_len(__hdr);
+ break;
+ case SMB2_IOCTL:
+ req_len = smb2_ioctl_req_len(__hdr);
+ expect_resp_len = smb2_ioctl_resp_len(__hdr);
+ break;
+ default:
+ return 0;
+ }
+
+ credit_charge = max(1, credit_charge);
+ max_len = max(req_len, expect_resp_len);
+ calc_credit_num = DIV_ROUND_UP(max_len, SMB2_MAX_BUFFER_SIZE);
+
+ if (credit_charge < calc_credit_num) {
+ pr_err("Insufficient credit charge, given: %d, needed: %d\n",
+ credit_charge, calc_credit_num);
+ return 1;
+ }
+
+ return 0;
+}
+
+int ksmbd_smb2_check_message(struct ksmbd_work *work)
+{
+ struct smb2_pdu *pdu = work->request_buf;
+ struct smb2_hdr *hdr = &pdu->hdr;
+ int command;
+ __u32 clc_len; /* calculated length */
+ __u32 len = get_rfc1002_len(pdu);
+
+ if (work->next_smb2_rcv_hdr_off) {
+ pdu = ksmbd_req_buf_next(work);
+ hdr = &pdu->hdr;
+ }
+
+ if (le32_to_cpu(hdr->NextCommand) > 0) {
+ len = le32_to_cpu(hdr->NextCommand);
+ } else if (work->next_smb2_rcv_hdr_off) {
+ len -= work->next_smb2_rcv_hdr_off;
+ len = round_up(len, 8);
+ }
+
+ if (check_smb2_hdr(hdr))
+ return 1;
+
+ if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
+ ksmbd_debug(SMB, "Illegal structure size %u\n",
+ le16_to_cpu(hdr->StructureSize));
+ return 1;
+ }
+
+ command = le16_to_cpu(hdr->Command);
+ if (command >= NUMBER_OF_SMB2_COMMANDS) {
+ ksmbd_debug(SMB, "Illegal SMB2 command %d\n", command);
+ return 1;
+ }
+
+ if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
+ if (command != SMB2_OPLOCK_BREAK_HE &&
+ (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) {
+ /* error packets have 9 byte structure size */
+ ksmbd_debug(SMB,
+ "Illegal request size %u for command %d\n",
+ le16_to_cpu(pdu->StructureSize2), command);
+ return 1;
+ } else if (command == SMB2_OPLOCK_BREAK_HE &&
+ hdr->Status == 0 &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+ /* special case for SMB2.1 lease break message */
+ ksmbd_debug(SMB,
+ "Illegal request size %d for oplock break\n",
+ le16_to_cpu(pdu->StructureSize2));
+ return 1;
+ }
+ }
+
+ if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) &&
+ smb2_validate_credit_charge(hdr)) {
+ work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER);
+ return 1;
+ }
+
+ clc_len = smb2_calc_size(hdr);
+ if (len != clc_len) {
+ /* server can return one byte more due to implied bcc[0] */
+ if (clc_len == len + 1)
+ return 0;
+
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (ALIGN(clc_len, 8) == len)
+ return 0;
+
+ /*
+ * windows client also pad up to 8 bytes when compounding.
+ * If pad is longer than eight bytes, log the server behavior
+ * (once), since may indicate a problem but allow it and
+ * continue since the frame is parseable.
+ */
+ if (clc_len < len) {
+ ksmbd_debug(SMB,
+ "cli req padded more than expected. Length %d not %d for cmd:%d mid:%llu\n",
+ len, clc_len, command,
+ le64_to_cpu(hdr->MessageId));
+ return 0;
+ }
+
+ if (command == SMB2_LOCK_HE && len == 88)
+ return 0;
+
+ ksmbd_debug(SMB,
+ "cli req too short, len %d not %d. cmd:%d mid:%llu\n",
+ len, clc_len, command,
+ le64_to_cpu(hdr->MessageId));
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int smb2_negotiate_request(struct ksmbd_work *work)
+{
+ return ksmbd_smb_negotiate_common(work, SMB2_NEGOTIATE_HE);
+}
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
new file mode 100644
index 000000000000..197473871aa4
--- /dev/null
+++ b/fs/ksmbd/smb2ops.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/slab.h>
+#include "glob.h"
+#include "smb2pdu.h"
+
+#include "auth.h"
+#include "connection.h"
+#include "smb_common.h"
+#include "server.h"
+
+static struct smb_version_values smb21_server_values = {
+ .version_string = SMB21_VERSION_STRING,
+ .protocol_id = SMB21_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB21_DEFAULT_IOSIZE,
+ .max_write_size = SMB21_DEFAULT_IOSIZE,
+ .max_trans_size = SMB21_DEFAULT_IOSIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_values smb30_server_values = {
+ .version_string = SMB30_VERSION_STRING,
+ .protocol_id = SMB30_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease_v2),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_durable_v2_size = sizeof(struct create_durable_v2_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_values smb302_server_values = {
+ .version_string = SMB302_VERSION_STRING,
+ .protocol_id = SMB302_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease_v2),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_durable_v2_size = sizeof(struct create_durable_v2_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_values smb311_server_values = {
+ .version_string = SMB311_VERSION_STRING,
+ .protocol_id = SMB311_PROT_ID,
+ .capabilities = SMB2_GLOBAL_CAP_LARGE_MTU,
+ .max_read_size = SMB3_DEFAULT_IOSIZE,
+ .max_write_size = SMB3_DEFAULT_IOSIZE,
+ .max_trans_size = SMB3_DEFAULT_TRANS_SIZE,
+ .large_lock_type = 0,
+ .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE,
+ .shared_lock_type = SMB2_LOCKFLAG_SHARED,
+ .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+ .header_size = sizeof(struct smb2_hdr),
+ .max_header_size = MAX_SMB2_HDR_SIZE,
+ .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .lock_cmd = SMB2_LOCK,
+ .cap_unix = 0,
+ .cap_nt_find = SMB2_NT_FIND,
+ .cap_large_files = SMB2_LARGE_FILES,
+ .create_lease_size = sizeof(struct create_lease_v2),
+ .create_durable_size = sizeof(struct create_durable_rsp),
+ .create_durable_v2_size = sizeof(struct create_durable_v2_rsp),
+ .create_mxac_size = sizeof(struct create_mxac_rsp),
+ .create_disk_id_size = sizeof(struct create_disk_id_rsp),
+ .create_posix_size = sizeof(struct create_posix_rsp),
+};
+
+static struct smb_version_ops smb2_0_server_ops = {
+ .get_cmd_val = get_smb2_cmd_val,
+ .init_rsp_hdr = init_smb2_rsp_hdr,
+ .set_rsp_status = set_smb2_rsp_status,
+ .allocate_rsp_buf = smb2_allocate_rsp_buf,
+ .set_rsp_credits = smb2_set_rsp_credits,
+ .check_user_session = smb2_check_user_session,
+ .get_ksmbd_tcon = smb2_get_ksmbd_tcon,
+ .is_sign_req = smb2_is_sign_req,
+ .check_sign_req = smb2_check_sign_req,
+ .set_sign_rsp = smb2_set_sign_rsp
+};
+
+static struct smb_version_ops smb3_0_server_ops = {
+ .get_cmd_val = get_smb2_cmd_val,
+ .init_rsp_hdr = init_smb2_rsp_hdr,
+ .set_rsp_status = set_smb2_rsp_status,
+ .allocate_rsp_buf = smb2_allocate_rsp_buf,
+ .set_rsp_credits = smb2_set_rsp_credits,
+ .check_user_session = smb2_check_user_session,
+ .get_ksmbd_tcon = smb2_get_ksmbd_tcon,
+ .is_sign_req = smb2_is_sign_req,
+ .check_sign_req = smb3_check_sign_req,
+ .set_sign_rsp = smb3_set_sign_rsp,
+ .generate_signingkey = ksmbd_gen_smb30_signingkey,
+ .generate_encryptionkey = ksmbd_gen_smb30_encryptionkey,
+ .is_transform_hdr = smb3_is_transform_hdr,
+ .decrypt_req = smb3_decrypt_req,
+ .encrypt_resp = smb3_encrypt_resp
+};
+
+static struct smb_version_ops smb3_11_server_ops = {
+ .get_cmd_val = get_smb2_cmd_val,
+ .init_rsp_hdr = init_smb2_rsp_hdr,
+ .set_rsp_status = set_smb2_rsp_status,
+ .allocate_rsp_buf = smb2_allocate_rsp_buf,
+ .set_rsp_credits = smb2_set_rsp_credits,
+ .check_user_session = smb2_check_user_session,
+ .get_ksmbd_tcon = smb2_get_ksmbd_tcon,
+ .is_sign_req = smb2_is_sign_req,
+ .check_sign_req = smb3_check_sign_req,
+ .set_sign_rsp = smb3_set_sign_rsp,
+ .generate_signingkey = ksmbd_gen_smb311_signingkey,
+ .generate_encryptionkey = ksmbd_gen_smb311_encryptionkey,
+ .is_transform_hdr = smb3_is_transform_hdr,
+ .decrypt_req = smb3_decrypt_req,
+ .encrypt_resp = smb3_encrypt_resp
+};
+
+static struct smb_version_cmds smb2_0_server_cmds[NUMBER_OF_SMB2_COMMANDS] = {
+ [SMB2_NEGOTIATE_HE] = { .proc = smb2_negotiate_request, },
+ [SMB2_SESSION_SETUP_HE] = { .proc = smb2_sess_setup, },
+ [SMB2_TREE_CONNECT_HE] = { .proc = smb2_tree_connect,},
+ [SMB2_TREE_DISCONNECT_HE] = { .proc = smb2_tree_disconnect,},
+ [SMB2_LOGOFF_HE] = { .proc = smb2_session_logoff,},
+ [SMB2_CREATE_HE] = { .proc = smb2_open},
+ [SMB2_QUERY_INFO_HE] = { .proc = smb2_query_info},
+ [SMB2_QUERY_DIRECTORY_HE] = { .proc = smb2_query_dir},
+ [SMB2_CLOSE_HE] = { .proc = smb2_close},
+ [SMB2_ECHO_HE] = { .proc = smb2_echo},
+ [SMB2_SET_INFO_HE] = { .proc = smb2_set_info},
+ [SMB2_READ_HE] = { .proc = smb2_read},
+ [SMB2_WRITE_HE] = { .proc = smb2_write},
+ [SMB2_FLUSH_HE] = { .proc = smb2_flush},
+ [SMB2_CANCEL_HE] = { .proc = smb2_cancel},
+ [SMB2_LOCK_HE] = { .proc = smb2_lock},
+ [SMB2_IOCTL_HE] = { .proc = smb2_ioctl},
+ [SMB2_OPLOCK_BREAK_HE] = { .proc = smb2_oplock_break},
+ [SMB2_CHANGE_NOTIFY_HE] = { .proc = smb2_notify},
+};
+
+int init_smb2_0_server(struct ksmbd_conn *conn)
+{
+ return -EOPNOTSUPP;
+}
+
+/**
+ * init_smb2_1_server() - initialize a smb server connection with smb2.1
+ * command dispatcher
+ * @conn: connection instance
+ */
+void init_smb2_1_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb21_server_values;
+ conn->ops = &smb2_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+}
+
+/**
+ * init_smb3_0_server() - initialize a smb server connection with smb3.0
+ * command dispatcher
+ * @conn: connection instance
+ */
+void init_smb3_0_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb30_server_values;
+ conn->ops = &smb3_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+}
+
+/**
+ * init_smb3_02_server() - initialize a smb server connection with smb3.02
+ * command dispatcher
+ * @conn: connection instance
+ */
+void init_smb3_02_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb302_server_values;
+ conn->ops = &smb3_0_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+}
+
+/**
+ * init_smb3_11_server() - initialize a smb server connection with smb3.11
+ * command dispatcher
+ * @conn: connection instance
+ */
+int init_smb3_11_server(struct ksmbd_conn *conn)
+{
+ conn->vals = &smb311_server_values;
+ conn->ops = &smb3_11_server_ops;
+ conn->cmds = smb2_0_server_cmds;
+ conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
+ conn->max_credits = SMB2_MAX_CREDITS;
+ conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
+
+ if (conn->cipher_type)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+
+ INIT_LIST_HEAD(&conn->preauth_sess_table);
+ return 0;
+}
+
+void init_smb2_max_read_size(unsigned int sz)
+{
+ smb21_server_values.max_read_size = sz;
+ smb30_server_values.max_read_size = sz;
+ smb302_server_values.max_read_size = sz;
+ smb311_server_values.max_read_size = sz;
+}
+
+void init_smb2_max_write_size(unsigned int sz)
+{
+ smb21_server_values.max_write_size = sz;
+ smb30_server_values.max_write_size = sz;
+ smb302_server_values.max_write_size = sz;
+ smb311_server_values.max_write_size = sz;
+}
+
+void init_smb2_max_trans_size(unsigned int sz)
+{
+ smb21_server_values.max_trans_size = sz;
+ smb30_server_values.max_trans_size = sz;
+ smb302_server_values.max_trans_size = sz;
+ smb311_server_values.max_trans_size = sz;
+}
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
new file mode 100644
index 000000000000..d329ea49fa14
--- /dev/null
+++ b/fs/ksmbd/smb2pdu.c
@@ -0,0 +1,8373 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#include <linux/syscalls.h>
+#include <linux/namei.h>
+#include <linux/statfs.h>
+#include <linux/ethtool.h>
+#include <linux/falloc.h>
+
+#include "glob.h"
+#include "smb2pdu.h"
+#include "smbfsctl.h"
+#include "oplock.h"
+#include "smbacl.h"
+
+#include "auth.h"
+#include "asn1.h"
+#include "connection.h"
+#include "transport_ipc.h"
+#include "transport_rdma.h"
+#include "vfs.h"
+#include "vfs_cache.h"
+#include "misc.h"
+
+#include "server.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "ksmbd_work.h"
+#include "mgmt/user_config.h"
+#include "mgmt/share_config.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/user_session.h"
+#include "mgmt/ksmbd_ida.h"
+#include "ndr.h"
+
+static void __wbuf(struct ksmbd_work *work, void **req, void **rsp)
+{
+ if (work->next_smb2_rcv_hdr_off) {
+ *req = ksmbd_req_buf_next(work);
+ *rsp = ksmbd_resp_buf_next(work);
+ } else {
+ *req = work->request_buf;
+ *rsp = work->response_buf;
+ }
+}
+
+#define WORK_BUFFERS(w, rq, rs) __wbuf((w), (void **)&(rq), (void **)&(rs))
+
+/**
+ * check_session_id() - check for valid session id in smb header
+ * @conn: connection instance
+ * @id: session id from smb header
+ *
+ * Return: 1 if valid session id, otherwise 0
+ */
+static inline bool check_session_id(struct ksmbd_conn *conn, u64 id)
+{
+ struct ksmbd_session *sess;
+
+ if (id == 0 || id == -1)
+ return false;
+
+ sess = ksmbd_session_lookup_all(conn, id);
+ if (sess)
+ return true;
+ pr_err("Invalid user session id: %llu\n", id);
+ return false;
+}
+
+struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn *conn)
+{
+ struct channel *chann;
+
+ list_for_each_entry(chann, &sess->ksmbd_chann_list, chann_list) {
+ if (chann->conn == conn)
+ return chann;
+ }
+
+ return NULL;
+}
+
+/**
+ * smb2_get_ksmbd_tcon() - get tree connection information using a tree id.
+ * @work: smb work
+ *
+ * Return: 0 if there is a tree connection matched or these are
+ * skipable commands, otherwise error
+ */
+int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req_hdr = work->request_buf;
+ int tree_id;
+
+ work->tcon = NULL;
+ if (work->conn->ops->get_cmd_val(work) == SMB2_TREE_CONNECT_HE ||
+ work->conn->ops->get_cmd_val(work) == SMB2_CANCEL_HE ||
+ work->conn->ops->get_cmd_val(work) == SMB2_LOGOFF_HE) {
+ ksmbd_debug(SMB, "skip to check tree connect request\n");
+ return 0;
+ }
+
+ if (xa_empty(&work->sess->tree_conns)) {
+ ksmbd_debug(SMB, "NO tree connected\n");
+ return -ENOENT;
+ }
+
+ tree_id = le32_to_cpu(req_hdr->Id.SyncId.TreeId);
+ work->tcon = ksmbd_tree_conn_lookup(work->sess, tree_id);
+ if (!work->tcon) {
+ pr_err("Invalid tid %d\n", tree_id);
+ return -EINVAL;
+ }
+
+ return 1;
+}
+
+/**
+ * smb2_set_err_rsp() - set error response code on smb response
+ * @work: smb work containing response buffer
+ */
+void smb2_set_err_rsp(struct ksmbd_work *work)
+{
+ struct smb2_err_rsp *err_rsp;
+
+ if (work->next_smb2_rcv_hdr_off)
+ err_rsp = ksmbd_resp_buf_next(work);
+ else
+ err_rsp = work->response_buf;
+
+ if (err_rsp->hdr.Status != STATUS_STOPPED_ON_SYMLINK) {
+ err_rsp->StructureSize = SMB2_ERROR_STRUCTURE_SIZE2_LE;
+ err_rsp->ErrorContextCount = 0;
+ err_rsp->Reserved = 0;
+ err_rsp->ByteCount = 0;
+ err_rsp->ErrorData[0] = 0;
+ inc_rfc1001_len(work->response_buf, SMB2_ERROR_STRUCTURE_SIZE2);
+ }
+}
+
+/**
+ * is_smb2_neg_cmd() - is it smb2 negotiation command
+ * @work: smb work containing smb header
+ *
+ * Return: true if smb2 negotiation command, otherwise false
+ */
+bool is_smb2_neg_cmd(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->request_buf;
+
+ /* is it SMB2 header ? */
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ return false;
+
+ /* make sure it is request not response message */
+ if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
+ return false;
+
+ if (hdr->Command != SMB2_NEGOTIATE)
+ return false;
+
+ return true;
+}
+
+/**
+ * is_smb2_rsp() - is it smb2 response
+ * @work: smb work containing smb response buffer
+ *
+ * Return: true if smb2 response, otherwise false
+ */
+bool is_smb2_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->response_buf;
+
+ /* is it SMB2 header ? */
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ return false;
+
+ /* make sure it is response not request message */
+ if (!(hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR))
+ return false;
+
+ return true;
+}
+
+/**
+ * get_smb2_cmd_val() - get smb command code from smb header
+ * @work: smb work containing smb request buffer
+ *
+ * Return: smb2 request command value
+ */
+u16 get_smb2_cmd_val(struct ksmbd_work *work)
+{
+ struct smb2_hdr *rcv_hdr;
+
+ if (work->next_smb2_rcv_hdr_off)
+ rcv_hdr = ksmbd_req_buf_next(work);
+ else
+ rcv_hdr = work->request_buf;
+ return le16_to_cpu(rcv_hdr->Command);
+}
+
+/**
+ * set_smb2_rsp_status() - set error response code on smb2 header
+ * @work: smb work containing response buffer
+ * @err: error response code
+ */
+void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err)
+{
+ struct smb2_hdr *rsp_hdr;
+
+ if (work->next_smb2_rcv_hdr_off)
+ rsp_hdr = ksmbd_resp_buf_next(work);
+ else
+ rsp_hdr = work->response_buf;
+ rsp_hdr->Status = err;
+ smb2_set_err_rsp(work);
+}
+
+/**
+ * init_smb2_neg_rsp() - initialize smb2 response for negotiate command
+ * @work: smb work containing smb request buffer
+ *
+ * smb2 negotiate response is sent in reply of smb1 negotiate command for
+ * dialect auto-negotiation.
+ */
+int init_smb2_neg_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *rsp_hdr;
+ struct smb2_negotiate_rsp *rsp;
+ struct ksmbd_conn *conn = work->conn;
+
+ if (conn->need_neg == false)
+ return -EINVAL;
+ if (!(conn->dialect >= SMB20_PROT_ID &&
+ conn->dialect <= SMB311_PROT_ID))
+ return -EINVAL;
+
+ rsp_hdr = work->response_buf;
+
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+
+ rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->CreditRequest = cpu_to_le16(2);
+ rsp_hdr->Command = SMB2_NEGOTIATE;
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = 0;
+ rsp_hdr->Id.SyncId.ProcessId = 0;
+ rsp_hdr->Id.SyncId.TreeId = 0;
+ rsp_hdr->SessionId = 0;
+ memset(rsp_hdr->Signature, 0, 16);
+
+ rsp = work->response_buf;
+
+ WARN_ON(ksmbd_conn_good(work));
+
+ rsp->StructureSize = cpu_to_le16(65);
+ ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
+ rsp->DialectRevision = cpu_to_le16(conn->dialect);
+ /* Not setting conn guid rsp->ServerGUID, as it
+ * not used by client for identifying connection
+ */
+ rsp->Capabilities = cpu_to_le32(conn->vals->capabilities);
+ /* Default Max Message Size till SMB2.0, 64K*/
+ rsp->MaxTransactSize = cpu_to_le32(conn->vals->max_trans_size);
+ rsp->MaxReadSize = cpu_to_le32(conn->vals->max_read_size);
+ rsp->MaxWriteSize = cpu_to_le32(conn->vals->max_write_size);
+
+ rsp->SystemTime = cpu_to_le64(ksmbd_systime());
+ rsp->ServerStartTime = 0;
+
+ rsp->SecurityBufferOffset = cpu_to_le16(128);
+ rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
+ ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
+ sizeof(rsp->hdr.smb2_buf_length)) +
+ le16_to_cpu(rsp->SecurityBufferOffset));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
+ sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
+ AUTH_GSS_LENGTH);
+ rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
+ if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY)
+ rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
+ conn->use_spnego = true;
+
+ ksmbd_conn_set_need_negotiate(work);
+ return 0;
+}
+
+static int smb2_consume_credit_charge(struct ksmbd_work *work,
+ unsigned short credit_charge)
+{
+ struct ksmbd_conn *conn = work->conn;
+ unsigned int rsp_credits = 1;
+
+ if (!conn->total_credits)
+ return 0;
+
+ if (credit_charge > 0)
+ rsp_credits = credit_charge;
+
+ conn->total_credits -= rsp_credits;
+ return rsp_credits;
+}
+
+/**
+ * smb2_set_rsp_credits() - set number of credits in response buffer
+ * @work: smb work containing smb response buffer
+ */
+int smb2_set_rsp_credits(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
+ struct smb2_hdr *hdr = ksmbd_resp_buf_next(work);
+ struct ksmbd_conn *conn = work->conn;
+ unsigned short credits_requested = le16_to_cpu(req_hdr->CreditRequest);
+ unsigned short credit_charge = 1, credits_granted = 0;
+ unsigned short aux_max, aux_credits, min_credits;
+ int rsp_credit_charge;
+
+ if (hdr->Command == SMB2_CANCEL)
+ goto out;
+
+ /* get default minimum credits by shifting maximum credits by 4 */
+ min_credits = conn->max_credits >> 4;
+
+ if (conn->total_credits >= conn->max_credits) {
+ pr_err("Total credits overflow: %d\n", conn->total_credits);
+ conn->total_credits = min_credits;
+ }
+
+ rsp_credit_charge =
+ smb2_consume_credit_charge(work, le16_to_cpu(req_hdr->CreditCharge));
+ if (rsp_credit_charge < 0)
+ return -EINVAL;
+
+ hdr->CreditCharge = cpu_to_le16(rsp_credit_charge);
+
+ if (credits_requested > 0) {
+ aux_credits = credits_requested - 1;
+ aux_max = 32;
+ if (hdr->Command == SMB2_NEGOTIATE)
+ aux_max = 0;
+ aux_credits = (aux_credits < aux_max) ? aux_credits : aux_max;
+ credits_granted = aux_credits + credit_charge;
+
+ /* if credits granted per client is getting bigger than default
+ * minimum credits then we should wrap it up within the limits.
+ */
+ if ((conn->total_credits + credits_granted) > min_credits)
+ credits_granted = min_credits - conn->total_credits;
+ /*
+ * TODO: Need to adjuct CreditRequest value according to
+ * current cpu load
+ */
+ } else if (conn->total_credits == 0) {
+ credits_granted = 1;
+ }
+
+ conn->total_credits += credits_granted;
+ work->credits_granted += credits_granted;
+
+ if (!req_hdr->NextCommand) {
+ /* Update CreditRequest in last request */
+ hdr->CreditRequest = cpu_to_le16(work->credits_granted);
+ }
+out:
+ ksmbd_debug(SMB,
+ "credits: requested[%d] granted[%d] total_granted[%d]\n",
+ credits_requested, credits_granted,
+ conn->total_credits);
+ return 0;
+}
+
+/**
+ * init_chained_smb2_rsp() - initialize smb2 chained response
+ * @work: smb work containing smb response buffer
+ */
+static void init_chained_smb2_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req = ksmbd_req_buf_next(work);
+ struct smb2_hdr *rsp = ksmbd_resp_buf_next(work);
+ struct smb2_hdr *rsp_hdr;
+ struct smb2_hdr *rcv_hdr;
+ int next_hdr_offset = 0;
+ int len, new_len;
+
+ /* Len of this response = updated RFC len - offset of previous cmd
+ * in the compound rsp
+ */
+
+ /* Storing the current local FID which may be needed by subsequent
+ * command in the compound request
+ */
+ if (req->Command == SMB2_CREATE && rsp->Status == STATUS_SUCCESS) {
+ work->compound_fid =
+ le64_to_cpu(((struct smb2_create_rsp *)rsp)->
+ VolatileFileId);
+ work->compound_pfid =
+ le64_to_cpu(((struct smb2_create_rsp *)rsp)->
+ PersistentFileId);
+ work->compound_sid = le64_to_cpu(rsp->SessionId);
+ }
+
+ len = get_rfc1002_len(work->response_buf) - work->next_smb2_rsp_hdr_off;
+ next_hdr_offset = le32_to_cpu(req->NextCommand);
+
+ new_len = ALIGN(len, 8);
+ inc_rfc1001_len(work->response_buf, ((sizeof(struct smb2_hdr) - 4)
+ + new_len - len));
+ rsp->NextCommand = cpu_to_le32(new_len);
+
+ work->next_smb2_rcv_hdr_off += next_hdr_offset;
+ work->next_smb2_rsp_hdr_off += new_len;
+ ksmbd_debug(SMB,
+ "Compound req new_len = %d rcv off = %d rsp off = %d\n",
+ new_len, work->next_smb2_rcv_hdr_off,
+ work->next_smb2_rsp_hdr_off);
+
+ rsp_hdr = ksmbd_resp_buf_next(work);
+ rcv_hdr = ksmbd_req_buf_next(work);
+
+ if (!(rcv_hdr->Flags & SMB2_FLAGS_RELATED_OPERATIONS)) {
+ ksmbd_debug(SMB, "related flag should be set\n");
+ work->compound_fid = KSMBD_NO_FID;
+ work->compound_pfid = KSMBD_NO_FID;
+ }
+ memset((char *)rsp_hdr + 4, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->Command = rcv_hdr->Command;
+
+ /*
+ * Message is response. We don't grant oplock yet.
+ */
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR |
+ SMB2_FLAGS_RELATED_OPERATIONS);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = rcv_hdr->MessageId;
+ rsp_hdr->Id.SyncId.ProcessId = rcv_hdr->Id.SyncId.ProcessId;
+ rsp_hdr->Id.SyncId.TreeId = rcv_hdr->Id.SyncId.TreeId;
+ rsp_hdr->SessionId = rcv_hdr->SessionId;
+ memcpy(rsp_hdr->Signature, rcv_hdr->Signature, 16);
+}
+
+/**
+ * is_chained_smb2_message() - check for chained command
+ * @work: smb work containing smb request buffer
+ *
+ * Return: true if chained request, otherwise false
+ */
+bool is_chained_smb2_message(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->request_buf;
+ unsigned int len;
+
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ return false;
+
+ hdr = ksmbd_req_buf_next(work);
+ if (le32_to_cpu(hdr->NextCommand) > 0) {
+ ksmbd_debug(SMB, "got SMB2 chained command\n");
+ init_chained_smb2_rsp(work);
+ return true;
+ } else if (work->next_smb2_rcv_hdr_off) {
+ /*
+ * This is last request in chained command,
+ * align response to 8 byte
+ */
+ len = ALIGN(get_rfc1002_len(work->response_buf), 8);
+ len = len - get_rfc1002_len(work->response_buf);
+ if (len) {
+ ksmbd_debug(SMB, "padding len %u\n", len);
+ inc_rfc1001_len(work->response_buf, len);
+ if (work->aux_payload_sz)
+ work->aux_payload_sz += len;
+ }
+ }
+ return false;
+}
+
+/**
+ * init_smb2_rsp_hdr() - initialize smb2 response
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0
+ */
+int init_smb2_rsp_hdr(struct ksmbd_work *work)
+{
+ struct smb2_hdr *rsp_hdr = work->response_buf;
+ struct smb2_hdr *rcv_hdr = work->request_buf;
+ struct ksmbd_conn *conn = work->conn;
+
+ memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
+ rsp_hdr->smb2_buf_length =
+ cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+ rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
+ rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
+ rsp_hdr->Command = rcv_hdr->Command;
+
+ /*
+ * Message is response. We don't grant oplock yet.
+ */
+ rsp_hdr->Flags = (SMB2_FLAGS_SERVER_TO_REDIR);
+ rsp_hdr->NextCommand = 0;
+ rsp_hdr->MessageId = rcv_hdr->MessageId;
+ rsp_hdr->Id.SyncId.ProcessId = rcv_hdr->Id.SyncId.ProcessId;
+ rsp_hdr->Id.SyncId.TreeId = rcv_hdr->Id.SyncId.TreeId;
+ rsp_hdr->SessionId = rcv_hdr->SessionId;
+ memcpy(rsp_hdr->Signature, rcv_hdr->Signature, 16);
+
+ work->syncronous = true;
+ if (work->async_id) {
+ ksmbd_release_id(&conn->async_ida, work->async_id);
+ work->async_id = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * smb2_allocate_rsp_buf() - allocate smb2 response buffer
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise -ENOMEM
+ */
+int smb2_allocate_rsp_buf(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr = work->request_buf;
+ size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
+ size_t large_sz = work->conn->vals->max_trans_size + MAX_SMB2_HDR_SIZE;
+ size_t sz = small_sz;
+ int cmd = le16_to_cpu(hdr->Command);
+
+ if (cmd == SMB2_IOCTL_HE || cmd == SMB2_QUERY_DIRECTORY_HE)
+ sz = large_sz;
+
+ if (cmd == SMB2_QUERY_INFO_HE) {
+ struct smb2_query_info_req *req;
+
+ req = work->request_buf;
+ if (req->InfoType == SMB2_O_INFO_FILE &&
+ (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
+ req->FileInfoClass == FILE_ALL_INFORMATION))
+ sz = large_sz;
+ }
+
+ /* allocate large response buf for chained commands */
+ if (le32_to_cpu(hdr->NextCommand) > 0)
+ sz = large_sz;
+
+ work->response_buf = kvmalloc(sz, GFP_KERNEL | __GFP_ZERO);
+ if (!work->response_buf)
+ return -ENOMEM;
+
+ work->response_sz = sz;
+ return 0;
+}
+
+/**
+ * smb2_check_user_session() - check for valid session for a user
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_check_user_session(struct ksmbd_work *work)
+{
+ struct smb2_hdr *req_hdr = work->request_buf;
+ struct ksmbd_conn *conn = work->conn;
+ unsigned int cmd = conn->ops->get_cmd_val(work);
+ unsigned long long sess_id;
+
+ work->sess = NULL;
+ /*
+ * SMB2_ECHO, SMB2_NEGOTIATE, SMB2_SESSION_SETUP command do not
+ * require a session id, so no need to validate user session's for
+ * these commands.
+ */
+ if (cmd == SMB2_ECHO_HE || cmd == SMB2_NEGOTIATE_HE ||
+ cmd == SMB2_SESSION_SETUP_HE)
+ return 0;
+
+ if (!ksmbd_conn_good(work))
+ return -EINVAL;
+
+ sess_id = le64_to_cpu(req_hdr->SessionId);
+ /* Check for validity of user session */
+ work->sess = ksmbd_session_lookup_all(conn, sess_id);
+ if (work->sess)
+ return 1;
+ ksmbd_debug(SMB, "Invalid user session, Uid %llu\n", sess_id);
+ return -EINVAL;
+}
+
+static void destroy_previous_session(struct ksmbd_user *user, u64 id)
+{
+ struct ksmbd_session *prev_sess = ksmbd_session_lookup_slowpath(id);
+ struct ksmbd_user *prev_user;
+
+ if (!prev_sess)
+ return;
+
+ prev_user = prev_sess->user;
+
+ if (!prev_user ||
+ strcmp(user->name, prev_user->name) ||
+ user->passkey_sz != prev_user->passkey_sz ||
+ memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) {
+ put_session(prev_sess);
+ return;
+ }
+
+ put_session(prev_sess);
+ ksmbd_session_destroy(prev_sess);
+}
+
+/**
+ * smb2_get_name() - get filename string from on the wire smb format
+ * @share: ksmbd_share_config pointer
+ * @src: source buffer
+ * @maxlen: maxlen of source string
+ * @nls_table: nls_table pointer
+ *
+ * Return: matching converted filename on success, otherwise error ptr
+ */
+static char *
+smb2_get_name(struct ksmbd_share_config *share, const char *src,
+ const int maxlen, struct nls_table *local_nls)
+{
+ char *name, *unixname;
+
+ name = smb_strndup_from_utf16(src, maxlen, 1, local_nls);
+ if (IS_ERR(name)) {
+ pr_err("failed to get name %ld\n", PTR_ERR(name));
+ return name;
+ }
+
+ /* change it to absolute unix name */
+ ksmbd_conv_path_to_unix(name);
+ ksmbd_strip_last_slash(name);
+
+ unixname = convert_to_unix_name(share, name);
+ kfree(name);
+ if (!unixname) {
+ pr_err("can not convert absolute name\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ksmbd_debug(SMB, "absolute name = %s\n", unixname);
+ return unixname;
+}
+
+int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
+{
+ struct smb2_hdr *rsp_hdr;
+ struct ksmbd_conn *conn = work->conn;
+ int id;
+
+ rsp_hdr = work->response_buf;
+ rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
+
+ id = ksmbd_acquire_async_msg_id(&conn->async_ida);
+ if (id < 0) {
+ pr_err("Failed to alloc async message id\n");
+ return id;
+ }
+ work->syncronous = false;
+ work->async_id = id;
+ rsp_hdr->Id.AsyncId = cpu_to_le64(id);
+
+ ksmbd_debug(SMB,
+ "Send interim Response to inform async request id : %d\n",
+ work->async_id);
+
+ work->cancel_fn = fn;
+ work->cancel_argv = arg;
+
+ if (list_empty(&work->async_request_entry)) {
+ spin_lock(&conn->request_lock);
+ list_add_tail(&work->async_request_entry, &conn->async_requests);
+ spin_unlock(&conn->request_lock);
+ }
+
+ return 0;
+}
+
+void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
+{
+ struct smb2_hdr *rsp_hdr;
+
+ rsp_hdr = work->response_buf;
+ smb2_set_err_rsp(work);
+ rsp_hdr->Status = status;
+
+ work->multiRsp = 1;
+ ksmbd_conn_write(work);
+ rsp_hdr->Status = 0;
+ work->multiRsp = 0;
+}
+
+static __le32 smb2_get_reparse_tag_special_file(umode_t mode)
+{
+ if (S_ISDIR(mode) || S_ISREG(mode))
+ return 0;
+
+ if (S_ISLNK(mode))
+ return IO_REPARSE_TAG_LX_SYMLINK_LE;
+ else if (S_ISFIFO(mode))
+ return IO_REPARSE_TAG_LX_FIFO_LE;
+ else if (S_ISSOCK(mode))
+ return IO_REPARSE_TAG_AF_UNIX_LE;
+ else if (S_ISCHR(mode))
+ return IO_REPARSE_TAG_LX_CHR_LE;
+ else if (S_ISBLK(mode))
+ return IO_REPARSE_TAG_LX_BLK_LE;
+
+ return 0;
+}
+
+/**
+ * smb2_get_dos_mode() - get file mode in dos format from unix mode
+ * @stat: kstat containing file mode
+ * @attribute: attribute flags
+ *
+ * Return: converted dos mode
+ */
+static int smb2_get_dos_mode(struct kstat *stat, int attribute)
+{
+ int attr = 0;
+
+ if (S_ISDIR(stat->mode)) {
+ attr = ATTR_DIRECTORY |
+ (attribute & (ATTR_HIDDEN | ATTR_SYSTEM));
+ } else {
+ attr = (attribute & 0x00005137) | ATTR_ARCHIVE;
+ attr &= ~(ATTR_DIRECTORY);
+ if (S_ISREG(stat->mode) && (server_conf.share_fake_fscaps &
+ FILE_SUPPORTS_SPARSE_FILES))
+ attr |= ATTR_SPARSE;
+
+ if (smb2_get_reparse_tag_special_file(stat->mode))
+ attr |= ATTR_REPARSE;
+ }
+
+ return attr;
+}
+
+static void build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt,
+ __le16 hash_id)
+{
+ pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(38);
+ pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
+ get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
+ pneg_ctxt->HashAlgorithms = hash_id;
+}
+
+static void build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt,
+ __le16 cipher_type)
+{
+ pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES;
+ pneg_ctxt->DataLength = cpu_to_le16(4);
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->CipherCount = cpu_to_le16(1);
+ pneg_ctxt->Ciphers[0] = cipher_type;
+}
+
+static void build_compression_ctxt(struct smb2_compression_ctx *pneg_ctxt,
+ __le16 comp_algo)
+{
+ pneg_ctxt->ContextType = SMB2_COMPRESSION_CAPABILITIES;
+ pneg_ctxt->DataLength =
+ cpu_to_le16(sizeof(struct smb2_compression_ctx)
+ - sizeof(struct smb2_neg_context));
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->CompressionAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->Reserved1 = cpu_to_le32(0);
+ pneg_ctxt->CompressionAlgorithms[0] = comp_algo;
+}
+
+static void build_sign_cap_ctxt(struct smb2_signing_capabilities *pneg_ctxt,
+ __le16 sign_algo)
+{
+ pneg_ctxt->ContextType = SMB2_SIGNING_CAPABILITIES;
+ pneg_ctxt->DataLength =
+ cpu_to_le16((sizeof(struct smb2_signing_capabilities) + 2)
+ - sizeof(struct smb2_neg_context));
+ pneg_ctxt->Reserved = cpu_to_le32(0);
+ pneg_ctxt->SigningAlgorithmCount = cpu_to_le16(1);
+ pneg_ctxt->SigningAlgorithms[0] = sign_algo;
+}
+
+static void build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
+{
+ pneg_ctxt->ContextType = SMB2_POSIX_EXTENSIONS_AVAILABLE;
+ pneg_ctxt->DataLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
+ /* SMB2_CREATE_TAG_POSIX is "0x93AD25509CB411E7B42383DE968BCD7C" */
+ pneg_ctxt->Name[0] = 0x93;
+ pneg_ctxt->Name[1] = 0xAD;
+ pneg_ctxt->Name[2] = 0x25;
+ pneg_ctxt->Name[3] = 0x50;
+ pneg_ctxt->Name[4] = 0x9C;
+ pneg_ctxt->Name[5] = 0xB4;
+ pneg_ctxt->Name[6] = 0x11;
+ pneg_ctxt->Name[7] = 0xE7;
+ pneg_ctxt->Name[8] = 0xB4;
+ pneg_ctxt->Name[9] = 0x23;
+ pneg_ctxt->Name[10] = 0x83;
+ pneg_ctxt->Name[11] = 0xDE;
+ pneg_ctxt->Name[12] = 0x96;
+ pneg_ctxt->Name[13] = 0x8B;
+ pneg_ctxt->Name[14] = 0xCD;
+ pneg_ctxt->Name[15] = 0x7C;
+}
+
+static void assemble_neg_contexts(struct ksmbd_conn *conn,
+ struct smb2_negotiate_rsp *rsp)
+{
+ /* +4 is to account for the RFC1001 len field */
+ char *pneg_ctxt = (char *)rsp +
+ le32_to_cpu(rsp->NegotiateContextOffset) + 4;
+ int neg_ctxt_cnt = 1;
+ int ctxt_size;
+
+ ksmbd_debug(SMB,
+ "assemble SMB2_PREAUTH_INTEGRITY_CAPABILITIES context\n");
+ build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt,
+ conn->preauth_info->Preauth_HashId);
+ rsp->NegotiateContextCount = cpu_to_le16(neg_ctxt_cnt);
+ inc_rfc1001_len(rsp, AUTH_GSS_PADDING);
+ ctxt_size = sizeof(struct smb2_preauth_neg_context);
+ /* Round to 8 byte boundary */
+ pneg_ctxt += round_up(sizeof(struct smb2_preauth_neg_context), 8);
+
+ if (conn->cipher_type) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_ENCRYPTION_CAPABILITIES context\n");
+ build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt,
+ conn->cipher_type);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_encryption_neg_context) + 2;
+ /* Round to 8 byte boundary */
+ pneg_ctxt +=
+ round_up(sizeof(struct smb2_encryption_neg_context) + 2,
+ 8);
+ }
+
+ if (conn->compress_algorithm) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_COMPRESSION_CAPABILITIES context\n");
+ /* Temporarily set to SMB3_COMPRESS_NONE */
+ build_compression_ctxt((struct smb2_compression_ctx *)pneg_ctxt,
+ conn->compress_algorithm);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_compression_ctx) + 2;
+ /* Round to 8 byte boundary */
+ pneg_ctxt += round_up(sizeof(struct smb2_compression_ctx) + 2,
+ 8);
+ }
+
+ if (conn->posix_ext_supported) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_POSIX_EXTENSIONS_AVAILABLE context\n");
+ build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_posix_neg_context);
+ /* Round to 8 byte boundary */
+ pneg_ctxt += round_up(sizeof(struct smb2_posix_neg_context), 8);
+ }
+
+ if (conn->signing_negotiated) {
+ ctxt_size = round_up(ctxt_size, 8);
+ ksmbd_debug(SMB,
+ "assemble SMB2_SIGNING_CAPABILITIES context\n");
+ build_sign_cap_ctxt((struct smb2_signing_capabilities *)pneg_ctxt,
+ conn->signing_algorithm);
+ rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
+ ctxt_size += sizeof(struct smb2_signing_capabilities) + 2;
+ }
+
+ inc_rfc1001_len(rsp, ctxt_size);
+}
+
+static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
+ struct smb2_preauth_neg_context *pneg_ctxt)
+{
+ __le32 err = STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
+
+ if (pneg_ctxt->HashAlgorithms == SMB2_PREAUTH_INTEGRITY_SHA512) {
+ conn->preauth_info->Preauth_HashId =
+ SMB2_PREAUTH_INTEGRITY_SHA512;
+ err = STATUS_SUCCESS;
+ }
+
+ return err;
+}
+
+static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
+ struct smb2_encryption_neg_context *pneg_ctxt,
+ int len_of_ctxts)
+{
+ int cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
+ int i, cphs_size = cph_cnt * sizeof(__le16);
+
+ conn->cipher_type = 0;
+
+ if (sizeof(struct smb2_encryption_neg_context) + cphs_size >
+ len_of_ctxts) {
+ pr_err("Invalid cipher count(%d)\n", cph_cnt);
+ return;
+ }
+
+ if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION))
+ return;
+
+ for (i = 0; i < cph_cnt; i++) {
+ if (pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES128_GCM ||
+ pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES128_CCM ||
+ pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES256_CCM ||
+ pneg_ctxt->Ciphers[i] == SMB2_ENCRYPTION_AES256_GCM) {
+ ksmbd_debug(SMB, "Cipher ID = 0x%x\n",
+ pneg_ctxt->Ciphers[i]);
+ conn->cipher_type = pneg_ctxt->Ciphers[i];
+ break;
+ }
+ }
+}
+
+static void decode_compress_ctxt(struct ksmbd_conn *conn,
+ struct smb2_compression_ctx *pneg_ctxt)
+{
+ conn->compress_algorithm = SMB3_COMPRESS_NONE;
+}
+
+static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
+ struct smb2_signing_capabilities *pneg_ctxt,
+ int len_of_ctxts)
+{
+ int sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
+ int i, sign_alos_size = sign_algo_cnt * sizeof(__le16);
+
+ conn->signing_negotiated = false;
+
+ if (sizeof(struct smb2_signing_capabilities) + sign_alos_size >
+ len_of_ctxts) {
+ pr_err("Invalid signing algorithm count(%d)\n", sign_algo_cnt);
+ return;
+ }
+
+ for (i = 0; i < sign_algo_cnt; i++) {
+ if (pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_HMAC_SHA256 ||
+ pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_AES_CMAC) {
+ ksmbd_debug(SMB, "Signing Algorithm ID = 0x%x\n",
+ pneg_ctxt->SigningAlgorithms[i]);
+ conn->signing_negotiated = true;
+ conn->signing_algorithm =
+ pneg_ctxt->SigningAlgorithms[i];
+ break;
+ }
+ }
+}
+
+static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
+ struct smb2_negotiate_req *req)
+{
+ /* +4 is to account for the RFC1001 len field */
+ struct smb2_neg_context *pctx = (struct smb2_neg_context *)((char *)req + 4);
+ int i = 0, len_of_ctxts;
+ int offset = le32_to_cpu(req->NegotiateContextOffset);
+ int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
+ int len_of_smb = be32_to_cpu(req->hdr.smb2_buf_length);
+ __le32 status = STATUS_INVALID_PARAMETER;
+
+ ksmbd_debug(SMB, "decoding %d negotiate contexts\n", neg_ctxt_cnt);
+ if (len_of_smb <= offset) {
+ ksmbd_debug(SMB, "Invalid response: negotiate context offset\n");
+ return status;
+ }
+
+ len_of_ctxts = len_of_smb - offset;
+
+ while (i++ < neg_ctxt_cnt) {
+ int clen;
+
+ /* check that offset is not beyond end of SMB */
+ if (len_of_ctxts == 0)
+ break;
+
+ if (len_of_ctxts < sizeof(struct smb2_neg_context))
+ break;
+
+ pctx = (struct smb2_neg_context *)((char *)pctx + offset);
+ clen = le16_to_cpu(pctx->DataLength);
+ if (clen + sizeof(struct smb2_neg_context) > len_of_ctxts)
+ break;
+
+ if (pctx->ContextType == SMB2_PREAUTH_INTEGRITY_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_PREAUTH_INTEGRITY_CAPABILITIES context\n");
+ if (conn->preauth_info->Preauth_HashId)
+ break;
+
+ status = decode_preauth_ctxt(conn,
+ (struct smb2_preauth_neg_context *)pctx);
+ if (status != STATUS_SUCCESS)
+ break;
+ } else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_ENCRYPTION_CAPABILITIES context\n");
+ if (conn->cipher_type)
+ break;
+
+ decode_encrypt_ctxt(conn,
+ (struct smb2_encryption_neg_context *)pctx,
+ len_of_ctxts);
+ } else if (pctx->ContextType == SMB2_COMPRESSION_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_COMPRESSION_CAPABILITIES context\n");
+ if (conn->compress_algorithm)
+ break;
+
+ decode_compress_ctxt(conn,
+ (struct smb2_compression_ctx *)pctx);
+ } else if (pctx->ContextType == SMB2_NETNAME_NEGOTIATE_CONTEXT_ID) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_NETNAME_NEGOTIATE_CONTEXT_ID context\n");
+ } else if (pctx->ContextType == SMB2_POSIX_EXTENSIONS_AVAILABLE) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_POSIX_EXTENSIONS_AVAILABLE context\n");
+ conn->posix_ext_supported = true;
+ } else if (pctx->ContextType == SMB2_SIGNING_CAPABILITIES) {
+ ksmbd_debug(SMB,
+ "deassemble SMB2_SIGNING_CAPABILITIES context\n");
+ decode_sign_cap_ctxt(conn,
+ (struct smb2_signing_capabilities *)pctx,
+ len_of_ctxts);
+ }
+
+ /* offsets must be 8 byte aligned */
+ clen = (clen + 7) & ~0x7;
+ offset = clen + sizeof(struct smb2_neg_context);
+ len_of_ctxts -= clen + sizeof(struct smb2_neg_context);
+ }
+ return status;
+}
+
+/**
+ * smb2_handle_negotiate() - handler for smb2 negotiate command
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0
+ */
+int smb2_handle_negotiate(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_negotiate_req *req = work->request_buf;
+ struct smb2_negotiate_rsp *rsp = work->response_buf;
+ int rc = 0;
+ __le32 status;
+
+ ksmbd_debug(SMB, "Received negotiate request\n");
+ conn->need_neg = false;
+ if (ksmbd_conn_good(work)) {
+ pr_err("conn->tcp_status is already in CifsGood State\n");
+ work->send_no_response = 1;
+ return rc;
+ }
+
+ if (req->DialectCount == 0) {
+ pr_err("malformed packet\n");
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ rc = -EINVAL;
+ goto err_out;
+ }
+
+ conn->cli_cap = le32_to_cpu(req->Capabilities);
+ switch (conn->dialect) {
+ case SMB311_PROT_ID:
+ conn->preauth_info =
+ kzalloc(sizeof(struct preauth_integrity_info),
+ GFP_KERNEL);
+ if (!conn->preauth_info) {
+ rc = -ENOMEM;
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ goto err_out;
+ }
+
+ status = deassemble_neg_contexts(conn, req);
+ if (status != STATUS_SUCCESS) {
+ pr_err("deassemble_neg_contexts error(0x%x)\n",
+ status);
+ rsp->hdr.Status = status;
+ rc = -EINVAL;
+ goto err_out;
+ }
+
+ rc = init_smb3_11_server(conn);
+ if (rc < 0) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ goto err_out;
+ }
+
+ ksmbd_gen_preauth_integrity_hash(conn,
+ work->request_buf,
+ conn->preauth_info->Preauth_HashValue);
+ rsp->NegotiateContextOffset =
+ cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+ assemble_neg_contexts(conn, rsp);
+ break;
+ case SMB302_PROT_ID:
+ init_smb3_02_server(conn);
+ break;
+ case SMB30_PROT_ID:
+ init_smb3_0_server(conn);
+ break;
+ case SMB21_PROT_ID:
+ init_smb2_1_server(conn);
+ break;
+ case SMB20_PROT_ID:
+ rc = init_smb2_0_server(conn);
+ if (rc) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ goto err_out;
+ }
+ break;
+ case SMB2X_PROT_ID:
+ case BAD_PROT_ID:
+ default:
+ ksmbd_debug(SMB, "Server dialect :0x%x not supported\n",
+ conn->dialect);
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ rc = -EINVAL;
+ goto err_out;
+ }
+ rsp->Capabilities = cpu_to_le32(conn->vals->capabilities);
+
+ /* For stats */
+ conn->connection_type = conn->dialect;
+
+ rsp->MaxTransactSize = cpu_to_le32(conn->vals->max_trans_size);
+ rsp->MaxReadSize = cpu_to_le32(conn->vals->max_read_size);
+ rsp->MaxWriteSize = cpu_to_le32(conn->vals->max_write_size);
+
+ if (conn->dialect > SMB20_PROT_ID) {
+ memcpy(conn->ClientGUID, req->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE);
+ conn->cli_sec_mode = le16_to_cpu(req->SecurityMode);
+ }
+
+ rsp->StructureSize = cpu_to_le16(65);
+ rsp->DialectRevision = cpu_to_le16(conn->dialect);
+ /* Not setting conn guid rsp->ServerGUID, as it
+ * not used by client for identifying server
+ */
+ memset(rsp->ServerGUID, 0, SMB2_CLIENT_GUID_SIZE);
+
+ rsp->SystemTime = cpu_to_le64(ksmbd_systime());
+ rsp->ServerStartTime = 0;
+ ksmbd_debug(SMB, "negotiate context offset %d, count %d\n",
+ le32_to_cpu(rsp->NegotiateContextOffset),
+ le16_to_cpu(rsp->NegotiateContextCount));
+
+ rsp->SecurityBufferOffset = cpu_to_le16(128);
+ rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
+ ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
+ sizeof(rsp->hdr.smb2_buf_length)) +
+ le16_to_cpu(rsp->SecurityBufferOffset));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
+ sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
+ AUTH_GSS_LENGTH);
+ rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
+ conn->use_spnego = true;
+
+ if ((server_conf.signing == KSMBD_CONFIG_OPT_AUTO ||
+ server_conf.signing == KSMBD_CONFIG_OPT_DISABLED) &&
+ req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED_LE)
+ conn->sign = true;
+ else if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY) {
+ server_conf.enforced_signing = true;
+ rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
+ conn->sign = true;
+ }
+
+ conn->srv_sec_mode = le16_to_cpu(rsp->SecurityMode);
+ ksmbd_conn_set_need_negotiate(work);
+
+err_out:
+ if (rc < 0)
+ smb2_set_err_rsp(work);
+
+ return rc;
+}
+
+static int alloc_preauth_hash(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn)
+{
+ if (sess->Preauth_HashValue)
+ return 0;
+
+ sess->Preauth_HashValue = kmemdup(conn->preauth_info->Preauth_HashValue,
+ PREAUTH_HASHVALUE_SIZE, GFP_KERNEL);
+ if (!sess->Preauth_HashValue)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int generate_preauth_hash(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ u8 *preauth_hash;
+
+ if (conn->dialect != SMB311_PROT_ID)
+ return 0;
+
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess = ksmbd_preauth_session_lookup(conn, sess->id);
+ if (!preauth_sess) {
+ preauth_sess = ksmbd_preauth_session_alloc(conn, sess->id);
+ if (!preauth_sess)
+ return -ENOMEM;
+ }
+
+ preauth_hash = preauth_sess->Preauth_HashValue;
+ } else {
+ if (!sess->Preauth_HashValue)
+ if (alloc_preauth_hash(sess, conn))
+ return -ENOMEM;
+ preauth_hash = sess->Preauth_HashValue;
+ }
+
+ ksmbd_gen_preauth_integrity_hash(conn, work->request_buf, preauth_hash);
+ return 0;
+}
+
+static int decode_negotiation_token(struct ksmbd_work *work,
+ struct negotiate_message *negblob)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_sess_setup_req *req;
+ int sz;
+
+ if (!conn->use_spnego)
+ return -EINVAL;
+
+ req = work->request_buf;
+ sz = le16_to_cpu(req->SecurityBufferLength);
+
+ if (ksmbd_decode_negTokenInit((char *)negblob, sz, conn)) {
+ if (ksmbd_decode_negTokenTarg((char *)negblob, sz, conn)) {
+ conn->auth_mechs |= KSMBD_AUTH_NTLMSSP;
+ conn->preferred_auth_mech = KSMBD_AUTH_NTLMSSP;
+ conn->use_spnego = false;
+ }
+ }
+ return 0;
+}
+
+static int ntlm_negotiate(struct ksmbd_work *work,
+ struct negotiate_message *negblob)
+{
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct challenge_message *chgblob;
+ unsigned char *spnego_blob = NULL;
+ u16 spnego_blob_len;
+ char *neg_blob;
+ int sz, rc;
+
+ ksmbd_debug(SMB, "negotiate phase\n");
+ sz = le16_to_cpu(req->SecurityBufferLength);
+ rc = ksmbd_decode_ntlmssp_neg_blob(negblob, sz, work->sess);
+ if (rc)
+ return rc;
+
+ sz = le16_to_cpu(rsp->SecurityBufferOffset);
+ chgblob =
+ (struct challenge_message *)((char *)&rsp->hdr.ProtocolId + sz);
+ memset(chgblob, 0, sizeof(struct challenge_message));
+
+ if (!work->conn->use_spnego) {
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ if (sz < 0)
+ return -ENOMEM;
+
+ rsp->SecurityBufferLength = cpu_to_le16(sz);
+ return 0;
+ }
+
+ sz = sizeof(struct challenge_message);
+ sz += (strlen(ksmbd_netbios_name()) * 2 + 1 + 4) * 6;
+
+ neg_blob = kzalloc(sz, GFP_KERNEL);
+ if (!neg_blob)
+ return -ENOMEM;
+
+ chgblob = (struct challenge_message *)neg_blob;
+ sz = ksmbd_build_ntlmssp_challenge_blob(chgblob, work->sess);
+ if (sz < 0) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = build_spnego_ntlmssp_neg_blob(&spnego_blob, &spnego_blob_len,
+ neg_blob, sz);
+ if (rc) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sz = le16_to_cpu(rsp->SecurityBufferOffset);
+ memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
+ rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
+
+out:
+ kfree(spnego_blob);
+ kfree(neg_blob);
+ return rc;
+}
+
+static struct authenticate_message *user_authblob(struct ksmbd_conn *conn,
+ struct smb2_sess_setup_req *req)
+{
+ int sz;
+
+ if (conn->use_spnego && conn->mechToken)
+ return (struct authenticate_message *)conn->mechToken;
+
+ sz = le16_to_cpu(req->SecurityBufferOffset);
+ return (struct authenticate_message *)((char *)&req->hdr.ProtocolId
+ + sz);
+}
+
+static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
+ struct smb2_sess_setup_req *req)
+{
+ struct authenticate_message *authblob;
+ struct ksmbd_user *user;
+ char *name;
+ int sz;
+
+ authblob = user_authblob(conn, req);
+ sz = le32_to_cpu(authblob->UserName.BufferOffset);
+ name = smb_strndup_from_utf16((const char *)authblob + sz,
+ le16_to_cpu(authblob->UserName.Length),
+ true,
+ conn->local_nls);
+ if (IS_ERR(name)) {
+ pr_err("cannot allocate memory\n");
+ return NULL;
+ }
+
+ ksmbd_debug(SMB, "session setup request for user %s\n", name);
+ user = ksmbd_login_user(name);
+ kfree(name);
+ return user;
+}
+
+static int ntlm_authenticate(struct ksmbd_work *work)
+{
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ struct channel *chann = NULL;
+ struct ksmbd_user *user;
+ u64 prev_id;
+ int sz, rc;
+
+ ksmbd_debug(SMB, "authenticate phase\n");
+ if (conn->use_spnego) {
+ unsigned char *spnego_blob;
+ u16 spnego_blob_len;
+
+ rc = build_spnego_ntlmssp_auth_blob(&spnego_blob,
+ &spnego_blob_len,
+ 0);
+ if (rc)
+ return -ENOMEM;
+
+ sz = le16_to_cpu(rsp->SecurityBufferOffset);
+ memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
+ rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
+ kfree(spnego_blob);
+ inc_rfc1001_len(rsp, spnego_blob_len - 1);
+ }
+
+ user = session_user(conn, req);
+ if (!user) {
+ ksmbd_debug(SMB, "Unknown user name or an error\n");
+ return -EPERM;
+ }
+
+ /* Check for previous session */
+ prev_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_id && prev_id != sess->id)
+ destroy_previous_session(user, prev_id);
+
+ if (sess->state == SMB2_SESSION_VALID) {
+ /*
+ * Reuse session if anonymous try to connect
+ * on reauthetication.
+ */
+ if (ksmbd_anonymous_user(user)) {
+ ksmbd_free_user(user);
+ return 0;
+ }
+ ksmbd_free_user(sess->user);
+ }
+
+ sess->user = user;
+ if (user_guest(sess->user)) {
+ if (conn->sign) {
+ ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n");
+ return -EPERM;
+ }
+
+ rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE;
+ } else {
+ struct authenticate_message *authblob;
+
+ authblob = user_authblob(conn, req);
+ sz = le16_to_cpu(req->SecurityBufferLength);
+ rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, sess);
+ if (rc) {
+ set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
+ ksmbd_debug(SMB, "authentication failed\n");
+ return -EPERM;
+ }
+
+ /*
+ * If session state is SMB2_SESSION_VALID, We can assume
+ * that it is reauthentication. And the user/password
+ * has been verified, so return it here.
+ */
+ if (sess->state == SMB2_SESSION_VALID) {
+ if (conn->binding)
+ goto binding_session;
+ return 0;
+ }
+
+ if ((conn->sign || server_conf.enforced_signing) ||
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
+
+ if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION &&
+ conn->ops->generate_encryptionkey &&
+ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ rc = conn->ops->generate_encryptionkey(sess);
+ if (rc) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+ return -EINVAL;
+ }
+ sess->enc = true;
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ /*
+ * signing is disable if encryption is enable
+ * on this session
+ */
+ sess->sign = false;
+ }
+ }
+
+binding_session:
+ if (conn->dialect >= SMB30_PROT_ID) {
+ chann = lookup_chann_list(sess, conn);
+ if (!chann) {
+ chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
+ if (!chann)
+ return -ENOMEM;
+
+ chann->conn = conn;
+ INIT_LIST_HEAD(&chann->chann_list);
+ list_add(&chann->chann_list, &sess->ksmbd_chann_list);
+ }
+ }
+
+ if (conn->ops->generate_signingkey) {
+ rc = conn->ops->generate_signingkey(sess, conn);
+ if (rc) {
+ ksmbd_debug(SMB, "SMB3 signing key generation failed\n");
+ return -EINVAL;
+ }
+ }
+
+ if (conn->dialect > SMB20_PROT_ID) {
+ if (!ksmbd_conn_lookup_dialect(conn)) {
+ pr_err("fail to verify the dialect\n");
+ return -ENOENT;
+ }
+ }
+ return 0;
+}
+
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+static int krb5_authenticate(struct ksmbd_work *work)
+{
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ char *in_blob, *out_blob;
+ struct channel *chann = NULL;
+ u64 prev_sess_id;
+ int in_len, out_len;
+ int retval;
+
+ in_blob = (char *)&req->hdr.ProtocolId +
+ le16_to_cpu(req->SecurityBufferOffset);
+ in_len = le16_to_cpu(req->SecurityBufferLength);
+ out_blob = (char *)&rsp->hdr.ProtocolId +
+ le16_to_cpu(rsp->SecurityBufferOffset);
+ out_len = work->response_sz -
+ offsetof(struct smb2_hdr, smb2_buf_length) -
+ le16_to_cpu(rsp->SecurityBufferOffset);
+
+ /* Check previous session */
+ prev_sess_id = le64_to_cpu(req->PreviousSessionId);
+ if (prev_sess_id && prev_sess_id != sess->id)
+ destroy_previous_session(sess->user, prev_sess_id);
+
+ if (sess->state == SMB2_SESSION_VALID)
+ ksmbd_free_user(sess->user);
+
+ retval = ksmbd_krb5_authenticate(sess, in_blob, in_len,
+ out_blob, &out_len);
+ if (retval) {
+ ksmbd_debug(SMB, "krb5 authentication failed\n");
+ return -EINVAL;
+ }
+ rsp->SecurityBufferLength = cpu_to_le16(out_len);
+ inc_rfc1001_len(rsp, out_len - 1);
+
+ if ((conn->sign || server_conf.enforced_signing) ||
+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
+ sess->sign = true;
+
+ if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) &&
+ conn->ops->generate_encryptionkey) {
+ retval = conn->ops->generate_encryptionkey(sess);
+ if (retval) {
+ ksmbd_debug(SMB,
+ "SMB3 encryption key generation failed\n");
+ return -EINVAL;
+ }
+ sess->enc = true;
+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+ sess->sign = false;
+ }
+
+ if (conn->dialect >= SMB30_PROT_ID) {
+ chann = lookup_chann_list(sess, conn);
+ if (!chann) {
+ chann = kmalloc(sizeof(struct channel), GFP_KERNEL);
+ if (!chann)
+ return -ENOMEM;
+
+ chann->conn = conn;
+ INIT_LIST_HEAD(&chann->chann_list);
+ list_add(&chann->chann_list, &sess->ksmbd_chann_list);
+ }
+ }
+
+ if (conn->ops->generate_signingkey) {
+ retval = conn->ops->generate_signingkey(sess, conn);
+ if (retval) {
+ ksmbd_debug(SMB, "SMB3 signing key generation failed\n");
+ return -EINVAL;
+ }
+ }
+
+ if (conn->dialect > SMB20_PROT_ID) {
+ if (!ksmbd_conn_lookup_dialect(conn)) {
+ pr_err("fail to verify the dialect\n");
+ return -ENOENT;
+ }
+ }
+ return 0;
+}
+#else
+static int krb5_authenticate(struct ksmbd_work *work)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+int smb2_sess_setup(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_sess_setup_req *req = work->request_buf;
+ struct smb2_sess_setup_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess;
+ struct negotiate_message *negblob;
+ int rc = 0;
+
+ ksmbd_debug(SMB, "Received request for session setup\n");
+
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->SessionFlags = 0;
+ rsp->SecurityBufferOffset = cpu_to_le16(72);
+ rsp->SecurityBufferLength = 0;
+ inc_rfc1001_len(rsp, 9);
+
+ if (!req->hdr.SessionId) {
+ sess = ksmbd_smb2_session_create();
+ if (!sess) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ rsp->hdr.SessionId = cpu_to_le64(sess->id);
+ ksmbd_session_register(conn, sess);
+ } else if (conn->dialect >= SMB30_PROT_ID &&
+ (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
+ req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) {
+ u64 sess_id = le64_to_cpu(req->hdr.SessionId);
+
+ sess = ksmbd_session_lookup_slowpath(sess_id);
+ if (!sess) {
+ rc = -ENOENT;
+ goto out_err;
+ }
+
+ if (conn->dialect != sess->conn->dialect) {
+ rc = -EINVAL;
+ goto out_err;
+ }
+
+ if (!(req->hdr.Flags & SMB2_FLAGS_SIGNED)) {
+ rc = -EINVAL;
+ goto out_err;
+ }
+
+ if (strncmp(conn->ClientGUID, sess->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE)) {
+ rc = -ENOENT;
+ goto out_err;
+ }
+
+ if (sess->state == SMB2_SESSION_IN_PROGRESS) {
+ rc = -EACCES;
+ goto out_err;
+ }
+
+ if (sess->state == SMB2_SESSION_EXPIRED) {
+ rc = -EFAULT;
+ goto out_err;
+ }
+
+ if (ksmbd_session_lookup(conn, sess_id)) {
+ rc = -EACCES;
+ goto out_err;
+ }
+
+ conn->binding = true;
+ } else if ((conn->dialect < SMB30_PROT_ID ||
+ server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
+ (req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ sess = NULL;
+ rc = -EACCES;
+ goto out_err;
+ } else {
+ sess = ksmbd_session_lookup(conn,
+ le64_to_cpu(req->hdr.SessionId));
+ if (!sess) {
+ rc = -ENOENT;
+ goto out_err;
+ }
+ }
+ work->sess = sess;
+
+ if (sess->state == SMB2_SESSION_EXPIRED)
+ sess->state = SMB2_SESSION_IN_PROGRESS;
+
+ negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId +
+ le16_to_cpu(req->SecurityBufferOffset));
+
+ if (decode_negotiation_token(work, negblob) == 0) {
+ if (conn->mechToken)
+ negblob = (struct negotiate_message *)conn->mechToken;
+ }
+
+ if (server_conf.auth_mechs & conn->auth_mechs) {
+ rc = generate_preauth_hash(work);
+ if (rc)
+ goto out_err;
+
+ if (conn->preferred_auth_mech &
+ (KSMBD_AUTH_KRB5 | KSMBD_AUTH_MSKRB5)) {
+ rc = krb5_authenticate(work);
+ if (rc) {
+ rc = -EINVAL;
+ goto out_err;
+ }
+
+ ksmbd_conn_set_good(work);
+ sess->state = SMB2_SESSION_VALID;
+ kfree(sess->Preauth_HashValue);
+ sess->Preauth_HashValue = NULL;
+ } else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) {
+ if (negblob->MessageType == NtLmNegotiate) {
+ rc = ntlm_negotiate(work, negblob);
+ if (rc)
+ goto out_err;
+ rsp->hdr.Status =
+ STATUS_MORE_PROCESSING_REQUIRED;
+ /*
+ * Note: here total size -1 is done as an
+ * adjustment for 0 size blob
+ */
+ inc_rfc1001_len(rsp, le16_to_cpu(rsp->SecurityBufferLength) - 1);
+
+ } else if (negblob->MessageType == NtLmAuthenticate) {
+ rc = ntlm_authenticate(work);
+ if (rc)
+ goto out_err;
+
+ ksmbd_conn_set_good(work);
+ sess->state = SMB2_SESSION_VALID;
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess =
+ ksmbd_preauth_session_lookup(conn, sess->id);
+ if (preauth_sess) {
+ list_del(&preauth_sess->preauth_entry);
+ kfree(preauth_sess);
+ }
+ }
+ kfree(sess->Preauth_HashValue);
+ sess->Preauth_HashValue = NULL;
+ }
+ } else {
+ /* TODO: need one more negotiation */
+ pr_err("Not support the preferred authentication\n");
+ rc = -EINVAL;
+ }
+ } else {
+ pr_err("Not support authentication\n");
+ rc = -EINVAL;
+ }
+
+out_err:
+ if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_USER_SESSION_DELETED;
+ else if (rc == -EACCES)
+ rsp->hdr.Status = STATUS_REQUEST_NOT_ACCEPTED;
+ else if (rc == -EFAULT)
+ rsp->hdr.Status = STATUS_NETWORK_SESSION_EXPIRED;
+ else if (rc == -ENOMEM)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ else if (rc)
+ rsp->hdr.Status = STATUS_LOGON_FAILURE;
+
+ if (conn->use_spnego && conn->mechToken) {
+ kfree(conn->mechToken);
+ conn->mechToken = NULL;
+ }
+
+ if (rc < 0 && sess) {
+ ksmbd_session_destroy(sess);
+ work->sess = NULL;
+ }
+
+ return rc;
+}
+
+/**
+ * smb2_tree_connect() - handler for smb2 tree connect command
+ * @work: smb work containing smb request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_tree_connect(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_tree_connect_req *req = work->request_buf;
+ struct smb2_tree_connect_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess = work->sess;
+ char *treename = NULL, *name = NULL;
+ struct ksmbd_tree_conn_status status;
+ struct ksmbd_share_config *share;
+ int rc = -EINVAL;
+
+ treename = smb_strndup_from_utf16(req->Buffer,
+ le16_to_cpu(req->PathLength), true,
+ conn->local_nls);
+ if (IS_ERR(treename)) {
+ pr_err("treename is NULL\n");
+ status.ret = KSMBD_TREE_CONN_STATUS_ERROR;
+ goto out_err1;
+ }
+
+ name = ksmbd_extract_sharename(treename);
+ if (IS_ERR(name)) {
+ status.ret = KSMBD_TREE_CONN_STATUS_ERROR;
+ goto out_err1;
+ }
+
+ ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n",
+ name, treename);
+
+ status = ksmbd_tree_conn_connect(sess, name);
+ if (status.ret == KSMBD_TREE_CONN_STATUS_OK)
+ rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id);
+ else
+ goto out_err1;
+
+ share = status.tree_conn->share_conf;
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC share path request\n");
+ rsp->ShareType = SMB2_SHARE_TYPE_PIPE;
+ rsp->MaximalAccess = FILE_READ_DATA_LE | FILE_READ_EA_LE |
+ FILE_EXECUTE_LE | FILE_READ_ATTRIBUTES_LE |
+ FILE_DELETE_LE | FILE_READ_CONTROL_LE |
+ FILE_WRITE_DAC_LE | FILE_WRITE_OWNER_LE |
+ FILE_SYNCHRONIZE_LE;
+ } else {
+ rsp->ShareType = SMB2_SHARE_TYPE_DISK;
+ rsp->MaximalAccess = FILE_READ_DATA_LE | FILE_READ_EA_LE |
+ FILE_EXECUTE_LE | FILE_READ_ATTRIBUTES_LE;
+ if (test_tree_conn_flag(status.tree_conn,
+ KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ rsp->MaximalAccess |= FILE_WRITE_DATA_LE |
+ FILE_APPEND_DATA_LE | FILE_WRITE_EA_LE |
+ FILE_DELETE_LE | FILE_WRITE_ATTRIBUTES_LE |
+ FILE_DELETE_CHILD_LE | FILE_READ_CONTROL_LE |
+ FILE_WRITE_DAC_LE | FILE_WRITE_OWNER_LE |
+ FILE_SYNCHRONIZE_LE;
+ }
+ }
+
+ status.tree_conn->maximal_access = le32_to_cpu(rsp->MaximalAccess);
+ if (conn->posix_ext_supported)
+ status.tree_conn->posix_extensions = true;
+
+out_err1:
+ rsp->StructureSize = cpu_to_le16(16);
+ rsp->Capabilities = 0;
+ rsp->Reserved = 0;
+ /* default manual caching */
+ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
+ inc_rfc1001_len(rsp, 16);
+
+ if (!IS_ERR(treename))
+ kfree(treename);
+ if (!IS_ERR(name))
+ kfree(name);
+
+ switch (status.ret) {
+ case KSMBD_TREE_CONN_STATUS_OK:
+ rsp->hdr.Status = STATUS_SUCCESS;
+ rc = 0;
+ break;
+ case KSMBD_TREE_CONN_STATUS_NO_SHARE:
+ rsp->hdr.Status = STATUS_BAD_NETWORK_PATH;
+ break;
+ case -ENOMEM:
+ case KSMBD_TREE_CONN_STATUS_NOMEM:
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ break;
+ case KSMBD_TREE_CONN_STATUS_ERROR:
+ case KSMBD_TREE_CONN_STATUS_TOO_MANY_CONNS:
+ case KSMBD_TREE_CONN_STATUS_TOO_MANY_SESSIONS:
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ break;
+ case -EINVAL:
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ break;
+ default:
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ }
+
+ return rc;
+}
+
+/**
+ * smb2_create_open_flags() - convert smb open flags to unix open flags
+ * @file_present: is file already present
+ * @access: file access flags
+ * @disposition: file disposition flags
+ * @may_flags: set with MAY_ flags
+ *
+ * Return: file open flags
+ */
+static int smb2_create_open_flags(bool file_present, __le32 access,
+ __le32 disposition,
+ int *may_flags)
+{
+ int oflags = O_NONBLOCK | O_LARGEFILE;
+
+ if (access & FILE_READ_DESIRED_ACCESS_LE &&
+ access & FILE_WRITE_DESIRE_ACCESS_LE) {
+ oflags |= O_RDWR;
+ *may_flags = MAY_OPEN | MAY_READ | MAY_WRITE;
+ } else if (access & FILE_WRITE_DESIRE_ACCESS_LE) {
+ oflags |= O_WRONLY;
+ *may_flags = MAY_OPEN | MAY_WRITE;
+ } else {
+ oflags |= O_RDONLY;
+ *may_flags = MAY_OPEN | MAY_READ;
+ }
+
+ if (access == FILE_READ_ATTRIBUTES_LE)
+ oflags |= O_PATH;
+
+ if (file_present) {
+ switch (disposition & FILE_CREATE_MASK_LE) {
+ case FILE_OPEN_LE:
+ case FILE_CREATE_LE:
+ break;
+ case FILE_SUPERSEDE_LE:
+ case FILE_OVERWRITE_LE:
+ case FILE_OVERWRITE_IF_LE:
+ oflags |= O_TRUNC;
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (disposition & FILE_CREATE_MASK_LE) {
+ case FILE_SUPERSEDE_LE:
+ case FILE_CREATE_LE:
+ case FILE_OPEN_IF_LE:
+ case FILE_OVERWRITE_IF_LE:
+ oflags |= O_CREAT;
+ break;
+ case FILE_OPEN_LE:
+ case FILE_OVERWRITE_LE:
+ oflags &= ~O_CREAT;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return oflags;
+}
+
+/**
+ * smb2_tree_disconnect() - handler for smb tree connect request
+ * @work: smb work containing request buffer
+ *
+ * Return: 0
+ */
+int smb2_tree_disconnect(struct ksmbd_work *work)
+{
+ struct smb2_tree_disconnect_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_tree_connect *tcon = work->tcon;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ inc_rfc1001_len(rsp, 4);
+
+ ksmbd_debug(SMB, "request\n");
+
+ if (!tcon) {
+ struct smb2_tree_disconnect_req *req = work->request_buf;
+
+ ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
+ rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
+ smb2_set_err_rsp(work);
+ return 0;
+ }
+
+ ksmbd_close_tree_conn_fds(work);
+ ksmbd_tree_conn_disconnect(sess, tcon);
+ return 0;
+}
+
+/**
+ * smb2_session_logoff() - handler for session log off request
+ * @work: smb work containing request buffer
+ *
+ * Return: 0
+ */
+int smb2_session_logoff(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_logoff_rsp *rsp = work->response_buf;
+ struct ksmbd_session *sess = work->sess;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ inc_rfc1001_len(rsp, 4);
+
+ ksmbd_debug(SMB, "request\n");
+
+ /* Got a valid session, set connection state */
+ WARN_ON(sess->conn != conn);
+
+ /* setting CifsExiting here may race with start_tcp_sess */
+ ksmbd_conn_set_need_reconnect(work);
+ ksmbd_close_session_fds(work);
+ ksmbd_conn_wait_idle(conn);
+
+ if (ksmbd_tree_conn_session_logoff(sess)) {
+ struct smb2_logoff_req *req = work->request_buf;
+
+ ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
+ rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
+ smb2_set_err_rsp(work);
+ return 0;
+ }
+
+ ksmbd_destroy_file_table(&sess->file_table);
+ sess->state = SMB2_SESSION_EXPIRED;
+
+ ksmbd_free_user(sess->user);
+ sess->user = NULL;
+
+ /* let start_tcp_sess free connection info now */
+ ksmbd_conn_set_need_negotiate(work);
+ return 0;
+}
+
+/**
+ * create_smb2_pipe() - create IPC pipe
+ * @work: smb work containing request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static noinline int create_smb2_pipe(struct ksmbd_work *work)
+{
+ struct smb2_create_rsp *rsp = work->response_buf;
+ struct smb2_create_req *req = work->request_buf;
+ int id;
+ int err;
+ char *name;
+
+ name = smb_strndup_from_utf16(req->Buffer, le16_to_cpu(req->NameLength),
+ 1, work->conn->local_nls);
+ if (IS_ERR(name)) {
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ err = PTR_ERR(name);
+ goto out;
+ }
+
+ id = ksmbd_session_rpc_open(work->sess, name);
+ if (id < 0) {
+ pr_err("Unable to open RPC pipe: %d\n", id);
+ err = id;
+ goto out;
+ }
+
+ rsp->hdr.Status = STATUS_SUCCESS;
+ rsp->StructureSize = cpu_to_le16(89);
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_NONE;
+ rsp->Reserved = 0;
+ rsp->CreateAction = cpu_to_le32(FILE_OPENED);
+
+ rsp->CreationTime = cpu_to_le64(0);
+ rsp->LastAccessTime = cpu_to_le64(0);
+ rsp->ChangeTime = cpu_to_le64(0);
+ rsp->AllocationSize = cpu_to_le64(0);
+ rsp->EndofFile = cpu_to_le64(0);
+ rsp->FileAttributes = ATTR_NORMAL_LE;
+ rsp->Reserved2 = 0;
+ rsp->VolatileFileId = cpu_to_le64(id);
+ rsp->PersistentFileId = 0;
+ rsp->CreateContextsOffset = 0;
+ rsp->CreateContextsLength = 0;
+
+ inc_rfc1001_len(rsp, 88); /* StructureSize - 1*/
+ kfree(name);
+ return 0;
+
+out:
+ switch (err) {
+ case -EINVAL:
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ break;
+ case -ENOSPC:
+ case -ENOMEM:
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ break;
+ }
+
+ if (!IS_ERR(name))
+ kfree(name);
+
+ smb2_set_err_rsp(work);
+ return err;
+}
+
+/**
+ * smb2_set_ea() - handler for setting extended attributes using set
+ * info command
+ * @eabuf: set info command buffer
+ * @path: dentry path for get ea
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_set_ea(struct smb2_ea_info *eabuf, struct path *path)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ char *attr_name = NULL, *value;
+ int rc = 0;
+ int next = 0;
+
+ attr_name = kmalloc(XATTR_NAME_MAX + 1, GFP_KERNEL);
+ if (!attr_name)
+ return -ENOMEM;
+
+ do {
+ if (!eabuf->EaNameLength)
+ goto next;
+
+ ksmbd_debug(SMB,
+ "name : <%s>, name_len : %u, value_len : %u, next : %u\n",
+ eabuf->name, eabuf->EaNameLength,
+ le16_to_cpu(eabuf->EaValueLength),
+ le32_to_cpu(eabuf->NextEntryOffset));
+
+ if (eabuf->EaNameLength >
+ (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ memcpy(attr_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+ memcpy(&attr_name[XATTR_USER_PREFIX_LEN], eabuf->name,
+ eabuf->EaNameLength);
+ attr_name[XATTR_USER_PREFIX_LEN + eabuf->EaNameLength] = '\0';
+ value = (char *)&eabuf->name + eabuf->EaNameLength + 1;
+
+ if (!eabuf->EaValueLength) {
+ rc = ksmbd_vfs_casexattr_len(user_ns,
+ path->dentry,
+ attr_name,
+ XATTR_USER_PREFIX_LEN +
+ eabuf->EaNameLength);
+
+ /* delete the EA only when it exits */
+ if (rc > 0) {
+ rc = ksmbd_vfs_remove_xattr(user_ns,
+ path->dentry,
+ attr_name);
+
+ if (rc < 0) {
+ ksmbd_debug(SMB,
+ "remove xattr failed(%d)\n",
+ rc);
+ break;
+ }
+ }
+
+ /* if the EA doesn't exist, just do nothing. */
+ rc = 0;
+ } else {
+ rc = ksmbd_vfs_setxattr(user_ns,
+ path->dentry, attr_name, value,
+ le16_to_cpu(eabuf->EaValueLength), 0);
+ if (rc < 0) {
+ ksmbd_debug(SMB,
+ "ksmbd_vfs_setxattr is failed(%d)\n",
+ rc);
+ break;
+ }
+ }
+
+next:
+ next = le32_to_cpu(eabuf->NextEntryOffset);
+ eabuf = (struct smb2_ea_info *)((char *)eabuf + next);
+ } while (next != 0);
+
+ kfree(attr_name);
+ return rc;
+}
+
+static noinline int smb2_set_stream_name_xattr(struct path *path,
+ struct ksmbd_file *fp,
+ char *stream_name, int s_type)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ size_t xattr_stream_size;
+ char *xattr_stream_name;
+ int rc;
+
+ rc = ksmbd_vfs_xattr_stream_name(stream_name,
+ &xattr_stream_name,
+ &xattr_stream_size,
+ s_type);
+ if (rc)
+ return rc;
+
+ fp->stream.name = xattr_stream_name;
+ fp->stream.size = xattr_stream_size;
+
+ /* Check if there is stream prefix in xattr space */
+ rc = ksmbd_vfs_casexattr_len(user_ns,
+ path->dentry,
+ xattr_stream_name,
+ xattr_stream_size);
+ if (rc >= 0)
+ return 0;
+
+ if (fp->cdoption == FILE_OPEN_LE) {
+ ksmbd_debug(SMB, "XATTR stream name lookup failed: %d\n", rc);
+ return -EBADF;
+ }
+
+ rc = ksmbd_vfs_setxattr(user_ns, path->dentry,
+ xattr_stream_name, NULL, 0, 0);
+ if (rc < 0)
+ pr_err("Failed to store XATTR stream name :%d\n", rc);
+ return 0;
+}
+
+static int smb2_remove_smb_xattrs(struct path *path)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ char *name, *xattr_list = NULL;
+ ssize_t xattr_list_len;
+ int err = 0;
+
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
+
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX,
+ DOS_ATTRIBUTE_PREFIX_LEN) &&
+ strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN))
+ continue;
+
+ err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name);
+ if (err)
+ ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
+ }
+out:
+ kvfree(xattr_list);
+ return err;
+}
+
+static int smb2_create_truncate(struct path *path)
+{
+ int rc = vfs_truncate(path, 0);
+
+ if (rc) {
+ pr_err("vfs_truncate failed, rc %d\n", rc);
+ return rc;
+ }
+
+ rc = smb2_remove_smb_xattrs(path);
+ if (rc == -EOPNOTSUPP)
+ rc = 0;
+ if (rc)
+ ksmbd_debug(SMB,
+ "ksmbd_truncate_stream_name_xattr failed, rc %d\n",
+ rc);
+ return rc;
+}
+
+static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, struct path *path,
+ struct ksmbd_file *fp)
+{
+ struct xattr_dos_attrib da = {0};
+ int rc;
+
+ if (!test_share_config_flag(tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS))
+ return;
+
+ da.version = 4;
+ da.attr = le32_to_cpu(fp->f_ci->m_fattr);
+ da.itime = da.create_time = fp->create_time;
+ da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
+ XATTR_DOSINFO_ITIME;
+
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_user_ns(path->mnt),
+ path->dentry, &da);
+ if (rc)
+ ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
+}
+
+static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
+ struct path *path, struct ksmbd_file *fp)
+{
+ struct xattr_dos_attrib da;
+ int rc;
+
+ fp->f_ci->m_fattr &= ~(ATTR_HIDDEN_LE | ATTR_SYSTEM_LE);
+
+ /* get FileAttributes from XATTR_NAME_DOS_ATTRIBUTE */
+ if (!test_share_config_flag(tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS))
+ return;
+
+ rc = ksmbd_vfs_get_dos_attrib_xattr(mnt_user_ns(path->mnt),
+ path->dentry, &da);
+ if (rc > 0) {
+ fp->f_ci->m_fattr = cpu_to_le32(da.attr);
+ fp->create_time = da.create_time;
+ fp->itime = da.itime;
+ }
+}
+
+static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
+ int open_flags, umode_t posix_mode, bool is_dir)
+{
+ struct ksmbd_tree_connect *tcon = work->tcon;
+ struct ksmbd_share_config *share = tcon->share_conf;
+ umode_t mode;
+ int rc;
+
+ if (!(open_flags & O_CREAT))
+ return -EBADF;
+
+ ksmbd_debug(SMB, "file does not exist, so creating\n");
+ if (is_dir == true) {
+ ksmbd_debug(SMB, "creating directory\n");
+
+ mode = share_config_directory_mode(share, posix_mode);
+ rc = ksmbd_vfs_mkdir(work, name, mode);
+ if (rc)
+ return rc;
+ } else {
+ ksmbd_debug(SMB, "creating regular file\n");
+
+ mode = share_config_create_mode(share, posix_mode);
+ rc = ksmbd_vfs_create(work, name, mode);
+ if (rc)
+ return rc;
+ }
+
+ rc = ksmbd_vfs_kern_path(name, 0, path, 0);
+ if (rc) {
+ pr_err("cannot get linux path (%s), err = %d\n",
+ name, rc);
+ return rc;
+ }
+ return 0;
+}
+
+static int smb2_create_sd_buffer(struct ksmbd_work *work,
+ struct smb2_create_req *req,
+ struct path *path)
+{
+ struct create_context *context;
+ struct create_sd_buf_req *sd_buf;
+
+ if (!req->CreateContextsOffset)
+ return -ENOENT;
+
+ /* Parse SD BUFFER create contexts */
+ context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER);
+ if (!context)
+ return -ENOENT;
+ else if (IS_ERR(context))
+ return PTR_ERR(context);
+
+ ksmbd_debug(SMB,
+ "Set ACLs using SMB2_CREATE_SD_BUFFER context\n");
+ sd_buf = (struct create_sd_buf_req *)context;
+ return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd,
+ le32_to_cpu(sd_buf->ccontext.DataLength), true);
+}
+
+static void ksmbd_acls_fattr(struct smb_fattr *fattr, struct inode *inode)
+{
+ fattr->cf_uid = inode->i_uid;
+ fattr->cf_gid = inode->i_gid;
+ fattr->cf_mode = inode->i_mode;
+ fattr->cf_acls = NULL;
+ fattr->cf_dacls = NULL;
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ fattr->cf_acls = get_acl(inode, ACL_TYPE_ACCESS);
+ if (S_ISDIR(inode->i_mode))
+ fattr->cf_dacls = get_acl(inode, ACL_TYPE_DEFAULT);
+ }
+}
+
+/**
+ * smb2_open() - handler for smb file open request
+ * @work: smb work containing request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_open(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_tree_connect *tcon = work->tcon;
+ struct smb2_create_req *req;
+ struct smb2_create_rsp *rsp, *rsp_org;
+ struct path path;
+ struct ksmbd_share_config *share = tcon->share_conf;
+ struct ksmbd_file *fp = NULL;
+ struct file *filp = NULL;
+ struct user_namespace *user_ns = NULL;
+ struct kstat stat;
+ struct create_context *context;
+ struct lease_ctx_info *lc = NULL;
+ struct create_ea_buf_req *ea_buf = NULL;
+ struct oplock_info *opinfo;
+ __le32 *next_ptr = NULL;
+ int req_op_level = 0, open_flags = 0, may_flags = 0, file_info = 0;
+ int rc = 0, len = 0;
+ int contxt_cnt = 0, query_disk_id = 0;
+ int maximal_access_ctxt = 0, posix_ctxt = 0;
+ int s_type = 0;
+ int next_off = 0;
+ char *name = NULL;
+ char *stream_name = NULL;
+ bool file_present = false, created = false, already_permitted = false;
+ int share_ret, need_truncate = 0;
+ u64 time;
+ umode_t posix_mode = 0;
+ __le32 daccess, maximal_access = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (req->hdr.NextCommand && !work->next_smb2_rcv_hdr_off &&
+ (req->hdr.Flags & SMB2_FLAGS_RELATED_OPERATIONS)) {
+ ksmbd_debug(SMB, "invalid flag in chained command\n");
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ smb2_set_err_rsp(work);
+ return -EINVAL;
+ }
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe create request\n");
+ return create_smb2_pipe(work);
+ }
+
+ if (req->NameLength) {
+ if ((req->CreateOptions & FILE_DIRECTORY_FILE_LE) &&
+ *(char *)req->Buffer == '\\') {
+ pr_err("not allow directory name included leading slash\n");
+ rc = -EINVAL;
+ goto err_out1;
+ }
+
+ name = smb2_get_name(share,
+ req->Buffer,
+ le16_to_cpu(req->NameLength),
+ work->conn->local_nls);
+ if (IS_ERR(name)) {
+ rc = PTR_ERR(name);
+ if (rc != -ENOMEM)
+ rc = -ENOENT;
+ name = NULL;
+ goto err_out1;
+ }
+
+ ksmbd_debug(SMB, "converted name = %s\n", name);
+ if (strchr(name, ':')) {
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_STREAMS)) {
+ rc = -EBADF;
+ goto err_out1;
+ }
+ rc = parse_stream_name(name, &stream_name, &s_type);
+ if (rc < 0)
+ goto err_out1;
+ }
+
+ rc = ksmbd_validate_filename(name);
+ if (rc < 0)
+ goto err_out1;
+
+ if (ksmbd_share_veto_filename(share, name)) {
+ rc = -ENOENT;
+ ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n",
+ name);
+ goto err_out1;
+ }
+ } else {
+ len = strlen(share->path);
+ ksmbd_debug(SMB, "share path len %d\n", len);
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name) {
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ rc = -ENOMEM;
+ goto err_out1;
+ }
+
+ memcpy(name, share->path, len);
+ *(name + len) = '\0';
+ }
+
+ req_op_level = req->RequestedOplockLevel;
+ if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
+ lc = parse_lease_state(req);
+
+ if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE_LE)) {
+ pr_err("Invalid impersonationlevel : 0x%x\n",
+ le32_to_cpu(req->ImpersonationLevel));
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL;
+ goto err_out1;
+ }
+
+ if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK)) {
+ pr_err("Invalid create options : 0x%x\n",
+ le32_to_cpu(req->CreateOptions));
+ rc = -EINVAL;
+ goto err_out1;
+ } else {
+ if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE &&
+ req->CreateOptions & FILE_RANDOM_ACCESS_LE)
+ req->CreateOptions = ~(FILE_SEQUENTIAL_ONLY_LE);
+
+ if (req->CreateOptions &
+ (FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION |
+ FILE_RESERVE_OPFILTER_LE)) {
+ rc = -EOPNOTSUPP;
+ goto err_out1;
+ }
+
+ if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
+ if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) {
+ rc = -EINVAL;
+ goto err_out1;
+ } else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) {
+ req->CreateOptions = ~(FILE_NO_COMPRESSION_LE);
+ }
+ }
+ }
+
+ if (le32_to_cpu(req->CreateDisposition) >
+ le32_to_cpu(FILE_OVERWRITE_IF_LE)) {
+ pr_err("Invalid create disposition : 0x%x\n",
+ le32_to_cpu(req->CreateDisposition));
+ rc = -EINVAL;
+ goto err_out1;
+ }
+
+ if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) {
+ pr_err("Invalid desired access : 0x%x\n",
+ le32_to_cpu(req->DesiredAccess));
+ rc = -EACCES;
+ goto err_out1;
+ }
+
+ if (req->FileAttributes && !(req->FileAttributes & ATTR_MASK_LE)) {
+ pr_err("Invalid file attribute : 0x%x\n",
+ le32_to_cpu(req->FileAttributes));
+ rc = -EINVAL;
+ goto err_out1;
+ }
+
+ if (req->CreateContextsOffset) {
+ /* Parse non-durable handle create contexts */
+ context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ ea_buf = (struct create_ea_buf_req *)context;
+ if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) {
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ rc = -EACCES;
+ goto err_out1;
+ }
+ }
+
+ context = smb2_find_context_vals(req,
+ SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ ksmbd_debug(SMB,
+ "get query maximal access context\n");
+ maximal_access_ctxt = 1;
+ }
+
+ context = smb2_find_context_vals(req,
+ SMB2_CREATE_TIMEWARP_REQUEST);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ ksmbd_debug(SMB, "get timewarp context\n");
+ rc = -EBADF;
+ goto err_out1;
+ }
+
+ if (tcon->posix_extensions) {
+ context = smb2_find_context_vals(req,
+ SMB2_CREATE_TAG_POSIX);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context) {
+ struct create_posix *posix =
+ (struct create_posix *)context;
+ ksmbd_debug(SMB, "get posix context\n");
+
+ posix_mode = le32_to_cpu(posix->Mode);
+ posix_ctxt = 1;
+ }
+ }
+ }
+
+ if (ksmbd_override_fsids(work)) {
+ rc = -ENOMEM;
+ goto err_out1;
+ }
+
+ if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) {
+ /*
+ * On delete request, instead of following up, need to
+ * look the current entity
+ */
+ rc = ksmbd_vfs_kern_path(name, 0, &path, 1);
+ if (!rc) {
+ /*
+ * If file exists with under flags, return access
+ * denied error.
+ */
+ if (req->CreateDisposition == FILE_OVERWRITE_IF_LE ||
+ req->CreateDisposition == FILE_OPEN_IF_LE) {
+ rc = -EACCES;
+ path_put(&path);
+ goto err_out;
+ }
+
+ if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ rc = -EACCES;
+ path_put(&path);
+ goto err_out;
+ }
+ }
+ } else {
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS)) {
+ /*
+ * Use LOOKUP_FOLLOW to follow the path of
+ * symlink in path buildup
+ */
+ rc = ksmbd_vfs_kern_path(name, LOOKUP_FOLLOW, &path, 1);
+ if (rc) { /* Case for broken link ?*/
+ rc = ksmbd_vfs_kern_path(name, 0, &path, 1);
+ }
+ } else {
+ rc = ksmbd_vfs_kern_path(name, 0, &path, 1);
+ if (!rc && d_is_symlink(path.dentry)) {
+ rc = -EACCES;
+ path_put(&path);
+ goto err_out;
+ }
+ }
+ }
+
+ if (rc) {
+ if (rc == -EACCES) {
+ ksmbd_debug(SMB,
+ "User does not have right permission\n");
+ goto err_out;
+ }
+ ksmbd_debug(SMB, "can not get linux path for %s, rc = %d\n",
+ name, rc);
+ rc = 0;
+ } else {
+ file_present = true;
+ user_ns = mnt_user_ns(path.mnt);
+ generic_fillattr(user_ns, d_inode(path.dentry), &stat);
+ }
+ if (stream_name) {
+ if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) {
+ if (s_type == DATA_STREAM) {
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
+ }
+ } else {
+ if (S_ISDIR(stat.mode) && s_type == DATA_STREAM) {
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
+ }
+ }
+
+ if (req->CreateOptions & FILE_DIRECTORY_FILE_LE &&
+ req->FileAttributes & ATTR_NORMAL_LE) {
+ rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
+ rc = -EIO;
+ }
+
+ if (rc < 0)
+ goto err_out;
+ }
+
+ if (file_present && req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE &&
+ S_ISDIR(stat.mode) && !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ ksmbd_debug(SMB, "open() argument is a directory: %s, %x\n",
+ name, req->CreateOptions);
+ rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
+ rc = -EIO;
+ goto err_out;
+ }
+
+ if (file_present && (req->CreateOptions & FILE_DIRECTORY_FILE_LE) &&
+ !(req->CreateDisposition == FILE_CREATE_LE) &&
+ !S_ISDIR(stat.mode)) {
+ rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
+ rc = -EIO;
+ goto err_out;
+ }
+
+ if (!stream_name && file_present &&
+ req->CreateDisposition == FILE_CREATE_LE) {
+ rc = -EEXIST;
+ goto err_out;
+ }
+
+ daccess = smb_map_generic_desired_access(req->DesiredAccess);
+
+ if (file_present && !(req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ rc = smb_check_perm_dacl(conn, &path, &daccess,
+ sess->user->uid);
+ if (rc)
+ goto err_out;
+ }
+
+ if (daccess & FILE_MAXIMAL_ACCESS_LE) {
+ if (!file_present) {
+ daccess = cpu_to_le32(GENERIC_ALL_FLAGS);
+ } else {
+ rc = ksmbd_vfs_query_maximal_access(user_ns,
+ path.dentry,
+ &daccess);
+ if (rc)
+ goto err_out;
+ already_permitted = true;
+ }
+ maximal_access = daccess;
+ }
+
+ open_flags = smb2_create_open_flags(file_present, daccess,
+ req->CreateDisposition,
+ &may_flags);
+
+ if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ if (open_flags & O_CREAT) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ rc = -EACCES;
+ goto err_out;
+ }
+ }
+
+ /*create file if not present */
+ if (!file_present) {
+ rc = smb2_creat(work, &path, name, open_flags, posix_mode,
+ req->CreateOptions & FILE_DIRECTORY_FILE_LE);
+ if (rc) {
+ if (rc == -ENOENT) {
+ rc = -EIO;
+ rsp->hdr.Status = STATUS_OBJECT_PATH_NOT_FOUND;
+ }
+ goto err_out;
+ }
+
+ created = true;
+ user_ns = mnt_user_ns(path.mnt);
+ if (ea_buf) {
+ rc = smb2_set_ea(&ea_buf->ea, &path);
+ if (rc == -EOPNOTSUPP)
+ rc = 0;
+ else if (rc)
+ goto err_out;
+ }
+ } else if (!already_permitted) {
+ /* FILE_READ_ATTRIBUTE is allowed without inode_permission,
+ * because execute(search) permission on a parent directory,
+ * is already granted.
+ */
+ if (daccess & ~(FILE_READ_ATTRIBUTES_LE | FILE_READ_CONTROL_LE)) {
+ rc = inode_permission(user_ns,
+ d_inode(path.dentry),
+ may_flags);
+ if (rc)
+ goto err_out;
+
+ if ((daccess & FILE_DELETE_LE) ||
+ (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)) {
+ rc = ksmbd_vfs_may_delete(user_ns,
+ path.dentry);
+ if (rc)
+ goto err_out;
+ }
+ }
+ }
+
+ rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent));
+ if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) {
+ rc = -EBUSY;
+ goto err_out;
+ }
+
+ rc = 0;
+ filp = dentry_open(&path, open_flags, current_cred());
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ pr_err("dentry open for dir failed, rc %d\n", rc);
+ goto err_out;
+ }
+
+ if (file_present) {
+ if (!(open_flags & O_TRUNC))
+ file_info = FILE_OPENED;
+ else
+ file_info = FILE_OVERWRITTEN;
+
+ if ((req->CreateDisposition & FILE_CREATE_MASK_LE) ==
+ FILE_SUPERSEDE_LE)
+ file_info = FILE_SUPERSEDED;
+ } else if (open_flags & O_CREAT) {
+ file_info = FILE_CREATED;
+ }
+
+ ksmbd_vfs_set_fadvise(filp, req->CreateOptions);
+
+ /* Obtain Volatile-ID */
+ fp = ksmbd_open_fd(work, filp);
+ if (IS_ERR(fp)) {
+ fput(filp);
+ rc = PTR_ERR(fp);
+ fp = NULL;
+ goto err_out;
+ }
+
+ /* Get Persistent-ID */
+ ksmbd_open_durable_fd(fp);
+ if (!has_file_id(fp->persistent_id)) {
+ rc = -ENOMEM;
+ goto err_out;
+ }
+
+ fp->filename = name;
+ fp->cdoption = req->CreateDisposition;
+ fp->daccess = daccess;
+ fp->saccess = req->ShareAccess;
+ fp->coption = req->CreateOptions;
+
+ /* Set default windows and posix acls if creating new file */
+ if (created) {
+ int posix_acl_rc;
+ struct inode *inode = d_inode(path.dentry);
+
+ posix_acl_rc = ksmbd_vfs_inherit_posix_acl(user_ns,
+ inode,
+ d_inode(path.dentry->d_parent));
+ if (posix_acl_rc)
+ ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR)) {
+ rc = smb_inherit_dacl(conn, &path, sess->user->uid,
+ sess->user->gid);
+ }
+
+ if (rc) {
+ rc = smb2_create_sd_buffer(work, req, &path);
+ if (rc) {
+ if (posix_acl_rc)
+ ksmbd_vfs_set_init_posix_acl(user_ns,
+ inode);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR)) {
+ struct smb_fattr fattr;
+ struct smb_ntsd *pntsd;
+ int pntsd_size, ace_num = 0;
+
+ ksmbd_acls_fattr(&fattr, inode);
+ if (fattr.cf_acls)
+ ace_num = fattr.cf_acls->a_count;
+ if (fattr.cf_dacls)
+ ace_num += fattr.cf_dacls->a_count;
+
+ pntsd = kmalloc(sizeof(struct smb_ntsd) +
+ sizeof(struct smb_sid) * 3 +
+ sizeof(struct smb_acl) +
+ sizeof(struct smb_ace) * ace_num * 2,
+ GFP_KERNEL);
+ if (!pntsd)
+ goto err_out;
+
+ rc = build_sec_desc(user_ns,
+ pntsd, NULL,
+ OWNER_SECINFO |
+ GROUP_SECINFO |
+ DACL_SECINFO,
+ &pntsd_size, &fattr);
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+
+ rc = ksmbd_vfs_set_sd_xattr(conn,
+ user_ns,
+ path.dentry,
+ pntsd,
+ pntsd_size);
+ kfree(pntsd);
+ if (rc)
+ pr_err("failed to store ntacl in xattr : %d\n",
+ rc);
+ }
+ }
+ }
+ rc = 0;
+ }
+
+ if (stream_name) {
+ rc = smb2_set_stream_name_xattr(&path,
+ fp,
+ stream_name,
+ s_type);
+ if (rc)
+ goto err_out;
+ file_info = FILE_CREATED;
+ }
+
+ fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE |
+ FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE));
+ if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC &&
+ !fp->attrib_only && !stream_name) {
+ smb_break_all_oplock(work, fp);
+ need_truncate = 1;
+ }
+
+ /* fp should be searchable through ksmbd_inode.m_fp_list
+ * after daccess, saccess, attrib_only, and stream are
+ * initialized.
+ */
+ write_lock(&fp->f_ci->m_lock);
+ list_add(&fp->node, &fp->f_ci->m_fp_list);
+ write_unlock(&fp->f_ci->m_lock);
+
+ rc = ksmbd_vfs_getattr(&path, &stat);
+ if (rc) {
+ generic_fillattr(user_ns, d_inode(path.dentry), &stat);
+ rc = 0;
+ }
+
+ /* Check delete pending among previous fp before oplock break */
+ if (ksmbd_inode_pending_delete(fp)) {
+ rc = -EBUSY;
+ goto err_out;
+ }
+
+ share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
+ if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
+ (req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
+ !(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) {
+ if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) {
+ rc = share_ret;
+ goto err_out;
+ }
+ } else {
+ if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
+ req_op_level = smb2_map_lease_to_oplock(lc->req_state);
+ ksmbd_debug(SMB,
+ "lease req for(%s) req oplock state 0x%x, lease state 0x%x\n",
+ name, req_op_level, lc->req_state);
+ rc = find_same_lease_key(sess, fp->f_ci, lc);
+ if (rc)
+ goto err_out;
+ } else if (open_flags == O_RDONLY &&
+ (req_op_level == SMB2_OPLOCK_LEVEL_BATCH ||
+ req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE))
+ req_op_level = SMB2_OPLOCK_LEVEL_II;
+
+ rc = smb_grant_oplock(work, req_op_level,
+ fp->persistent_id, fp,
+ le32_to_cpu(req->hdr.Id.SyncId.TreeId),
+ lc, share_ret);
+ if (rc < 0)
+ goto err_out;
+ }
+
+ if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE)
+ ksmbd_fd_set_delete_on_close(fp, file_info);
+
+ if (need_truncate) {
+ rc = smb2_create_truncate(&path);
+ if (rc)
+ goto err_out;
+ }
+
+ if (req->CreateContextsOffset) {
+ struct create_alloc_size_req *az_req;
+
+ az_req = (struct create_alloc_size_req *)smb2_find_context_vals(req,
+ SMB2_CREATE_ALLOCATION_SIZE);
+ if (IS_ERR(az_req)) {
+ rc = PTR_ERR(az_req);
+ goto err_out;
+ } else if (az_req) {
+ loff_t alloc_size = le64_to_cpu(az_req->AllocationSize);
+ int err;
+
+ ksmbd_debug(SMB,
+ "request smb2 create allocate size : %llu\n",
+ alloc_size);
+ smb_break_all_levII_oplock(work, fp, 1);
+ err = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0,
+ alloc_size);
+ if (err < 0)
+ ksmbd_debug(SMB,
+ "vfs_fallocate is failed : %d\n",
+ err);
+ }
+
+ context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out;
+ } else if (context) {
+ ksmbd_debug(SMB, "get query on disk id context\n");
+ query_disk_id = 1;
+ }
+ }
+
+ if (stat.result_mask & STATX_BTIME)
+ fp->create_time = ksmbd_UnixTimeToNT(stat.btime);
+ else
+ fp->create_time = ksmbd_UnixTimeToNT(stat.ctime);
+ if (req->FileAttributes || fp->f_ci->m_fattr == 0)
+ fp->f_ci->m_fattr =
+ cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes)));
+
+ if (!created)
+ smb2_update_xattrs(tcon, &path, fp);
+ else
+ smb2_new_xattrs(tcon, &path, fp);
+
+ memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
+
+ generic_fillattr(user_ns, file_inode(fp->filp),
+ &stat);
+
+ rsp->StructureSize = cpu_to_le16(89);
+ rcu_read_lock();
+ opinfo = rcu_dereference(fp->f_opinfo);
+ rsp->OplockLevel = opinfo != NULL ? opinfo->level : 0;
+ rcu_read_unlock();
+ rsp->Reserved = 0;
+ rsp->CreateAction = cpu_to_le32(file_info);
+ rsp->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ rsp->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ rsp->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ rsp->ChangeTime = cpu_to_le64(time);
+ rsp->AllocationSize = S_ISDIR(stat.mode) ? 0 :
+ cpu_to_le64(stat.blocks << 9);
+ rsp->EndofFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ rsp->FileAttributes = fp->f_ci->m_fattr;
+
+ rsp->Reserved2 = 0;
+
+ rsp->PersistentFileId = cpu_to_le64(fp->persistent_id);
+ rsp->VolatileFileId = cpu_to_le64(fp->volatile_id);
+
+ rsp->CreateContextsOffset = 0;
+ rsp->CreateContextsLength = 0;
+ inc_rfc1001_len(rsp_org, 88); /* StructureSize - 1*/
+
+ /* If lease is request send lease context response */
+ if (opinfo && opinfo->is_lease) {
+ struct create_context *lease_ccontext;
+
+ ksmbd_debug(SMB, "lease granted on(%s) lease state 0x%x\n",
+ name, opinfo->o_lease->state);
+ rsp->OplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
+
+ lease_ccontext = (struct create_context *)rsp->Buffer;
+ contxt_cnt++;
+ create_lease_buf(rsp->Buffer, opinfo->o_lease);
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_lease_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_lease_size);
+ next_ptr = &lease_ccontext->Next;
+ next_off = conn->vals->create_lease_size;
+ }
+
+ if (maximal_access_ctxt) {
+ struct create_context *mxac_ccontext;
+
+ if (maximal_access == 0)
+ ksmbd_vfs_query_maximal_access(user_ns,
+ path.dentry,
+ &maximal_access);
+ mxac_ccontext = (struct create_context *)(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength));
+ contxt_cnt++;
+ create_mxac_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength),
+ le32_to_cpu(maximal_access));
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_mxac_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_mxac_size);
+ if (next_ptr)
+ *next_ptr = cpu_to_le32(next_off);
+ next_ptr = &mxac_ccontext->Next;
+ next_off = conn->vals->create_mxac_size;
+ }
+
+ if (query_disk_id) {
+ struct create_context *disk_id_ccontext;
+
+ disk_id_ccontext = (struct create_context *)(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength));
+ contxt_cnt++;
+ create_disk_id_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength),
+ stat.ino, tcon->id);
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_disk_id_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_disk_id_size);
+ if (next_ptr)
+ *next_ptr = cpu_to_le32(next_off);
+ next_ptr = &disk_id_ccontext->Next;
+ next_off = conn->vals->create_disk_id_size;
+ }
+
+ if (posix_ctxt) {
+ contxt_cnt++;
+ create_posix_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength),
+ fp);
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_posix_size);
+ inc_rfc1001_len(rsp_org, conn->vals->create_posix_size);
+ if (next_ptr)
+ *next_ptr = cpu_to_le32(next_off);
+ }
+
+ if (contxt_cnt > 0) {
+ rsp->CreateContextsOffset =
+ cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer)
+ - 4);
+ }
+
+err_out:
+ if (file_present || created)
+ path_put(&path);
+ ksmbd_revert_fsids(work);
+err_out1:
+ if (rc) {
+ if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -EOPNOTSUPP)
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ else if (rc == -EACCES || rc == -ESTALE)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_INVALID;
+ else if (rc == -EPERM)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (rc == -EBUSY)
+ rsp->hdr.Status = STATUS_DELETE_PENDING;
+ else if (rc == -EBADF)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ else if (rc == -ENOEXEC)
+ rsp->hdr.Status = STATUS_DUPLICATE_OBJECTID;
+ else if (rc == -ENXIO)
+ rsp->hdr.Status = STATUS_NO_SUCH_DEVICE;
+ else if (rc == -EEXIST)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_COLLISION;
+ else if (rc == -EMFILE)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ if (!rsp->hdr.Status)
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+
+ if (!fp || !fp->filename)
+ kfree(name);
+ if (fp)
+ ksmbd_fd_put(work, fp);
+ smb2_set_err_rsp(work);
+ ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status);
+ }
+
+ kfree(lc);
+
+ return 0;
+}
+
+static int readdir_info_level_struct_sz(int info_level)
+{
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ return sizeof(struct file_full_directory_info);
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ return sizeof(struct file_both_directory_info);
+ case FILE_DIRECTORY_INFORMATION:
+ return sizeof(struct file_directory_info);
+ case FILE_NAMES_INFORMATION:
+ return sizeof(struct file_names_info);
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ return sizeof(struct file_id_full_dir_info);
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ return sizeof(struct file_id_both_directory_info);
+ case SMB_FIND_FILE_POSIX_INFO:
+ return sizeof(struct smb2_posix_info);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
+{
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_full_directory_info *ffdinfo;
+
+ ffdinfo = (struct file_full_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(ffdinfo->NextEntryOffset);
+ d_info->name = ffdinfo->FileName;
+ d_info->name_len = le32_to_cpu(ffdinfo->FileNameLength);
+ return 0;
+ }
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_both_directory_info *fbdinfo;
+
+ fbdinfo = (struct file_both_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fbdinfo->NextEntryOffset);
+ d_info->name = fbdinfo->FileName;
+ d_info->name_len = le32_to_cpu(fbdinfo->FileNameLength);
+ return 0;
+ }
+ case FILE_DIRECTORY_INFORMATION:
+ {
+ struct file_directory_info *fdinfo;
+
+ fdinfo = (struct file_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fdinfo->NextEntryOffset);
+ d_info->name = fdinfo->FileName;
+ d_info->name_len = le32_to_cpu(fdinfo->FileNameLength);
+ return 0;
+ }
+ case FILE_NAMES_INFORMATION:
+ {
+ struct file_names_info *fninfo;
+
+ fninfo = (struct file_names_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fninfo->NextEntryOffset);
+ d_info->name = fninfo->FileName;
+ d_info->name_len = le32_to_cpu(fninfo->FileNameLength);
+ return 0;
+ }
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_id_full_dir_info *dinfo;
+
+ dinfo = (struct file_id_full_dir_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(dinfo->NextEntryOffset);
+ d_info->name = dinfo->FileName;
+ d_info->name_len = le32_to_cpu(dinfo->FileNameLength);
+ return 0;
+ }
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_id_both_directory_info *fibdinfo;
+
+ fibdinfo = (struct file_id_both_directory_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(fibdinfo->NextEntryOffset);
+ d_info->name = fibdinfo->FileName;
+ d_info->name_len = le32_to_cpu(fibdinfo->FileNameLength);
+ return 0;
+ }
+ case SMB_FIND_FILE_POSIX_INFO:
+ {
+ struct smb2_posix_info *posix_info;
+
+ posix_info = (struct smb2_posix_info *)d_info->rptr;
+ d_info->rptr += le32_to_cpu(posix_info->NextEntryOffset);
+ d_info->name = posix_info->name;
+ d_info->name_len = le32_to_cpu(posix_info->name_len);
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * smb2_populate_readdir_entry() - encode directory entry in smb2 response
+ * buffer
+ * @conn: connection instance
+ * @info_level: smb information level
+ * @d_info: structure included variables for query dir
+ * @user_ns: user namespace
+ * @ksmbd_kstat: ksmbd wrapper of dirent stat information
+ *
+ * if directory has many entries, find first can't read it fully.
+ * find next might be called multiple times to read remaining dir entries
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
+ struct ksmbd_dir_info *d_info,
+ struct user_namespace *user_ns,
+ struct ksmbd_kstat *ksmbd_kstat)
+{
+ int next_entry_offset = 0;
+ char *conv_name;
+ int conv_len;
+ void *kstat;
+ int struct_sz, rc = 0;
+
+ conv_name = ksmbd_convert_dir_info_name(d_info,
+ conn->local_nls,
+ &conv_len);
+ if (!conv_name)
+ return -ENOMEM;
+
+ /* Somehow the name has only terminating NULL bytes */
+ if (conv_len < 0) {
+ rc = -EINVAL;
+ goto free_conv_name;
+ }
+
+ struct_sz = readdir_info_level_struct_sz(info_level);
+ next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+ KSMBD_DIR_INFO_ALIGNMENT);
+
+ if (next_entry_offset > d_info->out_buf_len) {
+ d_info->out_buf_len = 0;
+ rc = -ENOSPC;
+ goto free_conv_name;
+ }
+
+ kstat = d_info->wptr;
+ if (info_level != FILE_NAMES_INFORMATION)
+ kstat = ksmbd_vfs_init_kstat(&d_info->wptr, ksmbd_kstat);
+
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_full_directory_info *ffdinfo;
+
+ ffdinfo = (struct file_full_directory_info *)kstat;
+ ffdinfo->FileNameLength = cpu_to_le32(conv_len);
+ ffdinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (ffdinfo->EaSize)
+ ffdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ ffdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(ffdinfo->FileName, conv_name, conv_len);
+ ffdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_both_directory_info *fbdinfo;
+
+ fbdinfo = (struct file_both_directory_info *)kstat;
+ fbdinfo->FileNameLength = cpu_to_le32(conv_len);
+ fbdinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (fbdinfo->EaSize)
+ fbdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ fbdinfo->ShortNameLength = 0;
+ fbdinfo->Reserved = 0;
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ fbdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(fbdinfo->FileName, conv_name, conv_len);
+ fbdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_DIRECTORY_INFORMATION:
+ {
+ struct file_directory_info *fdinfo;
+
+ fdinfo = (struct file_directory_info *)kstat;
+ fdinfo->FileNameLength = cpu_to_le32(conv_len);
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ fdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(fdinfo->FileName, conv_name, conv_len);
+ fdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_NAMES_INFORMATION:
+ {
+ struct file_names_info *fninfo;
+
+ fninfo = (struct file_names_info *)kstat;
+ fninfo->FileNameLength = cpu_to_le32(conv_len);
+ memcpy(fninfo->FileName, conv_name, conv_len);
+ fninfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_id_full_dir_info *dinfo;
+
+ dinfo = (struct file_id_full_dir_info *)kstat;
+ dinfo->FileNameLength = cpu_to_le32(conv_len);
+ dinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (dinfo->EaSize)
+ dinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ dinfo->Reserved = 0;
+ dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ dinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(dinfo->FileName, conv_name, conv_len);
+ dinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_id_both_directory_info *fibdinfo;
+
+ fibdinfo = (struct file_id_both_directory_info *)kstat;
+ fibdinfo->FileNameLength = cpu_to_le32(conv_len);
+ fibdinfo->EaSize =
+ smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
+ if (fibdinfo->EaSize)
+ fibdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+ fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ fibdinfo->ShortNameLength = 0;
+ fibdinfo->Reserved = 0;
+ fibdinfo->Reserved2 = cpu_to_le16(0);
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ fibdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+ memcpy(fibdinfo->FileName, conv_name, conv_len);
+ fibdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case SMB_FIND_FILE_POSIX_INFO:
+ {
+ struct smb2_posix_info *posix_info;
+ u64 time;
+
+ posix_info = (struct smb2_posix_info *)kstat;
+ posix_info->Ignored = 0;
+ posix_info->CreationTime = cpu_to_le64(ksmbd_kstat->create_time);
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
+ posix_info->ChangeTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->atime);
+ posix_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->mtime);
+ posix_info->LastWriteTime = cpu_to_le64(time);
+ posix_info->EndOfFile = cpu_to_le64(ksmbd_kstat->kstat->size);
+ posix_info->AllocationSize = cpu_to_le64(ksmbd_kstat->kstat->blocks << 9);
+ posix_info->DeviceId = cpu_to_le32(ksmbd_kstat->kstat->rdev);
+ posix_info->HardLinks = cpu_to_le32(ksmbd_kstat->kstat->nlink);
+ posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode);
+ posix_info->Inode = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ posix_info->DosAttributes =
+ S_ISDIR(ksmbd_kstat->kstat->mode) ? ATTR_DIRECTORY_LE : ATTR_ARCHIVE_LE;
+ if (d_info->hide_dot_file && d_info->name[0] == '.')
+ posix_info->DosAttributes |= ATTR_HIDDEN_LE;
+ id_to_sid(from_kuid(user_ns, ksmbd_kstat->kstat->uid),
+ SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
+ id_to_sid(from_kgid(user_ns, ksmbd_kstat->kstat->gid),
+ SIDNFS_GROUP, (struct smb_sid *)&posix_info->SidBuffer[20]);
+ memcpy(posix_info->name, conv_name, conv_len);
+ posix_info->name_len = cpu_to_le32(conv_len);
+ posix_info->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+
+ } /* switch (info_level) */
+
+ d_info->last_entry_offset = d_info->data_count;
+ d_info->data_count += next_entry_offset;
+ d_info->out_buf_len -= next_entry_offset;
+ d_info->wptr += next_entry_offset;
+
+ ksmbd_debug(SMB,
+ "info_level : %d, buf_len :%d, next_offset : %d, data_count : %d\n",
+ info_level, d_info->out_buf_len,
+ next_entry_offset, d_info->data_count);
+
+free_conv_name:
+ kfree(conv_name);
+ return rc;
+}
+
+struct smb2_query_dir_private {
+ struct ksmbd_work *work;
+ char *search_pattern;
+ struct ksmbd_file *dir_fp;
+
+ struct ksmbd_dir_info *d_info;
+ int info_level;
+};
+
+static void lock_dir(struct ksmbd_file *dir_fp)
+{
+ struct dentry *dir = dir_fp->filp->f_path.dentry;
+
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
+}
+
+static void unlock_dir(struct ksmbd_file *dir_fp)
+{
+ struct dentry *dir = dir_fp->filp->f_path.dentry;
+
+ inode_unlock(d_inode(dir));
+}
+
+static int process_query_dir_entries(struct smb2_query_dir_private *priv)
+{
+ struct user_namespace *user_ns = file_mnt_user_ns(priv->dir_fp->filp);
+ struct kstat kstat;
+ struct ksmbd_kstat ksmbd_kstat;
+ int rc;
+ int i;
+
+ for (i = 0; i < priv->d_info->num_entry; i++) {
+ struct dentry *dent;
+
+ if (dentry_name(priv->d_info, priv->info_level))
+ return -EINVAL;
+
+ lock_dir(priv->dir_fp);
+ dent = lookup_one_len(priv->d_info->name,
+ priv->dir_fp->filp->f_path.dentry,
+ priv->d_info->name_len);
+ unlock_dir(priv->dir_fp);
+
+ if (IS_ERR(dent)) {
+ ksmbd_debug(SMB, "Cannot lookup `%s' [%ld]\n",
+ priv->d_info->name,
+ PTR_ERR(dent));
+ continue;
+ }
+ if (unlikely(d_is_negative(dent))) {
+ dput(dent);
+ ksmbd_debug(SMB, "Negative dentry `%s'\n",
+ priv->d_info->name);
+ continue;
+ }
+
+ ksmbd_kstat.kstat = &kstat;
+ if (priv->info_level != FILE_NAMES_INFORMATION)
+ ksmbd_vfs_fill_dentry_attrs(priv->work,
+ user_ns,
+ dent,
+ &ksmbd_kstat);
+
+ rc = smb2_populate_readdir_entry(priv->work->conn,
+ priv->info_level,
+ priv->d_info,
+ user_ns,
+ &ksmbd_kstat);
+ dput(dent);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static int reserve_populate_dentry(struct ksmbd_dir_info *d_info,
+ int info_level)
+{
+ int struct_sz;
+ int conv_len;
+ int next_entry_offset;
+
+ struct_sz = readdir_info_level_struct_sz(info_level);
+ if (struct_sz == -EOPNOTSUPP)
+ return -EOPNOTSUPP;
+
+ conv_len = (d_info->name_len + 1) * 2;
+ next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+ KSMBD_DIR_INFO_ALIGNMENT);
+
+ if (next_entry_offset > d_info->out_buf_len) {
+ d_info->out_buf_len = 0;
+ return -ENOSPC;
+ }
+
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_full_directory_info *ffdinfo;
+
+ ffdinfo = (struct file_full_directory_info *)d_info->wptr;
+ memcpy(ffdinfo->FileName, d_info->name, d_info->name_len);
+ ffdinfo->FileName[d_info->name_len] = 0x00;
+ ffdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ ffdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_both_directory_info *fbdinfo;
+
+ fbdinfo = (struct file_both_directory_info *)d_info->wptr;
+ memcpy(fbdinfo->FileName, d_info->name, d_info->name_len);
+ fbdinfo->FileName[d_info->name_len] = 0x00;
+ fbdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fbdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_DIRECTORY_INFORMATION:
+ {
+ struct file_directory_info *fdinfo;
+
+ fdinfo = (struct file_directory_info *)d_info->wptr;
+ memcpy(fdinfo->FileName, d_info->name, d_info->name_len);
+ fdinfo->FileName[d_info->name_len] = 0x00;
+ fdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILE_NAMES_INFORMATION:
+ {
+ struct file_names_info *fninfo;
+
+ fninfo = (struct file_names_info *)d_info->wptr;
+ memcpy(fninfo->FileName, d_info->name, d_info->name_len);
+ fninfo->FileName[d_info->name_len] = 0x00;
+ fninfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fninfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ {
+ struct file_id_full_dir_info *dinfo;
+
+ dinfo = (struct file_id_full_dir_info *)d_info->wptr;
+ memcpy(dinfo->FileName, d_info->name, d_info->name_len);
+ dinfo->FileName[d_info->name_len] = 0x00;
+ dinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ dinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ {
+ struct file_id_both_directory_info *fibdinfo;
+
+ fibdinfo = (struct file_id_both_directory_info *)d_info->wptr;
+ memcpy(fibdinfo->FileName, d_info->name, d_info->name_len);
+ fibdinfo->FileName[d_info->name_len] = 0x00;
+ fibdinfo->FileNameLength = cpu_to_le32(d_info->name_len);
+ fibdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
+ break;
+ }
+ case SMB_FIND_FILE_POSIX_INFO:
+ {
+ struct smb2_posix_info *posix_info;
+
+ posix_info = (struct smb2_posix_info *)d_info->wptr;
+ memcpy(posix_info->name, d_info->name, d_info->name_len);
+ posix_info->name[d_info->name_len] = 0x00;
+ posix_info->name_len = cpu_to_le32(d_info->name_len);
+ posix_info->NextEntryOffset =
+ cpu_to_le32(next_entry_offset);
+ break;
+ }
+ } /* switch (info_level) */
+
+ d_info->num_entry++;
+ d_info->out_buf_len -= next_entry_offset;
+ d_info->wptr += next_entry_offset;
+ return 0;
+}
+
+static int __query_dir(struct dir_context *ctx, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct ksmbd_readdir_data *buf;
+ struct smb2_query_dir_private *priv;
+ struct ksmbd_dir_info *d_info;
+ int rc;
+
+ buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
+ priv = buf->private;
+ d_info = priv->d_info;
+
+ /* dot and dotdot entries are already reserved */
+ if (!strcmp(".", name) || !strcmp("..", name))
+ return 0;
+ if (ksmbd_share_veto_filename(priv->work->tcon->share_conf, name))
+ return 0;
+ if (!match_pattern(name, namlen, priv->search_pattern))
+ return 0;
+
+ d_info->name = name;
+ d_info->name_len = namlen;
+ rc = reserve_populate_dentry(d_info, priv->info_level);
+ if (rc)
+ return rc;
+ if (d_info->flags & SMB2_RETURN_SINGLE_ENTRY) {
+ d_info->out_buf_len = 0;
+ return 0;
+ }
+ return 0;
+}
+
+static void restart_ctx(struct dir_context *ctx)
+{
+ ctx->pos = 0;
+}
+
+static int verify_info_level(int info_level)
+{
+ switch (info_level) {
+ case FILE_FULL_DIRECTORY_INFORMATION:
+ case FILE_BOTH_DIRECTORY_INFORMATION:
+ case FILE_DIRECTORY_INFORMATION:
+ case FILE_NAMES_INFORMATION:
+ case FILEID_FULL_DIRECTORY_INFORMATION:
+ case FILEID_BOTH_DIRECTORY_INFORMATION:
+ case SMB_FIND_FILE_POSIX_INFO:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int smb2_query_dir(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_query_directory_req *req;
+ struct smb2_query_directory_rsp *rsp, *rsp_org;
+ struct ksmbd_share_config *share = work->tcon->share_conf;
+ struct ksmbd_file *dir_fp = NULL;
+ struct ksmbd_dir_info d_info;
+ int rc = 0;
+ char *srch_ptr = NULL;
+ unsigned char srch_flag;
+ int buffer_sz;
+ struct smb2_query_dir_private query_dir_private = {NULL, };
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (ksmbd_override_fsids(work)) {
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ smb2_set_err_rsp(work);
+ return -ENOMEM;
+ }
+
+ rc = verify_info_level(req->FileInformationClass);
+ if (rc) {
+ rc = -EFAULT;
+ goto err_out2;
+ }
+
+ dir_fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!dir_fp) {
+ rc = -EBADF;
+ goto err_out2;
+ }
+
+ if (!(dir_fp->daccess & FILE_LIST_DIRECTORY_LE) ||
+ inode_permission(file_mnt_user_ns(dir_fp->filp),
+ file_inode(dir_fp->filp),
+ MAY_READ | MAY_EXEC)) {
+ pr_err("no right to enumerate directory (%pd)\n",
+ dir_fp->filp->f_path.dentry);
+ rc = -EACCES;
+ goto err_out2;
+ }
+
+ if (!S_ISDIR(file_inode(dir_fp->filp)->i_mode)) {
+ pr_err("can't do query dir for a file\n");
+ rc = -EINVAL;
+ goto err_out2;
+ }
+
+ srch_flag = req->Flags;
+ srch_ptr = smb_strndup_from_utf16(req->Buffer,
+ le16_to_cpu(req->FileNameLength), 1,
+ conn->local_nls);
+ if (IS_ERR(srch_ptr)) {
+ ksmbd_debug(SMB, "Search Pattern not found\n");
+ rc = -EINVAL;
+ goto err_out2;
+ } else {
+ ksmbd_debug(SMB, "Search pattern is %s\n", srch_ptr);
+ }
+
+ ksmbd_debug(SMB, "Directory name is %s\n", dir_fp->filename);
+
+ if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) {
+ ksmbd_debug(SMB, "Restart directory scan\n");
+ generic_file_llseek(dir_fp->filp, 0, SEEK_SET);
+ restart_ctx(&dir_fp->readdir_data.ctx);
+ }
+
+ memset(&d_info, 0, sizeof(struct ksmbd_dir_info));
+ d_info.wptr = (char *)rsp->Buffer;
+ d_info.rptr = (char *)rsp->Buffer;
+ d_info.out_buf_len = (work->response_sz - (get_rfc1002_len(rsp_org) + 4));
+ d_info.out_buf_len = min_t(int, d_info.out_buf_len, le32_to_cpu(req->OutputBufferLength)) -
+ sizeof(struct smb2_query_directory_rsp);
+ d_info.flags = srch_flag;
+
+ /*
+ * reserve dot and dotdot entries in head of buffer
+ * in first response
+ */
+ rc = ksmbd_populate_dot_dotdot_entries(work, req->FileInformationClass,
+ dir_fp, &d_info, srch_ptr,
+ smb2_populate_readdir_entry);
+ if (rc == -ENOSPC)
+ rc = 0;
+ else if (rc)
+ goto err_out;
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_HIDE_DOT_FILES))
+ d_info.hide_dot_file = true;
+
+ buffer_sz = d_info.out_buf_len;
+ d_info.rptr = d_info.wptr;
+ query_dir_private.work = work;
+ query_dir_private.search_pattern = srch_ptr;
+ query_dir_private.dir_fp = dir_fp;
+ query_dir_private.d_info = &d_info;
+ query_dir_private.info_level = req->FileInformationClass;
+ dir_fp->readdir_data.private = &query_dir_private;
+ set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir);
+
+ rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx);
+ if (rc == 0)
+ restart_ctx(&dir_fp->readdir_data.ctx);
+ if (rc == -ENOSPC)
+ rc = 0;
+ if (rc)
+ goto err_out;
+
+ d_info.wptr = d_info.rptr;
+ d_info.out_buf_len = buffer_sz;
+ rc = process_query_dir_entries(&query_dir_private);
+ if (rc)
+ goto err_out;
+
+ if (!d_info.data_count && d_info.out_buf_len >= 0) {
+ if (srch_flag & SMB2_RETURN_SINGLE_ENTRY && !is_asterisk(srch_ptr)) {
+ rsp->hdr.Status = STATUS_NO_SUCH_FILE;
+ } else {
+ dir_fp->dot_dotdot[0] = dir_fp->dot_dotdot[1] = 0;
+ rsp->hdr.Status = STATUS_NO_MORE_FILES;
+ }
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(0);
+ rsp->OutputBufferLength = cpu_to_le32(0);
+ rsp->Buffer[0] = 0;
+ inc_rfc1001_len(rsp_org, 9);
+ } else {
+ ((struct file_directory_info *)
+ ((char *)rsp->Buffer + d_info.last_entry_offset))
+ ->NextEntryOffset = 0;
+
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(72);
+ rsp->OutputBufferLength = cpu_to_le32(d_info.data_count);
+ inc_rfc1001_len(rsp_org, 8 + d_info.data_count);
+ }
+
+ kfree(srch_ptr);
+ ksmbd_fd_put(work, dir_fp);
+ ksmbd_revert_fsids(work);
+ return 0;
+
+err_out:
+ pr_err("error while processing smb2 query dir rc = %d\n", rc);
+ kfree(srch_ptr);
+
+err_out2:
+ if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_NO_SUCH_FILE;
+ else if (rc == -EBADF)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (rc == -ENOMEM)
+ rsp->hdr.Status = STATUS_NO_MEMORY;
+ else if (rc == -EFAULT)
+ rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
+ if (!rsp->hdr.Status)
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, dir_fp);
+ ksmbd_revert_fsids(work);
+ return 0;
+}
+
+/**
+ * buffer_check_err() - helper function to check buffer errors
+ * @reqOutputBufferLength: max buffer length expected in command response
+ * @rsp: query info response buffer contains output buffer length
+ * @infoclass_size: query info class response buffer size
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int buffer_check_err(int reqOutputBufferLength,
+ struct smb2_query_info_rsp *rsp, int infoclass_size)
+{
+ if (reqOutputBufferLength < le32_to_cpu(rsp->OutputBufferLength)) {
+ if (reqOutputBufferLength < infoclass_size) {
+ pr_err("Invalid Buffer Size Requested\n");
+ rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
+ rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4);
+ return -EINVAL;
+ }
+
+ ksmbd_debug(SMB, "Buffer Overflow\n");
+ rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
+ rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4 +
+ reqOutputBufferLength);
+ rsp->OutputBufferLength = cpu_to_le32(reqOutputBufferLength);
+ }
+ return 0;
+}
+
+static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp)
+{
+ struct smb2_file_standard_info *sinfo;
+
+ sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
+
+ sinfo->AllocationSize = cpu_to_le64(4096);
+ sinfo->EndOfFile = cpu_to_le64(0);
+ sinfo->NumberOfLinks = cpu_to_le32(1);
+ sinfo->DeletePending = 1;
+ sinfo->Directory = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_standard_info));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_file_standard_info));
+}
+
+static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num)
+{
+ struct smb2_file_internal_info *file_info;
+
+ file_info = (struct smb2_file_internal_info *)rsp->Buffer;
+
+ /* any unique number */
+ file_info->IndexNumber = cpu_to_le64(num | (1ULL << 63));
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_internal_info));
+ inc_rfc1001_len(rsp, sizeof(struct smb2_file_internal_info));
+}
+
+static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp)
+{
+ u64 id;
+ int rc;
+
+ /*
+ * Windows can sometime send query file info request on
+ * pipe without opening it, checking error condition here
+ */
+ id = le64_to_cpu(req->VolatileFileId);
+ if (!ksmbd_session_rpc_method(sess, id))
+ return -ENOENT;
+
+ ksmbd_debug(SMB, "FileInfoClass %u, FileId 0x%llx\n",
+ req->FileInfoClass, le64_to_cpu(req->VolatileFileId));
+
+ switch (req->FileInfoClass) {
+ case FILE_STANDARD_INFORMATION:
+ get_standard_info_pipe(rsp);
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp, FILE_STANDARD_INFORMATION_SIZE);
+ break;
+ case FILE_INTERNAL_INFORMATION:
+ get_internal_info_pipe(rsp, id);
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp, FILE_INTERNAL_INFORMATION_SIZE);
+ break;
+ default:
+ ksmbd_debug(SMB, "smb2_info_file_pipe for %u not supported\n",
+ req->FileInfoClass);
+ rc = -EOPNOTSUPP;
+ }
+ return rc;
+}
+
+/**
+ * smb2_get_ea() - handler for smb2 get extended attribute command
+ * @work: smb work containing query info command buffer
+ * @fp: ksmbd_file pointer
+ * @req: get extended attribute request
+ * @rsp: response buffer pointer
+ * @rsp_org: base response buffer pointer in case of chained response
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct smb2_ea_info *eainfo, *prev_eainfo;
+ char *name, *ptr, *xattr_list = NULL, *buf;
+ int rc, name_len, value_len, xattr_list_len, idx;
+ ssize_t buf_free_len, alignment_bytes, next_offset, rsp_data_cnt = 0;
+ struct smb2_ea_info_req *ea_req = NULL;
+ struct path *path;
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+
+ if (!(fp->daccess & FILE_READ_EA_LE)) {
+ pr_err("Not permitted to read ext attr : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ path = &fp->filp->f_path;
+ /* single EA entry is requested with given user.* name */
+ if (req->InputBufferLength) {
+ ea_req = (struct smb2_ea_info_req *)req->Buffer;
+ } else {
+ /* need to send all EAs, if no specific EA is requested*/
+ if (le32_to_cpu(req->Flags) & SL_RETURN_SINGLE_ENTRY)
+ ksmbd_debug(SMB,
+ "All EAs are requested but need to send single EA entry in rsp flags 0x%x\n",
+ le32_to_cpu(req->Flags));
+ }
+
+ buf_free_len = work->response_sz -
+ (get_rfc1002_len(rsp_org) + 4) -
+ sizeof(struct smb2_query_info_rsp);
+
+ if (le32_to_cpu(req->OutputBufferLength) < buf_free_len)
+ buf_free_len = le32_to_cpu(req->OutputBufferLength);
+
+ rc = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
+ if (rc < 0) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ goto out;
+ } else if (!rc) { /* there is no EA in the file */
+ ksmbd_debug(SMB, "no ea data in the file\n");
+ goto done;
+ }
+ xattr_list_len = rc;
+
+ ptr = (char *)rsp->Buffer;
+ eainfo = (struct smb2_ea_info *)ptr;
+ prev_eainfo = eainfo;
+ idx = 0;
+
+ while (idx < xattr_list_len) {
+ name = xattr_list + idx;
+ name_len = strlen(name);
+
+ ksmbd_debug(SMB, "%s, len %d\n", name, name_len);
+ idx += name_len + 1;
+
+ /*
+ * CIFS does not support EA other than user.* namespace,
+ * still keep the framework generic, to list other attrs
+ * in future.
+ */
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ continue;
+
+ if (!strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
+ STREAM_PREFIX_LEN))
+ continue;
+
+ if (req->InputBufferLength &&
+ strncmp(&name[XATTR_USER_PREFIX_LEN], ea_req->name,
+ ea_req->EaNameLength))
+ continue;
+
+ if (!strncmp(&name[XATTR_USER_PREFIX_LEN],
+ DOS_ATTRIBUTE_PREFIX, DOS_ATTRIBUTE_PREFIX_LEN))
+ continue;
+
+ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ name_len -= XATTR_USER_PREFIX_LEN;
+
+ ptr = (char *)(&eainfo->name + name_len + 1);
+ buf_free_len -= (offsetof(struct smb2_ea_info, name) +
+ name_len + 1);
+ /* bailout if xattr can't fit in buf_free_len */
+ value_len = ksmbd_vfs_getxattr(user_ns, path->dentry,
+ name, &buf);
+ if (value_len <= 0) {
+ rc = -ENOENT;
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ goto out;
+ }
+
+ buf_free_len -= value_len;
+ if (buf_free_len < 0) {
+ kfree(buf);
+ break;
+ }
+
+ memcpy(ptr, buf, value_len);
+ kfree(buf);
+
+ ptr += value_len;
+ eainfo->Flags = 0;
+ eainfo->EaNameLength = name_len;
+
+ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ memcpy(eainfo->name, &name[XATTR_USER_PREFIX_LEN],
+ name_len);
+ else
+ memcpy(eainfo->name, name, name_len);
+
+ eainfo->name[name_len] = '\0';
+ eainfo->EaValueLength = cpu_to_le16(value_len);
+ next_offset = offsetof(struct smb2_ea_info, name) +
+ name_len + 1 + value_len;
+
+ /* align next xattr entry at 4 byte bundary */
+ alignment_bytes = ((next_offset + 3) & ~3) - next_offset;
+ if (alignment_bytes) {
+ memset(ptr, '\0', alignment_bytes);
+ ptr += alignment_bytes;
+ next_offset += alignment_bytes;
+ buf_free_len -= alignment_bytes;
+ }
+ eainfo->NextEntryOffset = cpu_to_le32(next_offset);
+ prev_eainfo = eainfo;
+ eainfo = (struct smb2_ea_info *)ptr;
+ rsp_data_cnt += next_offset;
+
+ if (req->InputBufferLength) {
+ ksmbd_debug(SMB, "single entry requested\n");
+ break;
+ }
+ }
+
+ /* no more ea entries */
+ prev_eainfo->NextEntryOffset = 0;
+done:
+ rc = 0;
+ if (rsp_data_cnt == 0)
+ rsp->hdr.Status = STATUS_NO_EAS_ON_FILE;
+ rsp->OutputBufferLength = cpu_to_le32(rsp_data_cnt);
+ inc_rfc1001_len(rsp_org, rsp_data_cnt);
+out:
+ kvfree(xattr_list);
+ return rc;
+}
+
+static void get_file_access_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_access_info *file_info;
+
+ file_info = (struct smb2_file_access_info *)rsp->Buffer;
+ file_info->AccessFlags = fp->daccess;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_access_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_access_info));
+}
+
+static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_all_info *basic_info;
+ struct kstat stat;
+ u64 time;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ pr_err("no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ basic_info = (struct smb2_file_all_info *)rsp->Buffer;
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+ basic_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ basic_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ basic_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ basic_info->ChangeTime = cpu_to_le64(time);
+ basic_info->Attributes = fp->f_ci->m_fattr;
+ basic_info->Pad1 = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(offsetof(struct smb2_file_all_info, AllocationSize));
+ inc_rfc1001_len(rsp_org, offsetof(struct smb2_file_all_info,
+ AllocationSize));
+ return 0;
+}
+
+static unsigned long long get_allocation_size(struct inode *inode,
+ struct kstat *stat)
+{
+ unsigned long long alloc_size = 0;
+
+ if (!S_ISDIR(stat->mode)) {
+ if ((inode->i_blocks << 9) <= stat->size)
+ alloc_size = stat->size;
+ else
+ alloc_size = inode->i_blocks << 9;
+ }
+
+ return alloc_size;
+}
+
+static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_standard_info *sinfo;
+ unsigned int delete_pending;
+ struct inode *inode;
+ struct kstat stat;
+
+ inode = file_inode(fp->filp);
+ generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+
+ sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
+ delete_pending = ksmbd_inode_pending_delete(fp);
+
+ sinfo->AllocationSize = cpu_to_le64(get_allocation_size(inode, &stat));
+ sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
+ sinfo->DeletePending = delete_pending;
+ sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_standard_info));
+ inc_rfc1001_len(rsp_org,
+ sizeof(struct smb2_file_standard_info));
+}
+
+static void get_file_alignment_info(struct smb2_query_info_rsp *rsp,
+ void *rsp_org)
+{
+ struct smb2_file_alignment_info *file_info;
+
+ file_info = (struct smb2_file_alignment_info *)rsp->Buffer;
+ file_info->AlignmentRequirement = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_alignment_info));
+ inc_rfc1001_len(rsp_org,
+ sizeof(struct smb2_file_alignment_info));
+}
+
+static int get_file_all_info(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp,
+ void *rsp_org)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_file_all_info *file_info;
+ unsigned int delete_pending;
+ struct inode *inode;
+ struct kstat stat;
+ int conv_len;
+ char *filename;
+ u64 time;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ filename = convert_to_nt_pathname(fp->filename,
+ work->tcon->share_conf->path);
+ if (!filename)
+ return -ENOMEM;
+
+ inode = file_inode(fp->filp);
+ generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+
+ ksmbd_debug(SMB, "filename = %s\n", filename);
+ delete_pending = ksmbd_inode_pending_delete(fp);
+ file_info = (struct smb2_file_all_info *)rsp->Buffer;
+
+ file_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ file_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ file_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ file_info->ChangeTime = cpu_to_le64(time);
+ file_info->Attributes = fp->f_ci->m_fattr;
+ file_info->Pad1 = 0;
+ file_info->AllocationSize =
+ cpu_to_le64(get_allocation_size(inode, &stat));
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ file_info->NumberOfLinks =
+ cpu_to_le32(get_nlink(&stat) - delete_pending);
+ file_info->DeletePending = delete_pending;
+ file_info->Directory = S_ISDIR(stat.mode) ? 1 : 0;
+ file_info->Pad2 = 0;
+ file_info->IndexNumber = cpu_to_le64(stat.ino);
+ file_info->EASize = 0;
+ file_info->AccessFlags = fp->daccess;
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ file_info->Mode = fp->coption;
+ file_info->AlignmentRequirement = 0;
+ conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename,
+ PATH_MAX, conn->local_nls, 0);
+ conv_len *= 2;
+ file_info->FileNameLength = cpu_to_le32(conv_len);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_all_info) + conv_len - 1);
+ kfree(filename);
+ inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
+ return 0;
+}
+
+static void get_file_alternate_info(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp,
+ void *rsp_org)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_file_alt_name_info *file_info;
+ struct dentry *dentry = fp->filp->f_path.dentry;
+ int conv_len;
+
+ spin_lock(&dentry->d_lock);
+ file_info = (struct smb2_file_alt_name_info *)rsp->Buffer;
+ conv_len = ksmbd_extract_shortname(conn,
+ dentry->d_name.name,
+ file_info->FileName);
+ spin_unlock(&dentry->d_lock);
+ file_info->FileNameLength = cpu_to_le32(conv_len);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_alt_name_info) + conv_len);
+ inc_rfc1001_len(rsp_org, le32_to_cpu(rsp->OutputBufferLength));
+}
+
+static void get_file_stream_info(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp,
+ void *rsp_org)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_file_stream_info *file_info;
+ char *stream_name, *xattr_list = NULL, *stream_buf;
+ struct kstat stat;
+ struct path *path = &fp->filp->f_path;
+ ssize_t xattr_list_len;
+ int nbytes = 0, streamlen, stream_name_len, next, idx = 0;
+
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+ file_info = (struct smb2_file_stream_info *)rsp->Buffer;
+
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ while (idx < xattr_list_len) {
+ stream_name = xattr_list + idx;
+ streamlen = strlen(stream_name);
+ idx += streamlen + 1;
+
+ ksmbd_debug(SMB, "%s, len %d\n", stream_name, streamlen);
+
+ if (strncmp(&stream_name[XATTR_USER_PREFIX_LEN],
+ STREAM_PREFIX, STREAM_PREFIX_LEN))
+ continue;
+
+ stream_name_len = streamlen - (XATTR_USER_PREFIX_LEN +
+ STREAM_PREFIX_LEN);
+ streamlen = stream_name_len;
+
+ /* plus : size */
+ streamlen += 1;
+ stream_buf = kmalloc(streamlen + 1, GFP_KERNEL);
+ if (!stream_buf)
+ break;
+
+ streamlen = snprintf(stream_buf, streamlen + 1,
+ ":%s", &stream_name[XATTR_NAME_STREAM_LEN]);
+
+ file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes];
+ streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName,
+ stream_buf, streamlen,
+ conn->local_nls, 0);
+ streamlen *= 2;
+ kfree(stream_buf);
+ file_info->StreamNameLength = cpu_to_le32(streamlen);
+ file_info->StreamSize = cpu_to_le64(stream_name_len);
+ file_info->StreamAllocationSize = cpu_to_le64(stream_name_len);
+
+ next = sizeof(struct smb2_file_stream_info) + streamlen;
+ nbytes += next;
+ file_info->NextEntryOffset = cpu_to_le32(next);
+ }
+
+ if (nbytes) {
+ file_info = (struct smb2_file_stream_info *)
+ &rsp->Buffer[nbytes];
+ streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName,
+ "::$DATA", 7, conn->local_nls, 0);
+ streamlen *= 2;
+ file_info->StreamNameLength = cpu_to_le32(streamlen);
+ file_info->StreamSize = S_ISDIR(stat.mode) ? 0 :
+ cpu_to_le64(stat.size);
+ file_info->StreamAllocationSize = S_ISDIR(stat.mode) ? 0 :
+ cpu_to_le64(stat.size);
+ nbytes += sizeof(struct smb2_file_stream_info) + streamlen;
+ }
+
+ /* last entry offset should be 0 */
+ file_info->NextEntryOffset = 0;
+out:
+ kvfree(xattr_list);
+
+ rsp->OutputBufferLength = cpu_to_le32(nbytes);
+ inc_rfc1001_len(rsp_org, nbytes);
+}
+
+static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_internal_info *file_info;
+ struct kstat stat;
+
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+ file_info = (struct smb2_file_internal_info *)rsp->Buffer;
+ file_info->IndexNumber = cpu_to_le64(stat.ino);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_internal_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_internal_info));
+}
+
+static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_ntwrk_info *file_info;
+ struct inode *inode;
+ struct kstat stat;
+ u64 time;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ pr_err("no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
+
+ inode = file_inode(fp->filp);
+ generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat);
+
+ file_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(stat.atime);
+ file_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.mtime);
+ file_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(stat.ctime);
+ file_info->ChangeTime = cpu_to_le64(time);
+ file_info->Attributes = fp->f_ci->m_fattr;
+ file_info->AllocationSize =
+ cpu_to_le64(get_allocation_size(inode, &stat));
+ file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
+ file_info->Reserved = cpu_to_le32(0);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_ntwrk_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ntwrk_info));
+ return 0;
+}
+
+static void get_file_ea_info(struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct smb2_file_ea_info *file_info;
+
+ file_info = (struct smb2_file_ea_info *)rsp->Buffer;
+ file_info->EASize = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_ea_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_ea_info));
+}
+
+static void get_file_position_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_pos_info *file_info;
+
+ file_info = (struct smb2_file_pos_info *)rsp->Buffer;
+ file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_pos_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_pos_info));
+}
+
+static void get_file_mode_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_mode_info *file_info;
+
+ file_info = (struct smb2_file_mode_info *)rsp->Buffer;
+ file_info->Mode = fp->coption & FILE_MODE_INFO_MASK;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_mode_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_mode_info));
+}
+
+static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_comp_info *file_info;
+ struct kstat stat;
+
+ generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp),
+ &stat);
+
+ file_info = (struct smb2_file_comp_info *)rsp->Buffer;
+ file_info->CompressedFileSize = cpu_to_le64(stat.blocks << 9);
+ file_info->CompressionFormat = COMPRESSION_FORMAT_NONE;
+ file_info->CompressionUnitShift = 0;
+ file_info->ChunkShift = 0;
+ file_info->ClusterShift = 0;
+ memset(&file_info->Reserved[0], 0, 3);
+
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_comp_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_comp_info));
+}
+
+static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb2_file_attr_tag_info *file_info;
+
+ if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
+ pr_err("no right to read the attributes : 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ file_info = (struct smb2_file_attr_tag_info *)rsp->Buffer;
+ file_info->FileAttributes = fp->f_ci->m_fattr;
+ file_info->ReparseTag = 0;
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb2_file_attr_tag_info));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_attr_tag_info));
+ return 0;
+}
+
+static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
+{
+ struct smb311_posix_qinfo *file_info;
+ struct inode *inode = file_inode(fp->filp);
+ u64 time;
+
+ file_info = (struct smb311_posix_qinfo *)rsp->Buffer;
+ file_info->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(inode->i_atime);
+ file_info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_mtime);
+ file_info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_ctime);
+ file_info->ChangeTime = cpu_to_le64(time);
+ file_info->DosAttributes = fp->f_ci->m_fattr;
+ file_info->Inode = cpu_to_le64(inode->i_ino);
+ file_info->EndOfFile = cpu_to_le64(inode->i_size);
+ file_info->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
+ file_info->HardLinks = cpu_to_le32(inode->i_nlink);
+ file_info->Mode = cpu_to_le32(inode->i_mode);
+ file_info->DeviceId = cpu_to_le32(inode->i_rdev);
+ rsp->OutputBufferLength =
+ cpu_to_le32(sizeof(struct smb311_posix_qinfo));
+ inc_rfc1001_len(rsp_org, sizeof(struct smb311_posix_qinfo));
+ return 0;
+}
+
+static int smb2_get_info_file(struct ksmbd_work *work,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct ksmbd_file *fp;
+ int fileinfoclass = 0;
+ int rc = 0;
+ int file_infoclass_size;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_PIPE)) {
+ /* smb2 info file called for pipe */
+ return smb2_get_info_file_pipe(work->sess, req, rsp);
+ }
+
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+ }
+
+ if (!has_file_id(id)) {
+ id = le64_to_cpu(req->VolatileFileId);
+ pid = le64_to_cpu(req->PersistentFileId);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, id, pid);
+ if (!fp)
+ return -ENOENT;
+
+ fileinfoclass = req->FileInfoClass;
+
+ switch (fileinfoclass) {
+ case FILE_ACCESS_INFORMATION:
+ get_file_access_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ACCESS_INFORMATION_SIZE;
+ break;
+
+ case FILE_BASIC_INFORMATION:
+ rc = get_file_basic_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_BASIC_INFORMATION_SIZE;
+ break;
+
+ case FILE_STANDARD_INFORMATION:
+ get_file_standard_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_STANDARD_INFORMATION_SIZE;
+ break;
+
+ case FILE_ALIGNMENT_INFORMATION:
+ get_file_alignment_info(rsp, rsp_org);
+ file_infoclass_size = FILE_ALIGNMENT_INFORMATION_SIZE;
+ break;
+
+ case FILE_ALL_INFORMATION:
+ rc = get_file_all_info(work, rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ALL_INFORMATION_SIZE;
+ break;
+
+ case FILE_ALTERNATE_NAME_INFORMATION:
+ get_file_alternate_info(work, rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ALTERNATE_NAME_INFORMATION_SIZE;
+ break;
+
+ case FILE_STREAM_INFORMATION:
+ get_file_stream_info(work, rsp, fp, rsp_org);
+ file_infoclass_size = FILE_STREAM_INFORMATION_SIZE;
+ break;
+
+ case FILE_INTERNAL_INFORMATION:
+ get_file_internal_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_INTERNAL_INFORMATION_SIZE;
+ break;
+
+ case FILE_NETWORK_OPEN_INFORMATION:
+ rc = get_file_network_open_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_NETWORK_OPEN_INFORMATION_SIZE;
+ break;
+
+ case FILE_EA_INFORMATION:
+ get_file_ea_info(rsp, rsp_org);
+ file_infoclass_size = FILE_EA_INFORMATION_SIZE;
+ break;
+
+ case FILE_FULL_EA_INFORMATION:
+ rc = smb2_get_ea(work, fp, req, rsp, rsp_org);
+ file_infoclass_size = FILE_FULL_EA_INFORMATION_SIZE;
+ break;
+
+ case FILE_POSITION_INFORMATION:
+ get_file_position_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_POSITION_INFORMATION_SIZE;
+ break;
+
+ case FILE_MODE_INFORMATION:
+ get_file_mode_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_MODE_INFORMATION_SIZE;
+ break;
+
+ case FILE_COMPRESSION_INFORMATION:
+ get_file_compression_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_COMPRESSION_INFORMATION_SIZE;
+ break;
+
+ case FILE_ATTRIBUTE_TAG_INFORMATION:
+ rc = get_file_attribute_tag_info(rsp, fp, rsp_org);
+ file_infoclass_size = FILE_ATTRIBUTE_TAG_INFORMATION_SIZE;
+ break;
+ case SMB_FIND_FILE_POSIX_INFO:
+ if (!work->tcon->posix_extensions) {
+ pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
+ rc = -EOPNOTSUPP;
+ } else {
+ rc = find_file_posix_info(rsp, fp, rsp_org);
+ file_infoclass_size = sizeof(struct smb311_posix_qinfo);
+ }
+ break;
+ default:
+ ksmbd_debug(SMB, "fileinfoclass %d not supported yet\n",
+ fileinfoclass);
+ rc = -EOPNOTSUPP;
+ }
+ if (!rc)
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp,
+ file_infoclass_size);
+ ksmbd_fd_put(work, fp);
+ return rc;
+}
+
+static int smb2_get_info_filesystem(struct ksmbd_work *work,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_conn *conn = sess->conn;
+ struct ksmbd_share_config *share = work->tcon->share_conf;
+ int fsinfoclass = 0;
+ struct kstatfs stfs;
+ struct path path;
+ int rc = 0, len;
+ int fs_infoclass_size = 0;
+ int lookup_flags = 0;
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ lookup_flags = LOOKUP_FOLLOW;
+
+ rc = ksmbd_vfs_kern_path(share->path, lookup_flags, &path, 0);
+ if (rc) {
+ pr_err("cannot create vfs path\n");
+ return -EIO;
+ }
+
+ rc = vfs_statfs(&path, &stfs);
+ if (rc) {
+ pr_err("cannot do stat of path %s\n", share->path);
+ path_put(&path);
+ return -EIO;
+ }
+
+ fsinfoclass = req->FileInfoClass;
+
+ switch (fsinfoclass) {
+ case FS_DEVICE_INFORMATION:
+ {
+ struct filesystem_device_info *info;
+
+ info = (struct filesystem_device_info *)rsp->Buffer;
+
+ info->DeviceType = cpu_to_le32(stfs.f_type);
+ info->DeviceCharacteristics = cpu_to_le32(0x00000020);
+ rsp->OutputBufferLength = cpu_to_le32(8);
+ inc_rfc1001_len(rsp_org, 8);
+ fs_infoclass_size = FS_DEVICE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_ATTRIBUTE_INFORMATION:
+ {
+ struct filesystem_attribute_info *info;
+ size_t sz;
+
+ info = (struct filesystem_attribute_info *)rsp->Buffer;
+ info->Attributes = cpu_to_le32(FILE_SUPPORTS_OBJECT_IDS |
+ FILE_PERSISTENT_ACLS |
+ FILE_UNICODE_ON_DISK |
+ FILE_CASE_PRESERVED_NAMES |
+ FILE_CASE_SENSITIVE_SEARCH |
+ FILE_SUPPORTS_BLOCK_REFCOUNTING);
+
+ info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps);
+
+ info->MaxPathNameComponentLength = cpu_to_le32(stfs.f_namelen);
+ len = smbConvertToUTF16((__le16 *)info->FileSystemName,
+ "NTFS", PATH_MAX, conn->local_nls, 0);
+ len = len * 2;
+ info->FileSystemNameLen = cpu_to_le32(len);
+ sz = sizeof(struct filesystem_attribute_info) - 2 + len;
+ rsp->OutputBufferLength = cpu_to_le32(sz);
+ inc_rfc1001_len(rsp_org, sz);
+ fs_infoclass_size = FS_ATTRIBUTE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_VOLUME_INFORMATION:
+ {
+ struct filesystem_vol_info *info;
+ size_t sz;
+
+ info = (struct filesystem_vol_info *)(rsp->Buffer);
+ info->VolumeCreationTime = 0;
+ /* Taking dummy value of serial number*/
+ info->SerialNumber = cpu_to_le32(0xbc3ac512);
+ len = smbConvertToUTF16((__le16 *)info->VolumeLabel,
+ share->name, PATH_MAX,
+ conn->local_nls, 0);
+ len = len * 2;
+ info->VolumeLabelSize = cpu_to_le32(len);
+ info->Reserved = 0;
+ sz = sizeof(struct filesystem_vol_info) - 2 + len;
+ rsp->OutputBufferLength = cpu_to_le32(sz);
+ inc_rfc1001_len(rsp_org, sz);
+ fs_infoclass_size = FS_VOLUME_INFORMATION_SIZE;
+ break;
+ }
+ case FS_SIZE_INFORMATION:
+ {
+ struct filesystem_info *info;
+
+ info = (struct filesystem_info *)(rsp->Buffer);
+ info->TotalAllocationUnits = cpu_to_le64(stfs.f_blocks);
+ info->FreeAllocationUnits = cpu_to_le64(stfs.f_bfree);
+ info->SectorsPerAllocationUnit = cpu_to_le32(1);
+ info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
+ rsp->OutputBufferLength = cpu_to_le32(24);
+ inc_rfc1001_len(rsp_org, 24);
+ fs_infoclass_size = FS_SIZE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_FULL_SIZE_INFORMATION:
+ {
+ struct smb2_fs_full_size_info *info;
+
+ info = (struct smb2_fs_full_size_info *)(rsp->Buffer);
+ info->TotalAllocationUnits = cpu_to_le64(stfs.f_blocks);
+ info->CallerAvailableAllocationUnits =
+ cpu_to_le64(stfs.f_bavail);
+ info->ActualAvailableAllocationUnits =
+ cpu_to_le64(stfs.f_bfree);
+ info->SectorsPerAllocationUnit = cpu_to_le32(1);
+ info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
+ rsp->OutputBufferLength = cpu_to_le32(32);
+ inc_rfc1001_len(rsp_org, 32);
+ fs_infoclass_size = FS_FULL_SIZE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_OBJECT_ID_INFORMATION:
+ {
+ struct object_id_info *info;
+
+ info = (struct object_id_info *)(rsp->Buffer);
+
+ if (!user_guest(sess->user))
+ memcpy(info->objid, user_passkey(sess->user), 16);
+ else
+ memset(info->objid, 0, 16);
+
+ info->extended_info.magic = cpu_to_le32(EXTENDED_INFO_MAGIC);
+ info->extended_info.version = cpu_to_le32(1);
+ info->extended_info.release = cpu_to_le32(1);
+ info->extended_info.rel_date = 0;
+ memcpy(info->extended_info.version_string, "1.1.0", strlen("1.1.0"));
+ rsp->OutputBufferLength = cpu_to_le32(64);
+ inc_rfc1001_len(rsp_org, 64);
+ fs_infoclass_size = FS_OBJECT_ID_INFORMATION_SIZE;
+ break;
+ }
+ case FS_SECTOR_SIZE_INFORMATION:
+ {
+ struct smb3_fs_ss_info *info;
+
+ info = (struct smb3_fs_ss_info *)(rsp->Buffer);
+
+ info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize);
+ info->PhysicalBytesPerSectorForAtomicity =
+ cpu_to_le32(stfs.f_bsize);
+ info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize);
+ info->FSEffPhysicalBytesPerSectorForAtomicity =
+ cpu_to_le32(stfs.f_bsize);
+ info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE |
+ SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE);
+ info->ByteOffsetForSectorAlignment = 0;
+ info->ByteOffsetForPartitionAlignment = 0;
+ rsp->OutputBufferLength = cpu_to_le32(28);
+ inc_rfc1001_len(rsp_org, 28);
+ fs_infoclass_size = FS_SECTOR_SIZE_INFORMATION_SIZE;
+ break;
+ }
+ case FS_CONTROL_INFORMATION:
+ {
+ /*
+ * TODO : The current implementation is based on
+ * test result with win7(NTFS) server. It's need to
+ * modify this to get valid Quota values
+ * from Linux kernel
+ */
+ struct smb2_fs_control_info *info;
+
+ info = (struct smb2_fs_control_info *)(rsp->Buffer);
+ info->FreeSpaceStartFiltering = 0;
+ info->FreeSpaceThreshold = 0;
+ info->FreeSpaceStopFiltering = 0;
+ info->DefaultQuotaThreshold = cpu_to_le64(SMB2_NO_FID);
+ info->DefaultQuotaLimit = cpu_to_le64(SMB2_NO_FID);
+ info->Padding = 0;
+ rsp->OutputBufferLength = cpu_to_le32(48);
+ inc_rfc1001_len(rsp_org, 48);
+ fs_infoclass_size = FS_CONTROL_INFORMATION_SIZE;
+ break;
+ }
+ case FS_POSIX_INFORMATION:
+ {
+ struct filesystem_posix_info *info;
+
+ if (!work->tcon->posix_extensions) {
+ pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
+ rc = -EOPNOTSUPP;
+ } else {
+ info = (struct filesystem_posix_info *)(rsp->Buffer);
+ info->OptimalTransferSize = cpu_to_le32(stfs.f_bsize);
+ info->BlockSize = cpu_to_le32(stfs.f_bsize);
+ info->TotalBlocks = cpu_to_le64(stfs.f_blocks);
+ info->BlocksAvail = cpu_to_le64(stfs.f_bfree);
+ info->UserBlocksAvail = cpu_to_le64(stfs.f_bavail);
+ info->TotalFileNodes = cpu_to_le64(stfs.f_files);
+ info->FreeFileNodes = cpu_to_le64(stfs.f_ffree);
+ rsp->OutputBufferLength = cpu_to_le32(56);
+ inc_rfc1001_len(rsp_org, 56);
+ fs_infoclass_size = FS_POSIX_INFORMATION_SIZE;
+ }
+ break;
+ }
+ default:
+ path_put(&path);
+ return -EOPNOTSUPP;
+ }
+ rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
+ rsp,
+ fs_infoclass_size);
+ path_put(&path);
+ return rc;
+}
+
+static int smb2_get_info_sec(struct ksmbd_work *work,
+ struct smb2_query_info_req *req,
+ struct smb2_query_info_rsp *rsp, void *rsp_org)
+{
+ struct ksmbd_file *fp;
+ struct user_namespace *user_ns;
+ struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
+ struct smb_fattr fattr = {{0}};
+ struct inode *inode;
+ __u32 secdesclen;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+ int addition_info = le32_to_cpu(req->AdditionalInformation);
+ int rc;
+
+ if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+ PROTECTED_DACL_SECINFO |
+ UNPROTECTED_DACL_SECINFO)) {
+ pr_err("Unsupported addition info: 0x%x)\n",
+ addition_info);
+
+ pntsd->revision = cpu_to_le16(1);
+ pntsd->type = cpu_to_le16(SELF_RELATIVE | DACL_PROTECTED);
+ pntsd->osidoffset = 0;
+ pntsd->gsidoffset = 0;
+ pntsd->sacloffset = 0;
+ pntsd->dacloffset = 0;
+
+ secdesclen = sizeof(struct smb_ntsd);
+ rsp->OutputBufferLength = cpu_to_le32(secdesclen);
+ inc_rfc1001_len(rsp_org, secdesclen);
+
+ return 0;
+ }
+
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+ }
+
+ if (!has_file_id(id)) {
+ id = le64_to_cpu(req->VolatileFileId);
+ pid = le64_to_cpu(req->PersistentFileId);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, id, pid);
+ if (!fp)
+ return -ENOENT;
+
+ user_ns = file_mnt_user_ns(fp->filp);
+ inode = file_inode(fp->filp);
+ ksmbd_acls_fattr(&fattr, inode);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_ACL_XATTR))
+ ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
+ fp->filp->f_path.dentry, &ppntsd);
+
+ rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info,
+ &secdesclen, &fattr);
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+ kfree(ppntsd);
+ ksmbd_fd_put(work, fp);
+ if (rc)
+ return rc;
+
+ rsp->OutputBufferLength = cpu_to_le32(secdesclen);
+ inc_rfc1001_len(rsp_org, secdesclen);
+ return 0;
+}
+
+/**
+ * smb2_query_info() - handler for smb2 query info command
+ * @work: smb work containing query info request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_query_info(struct ksmbd_work *work)
+{
+ struct smb2_query_info_req *req;
+ struct smb2_query_info_rsp *rsp, *rsp_org;
+ int rc = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ ksmbd_debug(SMB, "GOT query info request\n");
+
+ switch (req->InfoType) {
+ case SMB2_O_INFO_FILE:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
+ rc = smb2_get_info_file(work, req, rsp, (void *)rsp_org);
+ break;
+ case SMB2_O_INFO_FILESYSTEM:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILESYSTEM\n");
+ rc = smb2_get_info_filesystem(work, req, rsp, (void *)rsp_org);
+ break;
+ case SMB2_O_INFO_SECURITY:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_SECURITY\n");
+ rc = smb2_get_info_sec(work, req, rsp, (void *)rsp_org);
+ break;
+ default:
+ ksmbd_debug(SMB, "InfoType %d not supported yet\n",
+ req->InfoType);
+ rc = -EOPNOTSUPP;
+ }
+
+ if (rc < 0) {
+ if (rc == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (rc == -EIO)
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+ else if (rc == -EOPNOTSUPP || rsp->hdr.Status == 0)
+ rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
+ smb2_set_err_rsp(work);
+
+ ksmbd_debug(SMB, "error while processing smb2 query rc = %d\n",
+ rc);
+ return rc;
+ }
+ rsp->StructureSize = cpu_to_le16(9);
+ rsp->OutputBufferOffset = cpu_to_le16(72);
+ inc_rfc1001_len(rsp_org, 8);
+ return 0;
+}
+
+/**
+ * smb2_close_pipe() - handler for closing IPC pipe
+ * @work: smb work containing close request buffer
+ *
+ * Return: 0
+ */
+static noinline int smb2_close_pipe(struct ksmbd_work *work)
+{
+ u64 id;
+ struct smb2_close_req *req = work->request_buf;
+ struct smb2_close_rsp *rsp = work->response_buf;
+
+ id = le64_to_cpu(req->VolatileFileId);
+ ksmbd_session_rpc_close(work->sess, id);
+
+ rsp->StructureSize = cpu_to_le16(60);
+ rsp->Flags = 0;
+ rsp->Reserved = 0;
+ rsp->CreationTime = 0;
+ rsp->LastAccessTime = 0;
+ rsp->LastWriteTime = 0;
+ rsp->ChangeTime = 0;
+ rsp->AllocationSize = 0;
+ rsp->EndOfFile = 0;
+ rsp->Attributes = 0;
+ inc_rfc1001_len(rsp, 60);
+ return 0;
+}
+
+/**
+ * smb2_close() - handler for smb2 close file command
+ * @work: smb work containing close request buffer
+ *
+ * Return: 0
+ */
+int smb2_close(struct ksmbd_work *work)
+{
+ u64 volatile_id = KSMBD_NO_FID;
+ u64 sess_id;
+ struct smb2_close_req *req;
+ struct smb2_close_rsp *rsp;
+ struct smb2_close_rsp *rsp_org;
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_file *fp;
+ struct inode *inode;
+ u64 time;
+ int err = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe close request\n");
+ return smb2_close_pipe(work);
+ }
+
+ sess_id = le64_to_cpu(req->hdr.SessionId);
+ if (req->hdr.Flags & SMB2_FLAGS_RELATED_OPERATIONS)
+ sess_id = work->compound_sid;
+
+ work->compound_sid = 0;
+ if (check_session_id(conn, sess_id)) {
+ work->compound_sid = sess_id;
+ } else {
+ rsp->hdr.Status = STATUS_USER_SESSION_DELETED;
+ if (req->hdr.Flags & SMB2_FLAGS_RELATED_OPERATIONS)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ err = -EBADF;
+ goto out;
+ }
+
+ if (work->next_smb2_rcv_hdr_off &&
+ !has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ if (!has_file_id(work->compound_fid)) {
+ /* file already closed, return FILE_CLOSED */
+ ksmbd_debug(SMB, "file already closed\n");
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ err = -EBADF;
+ goto out;
+ } else {
+ ksmbd_debug(SMB,
+ "Compound request set FID = %llu:%llu\n",
+ work->compound_fid,
+ work->compound_pfid);
+ volatile_id = work->compound_fid;
+
+ /* file closed, stored id is not valid anymore */
+ work->compound_fid = KSMBD_NO_FID;
+ work->compound_pfid = KSMBD_NO_FID;
+ }
+ } else {
+ volatile_id = le64_to_cpu(req->VolatileFileId);
+ }
+ ksmbd_debug(SMB, "volatile_id = %llu\n", volatile_id);
+
+ rsp->StructureSize = cpu_to_le16(60);
+ rsp->Reserved = 0;
+
+ if (req->Flags == SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB) {
+ fp = ksmbd_lookup_fd_fast(work, volatile_id);
+ if (!fp) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ inode = file_inode(fp->filp);
+ rsp->Flags = SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB;
+ rsp->AllocationSize = S_ISDIR(inode->i_mode) ? 0 :
+ cpu_to_le64(inode->i_blocks << 9);
+ rsp->EndOfFile = cpu_to_le64(inode->i_size);
+ rsp->Attributes = fp->f_ci->m_fattr;
+ rsp->CreationTime = cpu_to_le64(fp->create_time);
+ time = ksmbd_UnixTimeToNT(inode->i_atime);
+ rsp->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_mtime);
+ rsp->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(inode->i_ctime);
+ rsp->ChangeTime = cpu_to_le64(time);
+ ksmbd_fd_put(work, fp);
+ } else {
+ rsp->Flags = 0;
+ rsp->AllocationSize = 0;
+ rsp->EndOfFile = 0;
+ rsp->Attributes = 0;
+ rsp->CreationTime = 0;
+ rsp->LastAccessTime = 0;
+ rsp->LastWriteTime = 0;
+ rsp->ChangeTime = 0;
+ }
+
+ err = ksmbd_close_fd(work, volatile_id);
+out:
+ if (err) {
+ if (rsp->hdr.Status == 0)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ smb2_set_err_rsp(work);
+ } else {
+ inc_rfc1001_len(rsp_org, 60);
+ }
+
+ return 0;
+}
+
+/**
+ * smb2_echo() - handler for smb2 echo(ping) command
+ * @work: smb work containing echo request buffer
+ *
+ * Return: 0
+ */
+int smb2_echo(struct ksmbd_work *work)
+{
+ struct smb2_echo_rsp *rsp = work->response_buf;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ rsp->Reserved = 0;
+ inc_rfc1001_len(rsp, 4);
+ return 0;
+}
+
+static int smb2_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+ struct smb2_file_rename_info *file_info,
+ struct nls_table *local_nls)
+{
+ struct ksmbd_share_config *share = fp->tcon->share_conf;
+ char *new_name = NULL, *abs_oldname = NULL, *old_name = NULL;
+ char *pathname = NULL;
+ struct path path;
+ bool file_present = true;
+ int rc;
+
+ ksmbd_debug(SMB, "setting FILE_RENAME_INFO\n");
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!pathname)
+ return -ENOMEM;
+
+ abs_oldname = d_path(&fp->filp->f_path, pathname, PATH_MAX);
+ if (IS_ERR(abs_oldname)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ old_name = strrchr(abs_oldname, '/');
+ if (old_name && old_name[1] != '\0') {
+ old_name++;
+ } else {
+ ksmbd_debug(SMB, "can't get last component in path %s\n",
+ abs_oldname);
+ rc = -ENOENT;
+ goto out;
+ }
+
+ new_name = smb2_get_name(share,
+ file_info->FileName,
+ le32_to_cpu(file_info->FileNameLength),
+ local_nls);
+ if (IS_ERR(new_name)) {
+ rc = PTR_ERR(new_name);
+ goto out;
+ }
+
+ if (strchr(new_name, ':')) {
+ int s_type;
+ char *xattr_stream_name, *stream_name = NULL;
+ size_t xattr_stream_size;
+ int len;
+
+ rc = parse_stream_name(new_name, &stream_name, &s_type);
+ if (rc < 0)
+ goto out;
+
+ len = strlen(new_name);
+ if (new_name[len - 1] != '/') {
+ pr_err("not allow base filename in rename\n");
+ rc = -ESHARE;
+ goto out;
+ }
+
+ rc = ksmbd_vfs_xattr_stream_name(stream_name,
+ &xattr_stream_name,
+ &xattr_stream_size,
+ s_type);
+ if (rc)
+ goto out;
+
+ rc = ksmbd_vfs_setxattr(file_mnt_user_ns(fp->filp),
+ fp->filp->f_path.dentry,
+ xattr_stream_name,
+ NULL, 0, 0);
+ if (rc < 0) {
+ pr_err("failed to store stream name in xattr: %d\n",
+ rc);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "new name %s\n", new_name);
+ rc = ksmbd_vfs_kern_path(new_name, 0, &path, 1);
+ if (rc)
+ file_present = false;
+ else
+ path_put(&path);
+
+ if (ksmbd_share_veto_filename(share, new_name)) {
+ rc = -ENOENT;
+ ksmbd_debug(SMB, "Can't rename vetoed file: %s\n", new_name);
+ goto out;
+ }
+
+ if (file_info->ReplaceIfExists) {
+ if (file_present) {
+ rc = ksmbd_vfs_remove_file(work, new_name);
+ if (rc) {
+ if (rc != -ENOTEMPTY)
+ rc = -EINVAL;
+ ksmbd_debug(SMB, "cannot delete %s, rc %d\n",
+ new_name, rc);
+ goto out;
+ }
+ }
+ } else {
+ if (file_present &&
+ strncmp(old_name, path.dentry->d_name.name, strlen(old_name))) {
+ rc = -EEXIST;
+ ksmbd_debug(SMB,
+ "cannot rename already existing file\n");
+ goto out;
+ }
+ }
+
+ rc = ksmbd_vfs_fp_rename(work, fp, new_name);
+out:
+ kfree(pathname);
+ if (!IS_ERR(new_name))
+ kfree(new_name);
+ return rc;
+}
+
+static int smb2_create_link(struct ksmbd_work *work,
+ struct ksmbd_share_config *share,
+ struct smb2_file_link_info *file_info,
+ struct file *filp,
+ struct nls_table *local_nls)
+{
+ char *link_name = NULL, *target_name = NULL, *pathname = NULL;
+ struct path path;
+ bool file_present = true;
+ int rc;
+
+ ksmbd_debug(SMB, "setting FILE_LINK_INFORMATION\n");
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!pathname)
+ return -ENOMEM;
+
+ link_name = smb2_get_name(share,
+ file_info->FileName,
+ le32_to_cpu(file_info->FileNameLength),
+ local_nls);
+ if (IS_ERR(link_name) || S_ISDIR(file_inode(filp)->i_mode)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "link name is %s\n", link_name);
+ target_name = d_path(&filp->f_path, pathname, PATH_MAX);
+ if (IS_ERR(target_name)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "target name is %s\n", target_name);
+ rc = ksmbd_vfs_kern_path(link_name, 0, &path, 0);
+ if (rc)
+ file_present = false;
+ else
+ path_put(&path);
+
+ if (file_info->ReplaceIfExists) {
+ if (file_present) {
+ rc = ksmbd_vfs_remove_file(work, link_name);
+ if (rc) {
+ rc = -EINVAL;
+ ksmbd_debug(SMB, "cannot delete %s\n",
+ link_name);
+ goto out;
+ }
+ }
+ } else {
+ if (file_present) {
+ rc = -EEXIST;
+ ksmbd_debug(SMB, "link already exists\n");
+ goto out;
+ }
+ }
+
+ rc = ksmbd_vfs_link(work, target_name, link_name);
+ if (rc)
+ rc = -EINVAL;
+out:
+ if (!IS_ERR(link_name))
+ kfree(link_name);
+ kfree(pathname);
+ return rc;
+}
+
+static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
+ struct ksmbd_share_config *share)
+{
+ struct smb2_file_all_info *file_info;
+ struct iattr attrs;
+ struct iattr temp_attrs;
+ struct file *filp;
+ struct inode *inode;
+ struct user_namespace *user_ns;
+ int rc;
+
+ if (!(fp->daccess & FILE_WRITE_ATTRIBUTES_LE))
+ return -EACCES;
+
+ file_info = (struct smb2_file_all_info *)buf;
+ attrs.ia_valid = 0;
+ filp = fp->filp;
+ inode = file_inode(filp);
+ user_ns = file_mnt_user_ns(filp);
+
+ if (file_info->CreationTime)
+ fp->create_time = le64_to_cpu(file_info->CreationTime);
+
+ if (file_info->LastAccessTime) {
+ attrs.ia_atime = ksmbd_NTtimeToUnix(file_info->LastAccessTime);
+ attrs.ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
+ }
+
+ if (file_info->ChangeTime) {
+ temp_attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
+ attrs.ia_ctime = temp_attrs.ia_ctime;
+ attrs.ia_valid |= ATTR_CTIME;
+ } else {
+ temp_attrs.ia_ctime = inode->i_ctime;
+ }
+
+ if (file_info->LastWriteTime) {
+ attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime);
+ attrs.ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
+ }
+
+ if (file_info->Attributes) {
+ if (!S_ISDIR(inode->i_mode) &&
+ file_info->Attributes & ATTR_DIRECTORY_LE) {
+ pr_err("can't change a file to a directory\n");
+ return -EINVAL;
+ }
+
+ if (!(S_ISDIR(inode->i_mode) && file_info->Attributes == ATTR_NORMAL_LE))
+ fp->f_ci->m_fattr = file_info->Attributes |
+ (fp->f_ci->m_fattr & ATTR_DIRECTORY_LE);
+ }
+
+ if (test_share_config_flag(share, KSMBD_SHARE_FLAG_STORE_DOS_ATTRS) &&
+ (file_info->CreationTime || file_info->Attributes)) {
+ struct xattr_dos_attrib da = {0};
+
+ da.version = 4;
+ da.itime = fp->itime;
+ da.create_time = fp->create_time;
+ da.attr = le32_to_cpu(fp->f_ci->m_fattr);
+ da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
+ XATTR_DOSINFO_ITIME;
+
+ rc = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
+ filp->f_path.dentry, &da);
+ if (rc)
+ ksmbd_debug(SMB,
+ "failed to restore file attribute in EA\n");
+ rc = 0;
+ }
+
+ /*
+ * HACK : set ctime here to avoid ctime changed
+ * when file_info->ChangeTime is zero.
+ */
+ attrs.ia_ctime = temp_attrs.ia_ctime;
+ attrs.ia_valid |= ATTR_CTIME;
+
+ if (attrs.ia_valid) {
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = d_inode(dentry);
+
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ return -EACCES;
+
+ rc = setattr_prepare(user_ns, dentry, &attrs);
+ if (rc)
+ return -EINVAL;
+
+ inode_lock(inode);
+ setattr_copy(user_ns, inode, &attrs);
+ attrs.ia_valid &= ~ATTR_CTIME;
+ rc = notify_change(user_ns, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ }
+ return 0;
+}
+
+static int set_file_allocation_info(struct ksmbd_work *work,
+ struct ksmbd_file *fp, char *buf)
+{
+ /*
+ * TODO : It's working fine only when store dos attributes
+ * is not yes. need to implement a logic which works
+ * properly with any smb.conf option
+ */
+
+ struct smb2_file_alloc_info *file_alloc_info;
+ loff_t alloc_blks;
+ struct inode *inode;
+ int rc;
+
+ if (!(fp->daccess & FILE_WRITE_DATA_LE))
+ return -EACCES;
+
+ file_alloc_info = (struct smb2_file_alloc_info *)buf;
+ alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9;
+ inode = file_inode(fp->filp);
+
+ if (alloc_blks > inode->i_blocks) {
+ smb_break_all_levII_oplock(work, fp, 1);
+ rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0,
+ alloc_blks * 512);
+ if (rc && rc != -EOPNOTSUPP) {
+ pr_err("vfs_fallocate is failed : %d\n", rc);
+ return rc;
+ }
+ } else if (alloc_blks < inode->i_blocks) {
+ loff_t size;
+
+ /*
+ * Allocation size could be smaller than original one
+ * which means allocated blocks in file should be
+ * deallocated. use truncate to cut out it, but inode
+ * size is also updated with truncate offset.
+ * inode size is retained by backup inode size.
+ */
+ size = i_size_read(inode);
+ rc = ksmbd_vfs_truncate(work, NULL, fp, alloc_blks * 512);
+ if (rc) {
+ pr_err("truncate failed! filename : %s, err %d\n",
+ fp->filename, rc);
+ return rc;
+ }
+ if (size < alloc_blks * 512)
+ i_size_write(inode, size);
+ }
+ return 0;
+}
+
+static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf)
+{
+ struct smb2_file_eof_info *file_eof_info;
+ loff_t newsize;
+ struct inode *inode;
+ int rc;
+
+ if (!(fp->daccess & FILE_WRITE_DATA_LE))
+ return -EACCES;
+
+ file_eof_info = (struct smb2_file_eof_info *)buf;
+ newsize = le64_to_cpu(file_eof_info->EndOfFile);
+ inode = file_inode(fp->filp);
+
+ /*
+ * If FILE_END_OF_FILE_INFORMATION of set_info_file is called
+ * on FAT32 shared device, truncate execution time is too long
+ * and network error could cause from windows client. because
+ * truncate of some filesystem like FAT32 fill zero data in
+ * truncated range.
+ */
+ if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) {
+ ksmbd_debug(SMB, "filename : %s truncated to newsize %lld\n",
+ fp->filename, newsize);
+ rc = ksmbd_vfs_truncate(work, NULL, fp, newsize);
+ if (rc) {
+ ksmbd_debug(SMB, "truncate failed! filename : %s err %d\n",
+ fp->filename, rc);
+ if (rc != -EAGAIN)
+ rc = -EBADF;
+ return rc;
+ }
+ }
+ return 0;
+}
+
+static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf)
+{
+ struct ksmbd_file *parent_fp;
+ struct dentry *parent;
+ struct dentry *dentry = fp->filp->f_path.dentry;
+ int ret;
+
+ if (!(fp->daccess & FILE_DELETE_LE)) {
+ pr_err("no right to delete : 0x%x\n", fp->daccess);
+ return -EACCES;
+ }
+
+ if (ksmbd_stream_fd(fp))
+ goto next;
+
+ parent = dget_parent(dentry);
+ ret = ksmbd_vfs_lock_parent(parent, dentry);
+ if (ret) {
+ dput(parent);
+ return ret;
+ }
+
+ parent_fp = ksmbd_lookup_fd_inode(d_inode(parent));
+ inode_unlock(d_inode(parent));
+ dput(parent);
+
+ if (parent_fp) {
+ if (parent_fp->daccess & FILE_DELETE_LE) {
+ pr_err("parent dir is opened with delete access\n");
+ return -ESHARE;
+ }
+ }
+next:
+ return smb2_rename(work, fp,
+ (struct smb2_file_rename_info *)buf,
+ work->sess->conn->local_nls);
+}
+
+static int set_file_disposition_info(struct ksmbd_file *fp, char *buf)
+{
+ struct smb2_file_disposition_info *file_info;
+ struct inode *inode;
+
+ if (!(fp->daccess & FILE_DELETE_LE)) {
+ pr_err("no right to delete : 0x%x\n", fp->daccess);
+ return -EACCES;
+ }
+
+ inode = file_inode(fp->filp);
+ file_info = (struct smb2_file_disposition_info *)buf;
+ if (file_info->DeletePending) {
+ if (S_ISDIR(inode->i_mode) &&
+ ksmbd_vfs_empty_dir(fp) == -ENOTEMPTY)
+ return -EBUSY;
+ ksmbd_set_inode_pending_delete(fp);
+ } else {
+ ksmbd_clear_inode_pending_delete(fp);
+ }
+ return 0;
+}
+
+static int set_file_position_info(struct ksmbd_file *fp, char *buf)
+{
+ struct smb2_file_pos_info *file_info;
+ loff_t current_byte_offset;
+ unsigned long sector_size;
+ struct inode *inode;
+
+ inode = file_inode(fp->filp);
+ file_info = (struct smb2_file_pos_info *)buf;
+ current_byte_offset = le64_to_cpu(file_info->CurrentByteOffset);
+ sector_size = inode->i_sb->s_blocksize;
+
+ if (current_byte_offset < 0 ||
+ (fp->coption == FILE_NO_INTERMEDIATE_BUFFERING_LE &&
+ current_byte_offset & (sector_size - 1))) {
+ pr_err("CurrentByteOffset is not valid : %llu\n",
+ current_byte_offset);
+ return -EINVAL;
+ }
+
+ fp->filp->f_pos = current_byte_offset;
+ return 0;
+}
+
+static int set_file_mode_info(struct ksmbd_file *fp, char *buf)
+{
+ struct smb2_file_mode_info *file_info;
+ __le32 mode;
+
+ file_info = (struct smb2_file_mode_info *)buf;
+ mode = file_info->Mode;
+
+ if ((mode & ~FILE_MODE_INFO_MASK) ||
+ (mode & FILE_SYNCHRONOUS_IO_ALERT_LE &&
+ mode & FILE_SYNCHRONOUS_IO_NONALERT_LE)) {
+ pr_err("Mode is not valid : 0x%x\n", le32_to_cpu(mode));
+ return -EINVAL;
+ }
+
+ /*
+ * TODO : need to implement consideration for
+ * FILE_SYNCHRONOUS_IO_ALERT and FILE_SYNCHRONOUS_IO_NONALERT
+ */
+ ksmbd_vfs_set_fadvise(fp->filp, mode);
+ fp->coption = mode;
+ return 0;
+}
+
+/**
+ * smb2_set_info_file() - handler for smb2 set info command
+ * @work: smb work containing set info command buffer
+ * @fp: ksmbd_file pointer
+ * @info_class: smb2 set info class
+ * @share: ksmbd_share_config pointer
+ *
+ * Return: 0 on success, otherwise error
+ * TODO: need to implement an error handling for STATUS_INFO_LENGTH_MISMATCH
+ */
+static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
+ int info_class, char *buf,
+ struct ksmbd_share_config *share)
+{
+ switch (info_class) {
+ case FILE_BASIC_INFORMATION:
+ return set_file_basic_info(fp, buf, share);
+
+ case FILE_ALLOCATION_INFORMATION:
+ return set_file_allocation_info(work, fp, buf);
+
+ case FILE_END_OF_FILE_INFORMATION:
+ return set_end_of_file_info(work, fp, buf);
+
+ case FILE_RENAME_INFORMATION:
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ return -EACCES;
+ }
+ return set_rename_info(work, fp, buf);
+
+ case FILE_LINK_INFORMATION:
+ return smb2_create_link(work, work->tcon->share_conf,
+ (struct smb2_file_link_info *)buf, fp->filp,
+ work->sess->conn->local_nls);
+
+ case FILE_DISPOSITION_INFORMATION:
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ return -EACCES;
+ }
+ return set_file_disposition_info(fp, buf);
+
+ case FILE_FULL_EA_INFORMATION:
+ {
+ if (!(fp->daccess & FILE_WRITE_EA_LE)) {
+ pr_err("Not permitted to write ext attr: 0x%x\n",
+ fp->daccess);
+ return -EACCES;
+ }
+
+ return smb2_set_ea((struct smb2_ea_info *)buf,
+ &fp->filp->f_path);
+ }
+
+ case FILE_POSITION_INFORMATION:
+ return set_file_position_info(fp, buf);
+
+ case FILE_MODE_INFORMATION:
+ return set_file_mode_info(fp, buf);
+ }
+
+ pr_err("Unimplemented Fileinfoclass :%d\n", info_class);
+ return -EOPNOTSUPP;
+}
+
+static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
+ char *buffer, int buf_len)
+{
+ struct smb_ntsd *pntsd = (struct smb_ntsd *)buffer;
+
+ fp->saccess |= FILE_SHARE_DELETE_LE;
+
+ return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd,
+ buf_len, false);
+}
+
+/**
+ * smb2_set_info() - handler for smb2 set info command handler
+ * @work: smb work containing set info request buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_set_info(struct ksmbd_work *work)
+{
+ struct smb2_set_info_req *req;
+ struct smb2_set_info_rsp *rsp, *rsp_org;
+ struct ksmbd_file *fp;
+ int rc = 0;
+ unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+
+ ksmbd_debug(SMB, "Received set info request\n");
+
+ rsp_org = work->response_buf;
+ if (work->next_smb2_rcv_hdr_off) {
+ req = ksmbd_req_buf_next(work);
+ rsp = ksmbd_resp_buf_next(work);
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+ } else {
+ req = work->request_buf;
+ rsp = work->response_buf;
+ }
+
+ if (!has_file_id(id)) {
+ id = le64_to_cpu(req->VolatileFileId);
+ pid = le64_to_cpu(req->PersistentFileId);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, id, pid);
+ if (!fp) {
+ ksmbd_debug(SMB, "Invalid id for close: %u\n", id);
+ rc = -ENOENT;
+ goto err_out;
+ }
+
+ switch (req->InfoType) {
+ case SMB2_O_INFO_FILE:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
+ rc = smb2_set_info_file(work, fp, req->FileInfoClass,
+ req->Buffer, work->tcon->share_conf);
+ break;
+ case SMB2_O_INFO_SECURITY:
+ ksmbd_debug(SMB, "GOT SMB2_O_INFO_SECURITY\n");
+ if (ksmbd_override_fsids(work)) {
+ rc = -ENOMEM;
+ goto err_out;
+ }
+ rc = smb2_set_info_sec(fp,
+ le32_to_cpu(req->AdditionalInformation),
+ req->Buffer,
+ le32_to_cpu(req->BufferLength));
+ ksmbd_revert_fsids(work);
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ }
+
+ if (rc < 0)
+ goto err_out;
+
+ rsp->StructureSize = cpu_to_le16(2);
+ inc_rfc1001_len(rsp_org, 2);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+err_out:
+ if (rc == -EACCES || rc == -EPERM)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (rc == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (rc == -ESHARE)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (rc == -ENOENT)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_INVALID;
+ else if (rc == -EBUSY || rc == -ENOTEMPTY)
+ rsp->hdr.Status = STATUS_DIRECTORY_NOT_EMPTY;
+ else if (rc == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (rc == -EBADF || rc == -ESTALE)
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ else if (rc == -EEXIST)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_COLLISION;
+ else if (rsp->hdr.Status == 0 || rc == -EOPNOTSUPP)
+ rsp->hdr.Status = STATUS_INVALID_INFO_CLASS;
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ ksmbd_debug(SMB, "error while processing smb2 query rc = %d\n", rc);
+ return rc;
+}
+
+/**
+ * smb2_read_pipe() - handler for smb2 read from IPC pipe
+ * @work: smb work containing read IPC pipe command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static noinline int smb2_read_pipe(struct ksmbd_work *work)
+{
+ int nbytes = 0, err;
+ u64 id;
+ struct ksmbd_rpc_command *rpc_resp;
+ struct smb2_read_req *req = work->request_buf;
+ struct smb2_read_rsp *rsp = work->response_buf;
+
+ id = le64_to_cpu(req->VolatileFileId);
+
+ inc_rfc1001_len(rsp, 16);
+ rpc_resp = ksmbd_rpc_read(work->sess, id);
+ if (rpc_resp) {
+ if (rpc_resp->flags != KSMBD_RPC_OK) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ work->aux_payload_buf =
+ kvmalloc(rpc_resp->payload_sz, GFP_KERNEL | __GFP_ZERO);
+ if (!work->aux_payload_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(work->aux_payload_buf, rpc_resp->payload,
+ rpc_resp->payload_sz);
+
+ nbytes = rpc_resp->payload_sz;
+ work->resp_hdr_sz = get_rfc1002_len(rsp) + 4;
+ work->aux_payload_sz = nbytes;
+ kvfree(rpc_resp);
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 80;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(nbytes);
+ rsp->DataRemaining = 0;
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp, nbytes);
+ return 0;
+
+out:
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+ smb2_set_err_rsp(work);
+ kvfree(rpc_resp);
+ return err;
+}
+
+static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work,
+ struct smb2_read_req *req, void *data_buf,
+ size_t length)
+{
+ struct smb2_buffer_desc_v1 *desc =
+ (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+ int err;
+
+ if (work->conn->dialect == SMB30_PROT_ID &&
+ req->Channel != SMB2_CHANNEL_RDMA_V1)
+ return -EINVAL;
+
+ if (req->ReadChannelInfoOffset == 0 ||
+ le16_to_cpu(req->ReadChannelInfoLength) < sizeof(*desc))
+ return -EINVAL;
+
+ work->need_invalidate_rkey =
+ (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ work->remote_key = le32_to_cpu(desc->token);
+
+ err = ksmbd_conn_rdma_write(work->conn, data_buf, length,
+ le32_to_cpu(desc->token),
+ le64_to_cpu(desc->offset),
+ le32_to_cpu(desc->length));
+ if (err)
+ return err;
+
+ return length;
+}
+
+/**
+ * smb2_read() - handler for smb2 read from file
+ * @work: smb work containing read command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_read(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_read_req *req;
+ struct smb2_read_rsp *rsp, *rsp_org;
+ struct ksmbd_file *fp;
+ loff_t offset;
+ size_t length, mincount;
+ ssize_t nbytes = 0, remain_bytes = 0;
+ int err = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe read request\n");
+ return smb2_read_pipe(work);
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!(fp->daccess & (FILE_READ_DATA_LE | FILE_READ_ATTRIBUTES_LE))) {
+ pr_err("Not permitted to read : 0x%x\n", fp->daccess);
+ err = -EACCES;
+ goto out;
+ }
+
+ offset = le64_to_cpu(req->Offset);
+ length = le32_to_cpu(req->Length);
+ mincount = le32_to_cpu(req->MinimumCount);
+
+ if (length > conn->vals->max_read_size) {
+ ksmbd_debug(SMB, "limiting read size to max size(%u)\n",
+ conn->vals->max_read_size);
+ err = -EINVAL;
+ goto out;
+ }
+
+ ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
+ fp->filp->f_path.dentry, offset, length);
+
+ work->aux_payload_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
+ if (!work->aux_payload_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ nbytes = ksmbd_vfs_read(work, fp, length, &offset);
+ if (nbytes < 0) {
+ err = nbytes;
+ goto out;
+ }
+
+ if ((nbytes == 0 && length != 0) || nbytes < mincount) {
+ kvfree(work->aux_payload_buf);
+ work->aux_payload_buf = NULL;
+ rsp->hdr.Status = STATUS_END_OF_FILE;
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return 0;
+ }
+
+ ksmbd_debug(SMB, "nbytes %zu, offset %lld mincount %zu\n",
+ nbytes, offset, mincount);
+
+ if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE ||
+ req->Channel == SMB2_CHANNEL_RDMA_V1) {
+ /* write data to the client using rdma channel */
+ remain_bytes = smb2_read_rdma_channel(work, req,
+ work->aux_payload_buf,
+ nbytes);
+ kvfree(work->aux_payload_buf);
+ work->aux_payload_buf = NULL;
+
+ nbytes = 0;
+ if (remain_bytes < 0) {
+ err = (int)remain_bytes;
+ goto out;
+ }
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 80;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(nbytes);
+ rsp->DataRemaining = cpu_to_le32(remain_bytes);
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp_org, 16);
+ work->resp_hdr_sz = get_rfc1002_len(rsp_org) + 4;
+ work->aux_payload_sz = nbytes;
+ inc_rfc1001_len(rsp_org, nbytes);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+out:
+ if (err) {
+ if (err == -EISDIR)
+ rsp->hdr.Status = STATUS_INVALID_DEVICE_REQUEST;
+ else if (err == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (err == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (err == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (err == -ESHARE)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (err == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+
+ smb2_set_err_rsp(work);
+ }
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+/**
+ * smb2_write_pipe() - handler for smb2 write on IPC pipe
+ * @work: smb work containing write IPC pipe command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+static noinline int smb2_write_pipe(struct ksmbd_work *work)
+{
+ struct smb2_write_req *req = work->request_buf;
+ struct smb2_write_rsp *rsp = work->response_buf;
+ struct ksmbd_rpc_command *rpc_resp;
+ u64 id = 0;
+ int err = 0, ret = 0;
+ char *data_buf;
+ size_t length;
+
+ length = le32_to_cpu(req->Length);
+ id = le64_to_cpu(req->VolatileFileId);
+
+ if (le16_to_cpu(req->DataOffset) ==
+ (offsetof(struct smb2_write_req, Buffer) - 4)) {
+ data_buf = (char *)&req->Buffer[0];
+ } else {
+ if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) ||
+ (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) {
+ pr_err("invalid write data offset %u, smb_len %u\n",
+ le16_to_cpu(req->DataOffset),
+ get_rfc1002_len(req));
+ err = -EINVAL;
+ goto out;
+ }
+
+ data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+ le16_to_cpu(req->DataOffset));
+ }
+
+ rpc_resp = ksmbd_rpc_write(work->sess, id, data_buf, length);
+ if (rpc_resp) {
+ if (rpc_resp->flags == KSMBD_RPC_ENOTIMPLEMENTED) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ kvfree(rpc_resp);
+ smb2_set_err_rsp(work);
+ return -EOPNOTSUPP;
+ }
+ if (rpc_resp->flags != KSMBD_RPC_OK) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ smb2_set_err_rsp(work);
+ kvfree(rpc_resp);
+ return ret;
+ }
+ kvfree(rpc_resp);
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 0;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(length);
+ rsp->DataRemaining = 0;
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp, 16);
+ return 0;
+out:
+ if (err) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ smb2_set_err_rsp(work);
+ }
+
+ return err;
+}
+
+static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
+ struct smb2_write_req *req,
+ struct ksmbd_file *fp,
+ loff_t offset, size_t length, bool sync)
+{
+ struct smb2_buffer_desc_v1 *desc;
+ char *data_buf;
+ int ret;
+ ssize_t nbytes;
+
+ desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0];
+
+ if (work->conn->dialect == SMB30_PROT_ID &&
+ req->Channel != SMB2_CHANNEL_RDMA_V1)
+ return -EINVAL;
+
+ if (req->Length != 0 || req->DataOffset != 0)
+ return -EINVAL;
+
+ if (req->WriteChannelInfoOffset == 0 ||
+ le16_to_cpu(req->WriteChannelInfoLength) < sizeof(*desc))
+ return -EINVAL;
+
+ work->need_invalidate_rkey =
+ (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE);
+ work->remote_key = le32_to_cpu(desc->token);
+
+ data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO);
+ if (!data_buf)
+ return -ENOMEM;
+
+ ret = ksmbd_conn_rdma_read(work->conn, data_buf, length,
+ le32_to_cpu(desc->token),
+ le64_to_cpu(desc->offset),
+ le32_to_cpu(desc->length));
+ if (ret < 0) {
+ kvfree(data_buf);
+ return ret;
+ }
+
+ ret = ksmbd_vfs_write(work, fp, data_buf, length, &offset, sync, &nbytes);
+ kvfree(data_buf);
+ if (ret < 0)
+ return ret;
+
+ return nbytes;
+}
+
+/**
+ * smb2_write() - handler for smb2 write from file
+ * @work: smb work containing write command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_write(struct ksmbd_work *work)
+{
+ struct smb2_write_req *req;
+ struct smb2_write_rsp *rsp, *rsp_org;
+ struct ksmbd_file *fp = NULL;
+ loff_t offset;
+ size_t length;
+ ssize_t nbytes;
+ char *data_buf;
+ bool writethrough = false;
+ int err = 0;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) {
+ ksmbd_debug(SMB, "IPC pipe write request\n");
+ return smb2_write_pipe(work);
+ }
+
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB, "User does not have write permission\n");
+ err = -EACCES;
+ goto out;
+ }
+
+ fp = ksmbd_lookup_fd_slow(work, le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_READ_ATTRIBUTES_LE))) {
+ pr_err("Not permitted to write : 0x%x\n", fp->daccess);
+ err = -EACCES;
+ goto out;
+ }
+
+ offset = le64_to_cpu(req->Offset);
+ length = le32_to_cpu(req->Length);
+
+ if (length > work->conn->vals->max_write_size) {
+ ksmbd_debug(SMB, "limiting write size to max size(%u)\n",
+ work->conn->vals->max_write_size);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
+ writethrough = true;
+
+ if (req->Channel != SMB2_CHANNEL_RDMA_V1 &&
+ req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
+ if (le16_to_cpu(req->DataOffset) ==
+ (offsetof(struct smb2_write_req, Buffer) - 4)) {
+ data_buf = (char *)&req->Buffer[0];
+ } else {
+ if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) ||
+ (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) {
+ pr_err("invalid write data offset %u, smb_len %u\n",
+ le16_to_cpu(req->DataOffset),
+ get_rfc1002_len(req));
+ err = -EINVAL;
+ goto out;
+ }
+
+ data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+ le16_to_cpu(req->DataOffset));
+ }
+
+ ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
+ if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
+ writethrough = true;
+
+ ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
+ fp->filp->f_path.dentry, offset, length);
+ err = ksmbd_vfs_write(work, fp, data_buf, length, &offset,
+ writethrough, &nbytes);
+ if (err < 0)
+ goto out;
+ } else {
+ /* read data from the client using rdma channel, and
+ * write the data.
+ */
+ nbytes = smb2_write_rdma_channel(work, req, fp, offset,
+ le32_to_cpu(req->RemainingBytes),
+ writethrough);
+ if (nbytes < 0) {
+ err = (int)nbytes;
+ goto out;
+ }
+ }
+
+ rsp->StructureSize = cpu_to_le16(17);
+ rsp->DataOffset = 0;
+ rsp->Reserved = 0;
+ rsp->DataLength = cpu_to_le32(nbytes);
+ rsp->DataRemaining = 0;
+ rsp->Reserved2 = 0;
+ inc_rfc1001_len(rsp_org, 16);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+out:
+ if (err == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (err == -ENOSPC || err == -EFBIG)
+ rsp->hdr.Status = STATUS_DISK_FULL;
+ else if (err == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else if (err == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (err == -ESHARE)
+ rsp->hdr.Status = STATUS_SHARING_VIOLATION;
+ else if (err == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+/**
+ * smb2_flush() - handler for smb2 flush file - fsync
+ * @work: smb work containing flush command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_flush(struct ksmbd_work *work)
+{
+ struct smb2_flush_req *req;
+ struct smb2_flush_rsp *rsp, *rsp_org;
+ int err;
+
+ rsp_org = work->response_buf;
+ WORK_BUFFERS(work, req, rsp);
+
+ ksmbd_debug(SMB, "SMB2_FLUSH called for fid %llu\n",
+ le64_to_cpu(req->VolatileFileId));
+
+ err = ksmbd_vfs_fsync(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (err)
+ goto out;
+
+ rsp->StructureSize = cpu_to_le16(4);
+ rsp->Reserved = 0;
+ inc_rfc1001_len(rsp_org, 4);
+ return 0;
+
+out:
+ if (err) {
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ smb2_set_err_rsp(work);
+ }
+
+ return err;
+}
+
+/**
+ * smb2_cancel() - handler for smb2 cancel command
+ * @work: smb work containing cancel command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_cancel(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *hdr = work->request_buf;
+ struct smb2_hdr *chdr;
+ struct ksmbd_work *cancel_work = NULL;
+ int canceled = 0;
+ struct list_head *command_list;
+
+ ksmbd_debug(SMB, "smb2 cancel called on mid %llu, async flags 0x%x\n",
+ hdr->MessageId, hdr->Flags);
+
+ if (hdr->Flags & SMB2_FLAGS_ASYNC_COMMAND) {
+ command_list = &conn->async_requests;
+
+ spin_lock(&conn->request_lock);
+ list_for_each_entry(cancel_work, command_list,
+ async_request_entry) {
+ chdr = cancel_work->request_buf;
+
+ if (cancel_work->async_id !=
+ le64_to_cpu(hdr->Id.AsyncId))
+ continue;
+
+ ksmbd_debug(SMB,
+ "smb2 with AsyncId %llu cancelled command = 0x%x\n",
+ le64_to_cpu(hdr->Id.AsyncId),
+ le16_to_cpu(chdr->Command));
+ canceled = 1;
+ break;
+ }
+ spin_unlock(&conn->request_lock);
+ } else {
+ command_list = &conn->requests;
+
+ spin_lock(&conn->request_lock);
+ list_for_each_entry(cancel_work, command_list, request_entry) {
+ chdr = cancel_work->request_buf;
+
+ if (chdr->MessageId != hdr->MessageId ||
+ cancel_work == work)
+ continue;
+
+ ksmbd_debug(SMB,
+ "smb2 with mid %llu cancelled command = 0x%x\n",
+ le64_to_cpu(hdr->MessageId),
+ le16_to_cpu(chdr->Command));
+ canceled = 1;
+ break;
+ }
+ spin_unlock(&conn->request_lock);
+ }
+
+ if (canceled) {
+ cancel_work->state = KSMBD_WORK_CANCELLED;
+ if (cancel_work->cancel_fn)
+ cancel_work->cancel_fn(cancel_work->cancel_argv);
+ }
+
+ /* For SMB2_CANCEL command itself send no response*/
+ work->send_no_response = 1;
+ return 0;
+}
+
+struct file_lock *smb_flock_init(struct file *f)
+{
+ struct file_lock *fl;
+
+ fl = locks_alloc_lock();
+ if (!fl)
+ goto out;
+
+ locks_init_lock(fl);
+
+ fl->fl_owner = f;
+ fl->fl_pid = current->tgid;
+ fl->fl_file = f;
+ fl->fl_flags = FL_POSIX;
+ fl->fl_ops = NULL;
+ fl->fl_lmops = NULL;
+
+out:
+ return fl;
+}
+
+static int smb2_set_flock_flags(struct file_lock *flock, int flags)
+{
+ int cmd = -EINVAL;
+
+ /* Checking for wrong flag combination during lock request*/
+ switch (flags) {
+ case SMB2_LOCKFLAG_SHARED:
+ ksmbd_debug(SMB, "received shared request\n");
+ cmd = F_SETLKW;
+ flock->fl_type = F_RDLCK;
+ flock->fl_flags |= FL_SLEEP;
+ break;
+ case SMB2_LOCKFLAG_EXCLUSIVE:
+ ksmbd_debug(SMB, "received exclusive request\n");
+ cmd = F_SETLKW;
+ flock->fl_type = F_WRLCK;
+ flock->fl_flags |= FL_SLEEP;
+ break;
+ case SMB2_LOCKFLAG_SHARED | SMB2_LOCKFLAG_FAIL_IMMEDIATELY:
+ ksmbd_debug(SMB,
+ "received shared & fail immediately request\n");
+ cmd = F_SETLK;
+ flock->fl_type = F_RDLCK;
+ break;
+ case SMB2_LOCKFLAG_EXCLUSIVE | SMB2_LOCKFLAG_FAIL_IMMEDIATELY:
+ ksmbd_debug(SMB,
+ "received exclusive & fail immediately request\n");
+ cmd = F_SETLK;
+ flock->fl_type = F_WRLCK;
+ break;
+ case SMB2_LOCKFLAG_UNLOCK:
+ ksmbd_debug(SMB, "received unlock request\n");
+ flock->fl_type = F_UNLCK;
+ cmd = 0;
+ break;
+ }
+
+ return cmd;
+}
+
+static struct ksmbd_lock *smb2_lock_init(struct file_lock *flock,
+ unsigned int cmd, int flags,
+ struct list_head *lock_list)
+{
+ struct ksmbd_lock *lock;
+
+ lock = kzalloc(sizeof(struct ksmbd_lock), GFP_KERNEL);
+ if (!lock)
+ return NULL;
+
+ lock->cmd = cmd;
+ lock->fl = flock;
+ lock->start = flock->fl_start;
+ lock->end = flock->fl_end;
+ lock->flags = flags;
+ if (lock->start == lock->end)
+ lock->zero_len = 1;
+ INIT_LIST_HEAD(&lock->clist);
+ INIT_LIST_HEAD(&lock->flist);
+ INIT_LIST_HEAD(&lock->llist);
+ list_add_tail(&lock->llist, lock_list);
+
+ return lock;
+}
+
+static void smb2_remove_blocked_lock(void **argv)
+{
+ struct file_lock *flock = (struct file_lock *)argv[0];
+
+ ksmbd_vfs_posix_lock_unblock(flock);
+ wake_up(&flock->fl_wait);
+}
+
+static inline bool lock_defer_pending(struct file_lock *fl)
+{
+ /* check pending lock waiters */
+ return waitqueue_active(&fl->fl_wait);
+}
+
+/**
+ * smb2_lock() - handler for smb2 file lock command
+ * @work: smb work containing lock command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_lock(struct ksmbd_work *work)
+{
+ struct smb2_lock_req *req = work->request_buf;
+ struct smb2_lock_rsp *rsp = work->response_buf;
+ struct smb2_lock_element *lock_ele;
+ struct ksmbd_file *fp = NULL;
+ struct file_lock *flock = NULL;
+ struct file *filp = NULL;
+ int lock_count;
+ int flags = 0;
+ int cmd = 0;
+ int err = -EIO, i, rc = 0;
+ u64 lock_start, lock_length;
+ struct ksmbd_lock *smb_lock = NULL, *cmp_lock, *tmp, *tmp2;
+ struct ksmbd_conn *conn;
+ int nolock = 0;
+ LIST_HEAD(lock_list);
+ LIST_HEAD(rollback_list);
+ int prior_lock = 0;
+
+ ksmbd_debug(SMB, "Received lock request\n");
+ fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp) {
+ ksmbd_debug(SMB, "Invalid file id for lock : %llu\n",
+ le64_to_cpu(req->VolatileFileId));
+ err = -ENOENT;
+ goto out2;
+ }
+
+ filp = fp->filp;
+ lock_count = le16_to_cpu(req->LockCount);
+ lock_ele = req->locks;
+
+ ksmbd_debug(SMB, "lock count is %d\n", lock_count);
+ if (!lock_count) {
+ err = -EINVAL;
+ goto out2;
+ }
+
+ for (i = 0; i < lock_count; i++) {
+ flags = le32_to_cpu(lock_ele[i].Flags);
+
+ flock = smb_flock_init(filp);
+ if (!flock)
+ goto out;
+
+ cmd = smb2_set_flock_flags(flock, flags);
+
+ lock_start = le64_to_cpu(lock_ele[i].Offset);
+ lock_length = le64_to_cpu(lock_ele[i].Length);
+ if (lock_start > U64_MAX - lock_length) {
+ pr_err("Invalid lock range requested\n");
+ rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+ goto out;
+ }
+
+ if (lock_start > OFFSET_MAX)
+ flock->fl_start = OFFSET_MAX;
+ else
+ flock->fl_start = lock_start;
+
+ lock_length = le64_to_cpu(lock_ele[i].Length);
+ if (lock_length > OFFSET_MAX - flock->fl_start)
+ lock_length = OFFSET_MAX - flock->fl_start;
+
+ flock->fl_end = flock->fl_start + lock_length;
+
+ if (flock->fl_end < flock->fl_start) {
+ ksmbd_debug(SMB,
+ "the end offset(%llx) is smaller than the start offset(%llx)\n",
+ flock->fl_end, flock->fl_start);
+ rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+ goto out;
+ }
+
+ /* Check conflict locks in one request */
+ list_for_each_entry(cmp_lock, &lock_list, llist) {
+ if (cmp_lock->fl->fl_start <= flock->fl_start &&
+ cmp_lock->fl->fl_end >= flock->fl_end) {
+ if (cmp_lock->fl->fl_type != F_UNLCK &&
+ flock->fl_type != F_UNLCK) {
+ pr_err("conflict two locks in one request\n");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ smb_lock = smb2_lock_init(flock, cmd, flags, &lock_list);
+ if (!smb_lock) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ list_for_each_entry_safe(smb_lock, tmp, &lock_list, llist) {
+ if (smb_lock->cmd < 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!(smb_lock->flags & SMB2_LOCKFLAG_MASK)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if ((prior_lock & (SMB2_LOCKFLAG_EXCLUSIVE | SMB2_LOCKFLAG_SHARED) &&
+ smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) ||
+ (prior_lock == SMB2_LOCKFLAG_UNLOCK &&
+ !(smb_lock->flags & SMB2_LOCKFLAG_UNLOCK))) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ prior_lock = smb_lock->flags;
+
+ if (!(smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) &&
+ !(smb_lock->flags & SMB2_LOCKFLAG_FAIL_IMMEDIATELY))
+ goto no_check_cl;
+
+ nolock = 1;
+ /* check locks in connection list */
+ read_lock(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list) {
+ spin_lock(&conn->llist_lock);
+ list_for_each_entry_safe(cmp_lock, tmp2, &conn->lock_list, clist) {
+ if (file_inode(cmp_lock->fl->fl_file) !=
+ file_inode(smb_lock->fl->fl_file))
+ continue;
+
+ if (smb_lock->fl->fl_type == F_UNLCK) {
+ if (cmp_lock->fl->fl_file == smb_lock->fl->fl_file &&
+ cmp_lock->start == smb_lock->start &&
+ cmp_lock->end == smb_lock->end &&
+ !lock_defer_pending(cmp_lock->fl)) {
+ nolock = 0;
+ list_del(&cmp_lock->flist);
+ list_del(&cmp_lock->clist);
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+
+ locks_free_lock(cmp_lock->fl);
+ kfree(cmp_lock);
+ goto out_check_cl;
+ }
+ continue;
+ }
+
+ if (cmp_lock->fl->fl_file == smb_lock->fl->fl_file) {
+ if (smb_lock->flags & SMB2_LOCKFLAG_SHARED)
+ continue;
+ } else {
+ if (cmp_lock->flags & SMB2_LOCKFLAG_SHARED)
+ continue;
+ }
+
+ /* check zero byte lock range */
+ if (cmp_lock->zero_len && !smb_lock->zero_len &&
+ cmp_lock->start > smb_lock->start &&
+ cmp_lock->start < smb_lock->end) {
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+ pr_err("previous lock conflict with zero byte lock range\n");
+ goto out;
+ }
+
+ if (smb_lock->zero_len && !cmp_lock->zero_len &&
+ smb_lock->start > cmp_lock->start &&
+ smb_lock->start < cmp_lock->end) {
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+ pr_err("current lock conflict with zero byte lock range\n");
+ goto out;
+ }
+
+ if (((cmp_lock->start <= smb_lock->start &&
+ cmp_lock->end > smb_lock->start) ||
+ (cmp_lock->start < smb_lock->end &&
+ cmp_lock->end >= smb_lock->end)) &&
+ !cmp_lock->zero_len && !smb_lock->zero_len) {
+ spin_unlock(&conn->llist_lock);
+ read_unlock(&conn_list_lock);
+ pr_err("Not allow lock operation on exclusive lock range\n");
+ goto out;
+ }
+ }
+ spin_unlock(&conn->llist_lock);
+ }
+ read_unlock(&conn_list_lock);
+out_check_cl:
+ if (smb_lock->fl->fl_type == F_UNLCK && nolock) {
+ pr_err("Try to unlock nolocked range\n");
+ rsp->hdr.Status = STATUS_RANGE_NOT_LOCKED;
+ goto out;
+ }
+
+no_check_cl:
+ if (smb_lock->zero_len) {
+ err = 0;
+ goto skip;
+ }
+
+ flock = smb_lock->fl;
+ list_del(&smb_lock->llist);
+retry:
+ rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL);
+skip:
+ if (flags & SMB2_LOCKFLAG_UNLOCK) {
+ if (!rc) {
+ ksmbd_debug(SMB, "File unlocked\n");
+ } else if (rc == -ENOENT) {
+ rsp->hdr.Status = STATUS_NOT_LOCKED;
+ goto out;
+ }
+ locks_free_lock(flock);
+ kfree(smb_lock);
+ } else {
+ if (rc == FILE_LOCK_DEFERRED) {
+ void **argv;
+
+ ksmbd_debug(SMB,
+ "would have to wait for getting lock\n");
+ spin_lock(&work->conn->llist_lock);
+ list_add_tail(&smb_lock->clist,
+ &work->conn->lock_list);
+ spin_unlock(&work->conn->llist_lock);
+ list_add(&smb_lock->llist, &rollback_list);
+
+ argv = kmalloc(sizeof(void *), GFP_KERNEL);
+ if (!argv) {
+ err = -ENOMEM;
+ goto out;
+ }
+ argv[0] = flock;
+
+ rc = setup_async_work(work,
+ smb2_remove_blocked_lock,
+ argv);
+ if (rc) {
+ err = -ENOMEM;
+ goto out;
+ }
+ spin_lock(&fp->f_lock);
+ list_add(&work->fp_entry, &fp->blocked_works);
+ spin_unlock(&fp->f_lock);
+
+ smb2_send_interim_resp(work, STATUS_PENDING);
+
+ ksmbd_vfs_posix_lock_wait(flock);
+
+ if (work->state != KSMBD_WORK_ACTIVE) {
+ list_del(&smb_lock->llist);
+ spin_lock(&work->conn->llist_lock);
+ list_del(&smb_lock->clist);
+ spin_unlock(&work->conn->llist_lock);
+ locks_free_lock(flock);
+
+ if (work->state == KSMBD_WORK_CANCELLED) {
+ spin_lock(&fp->f_lock);
+ list_del(&work->fp_entry);
+ spin_unlock(&fp->f_lock);
+ rsp->hdr.Status =
+ STATUS_CANCELLED;
+ kfree(smb_lock);
+ smb2_send_interim_resp(work,
+ STATUS_CANCELLED);
+ work->send_no_response = 1;
+ goto out;
+ }
+ init_smb2_rsp_hdr(work);
+ smb2_set_err_rsp(work);
+ rsp->hdr.Status =
+ STATUS_RANGE_NOT_LOCKED;
+ kfree(smb_lock);
+ goto out2;
+ }
+
+ list_del(&smb_lock->llist);
+ spin_lock(&work->conn->llist_lock);
+ list_del(&smb_lock->clist);
+ spin_unlock(&work->conn->llist_lock);
+
+ spin_lock(&fp->f_lock);
+ list_del(&work->fp_entry);
+ spin_unlock(&fp->f_lock);
+ goto retry;
+ } else if (!rc) {
+ spin_lock(&work->conn->llist_lock);
+ list_add_tail(&smb_lock->clist,
+ &work->conn->lock_list);
+ list_add_tail(&smb_lock->flist,
+ &fp->lock_list);
+ spin_unlock(&work->conn->llist_lock);
+ list_add(&smb_lock->llist, &rollback_list);
+ ksmbd_debug(SMB, "successful in taking lock\n");
+ } else {
+ goto out;
+ }
+ }
+ }
+
+ if (atomic_read(&fp->f_ci->op_count) > 1)
+ smb_break_all_oplock(work, fp);
+
+ rsp->StructureSize = cpu_to_le16(4);
+ ksmbd_debug(SMB, "successful in taking lock\n");
+ rsp->hdr.Status = STATUS_SUCCESS;
+ rsp->Reserved = 0;
+ inc_rfc1001_len(rsp, 4);
+ ksmbd_fd_put(work, fp);
+ return 0;
+
+out:
+ list_for_each_entry_safe(smb_lock, tmp, &lock_list, llist) {
+ locks_free_lock(smb_lock->fl);
+ list_del(&smb_lock->llist);
+ kfree(smb_lock);
+ }
+
+ list_for_each_entry_safe(smb_lock, tmp, &rollback_list, llist) {
+ struct file_lock *rlock = NULL;
+
+ rlock = smb_flock_init(filp);
+ rlock->fl_type = F_UNLCK;
+ rlock->fl_start = smb_lock->start;
+ rlock->fl_end = smb_lock->end;
+
+ rc = vfs_lock_file(filp, 0, rlock, NULL);
+ if (rc)
+ pr_err("rollback unlock fail : %d\n", rc);
+
+ list_del(&smb_lock->llist);
+ spin_lock(&work->conn->llist_lock);
+ if (!list_empty(&smb_lock->flist))
+ list_del(&smb_lock->flist);
+ list_del(&smb_lock->clist);
+ spin_unlock(&work->conn->llist_lock);
+
+ locks_free_lock(smb_lock->fl);
+ locks_free_lock(rlock);
+ kfree(smb_lock);
+ }
+out2:
+ ksmbd_debug(SMB, "failed in taking lock(flags : %x), err : %d\n", flags, err);
+
+ if (!rsp->hdr.Status) {
+ if (err == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (err == -ENOMEM)
+ rsp->hdr.Status = STATUS_INSUFFICIENT_RESOURCES;
+ else if (err == -ENOENT)
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ else
+ rsp->hdr.Status = STATUS_LOCK_NOT_GRANTED;
+ }
+
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req,
+ struct smb2_ioctl_rsp *rsp)
+{
+ struct copychunk_ioctl_req *ci_req;
+ struct copychunk_ioctl_rsp *ci_rsp;
+ struct ksmbd_file *src_fp = NULL, *dst_fp = NULL;
+ struct srv_copychunk *chunks;
+ unsigned int i, chunk_count, chunk_count_written = 0;
+ unsigned int chunk_size_written = 0;
+ loff_t total_size_written = 0;
+ int ret, cnt_code;
+
+ cnt_code = le32_to_cpu(req->CntCode);
+ ci_req = (struct copychunk_ioctl_req *)&req->Buffer[0];
+ ci_rsp = (struct copychunk_ioctl_rsp *)&rsp->Buffer[0];
+
+ rsp->VolatileFileId = req->VolatileFileId;
+ rsp->PersistentFileId = req->PersistentFileId;
+ ci_rsp->ChunksWritten =
+ cpu_to_le32(ksmbd_server_side_copy_max_chunk_count());
+ ci_rsp->ChunkBytesWritten =
+ cpu_to_le32(ksmbd_server_side_copy_max_chunk_size());
+ ci_rsp->TotalBytesWritten =
+ cpu_to_le32(ksmbd_server_side_copy_max_total_size());
+
+ chunks = (struct srv_copychunk *)&ci_req->Chunks[0];
+ chunk_count = le32_to_cpu(ci_req->ChunkCount);
+ total_size_written = 0;
+
+ /* verify the SRV_COPYCHUNK_COPY packet */
+ if (chunk_count > ksmbd_server_side_copy_max_chunk_count() ||
+ le32_to_cpu(req->InputCount) <
+ offsetof(struct copychunk_ioctl_req, Chunks) +
+ chunk_count * sizeof(struct srv_copychunk)) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ return -EINVAL;
+ }
+
+ for (i = 0; i < chunk_count; i++) {
+ if (le32_to_cpu(chunks[i].Length) == 0 ||
+ le32_to_cpu(chunks[i].Length) > ksmbd_server_side_copy_max_chunk_size())
+ break;
+ total_size_written += le32_to_cpu(chunks[i].Length);
+ }
+
+ if (i < chunk_count ||
+ total_size_written > ksmbd_server_side_copy_max_total_size()) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ return -EINVAL;
+ }
+
+ src_fp = ksmbd_lookup_foreign_fd(work,
+ le64_to_cpu(ci_req->ResumeKey[0]));
+ dst_fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ ret = -EINVAL;
+ if (!src_fp ||
+ src_fp->persistent_id != le64_to_cpu(ci_req->ResumeKey[1])) {
+ rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ goto out;
+ }
+
+ if (!dst_fp) {
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ goto out;
+ }
+
+ /*
+ * FILE_READ_DATA should only be included in
+ * the FSCTL_COPYCHUNK case
+ */
+ if (cnt_code == FSCTL_COPYCHUNK &&
+ !(dst_fp->daccess & (FILE_READ_DATA_LE | FILE_GENERIC_READ_LE))) {
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ goto out;
+ }
+
+ ret = ksmbd_vfs_copy_file_ranges(work, src_fp, dst_fp,
+ chunks, chunk_count,
+ &chunk_count_written,
+ &chunk_size_written,
+ &total_size_written);
+ if (ret < 0) {
+ if (ret == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ if (ret == -EAGAIN)
+ rsp->hdr.Status = STATUS_FILE_LOCK_CONFLICT;
+ else if (ret == -EBADF)
+ rsp->hdr.Status = STATUS_INVALID_HANDLE;
+ else if (ret == -EFBIG || ret == -ENOSPC)
+ rsp->hdr.Status = STATUS_DISK_FULL;
+ else if (ret == -EINVAL)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ else if (ret == -EISDIR)
+ rsp->hdr.Status = STATUS_FILE_IS_A_DIRECTORY;
+ else if (ret == -E2BIG)
+ rsp->hdr.Status = STATUS_INVALID_VIEW_SIZE;
+ else
+ rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR;
+ }
+
+ ci_rsp->ChunksWritten = cpu_to_le32(chunk_count_written);
+ ci_rsp->ChunkBytesWritten = cpu_to_le32(chunk_size_written);
+ ci_rsp->TotalBytesWritten = cpu_to_le32(total_size_written);
+out:
+ ksmbd_fd_put(work, src_fp);
+ ksmbd_fd_put(work, dst_fp);
+ return ret;
+}
+
+static __be32 idev_ipv4_address(struct in_device *idev)
+{
+ __be32 addr = 0;
+
+ struct in_ifaddr *ifa;
+
+ rcu_read_lock();
+ in_dev_for_each_ifa_rcu(ifa, idev) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
+
+ addr = ifa->ifa_address;
+ break;
+ }
+ rcu_read_unlock();
+ return addr;
+}
+
+static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
+ struct smb2_ioctl_req *req,
+ struct smb2_ioctl_rsp *rsp)
+{
+ struct network_interface_info_ioctl_rsp *nii_rsp = NULL;
+ int nbytes = 0;
+ struct net_device *netdev;
+ struct sockaddr_storage_rsp *sockaddr_storage;
+ unsigned int flags;
+ unsigned long long speed;
+ struct sockaddr_in6 *csin6 = (struct sockaddr_in6 *)&conn->peer_addr;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev) {
+ if (netdev->type == ARPHRD_LOOPBACK)
+ continue;
+
+ flags = dev_get_flags(netdev);
+ if (!(flags & IFF_RUNNING))
+ continue;
+
+ nii_rsp = (struct network_interface_info_ioctl_rsp *)
+ &rsp->Buffer[nbytes];
+ nii_rsp->IfIndex = cpu_to_le32(netdev->ifindex);
+
+ nii_rsp->Capability = 0;
+ if (ksmbd_rdma_capable_netdev(netdev))
+ nii_rsp->Capability |= cpu_to_le32(RDMA_CAPABLE);
+
+ nii_rsp->Next = cpu_to_le32(152);
+ nii_rsp->Reserved = 0;
+
+ if (netdev->ethtool_ops->get_link_ksettings) {
+ struct ethtool_link_ksettings cmd;
+
+ netdev->ethtool_ops->get_link_ksettings(netdev, &cmd);
+ speed = cmd.base.speed;
+ } else {
+ pr_err("%s %s\n", netdev->name,
+ "speed is unknown, defaulting to 1Gb/sec");
+ speed = SPEED_1000;
+ }
+
+ speed *= 1000000;
+ nii_rsp->LinkSpeed = cpu_to_le64(speed);
+
+ sockaddr_storage = (struct sockaddr_storage_rsp *)
+ nii_rsp->SockAddr_Storage;
+ memset(sockaddr_storage, 0, 128);
+
+ if (conn->peer_addr.ss_family == PF_INET ||
+ ipv6_addr_v4mapped(&csin6->sin6_addr)) {
+ struct in_device *idev;
+
+ sockaddr_storage->Family = cpu_to_le16(INTERNETWORK);
+ sockaddr_storage->addr4.Port = 0;
+
+ idev = __in_dev_get_rtnl(netdev);
+ if (!idev)
+ continue;
+ sockaddr_storage->addr4.IPv4address =
+ idev_ipv4_address(idev);
+ } else {
+ struct inet6_dev *idev6;
+ struct inet6_ifaddr *ifa;
+ __u8 *ipv6_addr = sockaddr_storage->addr6.IPv6address;
+
+ sockaddr_storage->Family = cpu_to_le16(INTERNETWORKV6);
+ sockaddr_storage->addr6.Port = 0;
+ sockaddr_storage->addr6.FlowInfo = 0;
+
+ idev6 = __in6_dev_get(netdev);
+ if (!idev6)
+ continue;
+
+ list_for_each_entry(ifa, &idev6->addr_list, if_list) {
+ if (ifa->flags & (IFA_F_TENTATIVE |
+ IFA_F_DEPRECATED))
+ continue;
+ memcpy(ipv6_addr, ifa->addr.s6_addr, 16);
+ break;
+ }
+ sockaddr_storage->addr6.ScopeId = 0;
+ }
+
+ nbytes += sizeof(struct network_interface_info_ioctl_rsp);
+ }
+ rtnl_unlock();
+
+ /* zero if this is last one */
+ if (nii_rsp)
+ nii_rsp->Next = 0;
+
+ if (!nbytes) {
+ rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL;
+ return -EINVAL;
+ }
+
+ rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID);
+ rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
+ return nbytes;
+}
+
+static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn,
+ struct validate_negotiate_info_req *neg_req,
+ struct validate_negotiate_info_rsp *neg_rsp)
+{
+ int ret = 0;
+ int dialect;
+
+ dialect = ksmbd_lookup_dialect_by_id(neg_req->Dialects,
+ neg_req->DialectCount);
+ if (dialect == BAD_PROT_ID || dialect != conn->dialect) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ if (strncmp(neg_req->Guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ if (le16_to_cpu(neg_req->SecurityMode) != conn->cli_sec_mode) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ if (le32_to_cpu(neg_req->Capabilities) != conn->cli_cap) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ neg_rsp->Capabilities = cpu_to_le32(conn->vals->capabilities);
+ memset(neg_rsp->Guid, 0, SMB2_CLIENT_GUID_SIZE);
+ neg_rsp->SecurityMode = cpu_to_le16(conn->srv_sec_mode);
+ neg_rsp->Dialect = cpu_to_le16(conn->dialect);
+err_out:
+ return ret;
+}
+
+static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id,
+ struct file_allocated_range_buffer *qar_req,
+ struct file_allocated_range_buffer *qar_rsp,
+ int in_count, int *out_count)
+{
+ struct ksmbd_file *fp;
+ loff_t start, length;
+ int ret = 0;
+
+ *out_count = 0;
+ if (in_count == 0)
+ return -EINVAL;
+
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp)
+ return -ENOENT;
+
+ start = le64_to_cpu(qar_req->file_offset);
+ length = le64_to_cpu(qar_req->length);
+
+ ret = ksmbd_vfs_fqar_lseek(fp, start, length,
+ qar_rsp, in_count, out_count);
+ if (ret && ret != -E2BIG)
+ *out_count = 0;
+
+ ksmbd_fd_put(work, fp);
+ return ret;
+}
+
+static int fsctl_pipe_transceive(struct ksmbd_work *work, u64 id,
+ int out_buf_len, struct smb2_ioctl_req *req,
+ struct smb2_ioctl_rsp *rsp)
+{
+ struct ksmbd_rpc_command *rpc_resp;
+ char *data_buf = (char *)&req->Buffer[0];
+ int nbytes = 0;
+
+ rpc_resp = ksmbd_rpc_ioctl(work->sess, id, data_buf,
+ le32_to_cpu(req->InputCount));
+ if (rpc_resp) {
+ if (rpc_resp->flags == KSMBD_RPC_SOME_NOT_MAPPED) {
+ /*
+ * set STATUS_SOME_NOT_MAPPED response
+ * for unknown domain sid.
+ */
+ rsp->hdr.Status = STATUS_SOME_NOT_MAPPED;
+ } else if (rpc_resp->flags == KSMBD_RPC_ENOTIMPLEMENTED) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ goto out;
+ } else if (rpc_resp->flags != KSMBD_RPC_OK) {
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ goto out;
+ }
+
+ nbytes = rpc_resp->payload_sz;
+ if (rpc_resp->payload_sz > out_buf_len) {
+ rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
+ nbytes = out_buf_len;
+ }
+
+ if (!rpc_resp->payload_sz) {
+ rsp->hdr.Status =
+ STATUS_UNEXPECTED_IO_ERROR;
+ goto out;
+ }
+
+ memcpy((char *)rsp->Buffer, rpc_resp->payload, nbytes);
+ }
+out:
+ kvfree(rpc_resp);
+ return nbytes;
+}
+
+static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
+ struct file_sparse *sparse)
+{
+ struct ksmbd_file *fp;
+ struct user_namespace *user_ns;
+ int ret = 0;
+ __le32 old_fattr;
+
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp)
+ return -ENOENT;
+ user_ns = file_mnt_user_ns(fp->filp);
+
+ old_fattr = fp->f_ci->m_fattr;
+ if (sparse->SetSparse)
+ fp->f_ci->m_fattr |= ATTR_SPARSE_FILE_LE;
+ else
+ fp->f_ci->m_fattr &= ~ATTR_SPARSE_FILE_LE;
+
+ if (fp->f_ci->m_fattr != old_fattr &&
+ test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
+ struct xattr_dos_attrib da;
+
+ ret = ksmbd_vfs_get_dos_attrib_xattr(user_ns,
+ fp->filp->f_path.dentry, &da);
+ if (ret <= 0)
+ goto out;
+
+ da.attr = le32_to_cpu(fp->f_ci->m_fattr);
+ ret = ksmbd_vfs_set_dos_attrib_xattr(user_ns,
+ fp->filp->f_path.dentry, &da);
+ if (ret)
+ fp->f_ci->m_fattr = old_fattr;
+ }
+
+out:
+ ksmbd_fd_put(work, fp);
+ return ret;
+}
+
+static int fsctl_request_resume_key(struct ksmbd_work *work,
+ struct smb2_ioctl_req *req,
+ struct resume_key_ioctl_rsp *key_rsp)
+{
+ struct ksmbd_file *fp;
+
+ fp = ksmbd_lookup_fd_slow(work,
+ le64_to_cpu(req->VolatileFileId),
+ le64_to_cpu(req->PersistentFileId));
+ if (!fp)
+ return -ENOENT;
+
+ memset(key_rsp, 0, sizeof(*key_rsp));
+ key_rsp->ResumeKey[0] = req->VolatileFileId;
+ key_rsp->ResumeKey[1] = req->PersistentFileId;
+ ksmbd_fd_put(work, fp);
+
+ return 0;
+}
+
+/**
+ * smb2_ioctl() - handler for smb2 ioctl command
+ * @work: smb work containing ioctl command buffer
+ *
+ * Return: 0 on success, otherwise error
+ */
+int smb2_ioctl(struct ksmbd_work *work)
+{
+ struct smb2_ioctl_req *req;
+ struct smb2_ioctl_rsp *rsp, *rsp_org;
+ int cnt_code, nbytes = 0;
+ int out_buf_len;
+ u64 id = KSMBD_NO_FID;
+ struct ksmbd_conn *conn = work->conn;
+ int ret = 0;
+
+ rsp_org = work->response_buf;
+ if (work->next_smb2_rcv_hdr_off) {
+ req = ksmbd_req_buf_next(work);
+ rsp = ksmbd_resp_buf_next(work);
+ if (!has_file_id(le64_to_cpu(req->VolatileFileId))) {
+ ksmbd_debug(SMB, "Compound request set FID = %llu\n",
+ work->compound_fid);
+ id = work->compound_fid;
+ }
+ } else {
+ req = work->request_buf;
+ rsp = work->response_buf;
+ }
+
+ if (!has_file_id(id))
+ id = le64_to_cpu(req->VolatileFileId);
+
+ if (req->Flags != cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL)) {
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ goto out;
+ }
+
+ cnt_code = le32_to_cpu(req->CntCode);
+ out_buf_len = le32_to_cpu(req->MaxOutputResponse);
+ out_buf_len = min(KSMBD_IPC_MAX_PAYLOAD, out_buf_len);
+
+ switch (cnt_code) {
+ case FSCTL_DFS_GET_REFERRALS:
+ case FSCTL_DFS_GET_REFERRALS_EX:
+ /* Not support DFS yet */
+ rsp->hdr.Status = STATUS_FS_DRIVER_REQUIRED;
+ goto out;
+ case FSCTL_CREATE_OR_GET_OBJECT_ID:
+ {
+ struct file_object_buf_type1_ioctl_rsp *obj_buf;
+
+ nbytes = sizeof(struct file_object_buf_type1_ioctl_rsp);
+ obj_buf = (struct file_object_buf_type1_ioctl_rsp *)
+ &rsp->Buffer[0];
+
+ /*
+ * TODO: This is dummy implementation to pass smbtorture
+ * Need to check correct response later
+ */
+ memset(obj_buf->ObjectId, 0x0, 16);
+ memset(obj_buf->BirthVolumeId, 0x0, 16);
+ memset(obj_buf->BirthObjectId, 0x0, 16);
+ memset(obj_buf->DomainId, 0x0, 16);
+
+ break;
+ }
+ case FSCTL_PIPE_TRANSCEIVE:
+ nbytes = fsctl_pipe_transceive(work, id, out_buf_len, req, rsp);
+ break;
+ case FSCTL_VALIDATE_NEGOTIATE_INFO:
+ if (conn->dialect < SMB30_PROT_ID) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = fsctl_validate_negotiate_info(conn,
+ (struct validate_negotiate_info_req *)&req->Buffer[0],
+ (struct validate_negotiate_info_rsp *)&rsp->Buffer[0]);
+ if (ret < 0)
+ goto out;
+
+ nbytes = sizeof(struct validate_negotiate_info_rsp);
+ rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID);
+ rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID);
+ break;
+ case FSCTL_QUERY_NETWORK_INTERFACE_INFO:
+ nbytes = fsctl_query_iface_info_ioctl(conn, req, rsp);
+ if (nbytes < 0)
+ goto out;
+ break;
+ case FSCTL_REQUEST_RESUME_KEY:
+ if (out_buf_len < sizeof(struct resume_key_ioctl_rsp)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = fsctl_request_resume_key(work, req,
+ (struct resume_key_ioctl_rsp *)&rsp->Buffer[0]);
+ if (ret < 0)
+ goto out;
+ rsp->PersistentFileId = req->PersistentFileId;
+ rsp->VolatileFileId = req->VolatileFileId;
+ nbytes = sizeof(struct resume_key_ioctl_rsp);
+ break;
+ case FSCTL_COPYCHUNK:
+ case FSCTL_COPYCHUNK_WRITE:
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ ret = -EACCES;
+ goto out;
+ }
+
+ if (out_buf_len < sizeof(struct copychunk_ioctl_rsp)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ nbytes = sizeof(struct copychunk_ioctl_rsp);
+ fsctl_copychunk(work, req, rsp);
+ break;
+ case FSCTL_SET_SPARSE:
+ ret = fsctl_set_sparse(work, id,
+ (struct file_sparse *)&req->Buffer[0]);
+ if (ret < 0)
+ goto out;
+ break;
+ case FSCTL_SET_ZERO_DATA:
+ {
+ struct file_zero_data_information *zero_data;
+ struct ksmbd_file *fp;
+ loff_t off, len;
+
+ if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
+ ksmbd_debug(SMB,
+ "User does not have write permission\n");
+ ret = -EACCES;
+ goto out;
+ }
+
+ zero_data =
+ (struct file_zero_data_information *)&req->Buffer[0];
+
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ off = le64_to_cpu(zero_data->FileOffset);
+ len = le64_to_cpu(zero_data->BeyondFinalZero) - off;
+
+ ret = ksmbd_vfs_zero_data(work, fp, off, len);
+ ksmbd_fd_put(work, fp);
+ if (ret < 0)
+ goto out;
+ break;
+ }
+ case FSCTL_QUERY_ALLOCATED_RANGES:
+ ret = fsctl_query_allocated_ranges(work, id,
+ (struct file_allocated_range_buffer *)&req->Buffer[0],
+ (struct file_allocated_range_buffer *)&rsp->Buffer[0],
+ out_buf_len /
+ sizeof(struct file_allocated_range_buffer), &nbytes);
+ if (ret == -E2BIG) {
+ rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
+ } else if (ret < 0) {
+ nbytes = 0;
+ goto out;
+ }
+
+ nbytes *= sizeof(struct file_allocated_range_buffer);
+ break;
+ case FSCTL_GET_REPARSE_POINT:
+ {
+ struct reparse_data_buffer *reparse_ptr;
+ struct ksmbd_file *fp;
+
+ reparse_ptr = (struct reparse_data_buffer *)&rsp->Buffer[0];
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp) {
+ pr_err("not found fp!!\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ reparse_ptr->ReparseTag =
+ smb2_get_reparse_tag_special_file(file_inode(fp->filp)->i_mode);
+ reparse_ptr->ReparseDataLength = 0;
+ ksmbd_fd_put(work, fp);
+ nbytes = sizeof(struct reparse_data_buffer);
+ break;
+ }
+ case FSCTL_DUPLICATE_EXTENTS_TO_FILE:
+ {
+ struct ksmbd_file *fp_in, *fp_out = NULL;
+ struct duplicate_extents_to_file *dup_ext;
+ loff_t src_off, dst_off, length, cloned;
+
+ dup_ext = (struct duplicate_extents_to_file *)&req->Buffer[0];
+
+ fp_in = ksmbd_lookup_fd_slow(work, dup_ext->VolatileFileHandle,
+ dup_ext->PersistentFileHandle);
+ if (!fp_in) {
+ pr_err("not found file handle in duplicate extent to file\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ fp_out = ksmbd_lookup_fd_fast(work, id);
+ if (!fp_out) {
+ pr_err("not found fp\n");
+ ret = -ENOENT;
+ goto dup_ext_out;
+ }
+
+ src_off = le64_to_cpu(dup_ext->SourceFileOffset);
+ dst_off = le64_to_cpu(dup_ext->TargetFileOffset);
+ length = le64_to_cpu(dup_ext->ByteCount);
+ cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp,
+ dst_off, length, 0);
+ if (cloned == -EXDEV || cloned == -EOPNOTSUPP) {
+ ret = -EOPNOTSUPP;
+ goto dup_ext_out;
+ } else if (cloned != length) {
+ cloned = vfs_copy_file_range(fp_in->filp, src_off,
+ fp_out->filp, dst_off, length, 0);
+ if (cloned != length) {
+ if (cloned < 0)
+ ret = cloned;
+ else
+ ret = -EINVAL;
+ }
+ }
+
+dup_ext_out:
+ ksmbd_fd_put(work, fp_in);
+ ksmbd_fd_put(work, fp_out);
+ if (ret < 0)
+ goto out;
+ break;
+ }
+ default:
+ ksmbd_debug(SMB, "not implemented yet ioctl command 0x%x\n",
+ cnt_code);
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ rsp->CntCode = cpu_to_le32(cnt_code);
+ rsp->InputCount = cpu_to_le32(0);
+ rsp->InputOffset = cpu_to_le32(112);
+ rsp->OutputOffset = cpu_to_le32(112);
+ rsp->OutputCount = cpu_to_le32(nbytes);
+ rsp->StructureSize = cpu_to_le16(49);
+ rsp->Reserved = cpu_to_le16(0);
+ rsp->Flags = cpu_to_le32(0);
+ rsp->Reserved2 = cpu_to_le32(0);
+ inc_rfc1001_len(rsp_org, 48 + nbytes);
+
+ return 0;
+
+out:
+ if (ret == -EACCES)
+ rsp->hdr.Status = STATUS_ACCESS_DENIED;
+ else if (ret == -ENOENT)
+ rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ else if (ret == -EOPNOTSUPP)
+ rsp->hdr.Status = STATUS_NOT_SUPPORTED;
+ else if (ret < 0 || rsp->hdr.Status == 0)
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ smb2_set_err_rsp(work);
+ return 0;
+}
+
+/**
+ * smb20_oplock_break_ack() - handler for smb2.0 oplock break command
+ * @work: smb work containing oplock break command buffer
+ *
+ * Return: 0
+ */
+static void smb20_oplock_break_ack(struct ksmbd_work *work)
+{
+ struct smb2_oplock_break *req = work->request_buf;
+ struct smb2_oplock_break *rsp = work->response_buf;
+ struct ksmbd_file *fp;
+ struct oplock_info *opinfo = NULL;
+ __le32 err = 0;
+ int ret = 0;
+ u64 volatile_id, persistent_id;
+ char req_oplevel = 0, rsp_oplevel = 0;
+ unsigned int oplock_change_type;
+
+ volatile_id = le64_to_cpu(req->VolatileFid);
+ persistent_id = le64_to_cpu(req->PersistentFid);
+ req_oplevel = req->OplockLevel;
+ ksmbd_debug(OPLOCK, "v_id %llu, p_id %llu request oplock level %d\n",
+ volatile_id, persistent_id, req_oplevel);
+
+ fp = ksmbd_lookup_fd_slow(work, volatile_id, persistent_id);
+ if (!fp) {
+ rsp->hdr.Status = STATUS_FILE_CLOSED;
+ smb2_set_err_rsp(work);
+ return;
+ }
+
+ opinfo = opinfo_get(fp);
+ if (!opinfo) {
+ pr_err("unexpected null oplock_info\n");
+ rsp->hdr.Status = STATUS_INVALID_OPLOCK_PROTOCOL;
+ smb2_set_err_rsp(work);
+ ksmbd_fd_put(work, fp);
+ return;
+ }
+
+ if (opinfo->level == SMB2_OPLOCK_LEVEL_NONE) {
+ rsp->hdr.Status = STATUS_INVALID_OPLOCK_PROTOCOL;
+ goto err_out;
+ }
+
+ if (opinfo->op_state == OPLOCK_STATE_NONE) {
+ ksmbd_debug(SMB, "unexpected oplock state 0x%x\n", opinfo->op_state);
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ goto err_out;
+ }
+
+ if ((opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_BATCH) &&
+ (req_oplevel != SMB2_OPLOCK_LEVEL_II &&
+ req_oplevel != SMB2_OPLOCK_LEVEL_NONE)) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ oplock_change_type = OPLOCK_WRITE_TO_NONE;
+ } else if (opinfo->level == SMB2_OPLOCK_LEVEL_II &&
+ req_oplevel != SMB2_OPLOCK_LEVEL_NONE) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ oplock_change_type = OPLOCK_READ_TO_NONE;
+ } else if (req_oplevel == SMB2_OPLOCK_LEVEL_II ||
+ req_oplevel == SMB2_OPLOCK_LEVEL_NONE) {
+ err = STATUS_INVALID_DEVICE_STATE;
+ if ((opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_BATCH) &&
+ req_oplevel == SMB2_OPLOCK_LEVEL_II) {
+ oplock_change_type = OPLOCK_WRITE_TO_READ;
+ } else if ((opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
+ opinfo->level == SMB2_OPLOCK_LEVEL_BATCH) &&
+ req_oplevel == SMB2_OPLOCK_LEVEL_NONE) {
+ oplock_change_type = OPLOCK_WRITE_TO_NONE;
+ } else if (opinfo->level == SMB2_OPLOCK_LEVEL_II &&
+ req_oplevel == SMB2_OPLOCK_LEVEL_NONE) {
+ oplock_change_type = OPLOCK_READ_TO_NONE;
+ } else {
+ oplock_change_type = 0;
+ }
+ } else {
+ oplock_change_type = 0;
+ }
+
+ switch (oplock_change_type) {
+ case OPLOCK_WRITE_TO_READ:
+ ret = opinfo_write_to_read(opinfo);
+ rsp_oplevel = SMB2_OPLOCK_LEVEL_II;
+ break;
+ case OPLOCK_WRITE_TO_NONE:
+ ret = opinfo_write_to_none(opinfo);
+ rsp_oplevel = SMB2_OPLOCK_LEVEL_NONE;
+ break;
+ case OPLOCK_READ_TO_NONE:
+ ret = opinfo_read_to_none(opinfo);
+ rsp_oplevel = SMB2_OPLOCK_LEVEL_NONE;
+ break;
+ default:
+ pr_err("unknown oplock change 0x%x -> 0x%x\n",
+ opinfo->level, rsp_oplevel);
+ }
+
+ if (ret < 0) {
+ rsp->hdr.Status = err;
+ goto err_out;
+ }
+
+ opinfo_put(opinfo);
+ ksmbd_fd_put(work, fp);
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+
+ rsp->StructureSize = cpu_to_le16(24);
+ rsp->OplockLevel = rsp_oplevel;
+ rsp->Reserved = 0;
+ rsp->Reserved2 = 0;
+ rsp->VolatileFid = cpu_to_le64(volatile_id);
+ rsp->PersistentFid = cpu_to_le64(persistent_id);
+ inc_rfc1001_len(rsp, 24);
+ return;
+
+err_out:
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+
+ opinfo_put(opinfo);
+ ksmbd_fd_put(work, fp);
+ smb2_set_err_rsp(work);
+}
+
+static int check_lease_state(struct lease *lease, __le32 req_state)
+{
+ if ((lease->new_state ==
+ (SMB2_LEASE_READ_CACHING_LE | SMB2_LEASE_HANDLE_CACHING_LE)) &&
+ !(req_state & SMB2_LEASE_WRITE_CACHING_LE)) {
+ lease->new_state = req_state;
+ return 0;
+ }
+
+ if (lease->new_state == req_state)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * smb21_lease_break_ack() - handler for smb2.1 lease break command
+ * @work: smb work containing lease break command buffer
+ *
+ * Return: 0
+ */
+static void smb21_lease_break_ack(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_lease_ack *req = work->request_buf;
+ struct smb2_lease_ack *rsp = work->response_buf;
+ struct oplock_info *opinfo;
+ __le32 err = 0;
+ int ret = 0;
+ unsigned int lease_change_type;
+ __le32 lease_state;
+ struct lease *lease;
+
+ ksmbd_debug(OPLOCK, "smb21 lease break, lease state(0x%x)\n",
+ le32_to_cpu(req->LeaseState));
+ opinfo = lookup_lease_in_table(conn, req->LeaseKey);
+ if (!opinfo) {
+ ksmbd_debug(OPLOCK, "file not opened\n");
+ smb2_set_err_rsp(work);
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ return;
+ }
+ lease = opinfo->o_lease;
+
+ if (opinfo->op_state == OPLOCK_STATE_NONE) {
+ pr_err("unexpected lease break state 0x%x\n",
+ opinfo->op_state);
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ goto err_out;
+ }
+
+ if (check_lease_state(lease, req->LeaseState)) {
+ rsp->hdr.Status = STATUS_REQUEST_NOT_ACCEPTED;
+ ksmbd_debug(OPLOCK,
+ "req lease state: 0x%x, expected state: 0x%x\n",
+ req->LeaseState, lease->new_state);
+ goto err_out;
+ }
+
+ if (!atomic_read(&opinfo->breaking_cnt)) {
+ rsp->hdr.Status = STATUS_UNSUCCESSFUL;
+ goto err_out;
+ }
+
+ /* check for bad lease state */
+ if (req->LeaseState &
+ (~(SMB2_LEASE_READ_CACHING_LE | SMB2_LEASE_HANDLE_CACHING_LE))) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ lease_change_type = OPLOCK_WRITE_TO_NONE;
+ else
+ lease_change_type = OPLOCK_READ_TO_NONE;
+ ksmbd_debug(OPLOCK, "handle bad lease state 0x%x -> 0x%x\n",
+ le32_to_cpu(lease->state),
+ le32_to_cpu(req->LeaseState));
+ } else if (lease->state == SMB2_LEASE_READ_CACHING_LE &&
+ req->LeaseState != SMB2_LEASE_NONE_LE) {
+ err = STATUS_INVALID_OPLOCK_PROTOCOL;
+ lease_change_type = OPLOCK_READ_TO_NONE;
+ ksmbd_debug(OPLOCK, "handle bad lease state 0x%x -> 0x%x\n",
+ le32_to_cpu(lease->state),
+ le32_to_cpu(req->LeaseState));
+ } else {
+ /* valid lease state changes */
+ err = STATUS_INVALID_DEVICE_STATE;
+ if (req->LeaseState == SMB2_LEASE_NONE_LE) {
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ lease_change_type = OPLOCK_WRITE_TO_NONE;
+ else
+ lease_change_type = OPLOCK_READ_TO_NONE;
+ } else if (req->LeaseState & SMB2_LEASE_READ_CACHING_LE) {
+ if (lease->state & SMB2_LEASE_WRITE_CACHING_LE)
+ lease_change_type = OPLOCK_WRITE_TO_READ;
+ else
+ lease_change_type = OPLOCK_READ_HANDLE_TO_READ;
+ } else {
+ lease_change_type = 0;
+ }
+ }
+
+ switch (lease_change_type) {
+ case OPLOCK_WRITE_TO_READ:
+ ret = opinfo_write_to_read(opinfo);
+ break;
+ case OPLOCK_READ_HANDLE_TO_READ:
+ ret = opinfo_read_handle_to_read(opinfo);
+ break;
+ case OPLOCK_WRITE_TO_NONE:
+ ret = opinfo_write_to_none(opinfo);
+ break;
+ case OPLOCK_READ_TO_NONE:
+ ret = opinfo_read_to_none(opinfo);
+ break;
+ default:
+ ksmbd_debug(OPLOCK, "unknown lease change 0x%x -> 0x%x\n",
+ le32_to_cpu(lease->state),
+ le32_to_cpu(req->LeaseState));
+ }
+
+ lease_state = lease->state;
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+ atomic_dec(&opinfo->breaking_cnt);
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+ opinfo_put(opinfo);
+
+ if (ret < 0) {
+ rsp->hdr.Status = err;
+ goto err_out;
+ }
+
+ rsp->StructureSize = cpu_to_le16(36);
+ rsp->Reserved = 0;
+ rsp->Flags = 0;
+ memcpy(rsp->LeaseKey, req->LeaseKey, 16);
+ rsp->LeaseState = lease_state;
+ rsp->LeaseDuration = 0;
+ inc_rfc1001_len(rsp, 36);
+ return;
+
+err_out:
+ opinfo->op_state = OPLOCK_STATE_NONE;
+ wake_up_interruptible_all(&opinfo->oplock_q);
+ atomic_dec(&opinfo->breaking_cnt);
+ wake_up_interruptible_all(&opinfo->oplock_brk);
+
+ opinfo_put(opinfo);
+ smb2_set_err_rsp(work);
+}
+
+/**
+ * smb2_oplock_break() - dispatcher for smb2.0 and 2.1 oplock/lease break
+ * @work: smb work containing oplock/lease break command buffer
+ *
+ * Return: 0
+ */
+int smb2_oplock_break(struct ksmbd_work *work)
+{
+ struct smb2_oplock_break *req = work->request_buf;
+ struct smb2_oplock_break *rsp = work->response_buf;
+
+ switch (le16_to_cpu(req->StructureSize)) {
+ case OP_BREAK_STRUCT_SIZE_20:
+ smb20_oplock_break_ack(work);
+ break;
+ case OP_BREAK_STRUCT_SIZE_21:
+ smb21_lease_break_ack(work);
+ break;
+ default:
+ ksmbd_debug(OPLOCK, "invalid break cmd %d\n",
+ le16_to_cpu(req->StructureSize));
+ rsp->hdr.Status = STATUS_INVALID_PARAMETER;
+ smb2_set_err_rsp(work);
+ }
+
+ return 0;
+}
+
+/**
+ * smb2_notify() - handler for smb2 notify request
+ * @work: smb work containing notify command buffer
+ *
+ * Return: 0
+ */
+int smb2_notify(struct ksmbd_work *work)
+{
+ struct smb2_notify_req *req;
+ struct smb2_notify_rsp *rsp;
+
+ WORK_BUFFERS(work, req, rsp);
+
+ if (work->next_smb2_rcv_hdr_off && req->hdr.NextCommand) {
+ rsp->hdr.Status = STATUS_INTERNAL_ERROR;
+ smb2_set_err_rsp(work);
+ return 0;
+ }
+
+ smb2_set_err_rsp(work);
+ rsp->hdr.Status = STATUS_NOT_IMPLEMENTED;
+ return 0;
+}
+
+/**
+ * smb2_is_sign_req() - handler for checking packet signing status
+ * @work: smb work containing notify command buffer
+ * @command: SMB2 command id
+ *
+ * Return: true if packed is signed, false otherwise
+ */
+bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command)
+{
+ struct smb2_hdr *rcv_hdr2 = work->request_buf;
+
+ if ((rcv_hdr2->Flags & SMB2_FLAGS_SIGNED) &&
+ command != SMB2_NEGOTIATE_HE &&
+ command != SMB2_SESSION_SETUP_HE &&
+ command != SMB2_OPLOCK_BREAK_HE)
+ return true;
+
+ return false;
+}
+
+/**
+ * smb2_check_sign_req() - handler for req packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ * Return: 1 on success, 0 otherwise
+ */
+int smb2_check_sign_req(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr, *hdr_org;
+ char signature_req[SMB2_SIGNATURE_SIZE];
+ char signature[SMB2_HMACSHA256_SIZE];
+ struct kvec iov[1];
+ size_t len;
+
+ hdr_org = hdr = work->request_buf;
+ if (work->next_smb2_rcv_hdr_off)
+ hdr = ksmbd_req_buf_next(work);
+
+ if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
+ len = be32_to_cpu(hdr_org->smb2_buf_length);
+ else if (hdr->NextCommand)
+ len = le32_to_cpu(hdr->NextCommand);
+ else
+ len = be32_to_cpu(hdr_org->smb2_buf_length) -
+ work->next_smb2_rcv_hdr_off;
+
+ memcpy(signature_req, hdr->Signature, SMB2_SIGNATURE_SIZE);
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+
+ if (ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, 1,
+ signature))
+ return 0;
+
+ if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
+ pr_err("bad smb2 signature\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * smb2_set_sign_rsp() - handler for rsp packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ */
+void smb2_set_sign_rsp(struct ksmbd_work *work)
+{
+ struct smb2_hdr *hdr, *hdr_org;
+ struct smb2_hdr *req_hdr;
+ char signature[SMB2_HMACSHA256_SIZE];
+ struct kvec iov[2];
+ size_t len;
+ int n_vec = 1;
+
+ hdr_org = hdr = work->response_buf;
+ if (work->next_smb2_rsp_hdr_off)
+ hdr = ksmbd_resp_buf_next(work);
+
+ req_hdr = ksmbd_req_buf_next(work);
+
+ if (!work->next_smb2_rsp_hdr_off) {
+ len = get_rfc1002_len(hdr_org);
+ if (req_hdr->NextCommand)
+ len = ALIGN(len, 8);
+ } else {
+ len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+ len = ALIGN(len, 8);
+ }
+
+ if (req_hdr->NextCommand)
+ hdr->NextCommand = cpu_to_le32(len);
+
+ hdr->Flags |= SMB2_FLAGS_SIGNED;
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+
+ if (work->aux_payload_sz) {
+ iov[0].iov_len -= work->aux_payload_sz;
+
+ iov[1].iov_base = work->aux_payload_buf;
+ iov[1].iov_len = work->aux_payload_sz;
+ n_vec++;
+ }
+
+ if (!ksmbd_sign_smb2_pdu(work->conn, work->sess->sess_key, iov, n_vec,
+ signature))
+ memcpy(hdr->Signature, signature, SMB2_SIGNATURE_SIZE);
+}
+
+/**
+ * smb3_check_sign_req() - handler for req packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ * Return: 1 on success, 0 otherwise
+ */
+int smb3_check_sign_req(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ char *signing_key;
+ struct smb2_hdr *hdr, *hdr_org;
+ struct channel *chann;
+ char signature_req[SMB2_SIGNATURE_SIZE];
+ char signature[SMB2_CMACAES_SIZE];
+ struct kvec iov[1];
+ size_t len;
+
+ hdr_org = hdr = work->request_buf;
+ if (work->next_smb2_rcv_hdr_off)
+ hdr = ksmbd_req_buf_next(work);
+
+ if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
+ len = be32_to_cpu(hdr_org->smb2_buf_length);
+ else if (hdr->NextCommand)
+ len = le32_to_cpu(hdr->NextCommand);
+ else
+ len = be32_to_cpu(hdr_org->smb2_buf_length) -
+ work->next_smb2_rcv_hdr_off;
+
+ if (le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
+ signing_key = work->sess->smb3signingkey;
+ } else {
+ chann = lookup_chann_list(work->sess, conn);
+ if (!chann)
+ return 0;
+ signing_key = chann->smb3signingkey;
+ }
+
+ if (!signing_key) {
+ pr_err("SMB3 signing key is not generated\n");
+ return 0;
+ }
+
+ memcpy(signature_req, hdr->Signature, SMB2_SIGNATURE_SIZE);
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+
+ if (ksmbd_sign_smb3_pdu(conn, signing_key, iov, 1, signature))
+ return 0;
+
+ if (memcmp(signature, signature_req, SMB2_SIGNATURE_SIZE)) {
+ pr_err("bad smb2 signature\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * smb3_set_sign_rsp() - handler for rsp packet sign processing
+ * @work: smb work containing notify command buffer
+ *
+ */
+void smb3_set_sign_rsp(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *req_hdr;
+ struct smb2_hdr *hdr, *hdr_org;
+ struct channel *chann;
+ char signature[SMB2_CMACAES_SIZE];
+ struct kvec iov[2];
+ int n_vec = 1;
+ size_t len;
+ char *signing_key;
+
+ hdr_org = hdr = work->response_buf;
+ if (work->next_smb2_rsp_hdr_off)
+ hdr = ksmbd_resp_buf_next(work);
+
+ req_hdr = ksmbd_req_buf_next(work);
+
+ if (!work->next_smb2_rsp_hdr_off) {
+ len = get_rfc1002_len(hdr_org);
+ if (req_hdr->NextCommand)
+ len = ALIGN(len, 8);
+ } else {
+ len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+ len = ALIGN(len, 8);
+ }
+
+ if (conn->binding == false &&
+ le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
+ signing_key = work->sess->smb3signingkey;
+ } else {
+ chann = lookup_chann_list(work->sess, work->conn);
+ if (!chann)
+ return;
+ signing_key = chann->smb3signingkey;
+ }
+
+ if (!signing_key)
+ return;
+
+ if (req_hdr->NextCommand)
+ hdr->NextCommand = cpu_to_le32(len);
+
+ hdr->Flags |= SMB2_FLAGS_SIGNED;
+ memset(hdr->Signature, 0, SMB2_SIGNATURE_SIZE);
+ iov[0].iov_base = (char *)&hdr->ProtocolId;
+ iov[0].iov_len = len;
+ if (work->aux_payload_sz) {
+ iov[0].iov_len -= work->aux_payload_sz;
+ iov[1].iov_base = work->aux_payload_buf;
+ iov[1].iov_len = work->aux_payload_sz;
+ n_vec++;
+ }
+
+ if (!ksmbd_sign_smb3_pdu(conn, signing_key, iov, n_vec, signature))
+ memcpy(hdr->Signature, signature, SMB2_SIGNATURE_SIZE);
+}
+
+/**
+ * smb3_preauth_hash_rsp() - handler for computing preauth hash on response
+ * @work: smb work containing response buffer
+ *
+ */
+void smb3_preauth_hash_rsp(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess = work->sess;
+ struct smb2_hdr *req, *rsp;
+
+ if (conn->dialect != SMB311_PROT_ID)
+ return;
+
+ WORK_BUFFERS(work, req, rsp);
+
+ if (le16_to_cpu(req->Command) == SMB2_NEGOTIATE_HE)
+ ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+ conn->preauth_info->Preauth_HashValue);
+
+ if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && sess) {
+ __u8 *hash_value;
+
+ if (conn->binding) {
+ struct preauth_session *preauth_sess;
+
+ preauth_sess = ksmbd_preauth_session_lookup(conn, sess->id);
+ if (!preauth_sess)
+ return;
+ hash_value = preauth_sess->Preauth_HashValue;
+ } else {
+ hash_value = sess->Preauth_HashValue;
+ if (!hash_value)
+ return;
+ }
+ ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+ hash_value);
+ }
+}
+
+static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, char *old_buf,
+ __le16 cipher_type)
+{
+ struct smb2_hdr *hdr = (struct smb2_hdr *)old_buf;
+ unsigned int orig_len = get_rfc1002_len(old_buf);
+
+ memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
+ tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
+ tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len);
+ tr_hdr->Flags = cpu_to_le16(0x01);
+ if (cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
+ cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ get_random_bytes(&tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
+ else
+ get_random_bytes(&tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
+ memcpy(&tr_hdr->SessionId, &hdr->SessionId, 8);
+ inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - 4);
+ inc_rfc1001_len(tr_hdr, orig_len);
+}
+
+int smb3_encrypt_resp(struct ksmbd_work *work)
+{
+ char *buf = work->response_buf;
+ struct smb2_transform_hdr *tr_hdr;
+ struct kvec iov[3];
+ int rc = -ENOMEM;
+ int buf_size = 0, rq_nvec = 2 + (work->aux_payload_sz ? 1 : 0);
+
+ if (ARRAY_SIZE(iov) < rq_nvec)
+ return -ENOMEM;
+
+ tr_hdr = kzalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
+ if (!tr_hdr)
+ return rc;
+
+ /* fill transform header */
+ fill_transform_hdr(tr_hdr, buf, work->conn->cipher_type);
+
+ iov[0].iov_base = tr_hdr;
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ buf_size += iov[0].iov_len - 4;
+
+ iov[1].iov_base = buf + 4;
+ iov[1].iov_len = get_rfc1002_len(buf);
+ if (work->aux_payload_sz) {
+ iov[1].iov_len = work->resp_hdr_sz - 4;
+
+ iov[2].iov_base = work->aux_payload_buf;
+ iov[2].iov_len = work->aux_payload_sz;
+ buf_size += iov[2].iov_len;
+ }
+ buf_size += iov[1].iov_len;
+ work->resp_hdr_sz = iov[1].iov_len;
+
+ rc = ksmbd_crypt_message(work->conn, iov, rq_nvec, 1);
+ if (rc)
+ return rc;
+
+ memmove(buf, iov[1].iov_base, iov[1].iov_len);
+ tr_hdr->smb2_buf_length = cpu_to_be32(buf_size);
+ work->tr_buf = tr_hdr;
+
+ return rc;
+}
+
+bool smb3_is_transform_hdr(void *buf)
+{
+ struct smb2_transform_hdr *trhdr = buf;
+
+ return trhdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM;
+}
+
+int smb3_decrypt_req(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct ksmbd_session *sess;
+ char *buf = work->request_buf;
+ struct smb2_hdr *hdr;
+ unsigned int pdu_length = get_rfc1002_len(buf);
+ struct kvec iov[2];
+ unsigned int buf_data_size = pdu_length + 4 -
+ sizeof(struct smb2_transform_hdr);
+ struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+ unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
+ int rc = 0;
+
+ sess = ksmbd_session_lookup_all(conn, le64_to_cpu(tr_hdr->SessionId));
+ if (!sess) {
+ pr_err("invalid session id(%llx) in transform header\n",
+ le64_to_cpu(tr_hdr->SessionId));
+ return -ECONNABORTED;
+ }
+
+ if (pdu_length + 4 <
+ sizeof(struct smb2_transform_hdr) + sizeof(struct smb2_hdr)) {
+ pr_err("Transform message is too small (%u)\n",
+ pdu_length);
+ return -ECONNABORTED;
+ }
+
+ if (pdu_length + 4 < orig_len + sizeof(struct smb2_transform_hdr)) {
+ pr_err("Transform message is broken\n");
+ return -ECONNABORTED;
+ }
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+ iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
+ iov[1].iov_len = buf_data_size;
+ rc = ksmbd_crypt_message(conn, iov, 2, 0);
+ if (rc)
+ return rc;
+
+ memmove(buf + 4, iov[1].iov_base, buf_data_size);
+ hdr = (struct smb2_hdr *)buf;
+ hdr->smb2_buf_length = cpu_to_be32(buf_data_size);
+
+ return rc;
+}
+
+bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct smb2_hdr *rsp = work->response_buf;
+
+ if (conn->dialect < SMB30_PROT_ID)
+ return false;
+
+ if (work->next_smb2_rcv_hdr_off)
+ rsp = ksmbd_resp_buf_next(work);
+
+ if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE &&
+ rsp->Status == STATUS_SUCCESS)
+ return true;
+ return false;
+}
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
new file mode 100644
index 000000000000..bcec845b03f3
--- /dev/null
+++ b/fs/ksmbd/smb2pdu.h
@@ -0,0 +1,1698 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef _SMB2PDU_H
+#define _SMB2PDU_H
+
+#include "ntlmssp.h"
+#include "smbacl.h"
+
+/*
+ * Note that, due to trying to use names similar to the protocol specifications,
+ * there are many mixed case field names in the structures below. Although
+ * this does not match typical Linux kernel style, it is necessary to be
+ * able to match against the protocol specfication.
+ *
+ * SMB2 commands
+ * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
+ * (ie no useful data other than the SMB error code itself) and are marked such.
+ * Knowing this helps avoid response buffer allocations and copy in some cases.
+ */
+
+/* List of commands in host endian */
+#define SMB2_NEGOTIATE_HE 0x0000
+#define SMB2_SESSION_SETUP_HE 0x0001
+#define SMB2_LOGOFF_HE 0x0002 /* trivial request/resp */
+#define SMB2_TREE_CONNECT_HE 0x0003
+#define SMB2_TREE_DISCONNECT_HE 0x0004 /* trivial req/resp */
+#define SMB2_CREATE_HE 0x0005
+#define SMB2_CLOSE_HE 0x0006
+#define SMB2_FLUSH_HE 0x0007 /* trivial resp */
+#define SMB2_READ_HE 0x0008
+#define SMB2_WRITE_HE 0x0009
+#define SMB2_LOCK_HE 0x000A
+#define SMB2_IOCTL_HE 0x000B
+#define SMB2_CANCEL_HE 0x000C
+#define SMB2_ECHO_HE 0x000D
+#define SMB2_QUERY_DIRECTORY_HE 0x000E
+#define SMB2_CHANGE_NOTIFY_HE 0x000F
+#define SMB2_QUERY_INFO_HE 0x0010
+#define SMB2_SET_INFO_HE 0x0011
+#define SMB2_OPLOCK_BREAK_HE 0x0012
+
+/* The same list in little endian */
+#define SMB2_NEGOTIATE cpu_to_le16(SMB2_NEGOTIATE_HE)
+#define SMB2_SESSION_SETUP cpu_to_le16(SMB2_SESSION_SETUP_HE)
+#define SMB2_LOGOFF cpu_to_le16(SMB2_LOGOFF_HE)
+#define SMB2_TREE_CONNECT cpu_to_le16(SMB2_TREE_CONNECT_HE)
+#define SMB2_TREE_DISCONNECT cpu_to_le16(SMB2_TREE_DISCONNECT_HE)
+#define SMB2_CREATE cpu_to_le16(SMB2_CREATE_HE)
+#define SMB2_CLOSE cpu_to_le16(SMB2_CLOSE_HE)
+#define SMB2_FLUSH cpu_to_le16(SMB2_FLUSH_HE)
+#define SMB2_READ cpu_to_le16(SMB2_READ_HE)
+#define SMB2_WRITE cpu_to_le16(SMB2_WRITE_HE)
+#define SMB2_LOCK cpu_to_le16(SMB2_LOCK_HE)
+#define SMB2_IOCTL cpu_to_le16(SMB2_IOCTL_HE)
+#define SMB2_CANCEL cpu_to_le16(SMB2_CANCEL_HE)
+#define SMB2_ECHO cpu_to_le16(SMB2_ECHO_HE)
+#define SMB2_QUERY_DIRECTORY cpu_to_le16(SMB2_QUERY_DIRECTORY_HE)
+#define SMB2_CHANGE_NOTIFY cpu_to_le16(SMB2_CHANGE_NOTIFY_HE)
+#define SMB2_QUERY_INFO cpu_to_le16(SMB2_QUERY_INFO_HE)
+#define SMB2_SET_INFO cpu_to_le16(SMB2_SET_INFO_HE)
+#define SMB2_OPLOCK_BREAK cpu_to_le16(SMB2_OPLOCK_BREAK_HE)
+
+/*Create Action Flags*/
+#define FILE_SUPERSEDED 0x00000000
+#define FILE_OPENED 0x00000001
+#define FILE_CREATED 0x00000002
+#define FILE_OVERWRITTEN 0x00000003
+
+/*
+ * Size of the session key (crypto key encrypted with the password
+ */
+#define SMB2_NTLMV2_SESSKEY_SIZE 16
+#define SMB2_SIGNATURE_SIZE 16
+#define SMB2_HMACSHA256_SIZE 32
+#define SMB2_CMACAES_SIZE 16
+#define SMB3_GCM128_CRYPTKEY_SIZE 16
+#define SMB3_GCM256_CRYPTKEY_SIZE 32
+
+/*
+ * Size of the smb3 encryption/decryption keys
+ */
+#define SMB3_ENC_DEC_KEY_SIZE 32
+
+/*
+ * Size of the smb3 signing key
+ */
+#define SMB3_SIGN_KEY_SIZE 16
+
+#define CIFS_CLIENT_CHALLENGE_SIZE 8
+#define SMB_SERVER_CHALLENGE_SIZE 8
+
+/* SMB2 Max Credits */
+#define SMB2_MAX_CREDITS 8192
+
+#define SMB2_CLIENT_GUID_SIZE 16
+#define SMB2_CREATE_GUID_SIZE 16
+
+/* Maximum buffer size value we can send with 1 credit */
+#define SMB2_MAX_BUFFER_SIZE 65536
+
+#define NUMBER_OF_SMB2_COMMANDS 0x0013
+
+/* BB FIXME - analyze following length BB */
+#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
+
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) /* 'B''M''S' */
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
+
+#define SMB21_DEFAULT_IOSIZE (1024 * 1024)
+#define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
+#define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024)
+
+/*
+ * SMB2 Header Definition
+ *
+ * "MBZ" : Must be Zero
+ * "BB" : BugBug, Something to check/review/analyze later
+ * "PDU" : "Protocol Data Unit" (ie a network "frame")
+ *
+ */
+
+#define __SMB2_HEADER_STRUCTURE_SIZE 64
+#define SMB2_HEADER_STRUCTURE_SIZE \
+ cpu_to_le16(__SMB2_HEADER_STRUCTURE_SIZE)
+
+struct smb2_hdr {
+ __be32 smb2_buf_length; /* big endian on wire */
+ /*
+ * length is only two or three bytes - with
+ * one or two byte type preceding it that MBZ
+ */
+ __le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */
+ __le16 StructureSize; /* 64 */
+ __le16 CreditCharge; /* MBZ */
+ __le32 Status; /* Error from server */
+ __le16 Command;
+ __le16 CreditRequest; /* CreditResponse */
+ __le32 Flags;
+ __le32 NextCommand;
+ __le64 MessageId;
+ union {
+ struct {
+ __le32 ProcessId;
+ __le32 TreeId;
+ } __packed SyncId;
+ __le64 AsyncId;
+ } __packed Id;
+ __le64 SessionId;
+ __u8 Signature[16];
+} __packed;
+
+struct smb2_pdu {
+ struct smb2_hdr hdr;
+ __le16 StructureSize2; /* size of wct area (varies, request specific) */
+} __packed;
+
+#define SMB3_AES_CCM_NONCE 11
+#define SMB3_AES_GCM_NONCE 12
+
+struct smb2_transform_hdr {
+ __be32 smb2_buf_length; /* big endian on wire */
+ /*
+ * length is only two or three bytes - with
+ * one or two byte type preceding it that MBZ
+ */
+ __le32 ProtocolId; /* 0xFD 'S' 'M' 'B' */
+ __u8 Signature[16];
+ __u8 Nonce[16];
+ __le32 OriginalMessageSize;
+ __u16 Reserved1;
+ __le16 Flags; /* EncryptionAlgorithm */
+ __le64 SessionId;
+} __packed;
+
+/*
+ * SMB2 flag definitions
+ */
+#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_REPLAY_OPERATIONS cpu_to_le32(0x20000000)
+
+/*
+ * Definitions for SMB2 Protocol Data Units (network frames)
+ *
+ * See MS-SMB2.PDF specification for protocol details.
+ * The Naming convention is the lower case version of the SMB2
+ * command code name for the struct. Note that structures must be packed.
+ *
+ */
+
+#define SMB2_ERROR_STRUCTURE_SIZE2 9
+#define SMB2_ERROR_STRUCTURE_SIZE2_LE cpu_to_le16(SMB2_ERROR_STRUCTURE_SIZE2)
+
+struct smb2_err_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize;
+ __u8 ErrorContextCount;
+ __u8 Reserved;
+ __le32 ByteCount; /* even if zero, at least one byte follows */
+ __u8 ErrorData[1]; /* variable length */
+} __packed;
+
+struct smb2_negotiate_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 36 */
+ __le16 DialectCount;
+ __le16 SecurityMode;
+ __le16 Reserved; /* MBZ */
+ __le32 Capabilities;
+ __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE];
+ /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */
+ __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
+ __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */
+ __le16 Reserved2;
+ __le16 Dialects[1]; /* One dialect (vers=) at a time for now */
+} __packed;
+
+/* SecurityMode flags */
+#define SMB2_NEGOTIATE_SIGNING_ENABLED_LE cpu_to_le16(0x0001)
+#define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002
+#define SMB2_NEGOTIATE_SIGNING_REQUIRED_LE cpu_to_le16(0x0002)
+/* Capabilities flags */
+#define SMB2_GLOBAL_CAP_DFS 0x00000001
+#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */
+#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */
+#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */
+/* Internal types */
+#define SMB2_NT_FIND 0x00100000
+#define SMB2_LARGE_FILES 0x00200000
+
+#define SMB311_SALT_SIZE 32
+/* Hash Algorithm Types */
+#define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001)
+
+#define PREAUTH_HASHVALUE_SIZE 64
+
+struct preauth_integrity_info {
+ /* PreAuth integrity Hash ID */
+ __le16 Preauth_HashId;
+ /* PreAuth integrity Hash Value */
+ __u8 Preauth_HashValue[PREAUTH_HASHVALUE_SIZE];
+};
+
+/* offset is sizeof smb2_negotiate_rsp - 4 but rounded up to 8 bytes. */
+#ifdef CONFIG_SMB_SERVER_KERBEROS5
+/* sizeof(struct smb2_negotiate_rsp) - 4 =
+ * header(64) + response(64) + GSS_LENGTH(96) + GSS_PADDING(0)
+ */
+#define OFFSET_OF_NEG_CONTEXT 0xe0
+#else
+/* sizeof(struct smb2_negotiate_rsp) - 4 =
+ * header(64) + response(64) + GSS_LENGTH(74) + GSS_PADDING(6)
+ */
+#define OFFSET_OF_NEG_CONTEXT 0xd0
+#endif
+
+#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
+#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
+#define SMB2_COMPRESSION_CAPABILITIES cpu_to_le16(3)
+#define SMB2_NETNAME_NEGOTIATE_CONTEXT_ID cpu_to_le16(5)
+#define SMB2_SIGNING_CAPABILITIES cpu_to_le16(8)
+#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100)
+
+struct smb2_neg_context {
+ __le16 ContextType;
+ __le16 DataLength;
+ __le32 Reserved;
+ /* Followed by array of data */
+} __packed;
+
+struct smb2_preauth_neg_context {
+ __le16 ContextType; /* 1 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 HashAlgorithmCount; /* 1 */
+ __le16 SaltLength;
+ __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */
+ __u8 Salt[SMB311_SALT_SIZE];
+} __packed;
+
+/* Encryption Algorithms Ciphers */
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002)
+#define SMB2_ENCRYPTION_AES256_CCM cpu_to_le16(0x0003)
+#define SMB2_ENCRYPTION_AES256_GCM cpu_to_le16(0x0004)
+
+struct smb2_encryption_neg_context {
+ __le16 ContextType; /* 2 */
+ __le16 DataLength;
+ __le32 Reserved;
+ /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */
+ __le16 CipherCount; /* AES-128-GCM and AES-128-CCM by default */
+ __le16 Ciphers[];
+} __packed;
+
+#define SMB3_COMPRESS_NONE cpu_to_le16(0x0000)
+#define SMB3_COMPRESS_LZNT1 cpu_to_le16(0x0001)
+#define SMB3_COMPRESS_LZ77 cpu_to_le16(0x0002)
+#define SMB3_COMPRESS_LZ77_HUFF cpu_to_le16(0x0003)
+
+struct smb2_compression_ctx {
+ __le16 ContextType; /* 3 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 CompressionAlgorithmCount;
+ __u16 Padding;
+ __le32 Reserved1;
+ __le16 CompressionAlgorithms[];
+} __packed;
+
+#define POSIX_CTXT_DATA_LEN 16
+struct smb2_posix_neg_context {
+ __le16 ContextType; /* 0x100 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __u8 Name[16]; /* POSIX ctxt GUID 93AD25509CB411E7B42383DE968BCD7C */
+} __packed;
+
+struct smb2_netname_neg_context {
+ __le16 ContextType; /* 0x100 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 NetName[]; /* hostname of target converted to UCS-2 */
+} __packed;
+
+/* Signing algorithms */
+#define SIGNING_ALG_HMAC_SHA256 cpu_to_le16(0)
+#define SIGNING_ALG_AES_CMAC cpu_to_le16(1)
+#define SIGNING_ALG_AES_GMAC cpu_to_le16(2)
+
+struct smb2_signing_capabilities {
+ __le16 ContextType; /* 8 */
+ __le16 DataLength;
+ __le32 Reserved;
+ __le16 SigningAlgorithmCount;
+ __le16 SigningAlgorithms[];
+} __packed;
+
+struct smb2_negotiate_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 65 */
+ __le16 SecurityMode;
+ __le16 DialectRevision;
+ __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */
+ __u8 ServerGUID[16];
+ __le32 Capabilities;
+ __le32 MaxTransactSize;
+ __le32 MaxReadSize;
+ __le32 MaxWriteSize;
+ __le64 SystemTime; /* MBZ */
+ __le64 ServerStartTime;
+ __le16 SecurityBufferOffset;
+ __le16 SecurityBufferLength;
+ __le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */
+ __u8 Buffer[1]; /* variable length GSS security buffer */
+} __packed;
+
+/* Flags */
+#define SMB2_SESSION_REQ_FLAG_BINDING 0x01
+#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
+
+#define SMB2_SESSION_EXPIRED (0)
+#define SMB2_SESSION_IN_PROGRESS BIT(0)
+#define SMB2_SESSION_VALID BIT(1)
+
+/* Flags */
+#define SMB2_SESSION_REQ_FLAG_BINDING 0x01
+#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04
+
+struct smb2_sess_setup_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 25 */
+ __u8 Flags;
+ __u8 SecurityMode;
+ __le32 Capabilities;
+ __le32 Channel;
+ __le16 SecurityBufferOffset;
+ __le16 SecurityBufferLength;
+ __le64 PreviousSessionId;
+ __u8 Buffer[1]; /* variable length GSS security buffer */
+} __packed;
+
+/* Flags/Reserved for SMB3.1.1 */
+#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001
+
+/* Currently defined SessionFlags */
+#define SMB2_SESSION_FLAG_IS_GUEST_LE cpu_to_le16(0x0001)
+#define SMB2_SESSION_FLAG_IS_NULL_LE cpu_to_le16(0x0002)
+#define SMB2_SESSION_FLAG_ENCRYPT_DATA_LE cpu_to_le16(0x0004)
+struct smb2_sess_setup_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 SessionFlags;
+ __le16 SecurityBufferOffset;
+ __le16 SecurityBufferLength;
+ __u8 Buffer[1]; /* variable length GSS security buffer */
+} __packed;
+
+struct smb2_logoff_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_logoff_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_tree_connect_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 Reserved; /* Flags in SMB3.1.1 */
+ __le16 PathOffset;
+ __le16 PathLength;
+ __u8 Buffer[1]; /* variable length */
+} __packed;
+
+struct smb2_tree_connect_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 16 */
+ __u8 ShareType; /* see below */
+ __u8 Reserved;
+ __le32 ShareFlags; /* see below */
+ __le32 Capabilities; /* see below */
+ __le32 MaximalAccess;
+} __packed;
+
+/* Possible ShareType values */
+#define SMB2_SHARE_TYPE_DISK 0x01
+#define SMB2_SHARE_TYPE_PIPE 0x02
+#define SMB2_SHARE_TYPE_PRINT 0x03
+
+/*
+ * Possible ShareFlags - exactly one and only one of the first 4 caching flags
+ * must be set (any of the remaining, SHI1005, flags may be set individually
+ * or in combination.
+ */
+#define SMB2_SHAREFLAG_MANUAL_CACHING 0x00000000
+#define SMB2_SHAREFLAG_AUTO_CACHING 0x00000010
+#define SMB2_SHAREFLAG_VDO_CACHING 0x00000020
+#define SMB2_SHAREFLAG_NO_CACHING 0x00000030
+#define SHI1005_FLAGS_DFS 0x00000001
+#define SHI1005_FLAGS_DFS_ROOT 0x00000002
+#define SHI1005_FLAGS_RESTRICT_EXCLUSIVE_OPENS 0x00000100
+#define SHI1005_FLAGS_FORCE_SHARED_DELETE 0x00000200
+#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400
+#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800
+#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000
+#define SHI1005_FLAGS_ENABLE_HASH 0x00002000
+
+/* Possible share capabilities */
+#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008)
+
+struct smb2_tree_disconnect_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_tree_disconnect_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+#define ATTR_READONLY_LE cpu_to_le32(ATTR_READONLY)
+#define ATTR_HIDDEN_LE cpu_to_le32(ATTR_HIDDEN)
+#define ATTR_SYSTEM_LE cpu_to_le32(ATTR_SYSTEM)
+#define ATTR_DIRECTORY_LE cpu_to_le32(ATTR_DIRECTORY)
+#define ATTR_ARCHIVE_LE cpu_to_le32(ATTR_ARCHIVE)
+#define ATTR_NORMAL_LE cpu_to_le32(ATTR_NORMAL)
+#define ATTR_TEMPORARY_LE cpu_to_le32(ATTR_TEMPORARY)
+#define ATTR_SPARSE_FILE_LE cpu_to_le32(ATTR_SPARSE)
+#define ATTR_REPARSE_POINT_LE cpu_to_le32(ATTR_REPARSE)
+#define ATTR_COMPRESSED_LE cpu_to_le32(ATTR_COMPRESSED)
+#define ATTR_OFFLINE_LE cpu_to_le32(ATTR_OFFLINE)
+#define ATTR_NOT_CONTENT_INDEXED_LE cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
+#define ATTR_ENCRYPTED_LE cpu_to_le32(ATTR_ENCRYPTED)
+#define ATTR_INTEGRITY_STREAML_LE cpu_to_le32(0x00008000)
+#define ATTR_NO_SCRUB_DATA_LE cpu_to_le32(0x00020000)
+#define ATTR_MASK_LE cpu_to_le32(0x00007FB7)
+
+/* Oplock levels */
+#define SMB2_OPLOCK_LEVEL_NONE 0x00
+#define SMB2_OPLOCK_LEVEL_II 0x01
+#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08
+#define SMB2_OPLOCK_LEVEL_BATCH 0x09
+#define SMB2_OPLOCK_LEVEL_LEASE 0xFF
+/* Non-spec internal type */
+#define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99
+
+/* Desired Access Flags */
+#define FILE_READ_DATA_LE cpu_to_le32(0x00000001)
+#define FILE_LIST_DIRECTORY_LE cpu_to_le32(0x00000001)
+#define FILE_WRITE_DATA_LE cpu_to_le32(0x00000002)
+#define FILE_ADD_FILE_LE cpu_to_le32(0x00000002)
+#define FILE_APPEND_DATA_LE cpu_to_le32(0x00000004)
+#define FILE_ADD_SUBDIRECTORY_LE cpu_to_le32(0x00000004)
+#define FILE_READ_EA_LE cpu_to_le32(0x00000008)
+#define FILE_WRITE_EA_LE cpu_to_le32(0x00000010)
+#define FILE_EXECUTE_LE cpu_to_le32(0x00000020)
+#define FILE_TRAVERSE_LE cpu_to_le32(0x00000020)
+#define FILE_DELETE_CHILD_LE cpu_to_le32(0x00000040)
+#define FILE_READ_ATTRIBUTES_LE cpu_to_le32(0x00000080)
+#define FILE_WRITE_ATTRIBUTES_LE cpu_to_le32(0x00000100)
+#define FILE_DELETE_LE cpu_to_le32(0x00010000)
+#define FILE_READ_CONTROL_LE cpu_to_le32(0x00020000)
+#define FILE_WRITE_DAC_LE cpu_to_le32(0x00040000)
+#define FILE_WRITE_OWNER_LE cpu_to_le32(0x00080000)
+#define FILE_SYNCHRONIZE_LE cpu_to_le32(0x00100000)
+#define FILE_ACCESS_SYSTEM_SECURITY_LE cpu_to_le32(0x01000000)
+#define FILE_MAXIMAL_ACCESS_LE cpu_to_le32(0x02000000)
+#define FILE_GENERIC_ALL_LE cpu_to_le32(0x10000000)
+#define FILE_GENERIC_EXECUTE_LE cpu_to_le32(0x20000000)
+#define FILE_GENERIC_WRITE_LE cpu_to_le32(0x40000000)
+#define FILE_GENERIC_READ_LE cpu_to_le32(0x80000000)
+#define DESIRED_ACCESS_MASK cpu_to_le32(0xF21F01FF)
+
+/* ShareAccess Flags */
+#define FILE_SHARE_READ_LE cpu_to_le32(0x00000001)
+#define FILE_SHARE_WRITE_LE cpu_to_le32(0x00000002)
+#define FILE_SHARE_DELETE_LE cpu_to_le32(0x00000004)
+#define FILE_SHARE_ALL_LE cpu_to_le32(0x00000007)
+
+/* CreateDisposition Flags */
+#define FILE_SUPERSEDE_LE cpu_to_le32(0x00000000)
+#define FILE_OPEN_LE cpu_to_le32(0x00000001)
+#define FILE_CREATE_LE cpu_to_le32(0x00000002)
+#define FILE_OPEN_IF_LE cpu_to_le32(0x00000003)
+#define FILE_OVERWRITE_LE cpu_to_le32(0x00000004)
+#define FILE_OVERWRITE_IF_LE cpu_to_le32(0x00000005)
+#define FILE_CREATE_MASK_LE cpu_to_le32(0x00000007)
+
+#define FILE_READ_DESIRED_ACCESS_LE (FILE_READ_DATA_LE | \
+ FILE_READ_EA_LE | \
+ FILE_GENERIC_READ_LE)
+#define FILE_WRITE_DESIRE_ACCESS_LE (FILE_WRITE_DATA_LE | \
+ FILE_APPEND_DATA_LE | \
+ FILE_WRITE_EA_LE | \
+ FILE_WRITE_ATTRIBUTES_LE | \
+ FILE_GENERIC_WRITE_LE)
+
+/* Impersonation Levels */
+#define IL_ANONYMOUS_LE cpu_to_le32(0x00000000)
+#define IL_IDENTIFICATION_LE cpu_to_le32(0x00000001)
+#define IL_IMPERSONATION_LE cpu_to_le32(0x00000002)
+#define IL_DELEGATE_LE cpu_to_le32(0x00000003)
+
+/* Create Context Values */
+#define SMB2_CREATE_EA_BUFFER "ExtA" /* extended attributes */
+#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */
+#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ"
+#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC"
+#define SMB2_CREATE_ALLOCATION_SIZE "AlSi"
+#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
+#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp"
+#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid"
+#define SMB2_CREATE_REQUEST_LEASE "RqLs"
+#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q"
+#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C"
+#define SMB2_CREATE_APP_INSTANCE_ID "\x45\xBC\xA6\x6A\xEF\xA7\xF7\x4A\x90\x08\xFA\x46\x2E\x14\x4D\x74"
+ #define SMB2_CREATE_APP_INSTANCE_VERSION "\xB9\x82\xD0\xB7\x3B\x56\x07\x4F\xA0\x7B\x52\x4A\x81\x16\xA0\x10"
+#define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9
+#define SMB2_CREATE_TAG_POSIX "\x93\xAD\x25\x50\x9C\xB4\x11\xE7\xB4\x23\x83\xDE\x96\x8B\xCD\x7C"
+
+struct smb2_create_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 57 */
+ __u8 SecurityFlags;
+ __u8 RequestedOplockLevel;
+ __le32 ImpersonationLevel;
+ __le64 SmbCreateFlags;
+ __le64 Reserved;
+ __le32 DesiredAccess;
+ __le32 FileAttributes;
+ __le32 ShareAccess;
+ __le32 CreateDisposition;
+ __le32 CreateOptions;
+ __le16 NameOffset;
+ __le16 NameLength;
+ __le32 CreateContextsOffset;
+ __le32 CreateContextsLength;
+ __u8 Buffer[0];
+} __packed;
+
+struct smb2_create_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 89 */
+ __u8 OplockLevel;
+ __u8 Reserved;
+ __le32 CreateAction;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 AllocationSize;
+ __le64 EndofFile;
+ __le32 FileAttributes;
+ __le32 Reserved2;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 CreateContextsOffset;
+ __le32 CreateContextsLength;
+ __u8 Buffer[1];
+} __packed;
+
+struct create_context {
+ __le32 Next;
+ __le16 NameOffset;
+ __le16 NameLength;
+ __le16 Reserved;
+ __le16 DataOffset;
+ __le32 DataLength;
+ __u8 Buffer[0];
+} __packed;
+
+struct create_durable_req_v2 {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le32 Timeout;
+ __le32 Flags;
+ __u8 Reserved[8];
+ __u8 CreateGuid[16];
+} __packed;
+
+struct create_durable_reconn_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ union {
+ __u8 Reserved[16];
+ struct {
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ } Fid;
+ } Data;
+} __packed;
+
+struct create_durable_reconn_v2_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct {
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ } Fid;
+ __u8 CreateGuid[16];
+ __le32 Flags;
+} __packed;
+
+struct create_app_inst_id {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __u8 Reserved[8];
+ __u8 AppInstanceId[16];
+} __packed;
+
+struct create_app_inst_id_vers {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __u8 Reserved[2];
+ __u8 Padding[4];
+ __le64 AppInstanceVersionHigh;
+ __le64 AppInstanceVersionLow;
+} __packed;
+
+struct create_mxac_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le64 Timestamp;
+} __packed;
+
+struct create_alloc_size_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le64 AllocationSize;
+} __packed;
+
+struct create_posix {
+ struct create_context ccontext;
+ __u8 Name[16];
+ __le32 Mode;
+ __u32 Reserved;
+} __packed;
+
+struct create_durable_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ union {
+ __u8 Reserved[8];
+ __u64 data;
+ } Data;
+} __packed;
+
+struct create_durable_v2_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le32 Timeout;
+ __le32 Flags;
+} __packed;
+
+struct create_mxac_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le32 QueryStatus;
+ __le32 MaximalAccess;
+} __packed;
+
+struct create_disk_id_rsp {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le64 DiskFileId;
+ __le64 VolumeId;
+ __u8 Reserved[16];
+} __packed;
+
+/* equivalent of the contents of SMB3.1.1 POSIX open context response */
+struct create_posix_rsp {
+ struct create_context ccontext;
+ __u8 Name[16];
+ __le32 nlink;
+ __le32 reparse_tag;
+ __le32 mode;
+ u8 SidBuffer[40];
+} __packed;
+
+#define SMB2_LEASE_NONE_LE cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING_LE cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING_LE cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING_LE cpu_to_le32(0x04)
+
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE cpu_to_le32(0x02)
+
+struct lease_context {
+ __le64 LeaseKeyLow;
+ __le64 LeaseKeyHigh;
+ __le32 LeaseState;
+ __le32 LeaseFlags;
+ __le64 LeaseDuration;
+} __packed;
+
+struct lease_context_v2 {
+ __le64 LeaseKeyLow;
+ __le64 LeaseKeyHigh;
+ __le32 LeaseState;
+ __le32 LeaseFlags;
+ __le64 LeaseDuration;
+ __le64 ParentLeaseKeyLow;
+ __le64 ParentLeaseKeyHigh;
+ __le16 Epoch;
+ __le16 Reserved;
+} __packed;
+
+struct create_lease {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct lease_context lcontext;
+} __packed;
+
+struct create_lease_v2 {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct lease_context_v2 lcontext;
+ __u8 Pad[4];
+} __packed;
+
+/* Currently defined values for close flags */
+#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001)
+struct smb2_close_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 24 */
+ __le16 Flags;
+ __le32 Reserved;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+} __packed;
+
+struct smb2_close_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* 60 */
+ __le16 Flags;
+ __le32 Reserved;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */
+ __le64 EndOfFile;
+ __le32 Attributes;
+} __packed;
+
+struct smb2_flush_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 24 */
+ __le16 Reserved1;
+ __le32 Reserved2;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+} __packed;
+
+struct smb2_flush_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize;
+ __le16 Reserved;
+} __packed;
+
+struct smb2_buffer_desc_v1 {
+ __le64 offset;
+ __le32 token;
+ __le32 length;
+} __packed;
+
+#define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000)
+#define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001)
+#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002)
+
+struct smb2_read_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 49 */
+ __u8 Padding; /* offset from start of SMB2 header to place read */
+ __u8 Reserved;
+ __le32 Length;
+ __le64 Offset;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 MinimumCount;
+ __le32 Channel; /* Reserved MBZ */
+ __le32 RemainingBytes;
+ __le16 ReadChannelInfoOffset; /* Reserved MBZ */
+ __le16 ReadChannelInfoLength; /* Reserved MBZ */
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_read_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 17 */
+ __u8 DataOffset;
+ __u8 Reserved;
+ __le32 DataLength;
+ __le32 DataRemaining;
+ __u32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+/* For write request Flags field below the following flag is defined: */
+#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001
+
+struct smb2_write_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 49 */
+ __le16 DataOffset; /* offset from start of SMB2 header to write data */
+ __le32 Length;
+ __le64 Offset;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 Channel; /* Reserved MBZ */
+ __le32 RemainingBytes;
+ __le16 WriteChannelInfoOffset; /* Reserved MBZ */
+ __le16 WriteChannelInfoLength; /* Reserved MBZ */
+ __le32 Flags;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_write_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 17 */
+ __u8 DataOffset;
+ __u8 Reserved;
+ __le32 DataLength;
+ __le32 DataRemaining;
+ __u32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+#define SMB2_0_IOCTL_IS_FSCTL 0x00000001
+
+struct duplicate_extents_to_file {
+ __u64 PersistentFileHandle; /* source file handle, opaque endianness */
+ __u64 VolatileFileHandle;
+ __le64 SourceFileOffset;
+ __le64 TargetFileOffset;
+ __le64 ByteCount; /* Bytes to be copied */
+} __packed;
+
+struct smb2_ioctl_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 57 */
+ __le16 Reserved; /* offset from start of SMB2 header to write data */
+ __le32 CntCode;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 InputOffset; /* Reserved MBZ */
+ __le32 InputCount;
+ __le32 MaxInputResponse;
+ __le32 OutputOffset;
+ __le32 OutputCount;
+ __le32 MaxOutputResponse;
+ __le32 Flags;
+ __le32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_ioctl_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 49 */
+ __le16 Reserved; /* offset from start of SMB2 header to write data */
+ __le32 CntCode;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le32 InputOffset; /* Reserved MBZ */
+ __le32 InputCount;
+ __le32 OutputOffset;
+ __le32 OutputCount;
+ __le32 Flags;
+ __le32 Reserved2;
+ __u8 Buffer[1];
+} __packed;
+
+struct validate_negotiate_info_req {
+ __le32 Capabilities;
+ __u8 Guid[SMB2_CLIENT_GUID_SIZE];
+ __le16 SecurityMode;
+ __le16 DialectCount;
+ __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+} __packed;
+
+struct validate_negotiate_info_rsp {
+ __le32 Capabilities;
+ __u8 Guid[SMB2_CLIENT_GUID_SIZE];
+ __le16 SecurityMode;
+ __le16 Dialect; /* Dialect in use for the connection */
+} __packed;
+
+struct smb_sockaddr_in {
+ __be16 Port;
+ __be32 IPv4address;
+ __u8 Reserved[8];
+} __packed;
+
+struct smb_sockaddr_in6 {
+ __be16 Port;
+ __be32 FlowInfo;
+ __u8 IPv6address[16];
+ __be32 ScopeId;
+} __packed;
+
+#define INTERNETWORK 0x0002
+#define INTERNETWORKV6 0x0017
+
+struct sockaddr_storage_rsp {
+ __le16 Family;
+ union {
+ struct smb_sockaddr_in addr4;
+ struct smb_sockaddr_in6 addr6;
+ };
+} __packed;
+
+#define RSS_CAPABLE 0x00000001
+#define RDMA_CAPABLE 0x00000002
+
+struct network_interface_info_ioctl_rsp {
+ __le32 Next; /* next interface. zero if this is last one */
+ __le32 IfIndex;
+ __le32 Capability; /* RSS or RDMA Capable */
+ __le32 Reserved;
+ __le64 LinkSpeed;
+ char SockAddr_Storage[128];
+} __packed;
+
+struct file_object_buf_type1_ioctl_rsp {
+ __u8 ObjectId[16];
+ __u8 BirthVolumeId[16];
+ __u8 BirthObjectId[16];
+ __u8 DomainId[16];
+} __packed;
+
+struct resume_key_ioctl_rsp {
+ __le64 ResumeKey[3];
+ __le32 ContextLength;
+ __u8 Context[4]; /* ignored, Windows sets to 4 bytes of zero */
+} __packed;
+
+struct copychunk_ioctl_req {
+ __le64 ResumeKey[3];
+ __le32 ChunkCount;
+ __le32 Reserved;
+ __u8 Chunks[1]; /* array of srv_copychunk */
+} __packed;
+
+struct srv_copychunk {
+ __le64 SourceOffset;
+ __le64 TargetOffset;
+ __le32 Length;
+ __le32 Reserved;
+} __packed;
+
+struct copychunk_ioctl_rsp {
+ __le32 ChunksWritten;
+ __le32 ChunkBytesWritten;
+ __le32 TotalBytesWritten;
+} __packed;
+
+struct file_sparse {
+ __u8 SetSparse;
+} __packed;
+
+struct file_zero_data_information {
+ __le64 FileOffset;
+ __le64 BeyondFinalZero;
+} __packed;
+
+struct file_allocated_range_buffer {
+ __le64 file_offset;
+ __le64 length;
+} __packed;
+
+struct reparse_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __u8 DataBuffer[]; /* Variable Length */
+} __packed;
+
+/* Completion Filter flags for Notify */
+#define FILE_NOTIFY_CHANGE_FILE_NAME 0x00000001
+#define FILE_NOTIFY_CHANGE_DIR_NAME 0x00000002
+#define FILE_NOTIFY_CHANGE_NAME 0x00000003
+#define FILE_NOTIFY_CHANGE_ATTRIBUTES 0x00000004
+#define FILE_NOTIFY_CHANGE_SIZE 0x00000008
+#define FILE_NOTIFY_CHANGE_LAST_WRITE 0x00000010
+#define FILE_NOTIFY_CHANGE_LAST_ACCESS 0x00000020
+#define FILE_NOTIFY_CHANGE_CREATION 0x00000040
+#define FILE_NOTIFY_CHANGE_EA 0x00000080
+#define FILE_NOTIFY_CHANGE_SECURITY 0x00000100
+#define FILE_NOTIFY_CHANGE_STREAM_NAME 0x00000200
+#define FILE_NOTIFY_CHANGE_STREAM_SIZE 0x00000400
+#define FILE_NOTIFY_CHANGE_STREAM_WRITE 0x00000800
+
+/* Flags */
+#define SMB2_WATCH_TREE 0x0001
+
+struct smb2_notify_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 32 */
+ __le16 Flags;
+ __le32 OutputBufferLength;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __u32 CompletionFileter;
+ __u32 Reserved;
+} __packed;
+
+struct smb2_notify_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 OutputBufferOffset;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+/* SMB2 Notify Action Flags */
+#define FILE_ACTION_ADDED 0x00000001
+#define FILE_ACTION_REMOVED 0x00000002
+#define FILE_ACTION_MODIFIED 0x00000003
+#define FILE_ACTION_RENAMED_OLD_NAME 0x00000004
+#define FILE_ACTION_RENAMED_NEW_NAME 0x00000005
+#define FILE_ACTION_ADDED_STREAM 0x00000006
+#define FILE_ACTION_REMOVED_STREAM 0x00000007
+#define FILE_ACTION_MODIFIED_STREAM 0x00000008
+#define FILE_ACTION_REMOVED_BY_DELETE 0x00000009
+
+#define SMB2_LOCKFLAG_SHARED 0x0001
+#define SMB2_LOCKFLAG_EXCLUSIVE 0x0002
+#define SMB2_LOCKFLAG_UNLOCK 0x0004
+#define SMB2_LOCKFLAG_FAIL_IMMEDIATELY 0x0010
+#define SMB2_LOCKFLAG_MASK 0x0007
+
+struct smb2_lock_element {
+ __le64 Offset;
+ __le64 Length;
+ __le32 Flags;
+ __le32 Reserved;
+} __packed;
+
+struct smb2_lock_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 48 */
+ __le16 LockCount;
+ __le32 Reserved;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ /* Followed by at least one */
+ struct smb2_lock_element locks[1];
+} __packed;
+
+struct smb2_lock_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __le16 Reserved;
+} __packed;
+
+struct smb2_echo_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __u16 Reserved;
+} __packed;
+
+struct smb2_echo_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 4 */
+ __u16 Reserved;
+} __packed;
+
+/* search (query_directory) Flags field */
+#define SMB2_RESTART_SCANS 0x01
+#define SMB2_RETURN_SINGLE_ENTRY 0x02
+#define SMB2_INDEX_SPECIFIED 0x04
+#define SMB2_REOPEN 0x10
+
+struct smb2_query_directory_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 33 */
+ __u8 FileInformationClass;
+ __u8 Flags;
+ __le32 FileIndex;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __le16 FileNameOffset;
+ __le16 FileNameLength;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_query_directory_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 OutputBufferOffset;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+/* Possible InfoType values */
+#define SMB2_O_INFO_FILE 0x01
+#define SMB2_O_INFO_FILESYSTEM 0x02
+#define SMB2_O_INFO_SECURITY 0x03
+#define SMB2_O_INFO_QUOTA 0x04
+
+/* Security info type additionalinfo flags. See MS-SMB2 (2.2.37) or MS-DTYP */
+#define OWNER_SECINFO 0x00000001
+#define GROUP_SECINFO 0x00000002
+#define DACL_SECINFO 0x00000004
+#define SACL_SECINFO 0x00000008
+#define LABEL_SECINFO 0x00000010
+#define ATTRIBUTE_SECINFO 0x00000020
+#define SCOPE_SECINFO 0x00000040
+#define BACKUP_SECINFO 0x00010000
+#define UNPROTECTED_SACL_SECINFO 0x10000000
+#define UNPROTECTED_DACL_SECINFO 0x20000000
+#define PROTECTED_SACL_SECINFO 0x40000000
+#define PROTECTED_DACL_SECINFO 0x80000000
+
+struct smb2_query_info_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 41 */
+ __u8 InfoType;
+ __u8 FileInfoClass;
+ __le32 OutputBufferLength;
+ __le16 InputBufferOffset;
+ __u16 Reserved;
+ __le32 InputBufferLength;
+ __le32 AdditionalInformation;
+ __le32 Flags;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_query_info_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 9 */
+ __le16 OutputBufferOffset;
+ __le32 OutputBufferLength;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_set_info_req {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 33 */
+ __u8 InfoType;
+ __u8 FileInfoClass;
+ __le32 BufferLength;
+ __le16 BufferOffset;
+ __u16 Reserved;
+ __le32 AdditionalInformation;
+ __le64 PersistentFileId;
+ __le64 VolatileFileId;
+ __u8 Buffer[1];
+} __packed;
+
+struct smb2_set_info_rsp {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 2 */
+} __packed;
+
+/* FILE Info response size */
+#define FILE_DIRECTORY_INFORMATION_SIZE 1
+#define FILE_FULL_DIRECTORY_INFORMATION_SIZE 2
+#define FILE_BOTH_DIRECTORY_INFORMATION_SIZE 3
+#define FILE_BASIC_INFORMATION_SIZE 40
+#define FILE_STANDARD_INFORMATION_SIZE 24
+#define FILE_INTERNAL_INFORMATION_SIZE 8
+#define FILE_EA_INFORMATION_SIZE 4
+#define FILE_ACCESS_INFORMATION_SIZE 4
+#define FILE_NAME_INFORMATION_SIZE 9
+#define FILE_RENAME_INFORMATION_SIZE 10
+#define FILE_LINK_INFORMATION_SIZE 11
+#define FILE_NAMES_INFORMATION_SIZE 12
+#define FILE_DISPOSITION_INFORMATION_SIZE 13
+#define FILE_POSITION_INFORMATION_SIZE 14
+#define FILE_FULL_EA_INFORMATION_SIZE 15
+#define FILE_MODE_INFORMATION_SIZE 4
+#define FILE_ALIGNMENT_INFORMATION_SIZE 4
+#define FILE_ALL_INFORMATION_SIZE 104
+#define FILE_ALLOCATION_INFORMATION_SIZE 19
+#define FILE_END_OF_FILE_INFORMATION_SIZE 20
+#define FILE_ALTERNATE_NAME_INFORMATION_SIZE 8
+#define FILE_STREAM_INFORMATION_SIZE 32
+#define FILE_PIPE_INFORMATION_SIZE 23
+#define FILE_PIPE_LOCAL_INFORMATION_SIZE 24
+#define FILE_PIPE_REMOTE_INFORMATION_SIZE 25
+#define FILE_MAILSLOT_QUERY_INFORMATION_SIZE 26
+#define FILE_MAILSLOT_SET_INFORMATION_SIZE 27
+#define FILE_COMPRESSION_INFORMATION_SIZE 16
+#define FILE_OBJECT_ID_INFORMATION_SIZE 29
+/* Number 30 not defined in documents */
+#define FILE_MOVE_CLUSTER_INFORMATION_SIZE 31
+#define FILE_QUOTA_INFORMATION_SIZE 32
+#define FILE_REPARSE_POINT_INFORMATION_SIZE 33
+#define FILE_NETWORK_OPEN_INFORMATION_SIZE 56
+#define FILE_ATTRIBUTE_TAG_INFORMATION_SIZE 8
+
+/* FS Info response size */
+#define FS_DEVICE_INFORMATION_SIZE 8
+#define FS_ATTRIBUTE_INFORMATION_SIZE 16
+#define FS_VOLUME_INFORMATION_SIZE 24
+#define FS_SIZE_INFORMATION_SIZE 24
+#define FS_FULL_SIZE_INFORMATION_SIZE 32
+#define FS_SECTOR_SIZE_INFORMATION_SIZE 28
+#define FS_OBJECT_ID_INFORMATION_SIZE 64
+#define FS_CONTROL_INFORMATION_SIZE 48
+#define FS_POSIX_INFORMATION_SIZE 56
+
+/* FS_ATTRIBUTE_File_System_Name */
+#define FS_TYPE_SUPPORT_SIZE 44
+struct fs_type_info {
+ char *fs_name;
+ long magic_number;
+} __packed;
+
+struct smb2_oplock_break {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 24 */
+ __u8 OplockLevel;
+ __u8 Reserved;
+ __le32 Reserved2;
+ __le64 PersistentFid;
+ __le64 VolatileFid;
+} __packed;
+
+#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01)
+
+struct smb2_lease_break {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 44 */
+ __le16 Epoch;
+ __le32 Flags;
+ __u8 LeaseKey[16];
+ __le32 CurrentLeaseState;
+ __le32 NewLeaseState;
+ __le32 BreakReason;
+ __le32 AccessMaskHint;
+ __le32 ShareMaskHint;
+} __packed;
+
+struct smb2_lease_ack {
+ struct smb2_hdr hdr;
+ __le16 StructureSize; /* Must be 36 */
+ __le16 Reserved;
+ __le32 Flags;
+ __u8 LeaseKey[16];
+ __le32 LeaseState;
+ __le64 LeaseDuration;
+} __packed;
+
+/*
+ * PDU infolevel structure definitions
+ * BB consider moving to a different header
+ */
+
+/* File System Information Classes */
+#define FS_VOLUME_INFORMATION 1 /* Query */
+#define FS_LABEL_INFORMATION 2 /* Set */
+#define FS_SIZE_INFORMATION 3 /* Query */
+#define FS_DEVICE_INFORMATION 4 /* Query */
+#define FS_ATTRIBUTE_INFORMATION 5 /* Query */
+#define FS_CONTROL_INFORMATION 6 /* Query, Set */
+#define FS_FULL_SIZE_INFORMATION 7 /* Query */
+#define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */
+#define FS_DRIVER_PATH_INFORMATION 9 /* Query */
+#define FS_SECTOR_SIZE_INFORMATION 11 /* SMB3 or later. Query */
+#define FS_POSIX_INFORMATION 100 /* SMB3.1.1 POSIX. Query */
+
+struct smb2_fs_full_size_info {
+ __le64 TotalAllocationUnits;
+ __le64 CallerAvailableAllocationUnits;
+ __le64 ActualAvailableAllocationUnits;
+ __le32 SectorsPerAllocationUnit;
+ __le32 BytesPerSector;
+} __packed;
+
+#define SSINFO_FLAGS_ALIGNED_DEVICE 0x00000001
+#define SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE 0x00000002
+#define SSINFO_FLAGS_NO_SEEK_PENALTY 0x00000004
+#define SSINFO_FLAGS_TRIM_ENABLED 0x00000008
+
+/* sector size info struct */
+struct smb3_fs_ss_info {
+ __le32 LogicalBytesPerSector;
+ __le32 PhysicalBytesPerSectorForAtomicity;
+ __le32 PhysicalBytesPerSectorForPerf;
+ __le32 FSEffPhysicalBytesPerSectorForAtomicity;
+ __le32 Flags;
+ __le32 ByteOffsetForSectorAlignment;
+ __le32 ByteOffsetForPartitionAlignment;
+} __packed;
+
+/* File System Control Information */
+struct smb2_fs_control_info {
+ __le64 FreeSpaceStartFiltering;
+ __le64 FreeSpaceThreshold;
+ __le64 FreeSpaceStopFiltering;
+ __le64 DefaultQuotaThreshold;
+ __le64 DefaultQuotaLimit;
+ __le32 FileSystemControlFlags;
+ __le32 Padding;
+} __packed;
+
+/* partial list of QUERY INFO levels */
+#define FILE_DIRECTORY_INFORMATION 1
+#define FILE_FULL_DIRECTORY_INFORMATION 2
+#define FILE_BOTH_DIRECTORY_INFORMATION 3
+#define FILE_BASIC_INFORMATION 4
+#define FILE_STANDARD_INFORMATION 5
+#define FILE_INTERNAL_INFORMATION 6
+#define FILE_EA_INFORMATION 7
+#define FILE_ACCESS_INFORMATION 8
+#define FILE_NAME_INFORMATION 9
+#define FILE_RENAME_INFORMATION 10
+#define FILE_LINK_INFORMATION 11
+#define FILE_NAMES_INFORMATION 12
+#define FILE_DISPOSITION_INFORMATION 13
+#define FILE_POSITION_INFORMATION 14
+#define FILE_FULL_EA_INFORMATION 15
+#define FILE_MODE_INFORMATION 16
+#define FILE_ALIGNMENT_INFORMATION 17
+#define FILE_ALL_INFORMATION 18
+#define FILE_ALLOCATION_INFORMATION 19
+#define FILE_END_OF_FILE_INFORMATION 20
+#define FILE_ALTERNATE_NAME_INFORMATION 21
+#define FILE_STREAM_INFORMATION 22
+#define FILE_PIPE_INFORMATION 23
+#define FILE_PIPE_LOCAL_INFORMATION 24
+#define FILE_PIPE_REMOTE_INFORMATION 25
+#define FILE_MAILSLOT_QUERY_INFORMATION 26
+#define FILE_MAILSLOT_SET_INFORMATION 27
+#define FILE_COMPRESSION_INFORMATION 28
+#define FILE_OBJECT_ID_INFORMATION 29
+/* Number 30 not defined in documents */
+#define FILE_MOVE_CLUSTER_INFORMATION 31
+#define FILE_QUOTA_INFORMATION 32
+#define FILE_REPARSE_POINT_INFORMATION 33
+#define FILE_NETWORK_OPEN_INFORMATION 34
+#define FILE_ATTRIBUTE_TAG_INFORMATION 35
+#define FILE_TRACKING_INFORMATION 36
+#define FILEID_BOTH_DIRECTORY_INFORMATION 37
+#define FILEID_FULL_DIRECTORY_INFORMATION 38
+#define FILE_VALID_DATA_LENGTH_INFORMATION 39
+#define FILE_SHORT_NAME_INFORMATION 40
+#define FILE_SFIO_RESERVE_INFORMATION 44
+#define FILE_SFIO_VOLUME_INFORMATION 45
+#define FILE_HARD_LINK_INFORMATION 46
+#define FILE_NORMALIZED_NAME_INFORMATION 48
+#define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50
+#define FILE_STANDARD_LINK_INFORMATION 54
+
+#define OP_BREAK_STRUCT_SIZE_20 24
+#define OP_BREAK_STRUCT_SIZE_21 36
+
+struct smb2_file_access_info {
+ __le32 AccessFlags;
+} __packed;
+
+struct smb2_file_alignment_info {
+ __le32 AlignmentRequirement;
+} __packed;
+
+struct smb2_file_internal_info {
+ __le64 IndexNumber;
+} __packed; /* level 6 Query */
+
+struct smb2_file_rename_info { /* encoding of request for level 10 */
+ __u8 ReplaceIfExists; /* 1 = replace existing target with new */
+ /* 0 = fail if target already exists */
+ __u8 Reserved[7];
+ __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
+ __le32 FileNameLength;
+ char FileName[0]; /* New name to be assigned */
+} __packed; /* level 10 Set */
+
+struct smb2_file_link_info { /* encoding of request for level 11 */
+ __u8 ReplaceIfExists; /* 1 = replace existing link with new */
+ /* 0 = fail if link already exists */
+ __u8 Reserved[7];
+ __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
+ __le32 FileNameLength;
+ char FileName[0]; /* Name to be assigned to new link */
+} __packed; /* level 11 Set */
+
+/*
+ * This level 18, although with struct with same name is different from cifs
+ * level 0x107. Level 0x107 has an extra u64 between AccessFlags and
+ * CurrentByteOffset.
+ */
+struct smb2_file_all_info { /* data block encoding of response to level 18 */
+ __le64 CreationTime; /* Beginning of FILE_BASIC_INFO equivalent */
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le32 Attributes;
+ __u32 Pad1; /* End of FILE_BASIC_INFO_INFO equivalent */
+ __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */
+ __le64 EndOfFile; /* size ie offset to first free byte in file */
+ __le32 NumberOfLinks; /* hard links */
+ __u8 DeletePending;
+ __u8 Directory;
+ __u16 Pad2; /* End of FILE_STANDARD_INFO equivalent */
+ __le64 IndexNumber;
+ __le32 EASize;
+ __le32 AccessFlags;
+ __le64 CurrentByteOffset;
+ __le32 Mode;
+ __le32 AlignmentRequirement;
+ __le32 FileNameLength;
+ char FileName[1];
+} __packed; /* level 18 Query */
+
+struct smb2_file_alt_name_info {
+ __le32 FileNameLength;
+ char FileName[0];
+} __packed;
+
+struct smb2_file_stream_info {
+ __le32 NextEntryOffset;
+ __le32 StreamNameLength;
+ __le64 StreamSize;
+ __le64 StreamAllocationSize;
+ char StreamName[0];
+} __packed;
+
+struct smb2_file_eof_info { /* encoding of request for level 10 */
+ __le64 EndOfFile; /* new end of file value */
+} __packed; /* level 20 Set */
+
+struct smb2_file_ntwrk_info {
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 AllocationSize;
+ __le64 EndOfFile;
+ __le32 Attributes;
+ __le32 Reserved;
+} __packed;
+
+struct smb2_file_standard_info {
+ __le64 AllocationSize;
+ __le64 EndOfFile;
+ __le32 NumberOfLinks; /* hard links */
+ __u8 DeletePending;
+ __u8 Directory;
+ __le16 Reserved;
+} __packed; /* level 18 Query */
+
+struct smb2_file_ea_info {
+ __le32 EASize;
+} __packed;
+
+struct smb2_file_alloc_info {
+ __le64 AllocationSize;
+} __packed;
+
+struct smb2_file_disposition_info {
+ __u8 DeletePending;
+} __packed;
+
+struct smb2_file_pos_info {
+ __le64 CurrentByteOffset;
+} __packed;
+
+#define FILE_MODE_INFO_MASK cpu_to_le32(0x0000103e)
+
+struct smb2_file_mode_info {
+ __le32 Mode;
+} __packed;
+
+#define COMPRESSION_FORMAT_NONE 0x0000
+#define COMPRESSION_FORMAT_LZNT1 0x0002
+
+struct smb2_file_comp_info {
+ __le64 CompressedFileSize;
+ __le16 CompressionFormat;
+ __u8 CompressionUnitShift;
+ __u8 ChunkShift;
+ __u8 ClusterShift;
+ __u8 Reserved[3];
+} __packed;
+
+struct smb2_file_attr_tag_info {
+ __le32 FileAttributes;
+ __le32 ReparseTag;
+} __packed;
+
+#define SL_RESTART_SCAN 0x00000001
+#define SL_RETURN_SINGLE_ENTRY 0x00000002
+#define SL_INDEX_SPECIFIED 0x00000004
+
+struct smb2_ea_info_req {
+ __le32 NextEntryOffset;
+ __u8 EaNameLength;
+ char name[1];
+} __packed; /* level 15 Query */
+
+struct smb2_ea_info {
+ __le32 NextEntryOffset;
+ __u8 Flags;
+ __u8 EaNameLength;
+ __le16 EaValueLength;
+ char name[1];
+ /* optionally followed by value */
+} __packed; /* level 15 Query */
+
+struct create_ea_buf_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct smb2_ea_info ea;
+} __packed;
+
+struct create_sd_buf_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ struct smb_ntsd ntsd;
+} __packed;
+
+/* Find File infolevels */
+#define SMB_FIND_FILE_POSIX_INFO 0x064
+
+/* Level 100 query info */
+struct smb311_posix_qinfo {
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 DosAttributes;
+ __le64 Inode;
+ __le32 DeviceId;
+ __le32 Zero;
+ /* beginning of POSIX Create Context Response */
+ __le32 HardLinks;
+ __le32 ReparseTag;
+ __le32 Mode;
+ u8 Sids[];
+ /*
+ * var sized owner SID
+ * var sized group SID
+ * le32 filenamelength
+ * u8 filename[]
+ */
+} __packed;
+
+struct smb2_posix_info {
+ __le32 NextEntryOffset;
+ __u32 Ignored;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 DosAttributes;
+ __le64 Inode;
+ __le32 DeviceId;
+ __le32 Zero;
+ /* beginning of POSIX Create Context Response */
+ __le32 HardLinks;
+ __le32 ReparseTag;
+ __le32 Mode;
+ u8 SidBuffer[40];
+ __le32 name_len;
+ u8 name[1];
+ /*
+ * var sized owner SID
+ * var sized group SID
+ * le32 filenamelength
+ * u8 filename[]
+ */
+} __packed;
+
+/* functions */
+int init_smb2_0_server(struct ksmbd_conn *conn);
+void init_smb2_1_server(struct ksmbd_conn *conn);
+void init_smb3_0_server(struct ksmbd_conn *conn);
+void init_smb3_02_server(struct ksmbd_conn *conn);
+int init_smb3_11_server(struct ksmbd_conn *conn);
+
+void init_smb2_max_read_size(unsigned int sz);
+void init_smb2_max_write_size(unsigned int sz);
+void init_smb2_max_trans_size(unsigned int sz);
+
+bool is_smb2_neg_cmd(struct ksmbd_work *work);
+bool is_smb2_rsp(struct ksmbd_work *work);
+
+u16 get_smb2_cmd_val(struct ksmbd_work *work);
+void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err);
+int init_smb2_rsp_hdr(struct ksmbd_work *work);
+int smb2_allocate_rsp_buf(struct ksmbd_work *work);
+bool is_chained_smb2_message(struct ksmbd_work *work);
+int init_smb2_neg_rsp(struct ksmbd_work *work);
+void smb2_set_err_rsp(struct ksmbd_work *work);
+int smb2_check_user_session(struct ksmbd_work *work);
+int smb2_get_ksmbd_tcon(struct ksmbd_work *work);
+bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command);
+int smb2_check_sign_req(struct ksmbd_work *work);
+void smb2_set_sign_rsp(struct ksmbd_work *work);
+int smb3_check_sign_req(struct ksmbd_work *work);
+void smb3_set_sign_rsp(struct ksmbd_work *work);
+int find_matching_smb2_dialect(int start_index, __le16 *cli_dialects,
+ __le16 dialects_count);
+struct file_lock *smb_flock_init(struct file *f);
+int setup_async_work(struct ksmbd_work *work, void (*fn)(void **),
+ void **arg);
+void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status);
+struct channel *lookup_chann_list(struct ksmbd_session *sess,
+ struct ksmbd_conn *conn);
+void smb3_preauth_hash_rsp(struct ksmbd_work *work);
+bool smb3_is_transform_hdr(void *buf);
+int smb3_decrypt_req(struct ksmbd_work *work);
+int smb3_encrypt_resp(struct ksmbd_work *work);
+bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work);
+int smb2_set_rsp_credits(struct ksmbd_work *work);
+
+/* smb2 misc functions */
+int ksmbd_smb2_check_message(struct ksmbd_work *work);
+
+/* smb2 command handlers */
+int smb2_handle_negotiate(struct ksmbd_work *work);
+int smb2_negotiate_request(struct ksmbd_work *work);
+int smb2_sess_setup(struct ksmbd_work *work);
+int smb2_tree_connect(struct ksmbd_work *work);
+int smb2_tree_disconnect(struct ksmbd_work *work);
+int smb2_session_logoff(struct ksmbd_work *work);
+int smb2_open(struct ksmbd_work *work);
+int smb2_query_info(struct ksmbd_work *work);
+int smb2_query_dir(struct ksmbd_work *work);
+int smb2_close(struct ksmbd_work *work);
+int smb2_echo(struct ksmbd_work *work);
+int smb2_set_info(struct ksmbd_work *work);
+int smb2_read(struct ksmbd_work *work);
+int smb2_write(struct ksmbd_work *work);
+int smb2_flush(struct ksmbd_work *work);
+int smb2_cancel(struct ksmbd_work *work);
+int smb2_lock(struct ksmbd_work *work);
+int smb2_ioctl(struct ksmbd_work *work);
+int smb2_oplock_break(struct ksmbd_work *work);
+int smb2_notify(struct ksmbd_work *ksmbd_work);
+
+#endif /* _SMB2PDU_H */
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
new file mode 100644
index 000000000000..b108b918ec84
--- /dev/null
+++ b/fs/ksmbd/smb_common.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ * Copyright (C) 2018 Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include "smb_common.h"
+#include "server.h"
+#include "misc.h"
+#include "smbstatus.h"
+#include "connection.h"
+#include "ksmbd_work.h"
+#include "mgmt/user_session.h"
+#include "mgmt/user_config.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/share_config.h"
+
+/*for shortname implementation */
+static const char basechars[43] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_-!@#$%";
+#define MANGLE_BASE (sizeof(basechars) / sizeof(char) - 1)
+#define MAGIC_CHAR '~'
+#define PERIOD '.'
+#define mangle(V) ((char)(basechars[(V) % MANGLE_BASE]))
+#define KSMBD_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb2_hdr))
+
+struct smb_protocol {
+ int index;
+ char *name;
+ char *prot;
+ __u16 prot_id;
+};
+
+static struct smb_protocol smb1_protos[] = {
+ {
+ SMB21_PROT,
+ "\2SMB 2.1",
+ "SMB2_10",
+ SMB21_PROT_ID
+ },
+ {
+ SMB2X_PROT,
+ "\2SMB 2.???",
+ "SMB2_22",
+ SMB2X_PROT_ID
+ },
+};
+
+static struct smb_protocol smb2_protos[] = {
+ {
+ SMB21_PROT,
+ "\2SMB 2.1",
+ "SMB2_10",
+ SMB21_PROT_ID
+ },
+ {
+ SMB30_PROT,
+ "\2SMB 3.0",
+ "SMB3_00",
+ SMB30_PROT_ID
+ },
+ {
+ SMB302_PROT,
+ "\2SMB 3.02",
+ "SMB3_02",
+ SMB302_PROT_ID
+ },
+ {
+ SMB311_PROT,
+ "\2SMB 3.1.1",
+ "SMB3_11",
+ SMB311_PROT_ID
+ },
+};
+
+unsigned int ksmbd_server_side_copy_max_chunk_count(void)
+{
+ return 256;
+}
+
+unsigned int ksmbd_server_side_copy_max_chunk_size(void)
+{
+ return (2U << 30) - 1;
+}
+
+unsigned int ksmbd_server_side_copy_max_total_size(void)
+{
+ return (2U << 30) - 1;
+}
+
+inline int ksmbd_min_protocol(void)
+{
+ return SMB2_PROT;
+}
+
+inline int ksmbd_max_protocol(void)
+{
+ return SMB311_PROT;
+}
+
+int ksmbd_lookup_protocol_idx(char *str)
+{
+ int offt = ARRAY_SIZE(smb1_protos) - 1;
+ int len = strlen(str);
+
+ while (offt >= 0) {
+ if (!strncmp(str, smb1_protos[offt].prot, len)) {
+ ksmbd_debug(SMB, "selected %s dialect idx = %d\n",
+ smb1_protos[offt].prot, offt);
+ return smb1_protos[offt].index;
+ }
+ offt--;
+ }
+
+ offt = ARRAY_SIZE(smb2_protos) - 1;
+ while (offt >= 0) {
+ if (!strncmp(str, smb2_protos[offt].prot, len)) {
+ ksmbd_debug(SMB, "selected %s dialect idx = %d\n",
+ smb2_protos[offt].prot, offt);
+ return smb2_protos[offt].index;
+ }
+ offt--;
+ }
+ return -1;
+}
+
+/**
+ * ksmbd_verify_smb_message() - check for valid smb2 request header
+ * @work: smb work
+ *
+ * check for valid smb signature and packet direction(request/response)
+ *
+ * Return: 0 on success, otherwise 1
+ */
+int ksmbd_verify_smb_message(struct ksmbd_work *work)
+{
+ struct smb2_hdr *smb2_hdr = work->request_buf;
+
+ if (smb2_hdr->ProtocolId == SMB2_PROTO_NUMBER)
+ return ksmbd_smb2_check_message(work);
+
+ return 0;
+}
+
+/**
+ * ksmbd_smb_request() - check for valid smb request type
+ * @conn: connection instance
+ *
+ * Return: true on success, otherwise false
+ */
+bool ksmbd_smb_request(struct ksmbd_conn *conn)
+{
+ int type = *(char *)conn->request_buf;
+
+ switch (type) {
+ case RFC1002_SESSION_MESSAGE:
+ /* Regular SMB request */
+ return true;
+ case RFC1002_SESSION_KEEP_ALIVE:
+ ksmbd_debug(SMB, "RFC 1002 session keep alive\n");
+ break;
+ default:
+ ksmbd_debug(SMB, "RFC 1002 unknown request type 0x%x\n", type);
+ }
+
+ return false;
+}
+
+static bool supported_protocol(int idx)
+{
+ if (idx == SMB2X_PROT &&
+ (server_conf.min_protocol >= SMB21_PROT ||
+ server_conf.max_protocol <= SMB311_PROT))
+ return true;
+
+ return (server_conf.min_protocol <= idx &&
+ idx <= server_conf.max_protocol);
+}
+
+static char *next_dialect(char *dialect, int *next_off)
+{
+ dialect = dialect + *next_off;
+ *next_off = strlen(dialect);
+ return dialect;
+}
+
+static int ksmbd_lookup_dialect_by_name(char *cli_dialects, __le16 byte_count)
+{
+ int i, seq_num, bcount, next;
+ char *dialect;
+
+ for (i = ARRAY_SIZE(smb1_protos) - 1; i >= 0; i--) {
+ seq_num = 0;
+ next = 0;
+ dialect = cli_dialects;
+ bcount = le16_to_cpu(byte_count);
+ do {
+ dialect = next_dialect(dialect, &next);
+ ksmbd_debug(SMB, "client requested dialect %s\n",
+ dialect);
+ if (!strcmp(dialect, smb1_protos[i].name)) {
+ if (supported_protocol(smb1_protos[i].index)) {
+ ksmbd_debug(SMB,
+ "selected %s dialect\n",
+ smb1_protos[i].name);
+ if (smb1_protos[i].index == SMB1_PROT)
+ return seq_num;
+ return smb1_protos[i].prot_id;
+ }
+ }
+ seq_num++;
+ bcount -= (++next);
+ } while (bcount > 0);
+ }
+
+ return BAD_PROT_ID;
+}
+
+int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count)
+{
+ int i;
+ int count;
+
+ for (i = ARRAY_SIZE(smb2_protos) - 1; i >= 0; i--) {
+ count = le16_to_cpu(dialects_count);
+ while (--count >= 0) {
+ ksmbd_debug(SMB, "client requested dialect 0x%x\n",
+ le16_to_cpu(cli_dialects[count]));
+ if (le16_to_cpu(cli_dialects[count]) !=
+ smb2_protos[i].prot_id)
+ continue;
+
+ if (supported_protocol(smb2_protos[i].index)) {
+ ksmbd_debug(SMB, "selected %s dialect\n",
+ smb2_protos[i].name);
+ return smb2_protos[i].prot_id;
+ }
+ }
+ }
+
+ return BAD_PROT_ID;
+}
+
+static int ksmbd_negotiate_smb_dialect(void *buf)
+{
+ __le32 proto;
+
+ proto = ((struct smb2_hdr *)buf)->ProtocolId;
+ if (proto == SMB2_PROTO_NUMBER) {
+ struct smb2_negotiate_req *req;
+
+ req = (struct smb2_negotiate_req *)buf;
+ return ksmbd_lookup_dialect_by_id(req->Dialects,
+ req->DialectCount);
+ }
+
+ proto = *(__le32 *)((struct smb_hdr *)buf)->Protocol;
+ if (proto == SMB1_PROTO_NUMBER) {
+ struct smb_negotiate_req *req;
+
+ req = (struct smb_negotiate_req *)buf;
+ return ksmbd_lookup_dialect_by_name(req->DialectsArray,
+ req->ByteCount);
+ }
+
+ return BAD_PROT_ID;
+}
+
+#define SMB_COM_NEGOTIATE 0x72
+int ksmbd_init_smb_server(struct ksmbd_work *work)
+{
+ struct ksmbd_conn *conn = work->conn;
+
+ if (conn->need_neg == false)
+ return 0;
+
+ init_smb3_11_server(conn);
+
+ if (conn->ops->get_cmd_val(work) != SMB_COM_NEGOTIATE)
+ conn->need_neg = false;
+ return 0;
+}
+
+bool ksmbd_pdu_size_has_room(unsigned int pdu)
+{
+ return (pdu >= KSMBD_MIN_SUPPORTED_HEADER_SIZE - 4);
+}
+
+int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
+ struct ksmbd_file *dir,
+ struct ksmbd_dir_info *d_info,
+ char *search_pattern,
+ int (*fn)(struct ksmbd_conn *, int,
+ struct ksmbd_dir_info *,
+ struct user_namespace *,
+ struct ksmbd_kstat *))
+{
+ int i, rc = 0;
+ struct ksmbd_conn *conn = work->conn;
+ struct user_namespace *user_ns = file_mnt_user_ns(dir->filp);
+
+ for (i = 0; i < 2; i++) {
+ struct kstat kstat;
+ struct ksmbd_kstat ksmbd_kstat;
+
+ if (!dir->dot_dotdot[i]) { /* fill dot entry info */
+ if (i == 0) {
+ d_info->name = ".";
+ d_info->name_len = 1;
+ } else {
+ d_info->name = "..";
+ d_info->name_len = 2;
+ }
+
+ if (!match_pattern(d_info->name, d_info->name_len,
+ search_pattern)) {
+ dir->dot_dotdot[i] = 1;
+ continue;
+ }
+
+ ksmbd_kstat.kstat = &kstat;
+ ksmbd_vfs_fill_dentry_attrs(work,
+ user_ns,
+ dir->filp->f_path.dentry->d_parent,
+ &ksmbd_kstat);
+ rc = fn(conn, info_level, d_info,
+ user_ns, &ksmbd_kstat);
+ if (rc)
+ break;
+ if (d_info->out_buf_len <= 0)
+ break;
+
+ dir->dot_dotdot[i] = 1;
+ if (d_info->flags & SMB2_RETURN_SINGLE_ENTRY) {
+ d_info->out_buf_len = 0;
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+/**
+ * ksmbd_extract_shortname() - get shortname from long filename
+ * @conn: connection instance
+ * @longname: source long filename
+ * @shortname: destination short filename
+ *
+ * Return: shortname length or 0 when source long name is '.' or '..'
+ * TODO: Though this function comforms the restriction of 8.3 Filename spec,
+ * but the result is different with Windows 7's one. need to check.
+ */
+int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname,
+ char *shortname)
+{
+ const char *p;
+ char base[9], extension[4];
+ char out[13] = {0};
+ int baselen = 0;
+ int extlen = 0, len = 0;
+ unsigned int csum = 0;
+ const unsigned char *ptr;
+ bool dot_present = true;
+
+ p = longname;
+ if ((*p == '.') || (!(strcmp(p, "..")))) {
+ /*no mangling required */
+ return 0;
+ }
+
+ p = strrchr(longname, '.');
+ if (p == longname) { /*name starts with a dot*/
+ strscpy(extension, "___", strlen("___"));
+ } else {
+ if (p) {
+ p++;
+ while (*p && extlen < 3) {
+ if (*p != '.')
+ extension[extlen++] = toupper(*p);
+ p++;
+ }
+ extension[extlen] = '\0';
+ } else {
+ dot_present = false;
+ }
+ }
+
+ p = longname;
+ if (*p == '.') {
+ p++;
+ longname++;
+ }
+ while (*p && (baselen < 5)) {
+ if (*p != '.')
+ base[baselen++] = toupper(*p);
+ p++;
+ }
+
+ base[baselen] = MAGIC_CHAR;
+ memcpy(out, base, baselen + 1);
+
+ ptr = longname;
+ len = strlen(longname);
+ for (; len > 0; len--, ptr++)
+ csum += *ptr;
+
+ csum = csum % (MANGLE_BASE * MANGLE_BASE);
+ out[baselen + 1] = mangle(csum / MANGLE_BASE);
+ out[baselen + 2] = mangle(csum);
+ out[baselen + 3] = PERIOD;
+
+ if (dot_present)
+ memcpy(&out[baselen + 4], extension, 4);
+ else
+ out[baselen + 4] = '\0';
+ smbConvertToUTF16((__le16 *)shortname, out, PATH_MAX,
+ conn->local_nls, 0);
+ len = strlen(out) * 2;
+ return len;
+}
+
+static int __smb2_negotiate(struct ksmbd_conn *conn)
+{
+ return (conn->dialect >= SMB20_PROT_ID &&
+ conn->dialect <= SMB311_PROT_ID);
+}
+
+static int smb_handle_negotiate(struct ksmbd_work *work)
+{
+ struct smb_negotiate_rsp *neg_rsp = work->response_buf;
+
+ ksmbd_debug(SMB, "Unsupported SMB protocol\n");
+ neg_rsp->hdr.Status.CifsError = STATUS_INVALID_LOGON_TYPE;
+ return -EINVAL;
+}
+
+int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command)
+{
+ struct ksmbd_conn *conn = work->conn;
+ int ret;
+
+ conn->dialect = ksmbd_negotiate_smb_dialect(work->request_buf);
+ ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
+
+ if (command == SMB2_NEGOTIATE_HE) {
+ struct smb2_hdr *smb2_hdr = work->request_buf;
+
+ if (smb2_hdr->ProtocolId != SMB2_PROTO_NUMBER) {
+ ksmbd_debug(SMB, "Downgrade to SMB1 negotiation\n");
+ command = SMB_COM_NEGOTIATE;
+ }
+ }
+
+ if (command == SMB2_NEGOTIATE_HE) {
+ ret = smb2_handle_negotiate(work);
+ init_smb2_neg_rsp(work);
+ return ret;
+ }
+
+ if (command == SMB_COM_NEGOTIATE) {
+ if (__smb2_negotiate(conn)) {
+ conn->need_neg = true;
+ init_smb3_11_server(conn);
+ init_smb2_neg_rsp(work);
+ ksmbd_debug(SMB, "Upgrade to SMB2 negotiation\n");
+ return 0;
+ }
+ return smb_handle_negotiate(work);
+ }
+
+ pr_err("Unknown SMB negotiation command: %u\n", command);
+ return -EINVAL;
+}
+
+enum SHARED_MODE_ERRORS {
+ SHARE_DELETE_ERROR,
+ SHARE_READ_ERROR,
+ SHARE_WRITE_ERROR,
+ FILE_READ_ERROR,
+ FILE_WRITE_ERROR,
+ FILE_DELETE_ERROR,
+};
+
+static const char * const shared_mode_errors[] = {
+ "Current access mode does not permit SHARE_DELETE",
+ "Current access mode does not permit SHARE_READ",
+ "Current access mode does not permit SHARE_WRITE",
+ "Desired access mode does not permit FILE_READ",
+ "Desired access mode does not permit FILE_WRITE",
+ "Desired access mode does not permit FILE_DELETE",
+};
+
+static void smb_shared_mode_error(int error, struct ksmbd_file *prev_fp,
+ struct ksmbd_file *curr_fp)
+{
+ ksmbd_debug(SMB, "%s\n", shared_mode_errors[error]);
+ ksmbd_debug(SMB, "Current mode: 0x%x Desired mode: 0x%x\n",
+ prev_fp->saccess, curr_fp->daccess);
+}
+
+int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp)
+{
+ int rc = 0;
+ struct ksmbd_file *prev_fp;
+
+ /*
+ * Lookup fp in master fp list, and check desired access and
+ * shared mode between previous open and current open.
+ */
+ read_lock(&curr_fp->f_ci->m_lock);
+ list_for_each_entry(prev_fp, &curr_fp->f_ci->m_fp_list, node) {
+ if (file_inode(filp) != file_inode(prev_fp->filp))
+ continue;
+
+ if (filp == prev_fp->filp)
+ continue;
+
+ if (ksmbd_stream_fd(prev_fp) && ksmbd_stream_fd(curr_fp))
+ if (strcmp(prev_fp->stream.name, curr_fp->stream.name))
+ continue;
+
+ if (prev_fp->attrib_only != curr_fp->attrib_only)
+ continue;
+
+ if (!(prev_fp->saccess & FILE_SHARE_DELETE_LE) &&
+ curr_fp->daccess & FILE_DELETE_LE) {
+ smb_shared_mode_error(SHARE_DELETE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ /*
+ * Only check FILE_SHARE_DELETE if stream opened and
+ * normal file opened.
+ */
+ if (ksmbd_stream_fd(prev_fp) && !ksmbd_stream_fd(curr_fp))
+ continue;
+
+ if (!(prev_fp->saccess & FILE_SHARE_READ_LE) &&
+ curr_fp->daccess & (FILE_EXECUTE_LE | FILE_READ_DATA_LE)) {
+ smb_shared_mode_error(SHARE_READ_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (!(prev_fp->saccess & FILE_SHARE_WRITE_LE) &&
+ curr_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE)) {
+ smb_shared_mode_error(SHARE_WRITE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (prev_fp->daccess & (FILE_EXECUTE_LE | FILE_READ_DATA_LE) &&
+ !(curr_fp->saccess & FILE_SHARE_READ_LE)) {
+ smb_shared_mode_error(FILE_READ_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (prev_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE) &&
+ !(curr_fp->saccess & FILE_SHARE_WRITE_LE)) {
+ smb_shared_mode_error(FILE_WRITE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+
+ if (prev_fp->daccess & FILE_DELETE_LE &&
+ !(curr_fp->saccess & FILE_SHARE_DELETE_LE)) {
+ smb_shared_mode_error(FILE_DELETE_ERROR,
+ prev_fp,
+ curr_fp);
+ rc = -EPERM;
+ break;
+ }
+ }
+ read_unlock(&curr_fp->f_ci->m_lock);
+
+ return rc;
+}
+
+bool is_asterisk(char *p)
+{
+ return p && p[0] == '*';
+}
+
+int ksmbd_override_fsids(struct ksmbd_work *work)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct ksmbd_share_config *share = work->tcon->share_conf;
+ struct cred *cred;
+ struct group_info *gi;
+ unsigned int uid;
+ unsigned int gid;
+
+ uid = user_uid(sess->user);
+ gid = user_gid(sess->user);
+ if (share->force_uid != KSMBD_SHARE_INVALID_UID)
+ uid = share->force_uid;
+ if (share->force_gid != KSMBD_SHARE_INVALID_GID)
+ gid = share->force_gid;
+
+ cred = prepare_kernel_cred(NULL);
+ if (!cred)
+ return -ENOMEM;
+
+ cred->fsuid = make_kuid(current_user_ns(), uid);
+ cred->fsgid = make_kgid(current_user_ns(), gid);
+
+ gi = groups_alloc(0);
+ if (!gi) {
+ abort_creds(cred);
+ return -ENOMEM;
+ }
+ set_groups(cred, gi);
+ put_group_info(gi);
+
+ if (!uid_eq(cred->fsuid, GLOBAL_ROOT_UID))
+ cred->cap_effective = cap_drop_fs_set(cred->cap_effective);
+
+ WARN_ON(work->saved_cred);
+ work->saved_cred = override_creds(cred);
+ if (!work->saved_cred) {
+ abort_creds(cred);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void ksmbd_revert_fsids(struct ksmbd_work *work)
+{
+ const struct cred *cred;
+
+ WARN_ON(!work->saved_cred);
+
+ cred = current_cred();
+ revert_creds(work->saved_cred);
+ put_cred(cred);
+ work->saved_cred = NULL;
+}
+
+__le32 smb_map_generic_desired_access(__le32 daccess)
+{
+ if (daccess & FILE_GENERIC_READ_LE) {
+ daccess |= cpu_to_le32(GENERIC_READ_FLAGS);
+ daccess &= ~FILE_GENERIC_READ_LE;
+ }
+
+ if (daccess & FILE_GENERIC_WRITE_LE) {
+ daccess |= cpu_to_le32(GENERIC_WRITE_FLAGS);
+ daccess &= ~FILE_GENERIC_WRITE_LE;
+ }
+
+ if (daccess & FILE_GENERIC_EXECUTE_LE) {
+ daccess |= cpu_to_le32(GENERIC_EXECUTE_FLAGS);
+ daccess &= ~FILE_GENERIC_EXECUTE_LE;
+ }
+
+ if (daccess & FILE_GENERIC_ALL_LE) {
+ daccess |= cpu_to_le32(GENERIC_ALL_FLAGS);
+ daccess &= ~FILE_GENERIC_ALL_LE;
+ }
+
+ return daccess;
+}
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
new file mode 100644
index 000000000000..eb667d85558e
--- /dev/null
+++ b/fs/ksmbd/smb_common.h
@@ -0,0 +1,542 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __SMB_COMMON_H__
+#define __SMB_COMMON_H__
+
+#include <linux/kernel.h>
+
+#include "glob.h"
+#include "nterr.h"
+#include "smb2pdu.h"
+
+/* ksmbd's Specific ERRNO */
+#define ESHARE 50000
+
+#define SMB1_PROT 0
+#define SMB2_PROT 1
+#define SMB21_PROT 2
+/* multi-protocol negotiate request */
+#define SMB2X_PROT 3
+#define SMB30_PROT 4
+#define SMB302_PROT 5
+#define SMB311_PROT 6
+#define BAD_PROT 0xFFFF
+
+#define SMB1_VERSION_STRING "1.0"
+#define SMB20_VERSION_STRING "2.0"
+#define SMB21_VERSION_STRING "2.1"
+#define SMB30_VERSION_STRING "3.0"
+#define SMB302_VERSION_STRING "3.02"
+#define SMB311_VERSION_STRING "3.1.1"
+
+/* Dialects */
+#define SMB10_PROT_ID 0x00
+#define SMB20_PROT_ID 0x0202
+#define SMB21_PROT_ID 0x0210
+/* multi-protocol negotiate request */
+#define SMB2X_PROT_ID 0x02FF
+#define SMB30_PROT_ID 0x0300
+#define SMB302_PROT_ID 0x0302
+#define SMB311_PROT_ID 0x0311
+#define BAD_PROT_ID 0xFFFF
+
+#define SMB_ECHO_INTERVAL (60 * HZ)
+
+#define CIFS_DEFAULT_IOSIZE (64 * 1024)
+#define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */
+
+/* RFC 1002 session packet types */
+#define RFC1002_SESSION_MESSAGE 0x00
+#define RFC1002_SESSION_REQUEST 0x81
+#define RFC1002_POSITIVE_SESSION_RESPONSE 0x82
+#define RFC1002_NEGATIVE_SESSION_RESPONSE 0x83
+#define RFC1002_RETARGET_SESSION_RESPONSE 0x84
+#define RFC1002_SESSION_KEEP_ALIVE 0x85
+
+/* Responses when opening a file. */
+#define F_SUPERSEDED 0
+#define F_OPENED 1
+#define F_CREATED 2
+#define F_OVERWRITTEN 3
+
+/*
+ * File Attribute flags
+ */
+#define ATTR_READONLY 0x0001
+#define ATTR_HIDDEN 0x0002
+#define ATTR_SYSTEM 0x0004
+#define ATTR_VOLUME 0x0008
+#define ATTR_DIRECTORY 0x0010
+#define ATTR_ARCHIVE 0x0020
+#define ATTR_DEVICE 0x0040
+#define ATTR_NORMAL 0x0080
+#define ATTR_TEMPORARY 0x0100
+#define ATTR_SPARSE 0x0200
+#define ATTR_REPARSE 0x0400
+#define ATTR_COMPRESSED 0x0800
+#define ATTR_OFFLINE 0x1000
+#define ATTR_NOT_CONTENT_INDEXED 0x2000
+#define ATTR_ENCRYPTED 0x4000
+#define ATTR_POSIX_SEMANTICS 0x01000000
+#define ATTR_BACKUP_SEMANTICS 0x02000000
+#define ATTR_DELETE_ON_CLOSE 0x04000000
+#define ATTR_SEQUENTIAL_SCAN 0x08000000
+#define ATTR_RANDOM_ACCESS 0x10000000
+#define ATTR_NO_BUFFERING 0x20000000
+#define ATTR_WRITE_THROUGH 0x80000000
+
+#define ATTR_READONLY_LE cpu_to_le32(ATTR_READONLY)
+#define ATTR_HIDDEN_LE cpu_to_le32(ATTR_HIDDEN)
+#define ATTR_SYSTEM_LE cpu_to_le32(ATTR_SYSTEM)
+#define ATTR_DIRECTORY_LE cpu_to_le32(ATTR_DIRECTORY)
+#define ATTR_ARCHIVE_LE cpu_to_le32(ATTR_ARCHIVE)
+#define ATTR_NORMAL_LE cpu_to_le32(ATTR_NORMAL)
+#define ATTR_TEMPORARY_LE cpu_to_le32(ATTR_TEMPORARY)
+#define ATTR_SPARSE_FILE_LE cpu_to_le32(ATTR_SPARSE)
+#define ATTR_REPARSE_POINT_LE cpu_to_le32(ATTR_REPARSE)
+#define ATTR_COMPRESSED_LE cpu_to_le32(ATTR_COMPRESSED)
+#define ATTR_OFFLINE_LE cpu_to_le32(ATTR_OFFLINE)
+#define ATTR_NOT_CONTENT_INDEXED_LE cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
+#define ATTR_ENCRYPTED_LE cpu_to_le32(ATTR_ENCRYPTED)
+#define ATTR_INTEGRITY_STREAML_LE cpu_to_le32(0x00008000)
+#define ATTR_NO_SCRUB_DATA_LE cpu_to_le32(0x00020000)
+#define ATTR_MASK_LE cpu_to_le32(0x00007FB7)
+
+/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
+#define FILE_SUPPORTS_SPARSE_VDL 0x10000000 /* faster nonsparse extend */
+#define FILE_SUPPORTS_BLOCK_REFCOUNTING 0x08000000 /* allow ioctl dup extents */
+#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
+#define FILE_SUPPORTS_USN_JOURNAL 0x02000000
+#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000
+#define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000
+#define FILE_SUPPORTS_HARD_LINKS 0x00400000
+#define FILE_SUPPORTS_TRANSACTIONS 0x00200000
+#define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000
+#define FILE_READ_ONLY_VOLUME 0x00080000
+#define FILE_NAMED_STREAMS 0x00040000
+#define FILE_SUPPORTS_ENCRYPTION 0x00020000
+#define FILE_SUPPORTS_OBJECT_IDS 0x00010000
+#define FILE_VOLUME_IS_COMPRESSED 0x00008000
+#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100
+#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080
+#define FILE_SUPPORTS_SPARSE_FILES 0x00000040
+#define FILE_VOLUME_QUOTAS 0x00000020
+#define FILE_FILE_COMPRESSION 0x00000010
+#define FILE_PERSISTENT_ACLS 0x00000008
+#define FILE_UNICODE_ON_DISK 0x00000004
+#define FILE_CASE_PRESERVED_NAMES 0x00000002
+#define FILE_CASE_SENSITIVE_SEARCH 0x00000001
+
+#define FILE_READ_DATA 0x00000001 /* Data can be read from the file */
+#define FILE_WRITE_DATA 0x00000002 /* Data can be written to the file */
+#define FILE_APPEND_DATA 0x00000004 /* Data can be appended to the file */
+#define FILE_READ_EA 0x00000008 /* Extended attributes associated */
+/* with the file can be read */
+#define FILE_WRITE_EA 0x00000010 /* Extended attributes associated */
+/* with the file can be written */
+#define FILE_EXECUTE 0x00000020 /*Data can be read into memory from */
+/* the file using system paging I/O */
+#define FILE_DELETE_CHILD 0x00000040
+#define FILE_READ_ATTRIBUTES 0x00000080 /* Attributes associated with the */
+/* file can be read */
+#define FILE_WRITE_ATTRIBUTES 0x00000100 /* Attributes associated with the */
+/* file can be written */
+#define DELETE 0x00010000 /* The file can be deleted */
+#define READ_CONTROL 0x00020000 /* The access control list and */
+/* ownership associated with the */
+/* file can be read */
+#define WRITE_DAC 0x00040000 /* The access control list and */
+/* ownership associated with the */
+/* file can be written. */
+#define WRITE_OWNER 0x00080000 /* Ownership information associated */
+/* with the file can be written */
+#define SYNCHRONIZE 0x00100000 /* The file handle can waited on to */
+/* synchronize with the completion */
+/* of an input/output request */
+#define GENERIC_ALL 0x10000000
+#define GENERIC_EXECUTE 0x20000000
+#define GENERIC_WRITE 0x40000000
+#define GENERIC_READ 0x80000000
+/* In summary - Relevant file */
+/* access flags from CIFS are */
+/* file_read_data, file_write_data */
+/* file_execute, file_read_attributes*/
+/* write_dac, and delete. */
+
+#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
+#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
+ | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
+#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
+
+#define SET_FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA \
+ | FILE_READ_ATTRIBUTES \
+ | DELETE | READ_CONTROL | WRITE_DAC \
+ | WRITE_OWNER | SYNCHRONIZE)
+#define SET_FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
+ | FILE_WRITE_EA \
+ | FILE_DELETE_CHILD \
+ | FILE_WRITE_ATTRIBUTES \
+ | DELETE | READ_CONTROL | WRITE_DAC \
+ | WRITE_OWNER | SYNCHRONIZE)
+#define SET_FILE_EXEC_RIGHTS (FILE_READ_EA | FILE_WRITE_EA | FILE_EXECUTE \
+ | FILE_READ_ATTRIBUTES \
+ | FILE_WRITE_ATTRIBUTES \
+ | DELETE | READ_CONTROL | WRITE_DAC \
+ | WRITE_OWNER | SYNCHRONIZE)
+
+#define SET_MINIMUM_RIGHTS (FILE_READ_EA | FILE_READ_ATTRIBUTES \
+ | READ_CONTROL | SYNCHRONIZE)
+
+/* generic flags for file open */
+#define GENERIC_READ_FLAGS (READ_CONTROL | FILE_READ_DATA | \
+ FILE_READ_ATTRIBUTES | \
+ FILE_READ_EA | SYNCHRONIZE)
+
+#define GENERIC_WRITE_FLAGS (READ_CONTROL | FILE_WRITE_DATA | \
+ FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | \
+ FILE_APPEND_DATA | SYNCHRONIZE)
+
+#define GENERIC_EXECUTE_FLAGS (READ_CONTROL | FILE_EXECUTE | \
+ FILE_READ_ATTRIBUTES | SYNCHRONIZE)
+
+#define GENERIC_ALL_FLAGS (DELETE | READ_CONTROL | WRITE_DAC | \
+ WRITE_OWNER | SYNCHRONIZE | FILE_READ_DATA | \
+ FILE_WRITE_DATA | FILE_APPEND_DATA | \
+ FILE_READ_EA | FILE_WRITE_EA | \
+ FILE_EXECUTE | FILE_DELETE_CHILD | \
+ FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES)
+
+#define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff)
+
+#define SMB1_CLIENT_GUID_SIZE (16)
+struct smb_hdr {
+ __be32 smb_buf_length;
+ __u8 Protocol[4];
+ __u8 Command;
+ union {
+ struct {
+ __u8 ErrorClass;
+ __u8 Reserved;
+ __le16 Error;
+ } __packed DosError;
+ __le32 CifsError;
+ } __packed Status;
+ __u8 Flags;
+ __le16 Flags2; /* note: le */
+ __le16 PidHigh;
+ union {
+ struct {
+ __le32 SequenceNumber; /* le */
+ __u32 Reserved; /* zero */
+ } __packed Sequence;
+ __u8 SecuritySignature[8]; /* le */
+ } __packed Signature;
+ __u8 pad[2];
+ __le16 Tid;
+ __le16 Pid;
+ __le16 Uid;
+ __le16 Mid;
+ __u8 WordCount;
+} __packed;
+
+struct smb_negotiate_req {
+ struct smb_hdr hdr; /* wct = 0 */
+ __le16 ByteCount;
+ unsigned char DialectsArray[1];
+} __packed;
+
+struct smb_negotiate_rsp {
+ struct smb_hdr hdr; /* wct = 17 */
+ __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */
+ __u8 SecurityMode;
+ __le16 MaxMpxCount;
+ __le16 MaxNumberVcs;
+ __le32 MaxBufferSize;
+ __le32 MaxRawSize;
+ __le32 SessionKey;
+ __le32 Capabilities; /* see below */
+ __le32 SystemTimeLow;
+ __le32 SystemTimeHigh;
+ __le16 ServerTimeZone;
+ __u8 EncryptionKeyLength;
+ __le16 ByteCount;
+ union {
+ unsigned char EncryptionKey[8]; /* cap extended security off */
+ /* followed by Domain name - if extended security is off */
+ /* followed by 16 bytes of server GUID */
+ /* then security blob if cap_extended_security negotiated */
+ struct {
+ unsigned char GUID[SMB1_CLIENT_GUID_SIZE];
+ unsigned char SecurityBlob[1];
+ } __packed extended_response;
+ } __packed u;
+} __packed;
+
+struct filesystem_attribute_info {
+ __le32 Attributes;
+ __le32 MaxPathNameComponentLength;
+ __le32 FileSystemNameLen;
+ __le16 FileSystemName[1]; /* do not have to save this - get subset? */
+} __packed;
+
+struct filesystem_device_info {
+ __le32 DeviceType;
+ __le32 DeviceCharacteristics;
+} __packed; /* device info level 0x104 */
+
+struct filesystem_vol_info {
+ __le64 VolumeCreationTime;
+ __le32 SerialNumber;
+ __le32 VolumeLabelSize;
+ __le16 Reserved;
+ __le16 VolumeLabel[1];
+} __packed;
+
+struct filesystem_info {
+ __le64 TotalAllocationUnits;
+ __le64 FreeAllocationUnits;
+ __le32 SectorsPerAllocationUnit;
+ __le32 BytesPerSector;
+} __packed; /* size info, level 0x103 */
+
+#define EXTENDED_INFO_MAGIC 0x43667364 /* Cfsd */
+#define STRING_LENGTH 28
+
+struct fs_extended_info {
+ __le32 magic;
+ __le32 version;
+ __le32 release;
+ __u64 rel_date;
+ char version_string[STRING_LENGTH];
+} __packed;
+
+struct object_id_info {
+ char objid[16];
+ struct fs_extended_info extended_info;
+} __packed;
+
+struct file_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ char FileName[1];
+} __packed; /* level 0x101 FF resp data */
+
+struct file_names_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le32 FileNameLength;
+ char FileName[1];
+} __packed; /* level 0xc FF resp data */
+
+struct file_full_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize;
+ char FileName[1];
+} __packed; /* level 0x102 FF resp */
+
+struct file_both_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize; /* length of the xattrs */
+ __u8 ShortNameLength;
+ __u8 Reserved;
+ __u8 ShortName[24];
+ char FileName[1];
+} __packed; /* level 0x104 FFrsp data */
+
+struct file_id_both_directory_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize; /* length of the xattrs */
+ __u8 ShortNameLength;
+ __u8 Reserved;
+ __u8 ShortName[24];
+ __le16 Reserved2;
+ __le64 UniqueId;
+ char FileName[1];
+} __packed;
+
+struct file_id_full_dir_info {
+ __le32 NextEntryOffset;
+ __u32 FileIndex;
+ __le64 CreationTime;
+ __le64 LastAccessTime;
+ __le64 LastWriteTime;
+ __le64 ChangeTime;
+ __le64 EndOfFile;
+ __le64 AllocationSize;
+ __le32 ExtFileAttributes;
+ __le32 FileNameLength;
+ __le32 EaSize; /* EA size */
+ __le32 Reserved;
+ __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
+ char FileName[1];
+} __packed; /* level 0x105 FF rsp data */
+
+struct smb_version_values {
+ char *version_string;
+ __u16 protocol_id;
+ __le16 lock_cmd;
+ __u32 capabilities;
+ __u32 max_read_size;
+ __u32 max_write_size;
+ __u32 max_trans_size;
+ __u32 large_lock_type;
+ __u32 exclusive_lock_type;
+ __u32 shared_lock_type;
+ __u32 unlock_lock_type;
+ size_t header_size;
+ size_t max_header_size;
+ size_t read_rsp_size;
+ unsigned int cap_unix;
+ unsigned int cap_nt_find;
+ unsigned int cap_large_files;
+ __u16 signing_enabled;
+ __u16 signing_required;
+ size_t create_lease_size;
+ size_t create_durable_size;
+ size_t create_durable_v2_size;
+ size_t create_mxac_size;
+ size_t create_disk_id_size;
+ size_t create_posix_size;
+};
+
+struct filesystem_posix_info {
+ /* For undefined recommended transfer size return -1 in that field */
+ __le32 OptimalTransferSize; /* bsize on some os, iosize on other os */
+ __le32 BlockSize;
+ /* The next three fields are in terms of the block size.
+ * (above). If block size is unknown, 4096 would be a
+ * reasonable block size for a server to report.
+ * Note that returning the blocks/blocksavail removes need
+ * to make a second call (to QFSInfo level 0x103 to get this info.
+ * UserBlockAvail is typically less than or equal to BlocksAvail,
+ * if no distinction is made return the same value in each
+ */
+ __le64 TotalBlocks;
+ __le64 BlocksAvail; /* bfree */
+ __le64 UserBlocksAvail; /* bavail */
+ /* For undefined Node fields or FSID return -1 */
+ __le64 TotalFileNodes;
+ __le64 FreeFileNodes;
+ __le64 FileSysIdentifier; /* fsid */
+ /* NB Namelen comes from FILE_SYSTEM_ATTRIBUTE_INFO call */
+ /* NB flags can come from FILE_SYSTEM_DEVICE_INFO call */
+} __packed;
+
+struct smb_version_ops {
+ u16 (*get_cmd_val)(struct ksmbd_work *swork);
+ int (*init_rsp_hdr)(struct ksmbd_work *swork);
+ void (*set_rsp_status)(struct ksmbd_work *swork, __le32 err);
+ int (*allocate_rsp_buf)(struct ksmbd_work *work);
+ int (*set_rsp_credits)(struct ksmbd_work *work);
+ int (*check_user_session)(struct ksmbd_work *work);
+ int (*get_ksmbd_tcon)(struct ksmbd_work *work);
+ bool (*is_sign_req)(struct ksmbd_work *work, unsigned int command);
+ int (*check_sign_req)(struct ksmbd_work *work);
+ void (*set_sign_rsp)(struct ksmbd_work *work);
+ int (*generate_signingkey)(struct ksmbd_session *sess, struct ksmbd_conn *conn);
+ int (*generate_encryptionkey)(struct ksmbd_session *sess);
+ bool (*is_transform_hdr)(void *buf);
+ int (*decrypt_req)(struct ksmbd_work *work);
+ int (*encrypt_resp)(struct ksmbd_work *work);
+};
+
+struct smb_version_cmds {
+ int (*proc)(struct ksmbd_work *swork);
+};
+
+static inline size_t
+smb2_hdr_size_no_buflen(struct smb_version_values *vals)
+{
+ return vals->header_size - 4;
+}
+
+int ksmbd_min_protocol(void);
+int ksmbd_max_protocol(void);
+
+int ksmbd_lookup_protocol_idx(char *str);
+
+int ksmbd_verify_smb_message(struct ksmbd_work *work);
+bool ksmbd_smb_request(struct ksmbd_conn *conn);
+
+int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count);
+
+int ksmbd_init_smb_server(struct ksmbd_work *work);
+
+bool ksmbd_pdu_size_has_room(unsigned int pdu);
+
+struct ksmbd_kstat;
+int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
+ int info_level,
+ struct ksmbd_file *dir,
+ struct ksmbd_dir_info *d_info,
+ char *search_pattern,
+ int (*fn)(struct ksmbd_conn *,
+ int,
+ struct ksmbd_dir_info *,
+ struct user_namespace *,
+ struct ksmbd_kstat *));
+
+int ksmbd_extract_shortname(struct ksmbd_conn *conn,
+ const char *longname,
+ char *shortname);
+
+int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command);
+
+int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp);
+int ksmbd_override_fsids(struct ksmbd_work *work);
+void ksmbd_revert_fsids(struct ksmbd_work *work);
+
+unsigned int ksmbd_server_side_copy_max_chunk_count(void);
+unsigned int ksmbd_server_side_copy_max_chunk_size(void);
+unsigned int ksmbd_server_side_copy_max_total_size(void);
+bool is_asterisk(char *p);
+__le32 smb_map_generic_desired_access(__le32 daccess);
+
+static inline unsigned int get_rfc1002_len(void *buf)
+{
+ return be32_to_cpu(*((__be32 *)buf)) & 0xffffff;
+}
+
+static inline void inc_rfc1001_len(void *buf, int count)
+{
+ be32_add_cpu((__be32 *)buf, count);
+}
+#endif /* __SMB_COMMON_H__ */
diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
new file mode 100644
index 000000000000..5456e3ad943e
--- /dev/null
+++ b/fs/ksmbd/smbacl.c
@@ -0,0 +1,1366 @@
+// SPDX-License-Identifier: LGPL-2.1+
+/*
+ * Copyright (C) International Business Machines Corp., 2007,2008
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ * Copyright (C) 2020 Samsung Electronics Co., Ltd.
+ * Author(s): Namjae Jeon <linkinjeon@kernel.org>
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "smbacl.h"
+#include "smb_common.h"
+#include "server.h"
+#include "misc.h"
+#include "mgmt/share_config.h"
+
+static const struct smb_sid domain = {1, 4, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(21), cpu_to_le32(1), cpu_to_le32(2), cpu_to_le32(3),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* security id for everyone/world system group */
+static const struct smb_sid creator_owner = {
+ 1, 1, {0, 0, 0, 0, 0, 3}, {0} };
+/* security id for everyone/world system group */
+static const struct smb_sid creator_group = {
+ 1, 1, {0, 0, 0, 0, 0, 3}, {cpu_to_le32(1)} };
+
+/* security id for everyone/world system group */
+static const struct smb_sid sid_everyone = {
+ 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
+/* security id for Authenticated Users system group */
+static const struct smb_sid sid_authusers = {
+ 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
+
+/* S-1-22-1 Unmapped Unix users */
+static const struct smb_sid sid_unix_users = {1, 1, {0, 0, 0, 0, 0, 22},
+ {cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-22-2 Unmapped Unix groups */
+static const struct smb_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22},
+ {cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/*
+ * See http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx
+ */
+
+/* S-1-5-88 MS NFS and Apple style UID/GID/mode */
+
+/* S-1-5-88-1 Unix uid */
+static const struct smb_sid sid_unix_NFS_users = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-2 Unix gid */
+static const struct smb_sid sid_unix_NFS_groups = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-3 Unix mode */
+static const struct smb_sid sid_unix_NFS_mode = { 1, 2, {0, 0, 0, 0, 0, 5},
+ {cpu_to_le32(88),
+ cpu_to_le32(3), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/*
+ * if the two SIDs (roughly equivalent to a UUID for a user or group) are
+ * the same returns zero, if they do not match returns non-zero.
+ */
+int compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid)
+{
+ int i;
+ int num_subauth, num_sat, num_saw;
+
+ if (!ctsid || !cwsid)
+ return 1;
+
+ /* compare the revision */
+ if (ctsid->revision != cwsid->revision) {
+ if (ctsid->revision > cwsid->revision)
+ return 1;
+ else
+ return -1;
+ }
+
+ /* compare all of the six auth values */
+ for (i = 0; i < NUM_AUTHS; ++i) {
+ if (ctsid->authority[i] != cwsid->authority[i]) {
+ if (ctsid->authority[i] > cwsid->authority[i])
+ return 1;
+ else
+ return -1;
+ }
+ }
+
+ /* compare all of the subauth values if any */
+ num_sat = ctsid->num_subauth;
+ num_saw = cwsid->num_subauth;
+ num_subauth = num_sat < num_saw ? num_sat : num_saw;
+ if (num_subauth) {
+ for (i = 0; i < num_subauth; ++i) {
+ if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
+ if (le32_to_cpu(ctsid->sub_auth[i]) >
+ le32_to_cpu(cwsid->sub_auth[i]))
+ return 1;
+ else
+ return -1;
+ }
+ }
+ }
+
+ return 0; /* sids compare/match */
+}
+
+static void smb_copy_sid(struct smb_sid *dst, const struct smb_sid *src)
+{
+ int i;
+
+ dst->revision = src->revision;
+ dst->num_subauth = min_t(u8, src->num_subauth, SID_MAX_SUB_AUTHORITIES);
+ for (i = 0; i < NUM_AUTHS; ++i)
+ dst->authority[i] = src->authority[i];
+ for (i = 0; i < dst->num_subauth; ++i)
+ dst->sub_auth[i] = src->sub_auth[i];
+}
+
+/*
+ * change posix mode to reflect permissions
+ * pmode is the existing mode (we only want to overwrite part of this
+ * bits to set can be: S_IRWXU, S_IRWXG or S_IRWXO ie 00700 or 00070 or 00007
+ */
+static umode_t access_flags_to_mode(struct smb_fattr *fattr, __le32 ace_flags,
+ int type)
+{
+ __u32 flags = le32_to_cpu(ace_flags);
+ umode_t mode = 0;
+
+ if (flags & GENERIC_ALL) {
+ mode = 0777;
+ ksmbd_debug(SMB, "all perms\n");
+ return mode;
+ }
+
+ if ((flags & GENERIC_READ) || (flags & FILE_READ_RIGHTS))
+ mode = 0444;
+ if ((flags & GENERIC_WRITE) || (flags & FILE_WRITE_RIGHTS)) {
+ mode |= 0222;
+ if (S_ISDIR(fattr->cf_mode))
+ mode |= 0111;
+ }
+ if ((flags & GENERIC_EXECUTE) || (flags & FILE_EXEC_RIGHTS))
+ mode |= 0111;
+
+ if (type == ACCESS_DENIED_ACE_TYPE || type == ACCESS_DENIED_OBJECT_ACE_TYPE)
+ mode = ~mode;
+
+ ksmbd_debug(SMB, "access flags 0x%x mode now %04o\n", flags, mode);
+
+ return mode;
+}
+
+/*
+ * Generate access flags to reflect permissions mode is the existing mode.
+ * This function is called for every ACE in the DACL whose SID matches
+ * with either owner or group or everyone.
+ */
+static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
+ __u32 *pace_flags)
+{
+ /* reset access mask */
+ *pace_flags = 0x0;
+
+ /* bits to use are either S_IRWXU or S_IRWXG or S_IRWXO */
+ mode &= bits_to_use;
+
+ /*
+ * check for R/W/X UGO since we do not know whose flags
+ * is this but we have cleared all the bits sans RWX for
+ * either user or group or other as per bits_to_use
+ */
+ if (mode & 0444)
+ *pace_flags |= SET_FILE_READ_RIGHTS;
+ if (mode & 0222)
+ *pace_flags |= FILE_WRITE_RIGHTS;
+ if (mode & 0111)
+ *pace_flags |= SET_FILE_EXEC_RIGHTS;
+
+ ksmbd_debug(SMB, "mode: %o, access flags now 0x%x\n",
+ mode, *pace_flags);
+}
+
+static __u16 fill_ace_for_sid(struct smb_ace *pntace,
+ const struct smb_sid *psid, int type, int flags,
+ umode_t mode, umode_t bits)
+{
+ int i;
+ __u16 size = 0;
+ __u32 access_req = 0;
+
+ pntace->type = type;
+ pntace->flags = flags;
+ mode_to_access_flags(mode, bits, &access_req);
+ if (!access_req)
+ access_req = SET_MINIMUM_RIGHTS;
+ pntace->access_req = cpu_to_le32(access_req);
+
+ pntace->sid.revision = psid->revision;
+ pntace->sid.num_subauth = psid->num_subauth;
+ for (i = 0; i < NUM_AUTHS; i++)
+ pntace->sid.authority[i] = psid->authority[i];
+ for (i = 0; i < psid->num_subauth; i++)
+ pntace->sid.sub_auth[i] = psid->sub_auth[i];
+
+ size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4);
+ pntace->size = cpu_to_le16(size);
+
+ return size;
+}
+
+void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid)
+{
+ switch (sidtype) {
+ case SIDOWNER:
+ smb_copy_sid(ssid, &server_conf.domain_sid);
+ break;
+ case SIDUNIX_USER:
+ smb_copy_sid(ssid, &sid_unix_users);
+ break;
+ case SIDUNIX_GROUP:
+ smb_copy_sid(ssid, &sid_unix_groups);
+ break;
+ case SIDCREATOR_OWNER:
+ smb_copy_sid(ssid, &creator_owner);
+ return;
+ case SIDCREATOR_GROUP:
+ smb_copy_sid(ssid, &creator_group);
+ return;
+ case SIDNFS_USER:
+ smb_copy_sid(ssid, &sid_unix_NFS_users);
+ break;
+ case SIDNFS_GROUP:
+ smb_copy_sid(ssid, &sid_unix_NFS_groups);
+ break;
+ case SIDNFS_MODE:
+ smb_copy_sid(ssid, &sid_unix_NFS_mode);
+ break;
+ default:
+ return;
+ }
+
+ /* RID */
+ ssid->sub_auth[ssid->num_subauth] = cpu_to_le32(cid);
+ ssid->num_subauth++;
+}
+
+static int sid_to_id(struct user_namespace *user_ns,
+ struct smb_sid *psid, uint sidtype,
+ struct smb_fattr *fattr)
+{
+ int rc = -EINVAL;
+
+ /*
+ * If we have too many subauthorities, then something is really wrong.
+ * Just return an error.
+ */
+ if (unlikely(psid->num_subauth > SID_MAX_SUB_AUTHORITIES)) {
+ pr_err("%s: %u subauthorities is too many!\n",
+ __func__, psid->num_subauth);
+ return -EIO;
+ }
+
+ if (sidtype == SIDOWNER) {
+ kuid_t uid;
+ uid_t id;
+
+ id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
+ if (id > 0) {
+ uid = make_kuid(user_ns, id);
+ if (uid_valid(uid) && kuid_has_mapping(user_ns, uid)) {
+ fattr->cf_uid = uid;
+ rc = 0;
+ }
+ }
+ } else {
+ kgid_t gid;
+ gid_t id;
+
+ id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
+ if (id > 0) {
+ gid = make_kgid(user_ns, id);
+ if (gid_valid(gid) && kgid_has_mapping(user_ns, gid)) {
+ fattr->cf_gid = gid;
+ rc = 0;
+ }
+ }
+ }
+
+ return rc;
+}
+
+void posix_state_to_acl(struct posix_acl_state *state,
+ struct posix_acl_entry *pace)
+{
+ int i;
+
+ pace->e_tag = ACL_USER_OBJ;
+ pace->e_perm = state->owner.allow;
+ for (i = 0; i < state->users->n; i++) {
+ pace++;
+ pace->e_tag = ACL_USER;
+ pace->e_uid = state->users->aces[i].uid;
+ pace->e_perm = state->users->aces[i].perms.allow;
+ }
+
+ pace++;
+ pace->e_tag = ACL_GROUP_OBJ;
+ pace->e_perm = state->group.allow;
+
+ for (i = 0; i < state->groups->n; i++) {
+ pace++;
+ pace->e_tag = ACL_GROUP;
+ pace->e_gid = state->groups->aces[i].gid;
+ pace->e_perm = state->groups->aces[i].perms.allow;
+ }
+
+ if (state->users->n || state->groups->n) {
+ pace++;
+ pace->e_tag = ACL_MASK;
+ pace->e_perm = state->mask.allow;
+ }
+
+ pace++;
+ pace->e_tag = ACL_OTHER;
+ pace->e_perm = state->other.allow;
+}
+
+int init_acl_state(struct posix_acl_state *state, int cnt)
+{
+ int alloc;
+
+ memset(state, 0, sizeof(struct posix_acl_state));
+ /*
+ * In the worst case, each individual acl could be for a distinct
+ * named user or group, but we don't know which, so we allocate
+ * enough space for either:
+ */
+ alloc = sizeof(struct posix_ace_state_array)
+ + cnt * sizeof(struct posix_user_ace_state);
+ state->users = kzalloc(alloc, GFP_KERNEL);
+ if (!state->users)
+ return -ENOMEM;
+ state->groups = kzalloc(alloc, GFP_KERNEL);
+ if (!state->groups) {
+ kfree(state->users);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void free_acl_state(struct posix_acl_state *state)
+{
+ kfree(state->users);
+ kfree(state->groups);
+}
+
+static void parse_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pdacl, char *end_of_acl,
+ struct smb_sid *pownersid, struct smb_sid *pgrpsid,
+ struct smb_fattr *fattr)
+{
+ int i, ret;
+ int num_aces = 0;
+ int acl_size;
+ char *acl_base;
+ struct smb_ace **ppace;
+ struct posix_acl_entry *cf_pace, *cf_pdace;
+ struct posix_acl_state acl_state, default_acl_state;
+ umode_t mode = 0, acl_mode;
+ bool owner_found = false, group_found = false, others_found = false;
+
+ if (!pdacl)
+ return;
+
+ /* validate that we do not go past end of acl */
+ if (end_of_acl <= (char *)pdacl ||
+ end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
+ pr_err("ACL too small to parse DACL\n");
+ return;
+ }
+
+ ksmbd_debug(SMB, "DACL revision %d size %d num aces %d\n",
+ le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
+ le32_to_cpu(pdacl->num_aces));
+
+ acl_base = (char *)pdacl;
+ acl_size = sizeof(struct smb_acl);
+
+ num_aces = le32_to_cpu(pdacl->num_aces);
+ if (num_aces <= 0)
+ return;
+
+ if (num_aces > ULONG_MAX / sizeof(struct smb_ace *))
+ return;
+
+ ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *), GFP_KERNEL);
+ if (!ppace)
+ return;
+
+ ret = init_acl_state(&acl_state, num_aces);
+ if (ret)
+ return;
+ ret = init_acl_state(&default_acl_state, num_aces);
+ if (ret) {
+ free_acl_state(&acl_state);
+ return;
+ }
+
+ /*
+ * reset rwx permissions for user/group/other.
+ * Also, if num_aces is 0 i.e. DACL has no ACEs,
+ * user/group/other have no permissions
+ */
+ for (i = 0; i < num_aces; ++i) {
+ ppace[i] = (struct smb_ace *)(acl_base + acl_size);
+ acl_base = (char *)ppace[i];
+ acl_size = le16_to_cpu(ppace[i]->size);
+ ppace[i]->access_req =
+ smb_map_generic_desired_access(ppace[i]->access_req);
+
+ if (!(compare_sids(&ppace[i]->sid, &sid_unix_NFS_mode))) {
+ fattr->cf_mode =
+ le32_to_cpu(ppace[i]->sid.sub_auth[2]);
+ break;
+ } else if (!compare_sids(&ppace[i]->sid, pownersid)) {
+ acl_mode = access_flags_to_mode(fattr,
+ ppace[i]->access_req,
+ ppace[i]->type);
+ acl_mode &= 0700;
+
+ if (!owner_found) {
+ mode &= ~(0700);
+ mode |= acl_mode;
+ }
+ owner_found = true;
+ } else if (!compare_sids(&ppace[i]->sid, pgrpsid) ||
+ ppace[i]->sid.sub_auth[ppace[i]->sid.num_subauth - 1] ==
+ DOMAIN_USER_RID_LE) {
+ acl_mode = access_flags_to_mode(fattr,
+ ppace[i]->access_req,
+ ppace[i]->type);
+ acl_mode &= 0070;
+ if (!group_found) {
+ mode &= ~(0070);
+ mode |= acl_mode;
+ }
+ group_found = true;
+ } else if (!compare_sids(&ppace[i]->sid, &sid_everyone)) {
+ acl_mode = access_flags_to_mode(fattr,
+ ppace[i]->access_req,
+ ppace[i]->type);
+ acl_mode &= 0007;
+ if (!others_found) {
+ mode &= ~(0007);
+ mode |= acl_mode;
+ }
+ others_found = true;
+ } else if (!compare_sids(&ppace[i]->sid, &creator_owner)) {
+ continue;
+ } else if (!compare_sids(&ppace[i]->sid, &creator_group)) {
+ continue;
+ } else if (!compare_sids(&ppace[i]->sid, &sid_authusers)) {
+ continue;
+ } else {
+ struct smb_fattr temp_fattr;
+
+ acl_mode = access_flags_to_mode(fattr, ppace[i]->access_req,
+ ppace[i]->type);
+ temp_fattr.cf_uid = INVALID_UID;
+ ret = sid_to_id(user_ns, &ppace[i]->sid, SIDOWNER, &temp_fattr);
+ if (ret || uid_eq(temp_fattr.cf_uid, INVALID_UID)) {
+ pr_err("%s: Error %d mapping Owner SID to uid\n",
+ __func__, ret);
+ continue;
+ }
+
+ acl_state.owner.allow = ((acl_mode & 0700) >> 6) | 0004;
+ acl_state.users->aces[acl_state.users->n].uid =
+ temp_fattr.cf_uid;
+ acl_state.users->aces[acl_state.users->n++].perms.allow =
+ ((acl_mode & 0700) >> 6) | 0004;
+ default_acl_state.owner.allow = ((acl_mode & 0700) >> 6) | 0004;
+ default_acl_state.users->aces[default_acl_state.users->n].uid =
+ temp_fattr.cf_uid;
+ default_acl_state.users->aces[default_acl_state.users->n++].perms.allow =
+ ((acl_mode & 0700) >> 6) | 0004;
+ }
+ }
+ kfree(ppace);
+
+ if (owner_found) {
+ /* The owner must be set to at least read-only. */
+ acl_state.owner.allow = ((mode & 0700) >> 6) | 0004;
+ acl_state.users->aces[acl_state.users->n].uid = fattr->cf_uid;
+ acl_state.users->aces[acl_state.users->n++].perms.allow =
+ ((mode & 0700) >> 6) | 0004;
+ default_acl_state.owner.allow = ((mode & 0700) >> 6) | 0004;
+ default_acl_state.users->aces[default_acl_state.users->n].uid =
+ fattr->cf_uid;
+ default_acl_state.users->aces[default_acl_state.users->n++].perms.allow =
+ ((mode & 0700) >> 6) | 0004;
+ }
+
+ if (group_found) {
+ acl_state.group.allow = (mode & 0070) >> 3;
+ acl_state.groups->aces[acl_state.groups->n].gid =
+ fattr->cf_gid;
+ acl_state.groups->aces[acl_state.groups->n++].perms.allow =
+ (mode & 0070) >> 3;
+ default_acl_state.group.allow = (mode & 0070) >> 3;
+ default_acl_state.groups->aces[default_acl_state.groups->n].gid =
+ fattr->cf_gid;
+ default_acl_state.groups->aces[default_acl_state.groups->n++].perms.allow =
+ (mode & 0070) >> 3;
+ }
+
+ if (others_found) {
+ fattr->cf_mode &= ~(0007);
+ fattr->cf_mode |= mode & 0007;
+
+ acl_state.other.allow = mode & 0007;
+ default_acl_state.other.allow = mode & 0007;
+ }
+
+ if (acl_state.users->n || acl_state.groups->n) {
+ acl_state.mask.allow = 0x07;
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ fattr->cf_acls =
+ posix_acl_alloc(acl_state.users->n +
+ acl_state.groups->n + 4, GFP_KERNEL);
+ if (fattr->cf_acls) {
+ cf_pace = fattr->cf_acls->a_entries;
+ posix_state_to_acl(&acl_state, cf_pace);
+ }
+ }
+ }
+
+ if (default_acl_state.users->n || default_acl_state.groups->n) {
+ default_acl_state.mask.allow = 0x07;
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ fattr->cf_dacls =
+ posix_acl_alloc(default_acl_state.users->n +
+ default_acl_state.groups->n + 4, GFP_KERNEL);
+ if (fattr->cf_dacls) {
+ cf_pdace = fattr->cf_dacls->a_entries;
+ posix_state_to_acl(&default_acl_state, cf_pdace);
+ }
+ }
+ }
+ free_acl_state(&acl_state);
+ free_acl_state(&default_acl_state);
+}
+
+static void set_posix_acl_entries_dacl(struct user_namespace *user_ns,
+ struct smb_ace *pndace,
+ struct smb_fattr *fattr, u32 *num_aces,
+ u16 *size, u32 nt_aces_num)
+{
+ struct posix_acl_entry *pace;
+ struct smb_sid *sid;
+ struct smb_ace *ntace;
+ int i, j;
+
+ if (!fattr->cf_acls)
+ goto posix_default_acl;
+
+ pace = fattr->cf_acls->a_entries;
+ for (i = 0; i < fattr->cf_acls->a_count; i++, pace++) {
+ int flags = 0;
+
+ sid = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!sid)
+ break;
+
+ if (pace->e_tag == ACL_USER) {
+ uid_t uid;
+ unsigned int sid_type = SIDOWNER;
+
+ uid = from_kuid(user_ns, pace->e_uid);
+ if (!uid)
+ sid_type = SIDUNIX_USER;
+ id_to_sid(uid, sid_type, sid);
+ } else if (pace->e_tag == ACL_GROUP) {
+ gid_t gid;
+
+ gid = from_kgid(user_ns, pace->e_gid);
+ id_to_sid(gid, SIDUNIX_GROUP, sid);
+ } else if (pace->e_tag == ACL_OTHER && !nt_aces_num) {
+ smb_copy_sid(sid, &sid_everyone);
+ } else {
+ kfree(sid);
+ continue;
+ }
+ ntace = pndace;
+ for (j = 0; j < nt_aces_num; j++) {
+ if (ntace->sid.sub_auth[ntace->sid.num_subauth - 1] ==
+ sid->sub_auth[sid->num_subauth - 1])
+ goto pass_same_sid;
+ ntace = (struct smb_ace *)((char *)ntace +
+ le16_to_cpu(ntace->size));
+ }
+
+ if (S_ISDIR(fattr->cf_mode) && pace->e_tag == ACL_OTHER)
+ flags = 0x03;
+
+ ntace = (struct smb_ace *)((char *)pndace + *size);
+ *size += fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, flags,
+ pace->e_perm, 0777);
+ (*num_aces)++;
+ if (pace->e_tag == ACL_USER)
+ ntace->access_req |=
+ FILE_DELETE_LE | FILE_DELETE_CHILD_LE;
+
+ if (S_ISDIR(fattr->cf_mode) &&
+ (pace->e_tag == ACL_USER || pace->e_tag == ACL_GROUP)) {
+ ntace = (struct smb_ace *)((char *)pndace + *size);
+ *size += fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED,
+ 0x03, pace->e_perm, 0777);
+ (*num_aces)++;
+ if (pace->e_tag == ACL_USER)
+ ntace->access_req |=
+ FILE_DELETE_LE | FILE_DELETE_CHILD_LE;
+ }
+
+pass_same_sid:
+ kfree(sid);
+ }
+
+ if (nt_aces_num)
+ return;
+
+posix_default_acl:
+ if (!fattr->cf_dacls)
+ return;
+
+ pace = fattr->cf_dacls->a_entries;
+ for (i = 0; i < fattr->cf_dacls->a_count; i++, pace++) {
+ sid = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!sid)
+ break;
+
+ if (pace->e_tag == ACL_USER) {
+ uid_t uid;
+
+ uid = from_kuid(user_ns, pace->e_uid);
+ id_to_sid(uid, SIDCREATOR_OWNER, sid);
+ } else if (pace->e_tag == ACL_GROUP) {
+ gid_t gid;
+
+ gid = from_kgid(user_ns, pace->e_gid);
+ id_to_sid(gid, SIDCREATOR_GROUP, sid);
+ } else {
+ kfree(sid);
+ continue;
+ }
+
+ ntace = (struct smb_ace *)((char *)pndace + *size);
+ *size += fill_ace_for_sid(ntace, sid, ACCESS_ALLOWED, 0x0b,
+ pace->e_perm, 0777);
+ (*num_aces)++;
+ if (pace->e_tag == ACL_USER)
+ ntace->access_req |=
+ FILE_DELETE_LE | FILE_DELETE_CHILD_LE;
+ kfree(sid);
+ }
+}
+
+static void set_ntacl_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pndacl,
+ struct smb_acl *nt_dacl,
+ const struct smb_sid *pownersid,
+ const struct smb_sid *pgrpsid,
+ struct smb_fattr *fattr)
+{
+ struct smb_ace *ntace, *pndace;
+ int nt_num_aces = le32_to_cpu(nt_dacl->num_aces), num_aces = 0;
+ unsigned short size = 0;
+ int i;
+
+ pndace = (struct smb_ace *)((char *)pndacl + sizeof(struct smb_acl));
+ if (nt_num_aces) {
+ ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl));
+ for (i = 0; i < nt_num_aces; i++) {
+ memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size));
+ size += le16_to_cpu(ntace->size);
+ ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size));
+ num_aces++;
+ }
+ }
+
+ set_posix_acl_entries_dacl(user_ns, pndace, fattr,
+ &num_aces, &size, nt_num_aces);
+ pndacl->num_aces = cpu_to_le32(num_aces);
+ pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size);
+}
+
+static void set_mode_dacl(struct user_namespace *user_ns,
+ struct smb_acl *pndacl, struct smb_fattr *fattr)
+{
+ struct smb_ace *pace, *pndace;
+ u32 num_aces = 0;
+ u16 size = 0, ace_size = 0;
+ uid_t uid;
+ const struct smb_sid *sid;
+
+ pace = pndace = (struct smb_ace *)((char *)pndacl + sizeof(struct smb_acl));
+
+ if (fattr->cf_acls) {
+ set_posix_acl_entries_dacl(user_ns, pndace, fattr,
+ &num_aces, &size, num_aces);
+ goto out;
+ }
+
+ /* owner RID */
+ uid = from_kuid(user_ns, fattr->cf_uid);
+ if (uid)
+ sid = &server_conf.domain_sid;
+ else
+ sid = &sid_unix_users;
+ ace_size = fill_ace_for_sid(pace, sid, ACCESS_ALLOWED, 0,
+ fattr->cf_mode, 0700);
+ pace->sid.sub_auth[pace->sid.num_subauth++] = cpu_to_le32(uid);
+ pace->size = cpu_to_le16(ace_size + 4);
+ size += le16_to_cpu(pace->size);
+ pace = (struct smb_ace *)((char *)pndace + size);
+
+ /* Group RID */
+ ace_size = fill_ace_for_sid(pace, &sid_unix_groups,
+ ACCESS_ALLOWED, 0, fattr->cf_mode, 0070);
+ pace->sid.sub_auth[pace->sid.num_subauth++] =
+ cpu_to_le32(from_kgid(user_ns, fattr->cf_gid));
+ pace->size = cpu_to_le16(ace_size + 4);
+ size += le16_to_cpu(pace->size);
+ pace = (struct smb_ace *)((char *)pndace + size);
+ num_aces = 3;
+
+ if (S_ISDIR(fattr->cf_mode)) {
+ pace = (struct smb_ace *)((char *)pndace + size);
+
+ /* creator owner */
+ size += fill_ace_for_sid(pace, &creator_owner, ACCESS_ALLOWED,
+ 0x0b, fattr->cf_mode, 0700);
+ pace = (struct smb_ace *)((char *)pndace + size);
+
+ /* creator group */
+ size += fill_ace_for_sid(pace, &creator_group, ACCESS_ALLOWED,
+ 0x0b, fattr->cf_mode, 0070);
+ pace = (struct smb_ace *)((char *)pndace + size);
+ num_aces = 5;
+ }
+
+ /* other */
+ size += fill_ace_for_sid(pace, &sid_everyone, ACCESS_ALLOWED, 0,
+ fattr->cf_mode, 0007);
+
+out:
+ pndacl->num_aces = cpu_to_le32(num_aces);
+ pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size);
+}
+
+static int parse_sid(struct smb_sid *psid, char *end_of_acl)
+{
+ /*
+ * validate that we do not go past end of ACL - sid must be at least 8
+ * bytes long (assuming no sub-auths - e.g. the null SID
+ */
+ if (end_of_acl < (char *)psid + 8) {
+ pr_err("ACL too small to parse SID %p\n", psid);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Convert CIFS ACL to POSIX form */
+int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ int acl_len, struct smb_fattr *fattr)
+{
+ int rc = 0;
+ struct smb_sid *owner_sid_ptr, *group_sid_ptr;
+ struct smb_acl *dacl_ptr; /* no need for SACL ptr */
+ char *end_of_acl = ((char *)pntsd) + acl_len;
+ __u32 dacloffset;
+ int pntsd_type;
+
+ if (!pntsd)
+ return -EIO;
+
+ owner_sid_ptr = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->osidoffset));
+ group_sid_ptr = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->gsidoffset));
+ dacloffset = le32_to_cpu(pntsd->dacloffset);
+ dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset);
+ ksmbd_debug(SMB,
+ "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n",
+ pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
+ le32_to_cpu(pntsd->gsidoffset),
+ le32_to_cpu(pntsd->sacloffset), dacloffset);
+
+ pntsd_type = le16_to_cpu(pntsd->type);
+ if (!(pntsd_type & DACL_PRESENT)) {
+ ksmbd_debug(SMB, "DACL_PRESENT in DACL type is not set\n");
+ return rc;
+ }
+
+ pntsd->type = cpu_to_le16(DACL_PRESENT);
+
+ if (pntsd->osidoffset) {
+ rc = parse_sid(owner_sid_ptr, end_of_acl);
+ if (rc) {
+ pr_err("%s: Error %d parsing Owner SID\n", __func__, rc);
+ return rc;
+ }
+
+ rc = sid_to_id(user_ns, owner_sid_ptr, SIDOWNER, fattr);
+ if (rc) {
+ pr_err("%s: Error %d mapping Owner SID to uid\n",
+ __func__, rc);
+ owner_sid_ptr = NULL;
+ }
+ }
+
+ if (pntsd->gsidoffset) {
+ rc = parse_sid(group_sid_ptr, end_of_acl);
+ if (rc) {
+ pr_err("%s: Error %d mapping Owner SID to gid\n",
+ __func__, rc);
+ return rc;
+ }
+ rc = sid_to_id(user_ns, group_sid_ptr, SIDUNIX_GROUP, fattr);
+ if (rc) {
+ pr_err("%s: Error %d mapping Group SID to gid\n",
+ __func__, rc);
+ group_sid_ptr = NULL;
+ }
+ }
+
+ if ((pntsd_type & (DACL_AUTO_INHERITED | DACL_AUTO_INHERIT_REQ)) ==
+ (DACL_AUTO_INHERITED | DACL_AUTO_INHERIT_REQ))
+ pntsd->type |= cpu_to_le16(DACL_AUTO_INHERITED);
+ if (pntsd_type & DACL_PROTECTED)
+ pntsd->type |= cpu_to_le16(DACL_PROTECTED);
+
+ if (dacloffset) {
+ parse_dacl(user_ns, dacl_ptr, end_of_acl,
+ owner_sid_ptr, group_sid_ptr, fattr);
+ }
+
+ return 0;
+}
+
+/* Convert permission bits from mode to equivalent CIFS ACL */
+int build_sec_desc(struct user_namespace *user_ns,
+ struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
+ int addition_info, __u32 *secdesclen,
+ struct smb_fattr *fattr)
+{
+ int rc = 0;
+ __u32 offset;
+ struct smb_sid *owner_sid_ptr, *group_sid_ptr;
+ struct smb_sid *nowner_sid_ptr, *ngroup_sid_ptr;
+ struct smb_acl *dacl_ptr = NULL; /* no need for SACL ptr */
+ uid_t uid;
+ gid_t gid;
+ unsigned int sid_type = SIDOWNER;
+
+ nowner_sid_ptr = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!nowner_sid_ptr)
+ return -ENOMEM;
+
+ uid = from_kuid(user_ns, fattr->cf_uid);
+ if (!uid)
+ sid_type = SIDUNIX_USER;
+ id_to_sid(uid, sid_type, nowner_sid_ptr);
+
+ ngroup_sid_ptr = kmalloc(sizeof(struct smb_sid), GFP_KERNEL);
+ if (!ngroup_sid_ptr) {
+ kfree(nowner_sid_ptr);
+ return -ENOMEM;
+ }
+
+ gid = from_kgid(user_ns, fattr->cf_gid);
+ id_to_sid(gid, SIDUNIX_GROUP, ngroup_sid_ptr);
+
+ offset = sizeof(struct smb_ntsd);
+ pntsd->sacloffset = 0;
+ pntsd->revision = cpu_to_le16(1);
+ pntsd->type = cpu_to_le16(SELF_RELATIVE);
+ if (ppntsd)
+ pntsd->type |= ppntsd->type;
+
+ if (addition_info & OWNER_SECINFO) {
+ pntsd->osidoffset = cpu_to_le32(offset);
+ owner_sid_ptr = (struct smb_sid *)((char *)pntsd + offset);
+ smb_copy_sid(owner_sid_ptr, nowner_sid_ptr);
+ offset += 1 + 1 + 6 + (nowner_sid_ptr->num_subauth * 4);
+ }
+
+ if (addition_info & GROUP_SECINFO) {
+ pntsd->gsidoffset = cpu_to_le32(offset);
+ group_sid_ptr = (struct smb_sid *)((char *)pntsd + offset);
+ smb_copy_sid(group_sid_ptr, ngroup_sid_ptr);
+ offset += 1 + 1 + 6 + (ngroup_sid_ptr->num_subauth * 4);
+ }
+
+ if (addition_info & DACL_SECINFO) {
+ pntsd->type |= cpu_to_le16(DACL_PRESENT);
+ dacl_ptr = (struct smb_acl *)((char *)pntsd + offset);
+ dacl_ptr->revision = cpu_to_le16(2);
+ dacl_ptr->size = cpu_to_le16(sizeof(struct smb_acl));
+ dacl_ptr->num_aces = 0;
+
+ if (!ppntsd) {
+ set_mode_dacl(user_ns, dacl_ptr, fattr);
+ } else if (!ppntsd->dacloffset) {
+ goto out;
+ } else {
+ struct smb_acl *ppdacl_ptr;
+
+ ppdacl_ptr = (struct smb_acl *)((char *)ppntsd +
+ le32_to_cpu(ppntsd->dacloffset));
+ set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr,
+ nowner_sid_ptr, ngroup_sid_ptr, fattr);
+ }
+ pntsd->dacloffset = cpu_to_le32(offset);
+ offset += le16_to_cpu(dacl_ptr->size);
+ }
+
+out:
+ kfree(nowner_sid_ptr);
+ kfree(ngroup_sid_ptr);
+ *secdesclen = offset;
+ return rc;
+}
+
+static void smb_set_ace(struct smb_ace *ace, const struct smb_sid *sid, u8 type,
+ u8 flags, __le32 access_req)
+{
+ ace->type = type;
+ ace->flags = flags;
+ ace->access_req = access_req;
+ smb_copy_sid(&ace->sid, sid);
+ ace->size = cpu_to_le16(1 + 1 + 2 + 4 + 1 + 1 + 6 + (sid->num_subauth * 4));
+}
+
+int smb_inherit_dacl(struct ksmbd_conn *conn,
+ struct path *path,
+ unsigned int uid, unsigned int gid)
+{
+ const struct smb_sid *psid, *creator = NULL;
+ struct smb_ace *parent_aces, *aces;
+ struct smb_acl *parent_pdacl;
+ struct smb_ntsd *parent_pntsd = NULL;
+ struct smb_sid owner_sid, group_sid;
+ struct dentry *parent = path->dentry->d_parent;
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0;
+ int rc = 0, num_aces, dacloffset, pntsd_type, acl_len;
+ char *aces_base;
+ bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
+
+ acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+ parent, &parent_pntsd);
+ if (acl_len <= 0)
+ return -ENOENT;
+ dacloffset = le32_to_cpu(parent_pntsd->dacloffset);
+ if (!dacloffset) {
+ rc = -EINVAL;
+ goto free_parent_pntsd;
+ }
+
+ parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset);
+ num_aces = le32_to_cpu(parent_pdacl->num_aces);
+ pntsd_type = le16_to_cpu(parent_pntsd->type);
+
+ aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL);
+ if (!aces_base) {
+ rc = -ENOMEM;
+ goto free_parent_pntsd;
+ }
+
+ aces = (struct smb_ace *)aces_base;
+ parent_aces = (struct smb_ace *)((char *)parent_pdacl +
+ sizeof(struct smb_acl));
+
+ if (pntsd_type & DACL_AUTO_INHERITED)
+ inherited_flags = INHERITED_ACE;
+
+ for (i = 0; i < num_aces; i++) {
+ flags = parent_aces->flags;
+ if (!smb_inherit_flags(flags, is_dir))
+ goto pass;
+ if (is_dir) {
+ flags &= ~(INHERIT_ONLY_ACE | INHERITED_ACE);
+ if (!(flags & CONTAINER_INHERIT_ACE))
+ flags |= INHERIT_ONLY_ACE;
+ if (flags & NO_PROPAGATE_INHERIT_ACE)
+ flags = 0;
+ } else {
+ flags = 0;
+ }
+
+ if (!compare_sids(&creator_owner, &parent_aces->sid)) {
+ creator = &creator_owner;
+ id_to_sid(uid, SIDOWNER, &owner_sid);
+ psid = &owner_sid;
+ } else if (!compare_sids(&creator_group, &parent_aces->sid)) {
+ creator = &creator_group;
+ id_to_sid(gid, SIDUNIX_GROUP, &group_sid);
+ psid = &group_sid;
+ } else {
+ creator = NULL;
+ psid = &parent_aces->sid;
+ }
+
+ if (is_dir && creator && flags & CONTAINER_INHERIT_ACE) {
+ smb_set_ace(aces, psid, parent_aces->type, inherited_flags,
+ parent_aces->access_req);
+ nt_size += le16_to_cpu(aces->size);
+ ace_cnt++;
+ aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+ flags |= INHERIT_ONLY_ACE;
+ psid = creator;
+ } else if (is_dir && !(parent_aces->flags & NO_PROPAGATE_INHERIT_ACE)) {
+ psid = &parent_aces->sid;
+ }
+
+ smb_set_ace(aces, psid, parent_aces->type, flags | inherited_flags,
+ parent_aces->access_req);
+ nt_size += le16_to_cpu(aces->size);
+ aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+ ace_cnt++;
+pass:
+ parent_aces =
+ (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size));
+ }
+
+ if (nt_size > 0) {
+ struct smb_ntsd *pntsd;
+ struct smb_acl *pdacl;
+ struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL;
+ int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size;
+
+ if (parent_pntsd->osidoffset) {
+ powner_sid = (struct smb_sid *)((char *)parent_pntsd +
+ le32_to_cpu(parent_pntsd->osidoffset));
+ powner_sid_size = 1 + 1 + 6 + (powner_sid->num_subauth * 4);
+ }
+ if (parent_pntsd->gsidoffset) {
+ pgroup_sid = (struct smb_sid *)((char *)parent_pntsd +
+ le32_to_cpu(parent_pntsd->gsidoffset));
+ pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4);
+ }
+
+ pntsd = kzalloc(sizeof(struct smb_ntsd) + powner_sid_size +
+ pgroup_sid_size + sizeof(struct smb_acl) +
+ nt_size, GFP_KERNEL);
+ if (!pntsd) {
+ rc = -ENOMEM;
+ goto free_aces_base;
+ }
+
+ pntsd->revision = cpu_to_le16(1);
+ pntsd->type = cpu_to_le16(SELF_RELATIVE | DACL_PRESENT);
+ if (le16_to_cpu(parent_pntsd->type) & DACL_AUTO_INHERITED)
+ pntsd->type |= cpu_to_le16(DACL_AUTO_INHERITED);
+ pntsd_size = sizeof(struct smb_ntsd);
+ pntsd->osidoffset = parent_pntsd->osidoffset;
+ pntsd->gsidoffset = parent_pntsd->gsidoffset;
+ pntsd->dacloffset = parent_pntsd->dacloffset;
+
+ if (pntsd->osidoffset) {
+ struct smb_sid *owner_sid = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->osidoffset));
+ memcpy(owner_sid, powner_sid, powner_sid_size);
+ pntsd_size += powner_sid_size;
+ }
+
+ if (pntsd->gsidoffset) {
+ struct smb_sid *group_sid = (struct smb_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->gsidoffset));
+ memcpy(group_sid, pgroup_sid, pgroup_sid_size);
+ pntsd_size += pgroup_sid_size;
+ }
+
+ if (pntsd->dacloffset) {
+ struct smb_ace *pace;
+
+ pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+ pdacl->revision = cpu_to_le16(2);
+ pdacl->size = cpu_to_le16(sizeof(struct smb_acl) + nt_size);
+ pdacl->num_aces = cpu_to_le32(ace_cnt);
+ pace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
+ memcpy(pace, aces_base, nt_size);
+ pntsd_size += sizeof(struct smb_acl) + nt_size;
+ }
+
+ ksmbd_vfs_set_sd_xattr(conn, user_ns,
+ path->dentry, pntsd, pntsd_size);
+ kfree(pntsd);
+ }
+
+free_aces_base:
+ kfree(aces_base);
+free_parent_pntsd:
+ kfree(parent_pntsd);
+ return rc;
+}
+
+bool smb_inherit_flags(int flags, bool is_dir)
+{
+ if (!is_dir)
+ return (flags & OBJECT_INHERIT_ACE) != 0;
+
+ if (flags & OBJECT_INHERIT_ACE && !(flags & NO_PROPAGATE_INHERIT_ACE))
+ return true;
+
+ if (flags & CONTAINER_INHERIT_ACE)
+ return true;
+ return false;
+}
+
+int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ __le32 *pdaccess, int uid)
+{
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct smb_ntsd *pntsd = NULL;
+ struct smb_acl *pdacl;
+ struct posix_acl *posix_acls;
+ int rc = 0, acl_size;
+ struct smb_sid sid;
+ int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE);
+ struct smb_ace *ace;
+ int i, found = 0;
+ unsigned int access_bits = 0;
+ struct smb_ace *others_ace = NULL;
+ struct posix_acl_entry *pa_entry;
+ unsigned int sid_type = SIDOWNER;
+ char *end_of_acl;
+
+ ksmbd_debug(SMB, "check permission using windows acl\n");
+ acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+ path->dentry, &pntsd);
+ if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) {
+ kfree(pntsd);
+ return 0;
+ }
+
+ pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+ end_of_acl = ((char *)pntsd) + acl_size;
+ if (end_of_acl <= (char *)pdacl) {
+ kfree(pntsd);
+ return 0;
+ }
+
+ if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) ||
+ le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) {
+ kfree(pntsd);
+ return 0;
+ }
+
+ if (!pdacl->num_aces) {
+ if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) &&
+ *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) {
+ rc = -EACCES;
+ goto err_out;
+ }
+ kfree(pntsd);
+ return 0;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) {
+ granted = READ_CONTROL | WRITE_DAC | FILE_READ_ATTRIBUTES |
+ DELETE;
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
+ granted |= le32_to_cpu(ace->access_req);
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+ if (end_of_acl < (char *)ace)
+ goto err_out;
+ }
+
+ if (!pdacl->num_aces)
+ granted = GENERIC_ALL_FLAGS;
+ }
+
+ if (!uid)
+ sid_type = SIDUNIX_USER;
+ id_to_sid(uid, sid_type, &sid);
+
+ ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
+ for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
+ if (!compare_sids(&sid, &ace->sid) ||
+ !compare_sids(&sid_unix_NFS_mode, &ace->sid)) {
+ found = 1;
+ break;
+ }
+ if (!compare_sids(&sid_everyone, &ace->sid))
+ others_ace = ace;
+
+ ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+ if (end_of_acl < (char *)ace)
+ goto err_out;
+ }
+
+ if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) {
+ granted = READ_CONTROL | WRITE_DAC | FILE_READ_ATTRIBUTES |
+ DELETE;
+
+ granted |= le32_to_cpu(ace->access_req);
+
+ if (!pdacl->num_aces)
+ granted = GENERIC_ALL_FLAGS;
+ }
+
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
+ posix_acls = get_acl(d_inode(path->dentry), ACL_TYPE_ACCESS);
+ if (posix_acls && !found) {
+ unsigned int id = -1;
+
+ pa_entry = posix_acls->a_entries;
+ for (i = 0; i < posix_acls->a_count; i++, pa_entry++) {
+ if (pa_entry->e_tag == ACL_USER)
+ id = from_kuid(user_ns,
+ pa_entry->e_uid);
+ else if (pa_entry->e_tag == ACL_GROUP)
+ id = from_kgid(user_ns,
+ pa_entry->e_gid);
+ else
+ continue;
+
+ if (id == uid) {
+ mode_to_access_flags(pa_entry->e_perm,
+ 0777,
+ &access_bits);
+ if (!access_bits)
+ access_bits =
+ SET_MINIMUM_RIGHTS;
+ goto check_access_bits;
+ }
+ }
+ }
+ if (posix_acls)
+ posix_acl_release(posix_acls);
+ }
+
+ if (!found) {
+ if (others_ace) {
+ ace = others_ace;
+ } else {
+ ksmbd_debug(SMB, "Can't find corresponding sid\n");
+ rc = -EACCES;
+ goto err_out;
+ }
+ }
+
+ switch (ace->type) {
+ case ACCESS_ALLOWED_ACE_TYPE:
+ access_bits = le32_to_cpu(ace->access_req);
+ break;
+ case ACCESS_DENIED_ACE_TYPE:
+ case ACCESS_DENIED_CALLBACK_ACE_TYPE:
+ access_bits = le32_to_cpu(~ace->access_req);
+ break;
+ }
+
+check_access_bits:
+ if (granted &
+ ~(access_bits | FILE_READ_ATTRIBUTES | READ_CONTROL | WRITE_DAC | DELETE)) {
+ ksmbd_debug(SMB, "Access denied with winACL, granted : %x, access_req : %x\n",
+ granted, le32_to_cpu(ace->access_req));
+ rc = -EACCES;
+ goto err_out;
+ }
+
+ *pdaccess = cpu_to_le32(granted);
+err_out:
+ kfree(pntsd);
+ return rc;
+}
+
+int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
+ struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ bool type_check)
+{
+ int rc;
+ struct smb_fattr fattr = {{0}};
+ struct inode *inode = d_inode(path->dentry);
+ struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+ struct iattr newattrs;
+
+ fattr.cf_uid = INVALID_UID;
+ fattr.cf_gid = INVALID_GID;
+ fattr.cf_mode = inode->i_mode;
+
+ rc = parse_sec_desc(user_ns, pntsd, ntsd_len, &fattr);
+ if (rc)
+ goto out;
+
+ newattrs.ia_valid = ATTR_CTIME;
+ if (!uid_eq(fattr.cf_uid, INVALID_UID)) {
+ newattrs.ia_valid |= ATTR_UID;
+ newattrs.ia_uid = fattr.cf_uid;
+ }
+ if (!gid_eq(fattr.cf_gid, INVALID_GID)) {
+ newattrs.ia_valid |= ATTR_GID;
+ newattrs.ia_gid = fattr.cf_gid;
+ }
+ newattrs.ia_valid |= ATTR_MODE;
+ newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
+
+ inode_lock(inode);
+ rc = notify_change(user_ns, path->dentry, &newattrs, NULL);
+ inode_unlock(inode);
+ if (rc)
+ goto out;
+
+ ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry);
+ /* Update posix acls */
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
+ rc = set_posix_acl(user_ns, inode,
+ ACL_TYPE_ACCESS, fattr.cf_acls);
+ if (S_ISDIR(inode->i_mode) && fattr.cf_dacls)
+ rc = set_posix_acl(user_ns, inode,
+ ACL_TYPE_DEFAULT, fattr.cf_dacls);
+ }
+
+ /* Check it only calling from SD BUFFER context */
+ if (type_check && !(le16_to_cpu(pntsd->type) & DACL_PRESENT))
+ goto out;
+
+ if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
+ /* Update WinACL in xattr */
+ ksmbd_vfs_remove_sd_xattrs(user_ns, path->dentry);
+ ksmbd_vfs_set_sd_xattr(conn, user_ns,
+ path->dentry, pntsd, ntsd_len);
+ }
+
+out:
+ posix_acl_release(fattr.cf_acls);
+ posix_acl_release(fattr.cf_dacls);
+ mark_inode_dirty(inode);
+ return rc;
+}
+
+void ksmbd_init_domain(u32 *sub_auth)
+{
+ int i;
+
+ memcpy(&server_conf.domain_sid, &domain, sizeof(struct smb_sid));
+ for (i = 0; i < 3; ++i)
+ server_conf.domain_sid.sub_auth[i + 1] = cpu_to_le32(sub_auth[i]);
+}
diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
new file mode 100644
index 000000000000..940f686a1d95
--- /dev/null
+++ b/fs/ksmbd/smbacl.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Copyright (c) International Business Machines Corp., 2007
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ * Modified by Namjae Jeon (linkinjeon@kernel.org)
+ */
+
+#ifndef _SMBACL_H
+#define _SMBACL_H
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+
+#include "mgmt/tree_connect.h"
+
+#define NUM_AUTHS (6) /* number of authority fields */
+#define SID_MAX_SUB_AUTHORITIES (15) /* max number of sub authority fields */
+
+/*
+ * ACE types - see MS-DTYP 2.4.4.1
+ */
+enum {
+ ACCESS_ALLOWED,
+ ACCESS_DENIED,
+};
+
+/*
+ * Security ID types
+ */
+enum {
+ SIDOWNER = 1,
+ SIDGROUP,
+ SIDCREATOR_OWNER,
+ SIDCREATOR_GROUP,
+ SIDUNIX_USER,
+ SIDUNIX_GROUP,
+ SIDNFS_USER,
+ SIDNFS_GROUP,
+ SIDNFS_MODE,
+};
+
+/* Revision for ACLs */
+#define SD_REVISION 1
+
+/* Control flags for Security Descriptor */
+#define OWNER_DEFAULTED 0x0001
+#define GROUP_DEFAULTED 0x0002
+#define DACL_PRESENT 0x0004
+#define DACL_DEFAULTED 0x0008
+#define SACL_PRESENT 0x0010
+#define SACL_DEFAULTED 0x0020
+#define DACL_TRUSTED 0x0040
+#define SERVER_SECURITY 0x0080
+#define DACL_AUTO_INHERIT_REQ 0x0100
+#define SACL_AUTO_INHERIT_REQ 0x0200
+#define DACL_AUTO_INHERITED 0x0400
+#define SACL_AUTO_INHERITED 0x0800
+#define DACL_PROTECTED 0x1000
+#define SACL_PROTECTED 0x2000
+#define RM_CONTROL_VALID 0x4000
+#define SELF_RELATIVE 0x8000
+
+/* ACE types - see MS-DTYP 2.4.4.1 */
+#define ACCESS_ALLOWED_ACE_TYPE 0x00
+#define ACCESS_DENIED_ACE_TYPE 0x01
+#define SYSTEM_AUDIT_ACE_TYPE 0x02
+#define SYSTEM_ALARM_ACE_TYPE 0x03
+#define ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04
+#define ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05
+#define ACCESS_DENIED_OBJECT_ACE_TYPE 0x06
+#define SYSTEM_AUDIT_OBJECT_ACE_TYPE 0x07
+#define SYSTEM_ALARM_OBJECT_ACE_TYPE 0x08
+#define ACCESS_ALLOWED_CALLBACK_ACE_TYPE 0x09
+#define ACCESS_DENIED_CALLBACK_ACE_TYPE 0x0A
+#define ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE 0x0B
+#define ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE 0x0C
+#define SYSTEM_AUDIT_CALLBACK_ACE_TYPE 0x0D
+#define SYSTEM_ALARM_CALLBACK_ACE_TYPE 0x0E /* Reserved */
+#define SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE 0x0F
+#define SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE 0x10 /* reserved */
+#define SYSTEM_MANDATORY_LABEL_ACE_TYPE 0x11
+#define SYSTEM_RESOURCE_ATTRIBUTE_ACE_TYPE 0x12
+#define SYSTEM_SCOPED_POLICY_ID_ACE_TYPE 0x13
+
+/* ACE flags */
+#define OBJECT_INHERIT_ACE 0x01
+#define CONTAINER_INHERIT_ACE 0x02
+#define NO_PROPAGATE_INHERIT_ACE 0x04
+#define INHERIT_ONLY_ACE 0x08
+#define INHERITED_ACE 0x10
+#define SUCCESSFUL_ACCESS_ACE_FLAG 0x40
+#define FAILED_ACCESS_ACE_FLAG 0x80
+
+/*
+ * Maximum size of a string representation of a SID:
+ *
+ * The fields are unsigned values in decimal. So:
+ *
+ * u8: max 3 bytes in decimal
+ * u32: max 10 bytes in decimal
+ *
+ * "S-" + 3 bytes for version field + 15 for authority field + NULL terminator
+ *
+ * For authority field, max is when all 6 values are non-zero and it must be
+ * represented in hex. So "-0x" + 12 hex digits.
+ *
+ * Add 11 bytes for each subauthority field (10 bytes each + 1 for '-')
+ */
+#define SID_STRING_BASE_SIZE (2 + 3 + 15 + 1)
+#define SID_STRING_SUBAUTH_SIZE (11) /* size of a single subauth string */
+
+#define DOMAIN_USER_RID_LE cpu_to_le32(513)
+
+struct ksmbd_conn;
+
+struct smb_ntsd {
+ __le16 revision; /* revision level */
+ __le16 type;
+ __le32 osidoffset;
+ __le32 gsidoffset;
+ __le32 sacloffset;
+ __le32 dacloffset;
+} __packed;
+
+struct smb_sid {
+ __u8 revision; /* revision level */
+ __u8 num_subauth;
+ __u8 authority[NUM_AUTHS];
+ __le32 sub_auth[SID_MAX_SUB_AUTHORITIES]; /* sub_auth[num_subauth] */
+} __packed;
+
+/* size of a struct cifs_sid, sans sub_auth array */
+#define CIFS_SID_BASE_SIZE (1 + 1 + NUM_AUTHS)
+
+struct smb_acl {
+ __le16 revision; /* revision level */
+ __le16 size;
+ __le32 num_aces;
+} __packed;
+
+struct smb_ace {
+ __u8 type;
+ __u8 flags;
+ __le16 size;
+ __le32 access_req;
+ struct smb_sid sid; /* ie UUID of user or group who gets these perms */
+} __packed;
+
+struct smb_fattr {
+ kuid_t cf_uid;
+ kgid_t cf_gid;
+ umode_t cf_mode;
+ __le32 daccess;
+ struct posix_acl *cf_acls;
+ struct posix_acl *cf_dacls;
+};
+
+struct posix_ace_state {
+ u32 allow;
+ u32 deny;
+};
+
+struct posix_user_ace_state {
+ union {
+ kuid_t uid;
+ kgid_t gid;
+ };
+ struct posix_ace_state perms;
+};
+
+struct posix_ace_state_array {
+ int n;
+ struct posix_user_ace_state aces[];
+};
+
+/*
+ * while processing the nfsv4 ace, this maintains the partial permissions
+ * calculated so far:
+ */
+
+struct posix_acl_state {
+ struct posix_ace_state owner;
+ struct posix_ace_state group;
+ struct posix_ace_state other;
+ struct posix_ace_state everyone;
+ struct posix_ace_state mask; /* deny unused in this case */
+ struct posix_ace_state_array *users;
+ struct posix_ace_state_array *groups;
+};
+
+int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ int acl_len, struct smb_fattr *fattr);
+int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ struct smb_ntsd *ppntsd, int addition_info,
+ __u32 *secdesclen, struct smb_fattr *fattr);
+int init_acl_state(struct posix_acl_state *state, int cnt);
+void free_acl_state(struct posix_acl_state *state);
+void posix_state_to_acl(struct posix_acl_state *state,
+ struct posix_acl_entry *pace);
+int compare_sids(const struct smb_sid *ctsid, const struct smb_sid *cwsid);
+bool smb_inherit_flags(int flags, bool is_dir);
+int smb_inherit_dacl(struct ksmbd_conn *conn, struct path *path,
+ unsigned int uid, unsigned int gid);
+int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+ __le32 *pdaccess, int uid);
+int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
+ struct path *path, struct smb_ntsd *pntsd, int ntsd_len,
+ bool type_check);
+void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
+void ksmbd_init_domain(u32 *sub_auth);
+#endif /* _SMBACL_H */
diff --git a/fs/ksmbd/smbfsctl.h b/fs/ksmbd/smbfsctl.h
new file mode 100644
index 000000000000..b98418aae20c
--- /dev/null
+++ b/fs/ksmbd/smbfsctl.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
+ *
+ * Copyright (c) International Business Machines Corp., 2002,2009
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ */
+
+/* IOCTL information */
+/*
+ * List of ioctl/fsctl function codes that are or could be useful in the
+ * future to remote clients like cifs or SMB2 client. There is probably
+ * a slightly larger set of fsctls that NTFS local filesystem could handle,
+ * including the seven below that we do not have struct definitions for.
+ * Even with protocol definitions for most of these now available, we still
+ * need to do some experimentation to identify which are practical to do
+ * remotely. Some of the following, such as the encryption/compression ones
+ * could be invoked from tools via a specialized hook into the VFS rather
+ * than via the standard vfs entry points
+ */
+
+#ifndef __KSMBD_SMBFSCTL_H
+#define __KSMBD_SMBFSCTL_H
+
+#define FSCTL_DFS_GET_REFERRALS 0x00060194
+#define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0
+#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
+#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
+#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
+#define FSCTL_LOCK_VOLUME 0x00090018
+#define FSCTL_UNLOCK_VOLUME 0x0009001C
+#define FSCTL_IS_PATHNAME_VALID 0x0009002C /* BB add struct */
+#define FSCTL_GET_COMPRESSION 0x0009003C /* BB add struct */
+#define FSCTL_SET_COMPRESSION 0x0009C040 /* BB add struct */
+#define FSCTL_QUERY_FAT_BPB 0x00090058 /* BB add struct */
+/* Verify the next FSCTL number, we had it as 0x00090090 before */
+#define FSCTL_FILESYSTEM_GET_STATS 0x00090060 /* BB add struct */
+#define FSCTL_GET_NTFS_VOLUME_DATA 0x00090064 /* BB add struct */
+#define FSCTL_GET_RETRIEVAL_POINTERS 0x00090073 /* BB add struct */
+#define FSCTL_IS_VOLUME_DIRTY 0x00090078 /* BB add struct */
+#define FSCTL_ALLOW_EXTENDED_DASD_IO 0x00090083 /* BB add struct */
+#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
+#define FSCTL_FIND_FILES_BY_SID 0x0009008F /* BB add struct */
+#define FSCTL_SET_OBJECT_ID 0x00090098 /* BB add struct */
+#define FSCTL_GET_OBJECT_ID 0x0009009C /* BB add struct */
+#define FSCTL_DELETE_OBJECT_ID 0x000900A0 /* BB add struct */
+#define FSCTL_SET_REPARSE_POINT 0x000900A4 /* BB add struct */
+#define FSCTL_GET_REPARSE_POINT 0x000900A8 /* BB add struct */
+#define FSCTL_DELETE_REPARSE_POINT 0x000900AC /* BB add struct */
+#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
+#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
+#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
+#define FSCTL_SET_ZERO_DATA 0x000980C8 /* BB add struct */
+#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
+#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
+#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
+#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3 /* BB add struct */
+#define FSCTL_READ_FILE_USN_DATA 0x000900EB /* BB add struct */
+#define FSCTL_WRITE_USN_CLOSE_RECORD 0x000900EF /* BB add struct */
+#define FSCTL_SIS_COPYFILE 0x00090100 /* BB add struct */
+#define FSCTL_RECALL_FILE 0x00090117 /* BB add struct */
+#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
+#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
+#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
+#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
+#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
+#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
+#define FSCTL_SIS_LINK_FILES 0x0009C104
+#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
+#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
+/* strange that the number for this op is not sequential with previous op */
+#define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */
+#define FSCTL_REQUEST_RESUME_KEY 0x00140078
+#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
+#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
+#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
+#define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC
+#define FSCTL_COPYCHUNK 0x001440F2
+#define FSCTL_COPYCHUNK_WRITE 0x001480F2
+
+#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
+#define IO_REPARSE_TAG_HSM 0xC0000004
+#define IO_REPARSE_TAG_SIS 0x80000007
+
+/* WSL reparse tags */
+#define IO_REPARSE_TAG_LX_SYMLINK_LE cpu_to_le32(0xA000001D)
+#define IO_REPARSE_TAG_AF_UNIX_LE cpu_to_le32(0x80000023)
+#define IO_REPARSE_TAG_LX_FIFO_LE cpu_to_le32(0x80000024)
+#define IO_REPARSE_TAG_LX_CHR_LE cpu_to_le32(0x80000025)
+#define IO_REPARSE_TAG_LX_BLK_LE cpu_to_le32(0x80000026)
+#endif /* __KSMBD_SMBFSCTL_H */
diff --git a/fs/ksmbd/smbstatus.h b/fs/ksmbd/smbstatus.h
new file mode 100644
index 000000000000..108a8b6ed24a
--- /dev/null
+++ b/fs/ksmbd/smbstatus.h
@@ -0,0 +1,1822 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * fs/cifs/smb2status.h
+ *
+ * SMB2 Status code (network error) definitions
+ * Definitions are from MS-ERREF
+ *
+ * Copyright (c) International Business Machines Corp., 2009,2011
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ */
+
+/*
+ * 0 1 2 3 4 5 6 7 8 9 0 A B C D E F 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ * SEV C N <-------Facility--------> <------Error Status Code------>
+ *
+ * C is set if "customer defined" error, N bit is reserved and MBZ
+ */
+
+#define STATUS_SEVERITY_SUCCESS cpu_to_le32(0x0000)
+#define STATUS_SEVERITY_INFORMATIONAL cpu_to_le32(0x0001)
+#define STATUS_SEVERITY_WARNING cpu_to_le32(0x0002)
+#define STATUS_SEVERITY_ERROR cpu_to_le32(0x0003)
+
+struct ntstatus {
+ /* Facility is the high 12 bits of the following field */
+ __le32 Facility; /* low 2 bits Severity, next is Customer, then rsrvd */
+ __le32 Code;
+};
+
+#define STATUS_SUCCESS 0x00000000
+#define STATUS_WAIT_0 cpu_to_le32(0x00000000)
+#define STATUS_WAIT_1 cpu_to_le32(0x00000001)
+#define STATUS_WAIT_2 cpu_to_le32(0x00000002)
+#define STATUS_WAIT_3 cpu_to_le32(0x00000003)
+#define STATUS_WAIT_63 cpu_to_le32(0x0000003F)
+#define STATUS_ABANDONED cpu_to_le32(0x00000080)
+#define STATUS_ABANDONED_WAIT_0 cpu_to_le32(0x00000080)
+#define STATUS_ABANDONED_WAIT_63 cpu_to_le32(0x000000BF)
+#define STATUS_USER_APC cpu_to_le32(0x000000C0)
+#define STATUS_KERNEL_APC cpu_to_le32(0x00000100)
+#define STATUS_ALERTED cpu_to_le32(0x00000101)
+#define STATUS_TIMEOUT cpu_to_le32(0x00000102)
+#define STATUS_PENDING cpu_to_le32(0x00000103)
+#define STATUS_REPARSE cpu_to_le32(0x00000104)
+#define STATUS_MORE_ENTRIES cpu_to_le32(0x00000105)
+#define STATUS_NOT_ALL_ASSIGNED cpu_to_le32(0x00000106)
+#define STATUS_SOME_NOT_MAPPED cpu_to_le32(0x00000107)
+#define STATUS_OPLOCK_BREAK_IN_PROGRESS cpu_to_le32(0x00000108)
+#define STATUS_VOLUME_MOUNTED cpu_to_le32(0x00000109)
+#define STATUS_RXACT_COMMITTED cpu_to_le32(0x0000010A)
+#define STATUS_NOTIFY_CLEANUP cpu_to_le32(0x0000010B)
+#define STATUS_NOTIFY_ENUM_DIR cpu_to_le32(0x0000010C)
+#define STATUS_NO_QUOTAS_FOR_ACCOUNT cpu_to_le32(0x0000010D)
+#define STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED cpu_to_le32(0x0000010E)
+#define STATUS_PAGE_FAULT_TRANSITION cpu_to_le32(0x00000110)
+#define STATUS_PAGE_FAULT_DEMAND_ZERO cpu_to_le32(0x00000111)
+#define STATUS_PAGE_FAULT_COPY_ON_WRITE cpu_to_le32(0x00000112)
+#define STATUS_PAGE_FAULT_GUARD_PAGE cpu_to_le32(0x00000113)
+#define STATUS_PAGE_FAULT_PAGING_FILE cpu_to_le32(0x00000114)
+#define STATUS_CACHE_PAGE_LOCKED cpu_to_le32(0x00000115)
+#define STATUS_CRASH_DUMP cpu_to_le32(0x00000116)
+#define STATUS_BUFFER_ALL_ZEROS cpu_to_le32(0x00000117)
+#define STATUS_REPARSE_OBJECT cpu_to_le32(0x00000118)
+#define STATUS_RESOURCE_REQUIREMENTS_CHANGED cpu_to_le32(0x00000119)
+#define STATUS_TRANSLATION_COMPLETE cpu_to_le32(0x00000120)
+#define STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY cpu_to_le32(0x00000121)
+#define STATUS_NOTHING_TO_TERMINATE cpu_to_le32(0x00000122)
+#define STATUS_PROCESS_NOT_IN_JOB cpu_to_le32(0x00000123)
+#define STATUS_PROCESS_IN_JOB cpu_to_le32(0x00000124)
+#define STATUS_VOLSNAP_HIBERNATE_READY cpu_to_le32(0x00000125)
+#define STATUS_FSFILTER_OP_COMPLETED_SUCCESSFULLY cpu_to_le32(0x00000126)
+#define STATUS_INTERRUPT_VECTOR_ALREADY_CONNECTED cpu_to_le32(0x00000127)
+#define STATUS_INTERRUPT_STILL_CONNECTED cpu_to_le32(0x00000128)
+#define STATUS_PROCESS_CLONED cpu_to_le32(0x00000129)
+#define STATUS_FILE_LOCKED_WITH_ONLY_READERS cpu_to_le32(0x0000012A)
+#define STATUS_FILE_LOCKED_WITH_WRITERS cpu_to_le32(0x0000012B)
+#define STATUS_RESOURCEMANAGER_READ_ONLY cpu_to_le32(0x00000202)
+#define STATUS_WAIT_FOR_OPLOCK cpu_to_le32(0x00000367)
+#define DBG_EXCEPTION_HANDLED cpu_to_le32(0x00010001)
+#define DBG_CONTINUE cpu_to_le32(0x00010002)
+#define STATUS_FLT_IO_COMPLETE cpu_to_le32(0x001C0001)
+#define STATUS_OBJECT_NAME_EXISTS cpu_to_le32(0x40000000)
+#define STATUS_THREAD_WAS_SUSPENDED cpu_to_le32(0x40000001)
+#define STATUS_WORKING_SET_LIMIT_RANGE cpu_to_le32(0x40000002)
+#define STATUS_IMAGE_NOT_AT_BASE cpu_to_le32(0x40000003)
+#define STATUS_RXACT_STATE_CREATED cpu_to_le32(0x40000004)
+#define STATUS_SEGMENT_NOTIFICATION cpu_to_le32(0x40000005)
+#define STATUS_LOCAL_USER_SESSION_KEY cpu_to_le32(0x40000006)
+#define STATUS_BAD_CURRENT_DIRECTORY cpu_to_le32(0x40000007)
+#define STATUS_SERIAL_MORE_WRITES cpu_to_le32(0x40000008)
+#define STATUS_REGISTRY_RECOVERED cpu_to_le32(0x40000009)
+#define STATUS_FT_READ_RECOVERY_FROM_BACKUP cpu_to_le32(0x4000000A)
+#define STATUS_FT_WRITE_RECOVERY cpu_to_le32(0x4000000B)
+#define STATUS_SERIAL_COUNTER_TIMEOUT cpu_to_le32(0x4000000C)
+#define STATUS_NULL_LM_PASSWORD cpu_to_le32(0x4000000D)
+#define STATUS_IMAGE_MACHINE_TYPE_MISMATCH cpu_to_le32(0x4000000E)
+#define STATUS_RECEIVE_PARTIAL cpu_to_le32(0x4000000F)
+#define STATUS_RECEIVE_EXPEDITED cpu_to_le32(0x40000010)
+#define STATUS_RECEIVE_PARTIAL_EXPEDITED cpu_to_le32(0x40000011)
+#define STATUS_EVENT_DONE cpu_to_le32(0x40000012)
+#define STATUS_EVENT_PENDING cpu_to_le32(0x40000013)
+#define STATUS_CHECKING_FILE_SYSTEM cpu_to_le32(0x40000014)
+#define STATUS_FATAL_APP_EXIT cpu_to_le32(0x40000015)
+#define STATUS_PREDEFINED_HANDLE cpu_to_le32(0x40000016)
+#define STATUS_WAS_UNLOCKED cpu_to_le32(0x40000017)
+#define STATUS_SERVICE_NOTIFICATION cpu_to_le32(0x40000018)
+#define STATUS_WAS_LOCKED cpu_to_le32(0x40000019)
+#define STATUS_LOG_HARD_ERROR cpu_to_le32(0x4000001A)
+#define STATUS_ALREADY_WIN32 cpu_to_le32(0x4000001B)
+#define STATUS_WX86_UNSIMULATE cpu_to_le32(0x4000001C)
+#define STATUS_WX86_CONTINUE cpu_to_le32(0x4000001D)
+#define STATUS_WX86_SINGLE_STEP cpu_to_le32(0x4000001E)
+#define STATUS_WX86_BREAKPOINT cpu_to_le32(0x4000001F)
+#define STATUS_WX86_EXCEPTION_CONTINUE cpu_to_le32(0x40000020)
+#define STATUS_WX86_EXCEPTION_LASTCHANCE cpu_to_le32(0x40000021)
+#define STATUS_WX86_EXCEPTION_CHAIN cpu_to_le32(0x40000022)
+#define STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE cpu_to_le32(0x40000023)
+#define STATUS_NO_YIELD_PERFORMED cpu_to_le32(0x40000024)
+#define STATUS_TIMER_RESUME_IGNORED cpu_to_le32(0x40000025)
+#define STATUS_ARBITRATION_UNHANDLED cpu_to_le32(0x40000026)
+#define STATUS_CARDBUS_NOT_SUPPORTED cpu_to_le32(0x40000027)
+#define STATUS_WX86_CREATEWX86TIB cpu_to_le32(0x40000028)
+#define STATUS_MP_PROCESSOR_MISMATCH cpu_to_le32(0x40000029)
+#define STATUS_HIBERNATED cpu_to_le32(0x4000002A)
+#define STATUS_RESUME_HIBERNATION cpu_to_le32(0x4000002B)
+#define STATUS_FIRMWARE_UPDATED cpu_to_le32(0x4000002C)
+#define STATUS_DRIVERS_LEAKING_LOCKED_PAGES cpu_to_le32(0x4000002D)
+#define STATUS_MESSAGE_RETRIEVED cpu_to_le32(0x4000002E)
+#define STATUS_SYSTEM_POWERSTATE_TRANSITION cpu_to_le32(0x4000002F)
+#define STATUS_ALPC_CHECK_COMPLETION_LIST cpu_to_le32(0x40000030)
+#define STATUS_SYSTEM_POWERSTATE_COMPLEX_TRANSITION cpu_to_le32(0x40000031)
+#define STATUS_ACCESS_AUDIT_BY_POLICY cpu_to_le32(0x40000032)
+#define STATUS_ABANDON_HIBERFILE cpu_to_le32(0x40000033)
+#define STATUS_BIZRULES_NOT_ENABLED cpu_to_le32(0x40000034)
+#define STATUS_WAKE_SYSTEM cpu_to_le32(0x40000294)
+#define STATUS_DS_SHUTTING_DOWN cpu_to_le32(0x40000370)
+#define DBG_REPLY_LATER cpu_to_le32(0x40010001)
+#define DBG_UNABLE_TO_PROVIDE_HANDLE cpu_to_le32(0x40010002)
+#define DBG_TERMINATE_THREAD cpu_to_le32(0x40010003)
+#define DBG_TERMINATE_PROCESS cpu_to_le32(0x40010004)
+#define DBG_CONTROL_C cpu_to_le32(0x40010005)
+#define DBG_PRINTEXCEPTION_C cpu_to_le32(0x40010006)
+#define DBG_RIPEXCEPTION cpu_to_le32(0x40010007)
+#define DBG_CONTROL_BREAK cpu_to_le32(0x40010008)
+#define DBG_COMMAND_EXCEPTION cpu_to_le32(0x40010009)
+#define RPC_NT_UUID_LOCAL_ONLY cpu_to_le32(0x40020056)
+#define RPC_NT_SEND_INCOMPLETE cpu_to_le32(0x400200AF)
+#define STATUS_CTX_CDM_CONNECT cpu_to_le32(0x400A0004)
+#define STATUS_CTX_CDM_DISCONNECT cpu_to_le32(0x400A0005)
+#define STATUS_SXS_RELEASE_ACTIVATION_CONTEXT cpu_to_le32(0x4015000D)
+#define STATUS_RECOVERY_NOT_NEEDED cpu_to_le32(0x40190034)
+#define STATUS_RM_ALREADY_STARTED cpu_to_le32(0x40190035)
+#define STATUS_LOG_NO_RESTART cpu_to_le32(0x401A000C)
+#define STATUS_VIDEO_DRIVER_DEBUG_REPORT_REQUEST cpu_to_le32(0x401B00EC)
+#define STATUS_GRAPHICS_PARTIAL_DATA_POPULATED cpu_to_le32(0x401E000A)
+#define STATUS_GRAPHICS_DRIVER_MISMATCH cpu_to_le32(0x401E0117)
+#define STATUS_GRAPHICS_MODE_NOT_PINNED cpu_to_le32(0x401E0307)
+#define STATUS_GRAPHICS_NO_PREFERRED_MODE cpu_to_le32(0x401E031E)
+#define STATUS_GRAPHICS_DATASET_IS_EMPTY cpu_to_le32(0x401E034B)
+#define STATUS_GRAPHICS_NO_MORE_ELEMENTS_IN_DATASET cpu_to_le32(0x401E034C)
+#define STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_PINNED \
+ cpu_to_le32(0x401E0351)
+#define STATUS_GRAPHICS_UNKNOWN_CHILD_STATUS cpu_to_le32(0x401E042F)
+#define STATUS_GRAPHICS_LEADLINK_START_DEFERRED cpu_to_le32(0x401E0437)
+#define STATUS_GRAPHICS_POLLING_TOO_FREQUENTLY cpu_to_le32(0x401E0439)
+#define STATUS_GRAPHICS_START_DEFERRED cpu_to_le32(0x401E043A)
+#define STATUS_NDIS_INDICATION_REQUIRED cpu_to_le32(0x40230001)
+#define STATUS_GUARD_PAGE_VIOLATION cpu_to_le32(0x80000001)
+#define STATUS_DATATYPE_MISALIGNMENT cpu_to_le32(0x80000002)
+#define STATUS_BREAKPOINT cpu_to_le32(0x80000003)
+#define STATUS_SINGLE_STEP cpu_to_le32(0x80000004)
+#define STATUS_BUFFER_OVERFLOW cpu_to_le32(0x80000005)
+#define STATUS_NO_MORE_FILES cpu_to_le32(0x80000006)
+#define STATUS_WAKE_SYSTEM_DEBUGGER cpu_to_le32(0x80000007)
+#define STATUS_HANDLES_CLOSED cpu_to_le32(0x8000000A)
+#define STATUS_NO_INHERITANCE cpu_to_le32(0x8000000B)
+#define STATUS_GUID_SUBSTITUTION_MADE cpu_to_le32(0x8000000C)
+#define STATUS_PARTIAL_COPY cpu_to_le32(0x8000000D)
+#define STATUS_DEVICE_PAPER_EMPTY cpu_to_le32(0x8000000E)
+#define STATUS_DEVICE_POWERED_OFF cpu_to_le32(0x8000000F)
+#define STATUS_DEVICE_OFF_LINE cpu_to_le32(0x80000010)
+#define STATUS_DEVICE_BUSY cpu_to_le32(0x80000011)
+#define STATUS_NO_MORE_EAS cpu_to_le32(0x80000012)
+#define STATUS_INVALID_EA_NAME cpu_to_le32(0x80000013)
+#define STATUS_EA_LIST_INCONSISTENT cpu_to_le32(0x80000014)
+#define STATUS_INVALID_EA_FLAG cpu_to_le32(0x80000015)
+#define STATUS_VERIFY_REQUIRED cpu_to_le32(0x80000016)
+#define STATUS_EXTRANEOUS_INFORMATION cpu_to_le32(0x80000017)
+#define STATUS_RXACT_COMMIT_NECESSARY cpu_to_le32(0x80000018)
+#define STATUS_NO_MORE_ENTRIES cpu_to_le32(0x8000001A)
+#define STATUS_FILEMARK_DETECTED cpu_to_le32(0x8000001B)
+#define STATUS_MEDIA_CHANGED cpu_to_le32(0x8000001C)
+#define STATUS_BUS_RESET cpu_to_le32(0x8000001D)
+#define STATUS_END_OF_MEDIA cpu_to_le32(0x8000001E)
+#define STATUS_BEGINNING_OF_MEDIA cpu_to_le32(0x8000001F)
+#define STATUS_MEDIA_CHECK cpu_to_le32(0x80000020)
+#define STATUS_SETMARK_DETECTED cpu_to_le32(0x80000021)
+#define STATUS_NO_DATA_DETECTED cpu_to_le32(0x80000022)
+#define STATUS_REDIRECTOR_HAS_OPEN_HANDLES cpu_to_le32(0x80000023)
+#define STATUS_SERVER_HAS_OPEN_HANDLES cpu_to_le32(0x80000024)
+#define STATUS_ALREADY_DISCONNECTED cpu_to_le32(0x80000025)
+#define STATUS_LONGJUMP cpu_to_le32(0x80000026)
+#define STATUS_CLEANER_CARTRIDGE_INSTALLED cpu_to_le32(0x80000027)
+#define STATUS_PLUGPLAY_QUERY_VETOED cpu_to_le32(0x80000028)
+#define STATUS_UNWIND_CONSOLIDATE cpu_to_le32(0x80000029)
+#define STATUS_REGISTRY_HIVE_RECOVERED cpu_to_le32(0x8000002A)
+#define STATUS_DLL_MIGHT_BE_INSECURE cpu_to_le32(0x8000002B)
+#define STATUS_DLL_MIGHT_BE_INCOMPATIBLE cpu_to_le32(0x8000002C)
+#define STATUS_STOPPED_ON_SYMLINK cpu_to_le32(0x8000002D)
+#define STATUS_DEVICE_REQUIRES_CLEANING cpu_to_le32(0x80000288)
+#define STATUS_DEVICE_DOOR_OPEN cpu_to_le32(0x80000289)
+#define STATUS_DATA_LOST_REPAIR cpu_to_le32(0x80000803)
+#define DBG_EXCEPTION_NOT_HANDLED cpu_to_le32(0x80010001)
+#define STATUS_CLUSTER_NODE_ALREADY_UP cpu_to_le32(0x80130001)
+#define STATUS_CLUSTER_NODE_ALREADY_DOWN cpu_to_le32(0x80130002)
+#define STATUS_CLUSTER_NETWORK_ALREADY_ONLINE cpu_to_le32(0x80130003)
+#define STATUS_CLUSTER_NETWORK_ALREADY_OFFLINE cpu_to_le32(0x80130004)
+#define STATUS_CLUSTER_NODE_ALREADY_MEMBER cpu_to_le32(0x80130005)
+#define STATUS_COULD_NOT_RESIZE_LOG cpu_to_le32(0x80190009)
+#define STATUS_NO_TXF_METADATA cpu_to_le32(0x80190029)
+#define STATUS_CANT_RECOVER_WITH_HANDLE_OPEN cpu_to_le32(0x80190031)
+#define STATUS_TXF_METADATA_ALREADY_PRESENT cpu_to_le32(0x80190041)
+#define STATUS_TRANSACTION_SCOPE_CALLBACKS_NOT_SET cpu_to_le32(0x80190042)
+#define STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD_RECOVERED \
+ cpu_to_le32(0x801B00EB)
+#define STATUS_FLT_BUFFER_TOO_SMALL cpu_to_le32(0x801C0001)
+#define STATUS_FVE_PARTIAL_METADATA cpu_to_le32(0x80210001)
+#define STATUS_UNSUCCESSFUL cpu_to_le32(0xC0000001)
+#define STATUS_NOT_IMPLEMENTED cpu_to_le32(0xC0000002)
+#define STATUS_INVALID_INFO_CLASS cpu_to_le32(0xC0000003)
+#define STATUS_INFO_LENGTH_MISMATCH cpu_to_le32(0xC0000004)
+#define STATUS_ACCESS_VIOLATION cpu_to_le32(0xC0000005)
+#define STATUS_IN_PAGE_ERROR cpu_to_le32(0xC0000006)
+#define STATUS_PAGEFILE_QUOTA cpu_to_le32(0xC0000007)
+#define STATUS_INVALID_HANDLE cpu_to_le32(0xC0000008)
+#define STATUS_BAD_INITIAL_STACK cpu_to_le32(0xC0000009)
+#define STATUS_BAD_INITIAL_PC cpu_to_le32(0xC000000A)
+#define STATUS_INVALID_CID cpu_to_le32(0xC000000B)
+#define STATUS_TIMER_NOT_CANCELED cpu_to_le32(0xC000000C)
+#define STATUS_INVALID_PARAMETER cpu_to_le32(0xC000000D)
+#define STATUS_NO_SUCH_DEVICE cpu_to_le32(0xC000000E)
+#define STATUS_NO_SUCH_FILE cpu_to_le32(0xC000000F)
+#define STATUS_INVALID_DEVICE_REQUEST cpu_to_le32(0xC0000010)
+#define STATUS_END_OF_FILE cpu_to_le32(0xC0000011)
+#define STATUS_WRONG_VOLUME cpu_to_le32(0xC0000012)
+#define STATUS_NO_MEDIA_IN_DEVICE cpu_to_le32(0xC0000013)
+#define STATUS_UNRECOGNIZED_MEDIA cpu_to_le32(0xC0000014)
+#define STATUS_NONEXISTENT_SECTOR cpu_to_le32(0xC0000015)
+#define STATUS_MORE_PROCESSING_REQUIRED cpu_to_le32(0xC0000016)
+#define STATUS_NO_MEMORY cpu_to_le32(0xC0000017)
+#define STATUS_CONFLICTING_ADDRESSES cpu_to_le32(0xC0000018)
+#define STATUS_NOT_MAPPED_VIEW cpu_to_le32(0xC0000019)
+#define STATUS_UNABLE_TO_FREE_VM cpu_to_le32(0xC000001A)
+#define STATUS_UNABLE_TO_DELETE_SECTION cpu_to_le32(0xC000001B)
+#define STATUS_INVALID_SYSTEM_SERVICE cpu_to_le32(0xC000001C)
+#define STATUS_ILLEGAL_INSTRUCTION cpu_to_le32(0xC000001D)
+#define STATUS_INVALID_LOCK_SEQUENCE cpu_to_le32(0xC000001E)
+#define STATUS_INVALID_VIEW_SIZE cpu_to_le32(0xC000001F)
+#define STATUS_INVALID_FILE_FOR_SECTION cpu_to_le32(0xC0000020)
+#define STATUS_ALREADY_COMMITTED cpu_to_le32(0xC0000021)
+#define STATUS_ACCESS_DENIED cpu_to_le32(0xC0000022)
+#define STATUS_BUFFER_TOO_SMALL cpu_to_le32(0xC0000023)
+#define STATUS_OBJECT_TYPE_MISMATCH cpu_to_le32(0xC0000024)
+#define STATUS_NONCONTINUABLE_EXCEPTION cpu_to_le32(0xC0000025)
+#define STATUS_INVALID_DISPOSITION cpu_to_le32(0xC0000026)
+#define STATUS_UNWIND cpu_to_le32(0xC0000027)
+#define STATUS_BAD_STACK cpu_to_le32(0xC0000028)
+#define STATUS_INVALID_UNWIND_TARGET cpu_to_le32(0xC0000029)
+#define STATUS_NOT_LOCKED cpu_to_le32(0xC000002A)
+#define STATUS_PARITY_ERROR cpu_to_le32(0xC000002B)
+#define STATUS_UNABLE_TO_DECOMMIT_VM cpu_to_le32(0xC000002C)
+#define STATUS_NOT_COMMITTED cpu_to_le32(0xC000002D)
+#define STATUS_INVALID_PORT_ATTRIBUTES cpu_to_le32(0xC000002E)
+#define STATUS_PORT_MESSAGE_TOO_LONG cpu_to_le32(0xC000002F)
+#define STATUS_INVALID_PARAMETER_MIX cpu_to_le32(0xC0000030)
+#define STATUS_INVALID_QUOTA_LOWER cpu_to_le32(0xC0000031)
+#define STATUS_DISK_CORRUPT_ERROR cpu_to_le32(0xC0000032)
+#define STATUS_OBJECT_NAME_INVALID cpu_to_le32(0xC0000033)
+#define STATUS_OBJECT_NAME_NOT_FOUND cpu_to_le32(0xC0000034)
+#define STATUS_OBJECT_NAME_COLLISION cpu_to_le32(0xC0000035)
+#define STATUS_PORT_DISCONNECTED cpu_to_le32(0xC0000037)
+#define STATUS_DEVICE_ALREADY_ATTACHED cpu_to_le32(0xC0000038)
+#define STATUS_OBJECT_PATH_INVALID cpu_to_le32(0xC0000039)
+#define STATUS_OBJECT_PATH_NOT_FOUND cpu_to_le32(0xC000003A)
+#define STATUS_OBJECT_PATH_SYNTAX_BAD cpu_to_le32(0xC000003B)
+#define STATUS_DATA_OVERRUN cpu_to_le32(0xC000003C)
+#define STATUS_DATA_LATE_ERROR cpu_to_le32(0xC000003D)
+#define STATUS_DATA_ERROR cpu_to_le32(0xC000003E)
+#define STATUS_CRC_ERROR cpu_to_le32(0xC000003F)
+#define STATUS_SECTION_TOO_BIG cpu_to_le32(0xC0000040)
+#define STATUS_PORT_CONNECTION_REFUSED cpu_to_le32(0xC0000041)
+#define STATUS_INVALID_PORT_HANDLE cpu_to_le32(0xC0000042)
+#define STATUS_SHARING_VIOLATION cpu_to_le32(0xC0000043)
+#define STATUS_QUOTA_EXCEEDED cpu_to_le32(0xC0000044)
+#define STATUS_INVALID_PAGE_PROTECTION cpu_to_le32(0xC0000045)
+#define STATUS_MUTANT_NOT_OWNED cpu_to_le32(0xC0000046)
+#define STATUS_SEMAPHORE_LIMIT_EXCEEDED cpu_to_le32(0xC0000047)
+#define STATUS_PORT_ALREADY_SET cpu_to_le32(0xC0000048)
+#define STATUS_SECTION_NOT_IMAGE cpu_to_le32(0xC0000049)
+#define STATUS_SUSPEND_COUNT_EXCEEDED cpu_to_le32(0xC000004A)
+#define STATUS_THREAD_IS_TERMINATING cpu_to_le32(0xC000004B)
+#define STATUS_BAD_WORKING_SET_LIMIT cpu_to_le32(0xC000004C)
+#define STATUS_INCOMPATIBLE_FILE_MAP cpu_to_le32(0xC000004D)
+#define STATUS_SECTION_PROTECTION cpu_to_le32(0xC000004E)
+#define STATUS_EAS_NOT_SUPPORTED cpu_to_le32(0xC000004F)
+#define STATUS_EA_TOO_LARGE cpu_to_le32(0xC0000050)
+#define STATUS_NONEXISTENT_EA_ENTRY cpu_to_le32(0xC0000051)
+#define STATUS_NO_EAS_ON_FILE cpu_to_le32(0xC0000052)
+#define STATUS_EA_CORRUPT_ERROR cpu_to_le32(0xC0000053)
+#define STATUS_FILE_LOCK_CONFLICT cpu_to_le32(0xC0000054)
+#define STATUS_LOCK_NOT_GRANTED cpu_to_le32(0xC0000055)
+#define STATUS_DELETE_PENDING cpu_to_le32(0xC0000056)
+#define STATUS_CTL_FILE_NOT_SUPPORTED cpu_to_le32(0xC0000057)
+#define STATUS_UNKNOWN_REVISION cpu_to_le32(0xC0000058)
+#define STATUS_REVISION_MISMATCH cpu_to_le32(0xC0000059)
+#define STATUS_INVALID_OWNER cpu_to_le32(0xC000005A)
+#define STATUS_INVALID_PRIMARY_GROUP cpu_to_le32(0xC000005B)
+#define STATUS_NO_IMPERSONATION_TOKEN cpu_to_le32(0xC000005C)
+#define STATUS_CANT_DISABLE_MANDATORY cpu_to_le32(0xC000005D)
+#define STATUS_NO_LOGON_SERVERS cpu_to_le32(0xC000005E)
+#define STATUS_NO_SUCH_LOGON_SESSION cpu_to_le32(0xC000005F)
+#define STATUS_NO_SUCH_PRIVILEGE cpu_to_le32(0xC0000060)
+#define STATUS_PRIVILEGE_NOT_HELD cpu_to_le32(0xC0000061)
+#define STATUS_INVALID_ACCOUNT_NAME cpu_to_le32(0xC0000062)
+#define STATUS_USER_EXISTS cpu_to_le32(0xC0000063)
+#define STATUS_NO_SUCH_USER cpu_to_le32(0xC0000064)
+#define STATUS_GROUP_EXISTS cpu_to_le32(0xC0000065)
+#define STATUS_NO_SUCH_GROUP cpu_to_le32(0xC0000066)
+#define STATUS_MEMBER_IN_GROUP cpu_to_le32(0xC0000067)
+#define STATUS_MEMBER_NOT_IN_GROUP cpu_to_le32(0xC0000068)
+#define STATUS_LAST_ADMIN cpu_to_le32(0xC0000069)
+#define STATUS_WRONG_PASSWORD cpu_to_le32(0xC000006A)
+#define STATUS_ILL_FORMED_PASSWORD cpu_to_le32(0xC000006B)
+#define STATUS_PASSWORD_RESTRICTION cpu_to_le32(0xC000006C)
+#define STATUS_LOGON_FAILURE cpu_to_le32(0xC000006D)
+#define STATUS_ACCOUNT_RESTRICTION cpu_to_le32(0xC000006E)
+#define STATUS_INVALID_LOGON_HOURS cpu_to_le32(0xC000006F)
+#define STATUS_INVALID_WORKSTATION cpu_to_le32(0xC0000070)
+#define STATUS_PASSWORD_EXPIRED cpu_to_le32(0xC0000071)
+#define STATUS_ACCOUNT_DISABLED cpu_to_le32(0xC0000072)
+#define STATUS_NONE_MAPPED cpu_to_le32(0xC0000073)
+#define STATUS_TOO_MANY_LUIDS_REQUESTED cpu_to_le32(0xC0000074)
+#define STATUS_LUIDS_EXHAUSTED cpu_to_le32(0xC0000075)
+#define STATUS_INVALID_SUB_AUTHORITY cpu_to_le32(0xC0000076)
+#define STATUS_INVALID_ACL cpu_to_le32(0xC0000077)
+#define STATUS_INVALID_SID cpu_to_le32(0xC0000078)
+#define STATUS_INVALID_SECURITY_DESCR cpu_to_le32(0xC0000079)
+#define STATUS_PROCEDURE_NOT_FOUND cpu_to_le32(0xC000007A)
+#define STATUS_INVALID_IMAGE_FORMAT cpu_to_le32(0xC000007B)
+#define STATUS_NO_TOKEN cpu_to_le32(0xC000007C)
+#define STATUS_BAD_INHERITANCE_ACL cpu_to_le32(0xC000007D)
+#define STATUS_RANGE_NOT_LOCKED cpu_to_le32(0xC000007E)
+#define STATUS_DISK_FULL cpu_to_le32(0xC000007F)
+#define STATUS_SERVER_DISABLED cpu_to_le32(0xC0000080)
+#define STATUS_SERVER_NOT_DISABLED cpu_to_le32(0xC0000081)
+#define STATUS_TOO_MANY_GUIDS_REQUESTED cpu_to_le32(0xC0000082)
+#define STATUS_GUIDS_EXHAUSTED cpu_to_le32(0xC0000083)
+#define STATUS_INVALID_ID_AUTHORITY cpu_to_le32(0xC0000084)
+#define STATUS_AGENTS_EXHAUSTED cpu_to_le32(0xC0000085)
+#define STATUS_INVALID_VOLUME_LABEL cpu_to_le32(0xC0000086)
+#define STATUS_SECTION_NOT_EXTENDED cpu_to_le32(0xC0000087)
+#define STATUS_NOT_MAPPED_DATA cpu_to_le32(0xC0000088)
+#define STATUS_RESOURCE_DATA_NOT_FOUND cpu_to_le32(0xC0000089)
+#define STATUS_RESOURCE_TYPE_NOT_FOUND cpu_to_le32(0xC000008A)
+#define STATUS_RESOURCE_NAME_NOT_FOUND cpu_to_le32(0xC000008B)
+#define STATUS_ARRAY_BOUNDS_EXCEEDED cpu_to_le32(0xC000008C)
+#define STATUS_FLOAT_DENORMAL_OPERAND cpu_to_le32(0xC000008D)
+#define STATUS_FLOAT_DIVIDE_BY_ZERO cpu_to_le32(0xC000008E)
+#define STATUS_FLOAT_INEXACT_RESULT cpu_to_le32(0xC000008F)
+#define STATUS_FLOAT_INVALID_OPERATION cpu_to_le32(0xC0000090)
+#define STATUS_FLOAT_OVERFLOW cpu_to_le32(0xC0000091)
+#define STATUS_FLOAT_STACK_CHECK cpu_to_le32(0xC0000092)
+#define STATUS_FLOAT_UNDERFLOW cpu_to_le32(0xC0000093)
+#define STATUS_INTEGER_DIVIDE_BY_ZERO cpu_to_le32(0xC0000094)
+#define STATUS_INTEGER_OVERFLOW cpu_to_le32(0xC0000095)
+#define STATUS_PRIVILEGED_INSTRUCTION cpu_to_le32(0xC0000096)
+#define STATUS_TOO_MANY_PAGING_FILES cpu_to_le32(0xC0000097)
+#define STATUS_FILE_INVALID cpu_to_le32(0xC0000098)
+#define STATUS_ALLOTTED_SPACE_EXCEEDED cpu_to_le32(0xC0000099)
+#define STATUS_INSUFFICIENT_RESOURCES cpu_to_le32(0xC000009A)
+#define STATUS_DFS_EXIT_PATH_FOUND cpu_to_le32(0xC000009B)
+#define STATUS_DEVICE_DATA_ERROR cpu_to_le32(0xC000009C)
+#define STATUS_DEVICE_NOT_CONNECTED cpu_to_le32(0xC000009D)
+#define STATUS_DEVICE_POWER_FAILURE cpu_to_le32(0xC000009E)
+#define STATUS_FREE_VM_NOT_AT_BASE cpu_to_le32(0xC000009F)
+#define STATUS_MEMORY_NOT_ALLOCATED cpu_to_le32(0xC00000A0)
+#define STATUS_WORKING_SET_QUOTA cpu_to_le32(0xC00000A1)
+#define STATUS_MEDIA_WRITE_PROTECTED cpu_to_le32(0xC00000A2)
+#define STATUS_DEVICE_NOT_READY cpu_to_le32(0xC00000A3)
+#define STATUS_INVALID_GROUP_ATTRIBUTES cpu_to_le32(0xC00000A4)
+#define STATUS_BAD_IMPERSONATION_LEVEL cpu_to_le32(0xC00000A5)
+#define STATUS_CANT_OPEN_ANONYMOUS cpu_to_le32(0xC00000A6)
+#define STATUS_BAD_VALIDATION_CLASS cpu_to_le32(0xC00000A7)
+#define STATUS_BAD_TOKEN_TYPE cpu_to_le32(0xC00000A8)
+#define STATUS_BAD_MASTER_BOOT_RECORD cpu_to_le32(0xC00000A9)
+#define STATUS_INSTRUCTION_MISALIGNMENT cpu_to_le32(0xC00000AA)
+#define STATUS_INSTANCE_NOT_AVAILABLE cpu_to_le32(0xC00000AB)
+#define STATUS_PIPE_NOT_AVAILABLE cpu_to_le32(0xC00000AC)
+#define STATUS_INVALID_PIPE_STATE cpu_to_le32(0xC00000AD)
+#define STATUS_PIPE_BUSY cpu_to_le32(0xC00000AE)
+#define STATUS_ILLEGAL_FUNCTION cpu_to_le32(0xC00000AF)
+#define STATUS_PIPE_DISCONNECTED cpu_to_le32(0xC00000B0)
+#define STATUS_PIPE_CLOSING cpu_to_le32(0xC00000B1)
+#define STATUS_PIPE_CONNECTED cpu_to_le32(0xC00000B2)
+#define STATUS_PIPE_LISTENING cpu_to_le32(0xC00000B3)
+#define STATUS_INVALID_READ_MODE cpu_to_le32(0xC00000B4)
+#define STATUS_IO_TIMEOUT cpu_to_le32(0xC00000B5)
+#define STATUS_FILE_FORCED_CLOSED cpu_to_le32(0xC00000B6)
+#define STATUS_PROFILING_NOT_STARTED cpu_to_le32(0xC00000B7)
+#define STATUS_PROFILING_NOT_STOPPED cpu_to_le32(0xC00000B8)
+#define STATUS_COULD_NOT_INTERPRET cpu_to_le32(0xC00000B9)
+#define STATUS_FILE_IS_A_DIRECTORY cpu_to_le32(0xC00000BA)
+#define STATUS_NOT_SUPPORTED cpu_to_le32(0xC00000BB)
+#define STATUS_REMOTE_NOT_LISTENING cpu_to_le32(0xC00000BC)
+#define STATUS_DUPLICATE_NAME cpu_to_le32(0xC00000BD)
+#define STATUS_BAD_NETWORK_PATH cpu_to_le32(0xC00000BE)
+#define STATUS_NETWORK_BUSY cpu_to_le32(0xC00000BF)
+#define STATUS_DEVICE_DOES_NOT_EXIST cpu_to_le32(0xC00000C0)
+#define STATUS_TOO_MANY_COMMANDS cpu_to_le32(0xC00000C1)
+#define STATUS_ADAPTER_HARDWARE_ERROR cpu_to_le32(0xC00000C2)
+#define STATUS_INVALID_NETWORK_RESPONSE cpu_to_le32(0xC00000C3)
+#define STATUS_UNEXPECTED_NETWORK_ERROR cpu_to_le32(0xC00000C4)
+#define STATUS_BAD_REMOTE_ADAPTER cpu_to_le32(0xC00000C5)
+#define STATUS_PRINT_QUEUE_FULL cpu_to_le32(0xC00000C6)
+#define STATUS_NO_SPOOL_SPACE cpu_to_le32(0xC00000C7)
+#define STATUS_PRINT_CANCELLED cpu_to_le32(0xC00000C8)
+#define STATUS_NETWORK_NAME_DELETED cpu_to_le32(0xC00000C9)
+#define STATUS_NETWORK_ACCESS_DENIED cpu_to_le32(0xC00000CA)
+#define STATUS_BAD_DEVICE_TYPE cpu_to_le32(0xC00000CB)
+#define STATUS_BAD_NETWORK_NAME cpu_to_le32(0xC00000CC)
+#define STATUS_TOO_MANY_NAMES cpu_to_le32(0xC00000CD)
+#define STATUS_TOO_MANY_SESSIONS cpu_to_le32(0xC00000CE)
+#define STATUS_SHARING_PAUSED cpu_to_le32(0xC00000CF)
+#define STATUS_REQUEST_NOT_ACCEPTED cpu_to_le32(0xC00000D0)
+#define STATUS_REDIRECTOR_PAUSED cpu_to_le32(0xC00000D1)
+#define STATUS_NET_WRITE_FAULT cpu_to_le32(0xC00000D2)
+#define STATUS_PROFILING_AT_LIMIT cpu_to_le32(0xC00000D3)
+#define STATUS_NOT_SAME_DEVICE cpu_to_le32(0xC00000D4)
+#define STATUS_FILE_RENAMED cpu_to_le32(0xC00000D5)
+#define STATUS_VIRTUAL_CIRCUIT_CLOSED cpu_to_le32(0xC00000D6)
+#define STATUS_NO_SECURITY_ON_OBJECT cpu_to_le32(0xC00000D7)
+#define STATUS_CANT_WAIT cpu_to_le32(0xC00000D8)
+#define STATUS_PIPE_EMPTY cpu_to_le32(0xC00000D9)
+#define STATUS_CANT_ACCESS_DOMAIN_INFO cpu_to_le32(0xC00000DA)
+#define STATUS_CANT_TERMINATE_SELF cpu_to_le32(0xC00000DB)
+#define STATUS_INVALID_SERVER_STATE cpu_to_le32(0xC00000DC)
+#define STATUS_INVALID_DOMAIN_STATE cpu_to_le32(0xC00000DD)
+#define STATUS_INVALID_DOMAIN_ROLE cpu_to_le32(0xC00000DE)
+#define STATUS_NO_SUCH_DOMAIN cpu_to_le32(0xC00000DF)
+#define STATUS_DOMAIN_EXISTS cpu_to_le32(0xC00000E0)
+#define STATUS_DOMAIN_LIMIT_EXCEEDED cpu_to_le32(0xC00000E1)
+#define STATUS_OPLOCK_NOT_GRANTED cpu_to_le32(0xC00000E2)
+#define STATUS_INVALID_OPLOCK_PROTOCOL cpu_to_le32(0xC00000E3)
+#define STATUS_INTERNAL_DB_CORRUPTION cpu_to_le32(0xC00000E4)
+#define STATUS_INTERNAL_ERROR cpu_to_le32(0xC00000E5)
+#define STATUS_GENERIC_NOT_MAPPED cpu_to_le32(0xC00000E6)
+#define STATUS_BAD_DESCRIPTOR_FORMAT cpu_to_le32(0xC00000E7)
+#define STATUS_INVALID_USER_BUFFER cpu_to_le32(0xC00000E8)
+#define STATUS_UNEXPECTED_IO_ERROR cpu_to_le32(0xC00000E9)
+#define STATUS_UNEXPECTED_MM_CREATE_ERR cpu_to_le32(0xC00000EA)
+#define STATUS_UNEXPECTED_MM_MAP_ERROR cpu_to_le32(0xC00000EB)
+#define STATUS_UNEXPECTED_MM_EXTEND_ERR cpu_to_le32(0xC00000EC)
+#define STATUS_NOT_LOGON_PROCESS cpu_to_le32(0xC00000ED)
+#define STATUS_LOGON_SESSION_EXISTS cpu_to_le32(0xC00000EE)
+#define STATUS_INVALID_PARAMETER_1 cpu_to_le32(0xC00000EF)
+#define STATUS_INVALID_PARAMETER_2 cpu_to_le32(0xC00000F0)
+#define STATUS_INVALID_PARAMETER_3 cpu_to_le32(0xC00000F1)
+#define STATUS_INVALID_PARAMETER_4 cpu_to_le32(0xC00000F2)
+#define STATUS_INVALID_PARAMETER_5 cpu_to_le32(0xC00000F3)
+#define STATUS_INVALID_PARAMETER_6 cpu_to_le32(0xC00000F4)
+#define STATUS_INVALID_PARAMETER_7 cpu_to_le32(0xC00000F5)
+#define STATUS_INVALID_PARAMETER_8 cpu_to_le32(0xC00000F6)
+#define STATUS_INVALID_PARAMETER_9 cpu_to_le32(0xC00000F7)
+#define STATUS_INVALID_PARAMETER_10 cpu_to_le32(0xC00000F8)
+#define STATUS_INVALID_PARAMETER_11 cpu_to_le32(0xC00000F9)
+#define STATUS_INVALID_PARAMETER_12 cpu_to_le32(0xC00000FA)
+#define STATUS_REDIRECTOR_NOT_STARTED cpu_to_le32(0xC00000FB)
+#define STATUS_REDIRECTOR_STARTED cpu_to_le32(0xC00000FC)
+#define STATUS_STACK_OVERFLOW cpu_to_le32(0xC00000FD)
+#define STATUS_NO_SUCH_PACKAGE cpu_to_le32(0xC00000FE)
+#define STATUS_BAD_FUNCTION_TABLE cpu_to_le32(0xC00000FF)
+#define STATUS_VARIABLE_NOT_FOUND cpu_to_le32(0xC0000100)
+#define STATUS_DIRECTORY_NOT_EMPTY cpu_to_le32(0xC0000101)
+#define STATUS_FILE_CORRUPT_ERROR cpu_to_le32(0xC0000102)
+#define STATUS_NOT_A_DIRECTORY cpu_to_le32(0xC0000103)
+#define STATUS_BAD_LOGON_SESSION_STATE cpu_to_le32(0xC0000104)
+#define STATUS_LOGON_SESSION_COLLISION cpu_to_le32(0xC0000105)
+#define STATUS_NAME_TOO_LONG cpu_to_le32(0xC0000106)
+#define STATUS_FILES_OPEN cpu_to_le32(0xC0000107)
+#define STATUS_CONNECTION_IN_USE cpu_to_le32(0xC0000108)
+#define STATUS_MESSAGE_NOT_FOUND cpu_to_le32(0xC0000109)
+#define STATUS_PROCESS_IS_TERMINATING cpu_to_le32(0xC000010A)
+#define STATUS_INVALID_LOGON_TYPE cpu_to_le32(0xC000010B)
+#define STATUS_NO_GUID_TRANSLATION cpu_to_le32(0xC000010C)
+#define STATUS_CANNOT_IMPERSONATE cpu_to_le32(0xC000010D)
+#define STATUS_IMAGE_ALREADY_LOADED cpu_to_le32(0xC000010E)
+#define STATUS_ABIOS_NOT_PRESENT cpu_to_le32(0xC000010F)
+#define STATUS_ABIOS_LID_NOT_EXIST cpu_to_le32(0xC0000110)
+#define STATUS_ABIOS_LID_ALREADY_OWNED cpu_to_le32(0xC0000111)
+#define STATUS_ABIOS_NOT_LID_OWNER cpu_to_le32(0xC0000112)
+#define STATUS_ABIOS_INVALID_COMMAND cpu_to_le32(0xC0000113)
+#define STATUS_ABIOS_INVALID_LID cpu_to_le32(0xC0000114)
+#define STATUS_ABIOS_SELECTOR_NOT_AVAILABLE cpu_to_le32(0xC0000115)
+#define STATUS_ABIOS_INVALID_SELECTOR cpu_to_le32(0xC0000116)
+#define STATUS_NO_LDT cpu_to_le32(0xC0000117)
+#define STATUS_INVALID_LDT_SIZE cpu_to_le32(0xC0000118)
+#define STATUS_INVALID_LDT_OFFSET cpu_to_le32(0xC0000119)
+#define STATUS_INVALID_LDT_DESCRIPTOR cpu_to_le32(0xC000011A)
+#define STATUS_INVALID_IMAGE_NE_FORMAT cpu_to_le32(0xC000011B)
+#define STATUS_RXACT_INVALID_STATE cpu_to_le32(0xC000011C)
+#define STATUS_RXACT_COMMIT_FAILURE cpu_to_le32(0xC000011D)
+#define STATUS_MAPPED_FILE_SIZE_ZERO cpu_to_le32(0xC000011E)
+#define STATUS_TOO_MANY_OPENED_FILES cpu_to_le32(0xC000011F)
+#define STATUS_CANCELLED cpu_to_le32(0xC0000120)
+#define STATUS_CANNOT_DELETE cpu_to_le32(0xC0000121)
+#define STATUS_INVALID_COMPUTER_NAME cpu_to_le32(0xC0000122)
+#define STATUS_FILE_DELETED cpu_to_le32(0xC0000123)
+#define STATUS_SPECIAL_ACCOUNT cpu_to_le32(0xC0000124)
+#define STATUS_SPECIAL_GROUP cpu_to_le32(0xC0000125)
+#define STATUS_SPECIAL_USER cpu_to_le32(0xC0000126)
+#define STATUS_MEMBERS_PRIMARY_GROUP cpu_to_le32(0xC0000127)
+#define STATUS_FILE_CLOSED cpu_to_le32(0xC0000128)
+#define STATUS_TOO_MANY_THREADS cpu_to_le32(0xC0000129)
+#define STATUS_THREAD_NOT_IN_PROCESS cpu_to_le32(0xC000012A)
+#define STATUS_TOKEN_ALREADY_IN_USE cpu_to_le32(0xC000012B)
+#define STATUS_PAGEFILE_QUOTA_EXCEEDED cpu_to_le32(0xC000012C)
+#define STATUS_COMMITMENT_LIMIT cpu_to_le32(0xC000012D)
+#define STATUS_INVALID_IMAGE_LE_FORMAT cpu_to_le32(0xC000012E)
+#define STATUS_INVALID_IMAGE_NOT_MZ cpu_to_le32(0xC000012F)
+#define STATUS_INVALID_IMAGE_PROTECT cpu_to_le32(0xC0000130)
+#define STATUS_INVALID_IMAGE_WIN_16 cpu_to_le32(0xC0000131)
+#define STATUS_LOGON_SERVER_CONFLICT cpu_to_le32(0xC0000132)
+#define STATUS_TIME_DIFFERENCE_AT_DC cpu_to_le32(0xC0000133)
+#define STATUS_SYNCHRONIZATION_REQUIRED cpu_to_le32(0xC0000134)
+#define STATUS_DLL_NOT_FOUND cpu_to_le32(0xC0000135)
+#define STATUS_OPEN_FAILED cpu_to_le32(0xC0000136)
+#define STATUS_IO_PRIVILEGE_FAILED cpu_to_le32(0xC0000137)
+#define STATUS_ORDINAL_NOT_FOUND cpu_to_le32(0xC0000138)
+#define STATUS_ENTRYPOINT_NOT_FOUND cpu_to_le32(0xC0000139)
+#define STATUS_CONTROL_C_EXIT cpu_to_le32(0xC000013A)
+#define STATUS_LOCAL_DISCONNECT cpu_to_le32(0xC000013B)
+#define STATUS_REMOTE_DISCONNECT cpu_to_le32(0xC000013C)
+#define STATUS_REMOTE_RESOURCES cpu_to_le32(0xC000013D)
+#define STATUS_LINK_FAILED cpu_to_le32(0xC000013E)
+#define STATUS_LINK_TIMEOUT cpu_to_le32(0xC000013F)
+#define STATUS_INVALID_CONNECTION cpu_to_le32(0xC0000140)
+#define STATUS_INVALID_ADDRESS cpu_to_le32(0xC0000141)
+#define STATUS_DLL_INIT_FAILED cpu_to_le32(0xC0000142)
+#define STATUS_MISSING_SYSTEMFILE cpu_to_le32(0xC0000143)
+#define STATUS_UNHANDLED_EXCEPTION cpu_to_le32(0xC0000144)
+#define STATUS_APP_INIT_FAILURE cpu_to_le32(0xC0000145)
+#define STATUS_PAGEFILE_CREATE_FAILED cpu_to_le32(0xC0000146)
+#define STATUS_NO_PAGEFILE cpu_to_le32(0xC0000147)
+#define STATUS_INVALID_LEVEL cpu_to_le32(0xC0000148)
+#define STATUS_WRONG_PASSWORD_CORE cpu_to_le32(0xC0000149)
+#define STATUS_ILLEGAL_FLOAT_CONTEXT cpu_to_le32(0xC000014A)
+#define STATUS_PIPE_BROKEN cpu_to_le32(0xC000014B)
+#define STATUS_REGISTRY_CORRUPT cpu_to_le32(0xC000014C)
+#define STATUS_REGISTRY_IO_FAILED cpu_to_le32(0xC000014D)
+#define STATUS_NO_EVENT_PAIR cpu_to_le32(0xC000014E)
+#define STATUS_UNRECOGNIZED_VOLUME cpu_to_le32(0xC000014F)
+#define STATUS_SERIAL_NO_DEVICE_INITED cpu_to_le32(0xC0000150)
+#define STATUS_NO_SUCH_ALIAS cpu_to_le32(0xC0000151)
+#define STATUS_MEMBER_NOT_IN_ALIAS cpu_to_le32(0xC0000152)
+#define STATUS_MEMBER_IN_ALIAS cpu_to_le32(0xC0000153)
+#define STATUS_ALIAS_EXISTS cpu_to_le32(0xC0000154)
+#define STATUS_LOGON_NOT_GRANTED cpu_to_le32(0xC0000155)
+#define STATUS_TOO_MANY_SECRETS cpu_to_le32(0xC0000156)
+#define STATUS_SECRET_TOO_LONG cpu_to_le32(0xC0000157)
+#define STATUS_INTERNAL_DB_ERROR cpu_to_le32(0xC0000158)
+#define STATUS_FULLSCREEN_MODE cpu_to_le32(0xC0000159)
+#define STATUS_TOO_MANY_CONTEXT_IDS cpu_to_le32(0xC000015A)
+#define STATUS_LOGON_TYPE_NOT_GRANTED cpu_to_le32(0xC000015B)
+#define STATUS_NOT_REGISTRY_FILE cpu_to_le32(0xC000015C)
+#define STATUS_NT_CROSS_ENCRYPTION_REQUIRED cpu_to_le32(0xC000015D)
+#define STATUS_DOMAIN_CTRLR_CONFIG_ERROR cpu_to_le32(0xC000015E)
+#define STATUS_FT_MISSING_MEMBER cpu_to_le32(0xC000015F)
+#define STATUS_ILL_FORMED_SERVICE_ENTRY cpu_to_le32(0xC0000160)
+#define STATUS_ILLEGAL_CHARACTER cpu_to_le32(0xC0000161)
+#define STATUS_UNMAPPABLE_CHARACTER cpu_to_le32(0xC0000162)
+#define STATUS_UNDEFINED_CHARACTER cpu_to_le32(0xC0000163)
+#define STATUS_FLOPPY_VOLUME cpu_to_le32(0xC0000164)
+#define STATUS_FLOPPY_ID_MARK_NOT_FOUND cpu_to_le32(0xC0000165)
+#define STATUS_FLOPPY_WRONG_CYLINDER cpu_to_le32(0xC0000166)
+#define STATUS_FLOPPY_UNKNOWN_ERROR cpu_to_le32(0xC0000167)
+#define STATUS_FLOPPY_BAD_REGISTERS cpu_to_le32(0xC0000168)
+#define STATUS_DISK_RECALIBRATE_FAILED cpu_to_le32(0xC0000169)
+#define STATUS_DISK_OPERATION_FAILED cpu_to_le32(0xC000016A)
+#define STATUS_DISK_RESET_FAILED cpu_to_le32(0xC000016B)
+#define STATUS_SHARED_IRQ_BUSY cpu_to_le32(0xC000016C)
+#define STATUS_FT_ORPHANING cpu_to_le32(0xC000016D)
+#define STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT cpu_to_le32(0xC000016E)
+#define STATUS_PARTITION_FAILURE cpu_to_le32(0xC0000172)
+#define STATUS_INVALID_BLOCK_LENGTH cpu_to_le32(0xC0000173)
+#define STATUS_DEVICE_NOT_PARTITIONED cpu_to_le32(0xC0000174)
+#define STATUS_UNABLE_TO_LOCK_MEDIA cpu_to_le32(0xC0000175)
+#define STATUS_UNABLE_TO_UNLOAD_MEDIA cpu_to_le32(0xC0000176)
+#define STATUS_EOM_OVERFLOW cpu_to_le32(0xC0000177)
+#define STATUS_NO_MEDIA cpu_to_le32(0xC0000178)
+#define STATUS_NO_SUCH_MEMBER cpu_to_le32(0xC000017A)
+#define STATUS_INVALID_MEMBER cpu_to_le32(0xC000017B)
+#define STATUS_KEY_DELETED cpu_to_le32(0xC000017C)
+#define STATUS_NO_LOG_SPACE cpu_to_le32(0xC000017D)
+#define STATUS_TOO_MANY_SIDS cpu_to_le32(0xC000017E)
+#define STATUS_LM_CROSS_ENCRYPTION_REQUIRED cpu_to_le32(0xC000017F)
+#define STATUS_KEY_HAS_CHILDREN cpu_to_le32(0xC0000180)
+#define STATUS_CHILD_MUST_BE_VOLATILE cpu_to_le32(0xC0000181)
+#define STATUS_DEVICE_CONFIGURATION_ERROR cpu_to_le32(0xC0000182)
+#define STATUS_DRIVER_INTERNAL_ERROR cpu_to_le32(0xC0000183)
+#define STATUS_INVALID_DEVICE_STATE cpu_to_le32(0xC0000184)
+#define STATUS_IO_DEVICE_ERROR cpu_to_le32(0xC0000185)
+#define STATUS_DEVICE_PROTOCOL_ERROR cpu_to_le32(0xC0000186)
+#define STATUS_BACKUP_CONTROLLER cpu_to_le32(0xC0000187)
+#define STATUS_LOG_FILE_FULL cpu_to_le32(0xC0000188)
+#define STATUS_TOO_LATE cpu_to_le32(0xC0000189)
+#define STATUS_NO_TRUST_LSA_SECRET cpu_to_le32(0xC000018A)
+#define STATUS_NO_TRUST_SAM_ACCOUNT cpu_to_le32(0xC000018B)
+#define STATUS_TRUSTED_DOMAIN_FAILURE cpu_to_le32(0xC000018C)
+#define STATUS_TRUSTED_RELATIONSHIP_FAILURE cpu_to_le32(0xC000018D)
+#define STATUS_EVENTLOG_FILE_CORRUPT cpu_to_le32(0xC000018E)
+#define STATUS_EVENTLOG_CANT_START cpu_to_le32(0xC000018F)
+#define STATUS_TRUST_FAILURE cpu_to_le32(0xC0000190)
+#define STATUS_MUTANT_LIMIT_EXCEEDED cpu_to_le32(0xC0000191)
+#define STATUS_NETLOGON_NOT_STARTED cpu_to_le32(0xC0000192)
+#define STATUS_ACCOUNT_EXPIRED cpu_to_le32(0xC0000193)
+#define STATUS_POSSIBLE_DEADLOCK cpu_to_le32(0xC0000194)
+#define STATUS_NETWORK_CREDENTIAL_CONFLICT cpu_to_le32(0xC0000195)
+#define STATUS_REMOTE_SESSION_LIMIT cpu_to_le32(0xC0000196)
+#define STATUS_EVENTLOG_FILE_CHANGED cpu_to_le32(0xC0000197)
+#define STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT cpu_to_le32(0xC0000198)
+#define STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT cpu_to_le32(0xC0000199)
+#define STATUS_NOLOGON_SERVER_TRUST_ACCOUNT cpu_to_le32(0xC000019A)
+#define STATUS_DOMAIN_TRUST_INCONSISTENT cpu_to_le32(0xC000019B)
+#define STATUS_FS_DRIVER_REQUIRED cpu_to_le32(0xC000019C)
+#define STATUS_IMAGE_ALREADY_LOADED_AS_DLL cpu_to_le32(0xC000019D)
+#define STATUS_NETWORK_OPEN_RESTRICTION cpu_to_le32(0xC0000201)
+#define STATUS_NO_USER_SESSION_KEY cpu_to_le32(0xC0000202)
+#define STATUS_USER_SESSION_DELETED cpu_to_le32(0xC0000203)
+#define STATUS_RESOURCE_LANG_NOT_FOUND cpu_to_le32(0xC0000204)
+#define STATUS_INSUFF_SERVER_RESOURCES cpu_to_le32(0xC0000205)
+#define STATUS_INVALID_BUFFER_SIZE cpu_to_le32(0xC0000206)
+#define STATUS_INVALID_ADDRESS_COMPONENT cpu_to_le32(0xC0000207)
+#define STATUS_INVALID_ADDRESS_WILDCARD cpu_to_le32(0xC0000208)
+#define STATUS_TOO_MANY_ADDRESSES cpu_to_le32(0xC0000209)
+#define STATUS_ADDRESS_ALREADY_EXISTS cpu_to_le32(0xC000020A)
+#define STATUS_ADDRESS_CLOSED cpu_to_le32(0xC000020B)
+#define STATUS_CONNECTION_DISCONNECTED cpu_to_le32(0xC000020C)
+#define STATUS_CONNECTION_RESET cpu_to_le32(0xC000020D)
+#define STATUS_TOO_MANY_NODES cpu_to_le32(0xC000020E)
+#define STATUS_TRANSACTION_ABORTED cpu_to_le32(0xC000020F)
+#define STATUS_TRANSACTION_TIMED_OUT cpu_to_le32(0xC0000210)
+#define STATUS_TRANSACTION_NO_RELEASE cpu_to_le32(0xC0000211)
+#define STATUS_TRANSACTION_NO_MATCH cpu_to_le32(0xC0000212)
+#define STATUS_TRANSACTION_RESPONDED cpu_to_le32(0xC0000213)
+#define STATUS_TRANSACTION_INVALID_ID cpu_to_le32(0xC0000214)
+#define STATUS_TRANSACTION_INVALID_TYPE cpu_to_le32(0xC0000215)
+#define STATUS_NOT_SERVER_SESSION cpu_to_le32(0xC0000216)
+#define STATUS_NOT_CLIENT_SESSION cpu_to_le32(0xC0000217)
+#define STATUS_CANNOT_LOAD_REGISTRY_FILE cpu_to_le32(0xC0000218)
+#define STATUS_DEBUG_ATTACH_FAILED cpu_to_le32(0xC0000219)
+#define STATUS_SYSTEM_PROCESS_TERMINATED cpu_to_le32(0xC000021A)
+#define STATUS_DATA_NOT_ACCEPTED cpu_to_le32(0xC000021B)
+#define STATUS_NO_BROWSER_SERVERS_FOUND cpu_to_le32(0xC000021C)
+#define STATUS_VDM_HARD_ERROR cpu_to_le32(0xC000021D)
+#define STATUS_DRIVER_CANCEL_TIMEOUT cpu_to_le32(0xC000021E)
+#define STATUS_REPLY_MESSAGE_MISMATCH cpu_to_le32(0xC000021F)
+#define STATUS_MAPPED_ALIGNMENT cpu_to_le32(0xC0000220)
+#define STATUS_IMAGE_CHECKSUM_MISMATCH cpu_to_le32(0xC0000221)
+#define STATUS_LOST_WRITEBEHIND_DATA cpu_to_le32(0xC0000222)
+#define STATUS_CLIENT_SERVER_PARAMETERS_INVALID cpu_to_le32(0xC0000223)
+#define STATUS_PASSWORD_MUST_CHANGE cpu_to_le32(0xC0000224)
+#define STATUS_NOT_FOUND cpu_to_le32(0xC0000225)
+#define STATUS_NOT_TINY_STREAM cpu_to_le32(0xC0000226)
+#define STATUS_RECOVERY_FAILURE cpu_to_le32(0xC0000227)
+#define STATUS_STACK_OVERFLOW_READ cpu_to_le32(0xC0000228)
+#define STATUS_FAIL_CHECK cpu_to_le32(0xC0000229)
+#define STATUS_DUPLICATE_OBJECTID cpu_to_le32(0xC000022A)
+#define STATUS_OBJECTID_EXISTS cpu_to_le32(0xC000022B)
+#define STATUS_CONVERT_TO_LARGE cpu_to_le32(0xC000022C)
+#define STATUS_RETRY cpu_to_le32(0xC000022D)
+#define STATUS_FOUND_OUT_OF_SCOPE cpu_to_le32(0xC000022E)
+#define STATUS_ALLOCATE_BUCKET cpu_to_le32(0xC000022F)
+#define STATUS_PROPSET_NOT_FOUND cpu_to_le32(0xC0000230)
+#define STATUS_MARSHALL_OVERFLOW cpu_to_le32(0xC0000231)
+#define STATUS_INVALID_VARIANT cpu_to_le32(0xC0000232)
+#define STATUS_DOMAIN_CONTROLLER_NOT_FOUND cpu_to_le32(0xC0000233)
+#define STATUS_ACCOUNT_LOCKED_OUT cpu_to_le32(0xC0000234)
+#define STATUS_HANDLE_NOT_CLOSABLE cpu_to_le32(0xC0000235)
+#define STATUS_CONNECTION_REFUSED cpu_to_le32(0xC0000236)
+#define STATUS_GRACEFUL_DISCONNECT cpu_to_le32(0xC0000237)
+#define STATUS_ADDRESS_ALREADY_ASSOCIATED cpu_to_le32(0xC0000238)
+#define STATUS_ADDRESS_NOT_ASSOCIATED cpu_to_le32(0xC0000239)
+#define STATUS_CONNECTION_INVALID cpu_to_le32(0xC000023A)
+#define STATUS_CONNECTION_ACTIVE cpu_to_le32(0xC000023B)
+#define STATUS_NETWORK_UNREACHABLE cpu_to_le32(0xC000023C)
+#define STATUS_HOST_UNREACHABLE cpu_to_le32(0xC000023D)
+#define STATUS_PROTOCOL_UNREACHABLE cpu_to_le32(0xC000023E)
+#define STATUS_PORT_UNREACHABLE cpu_to_le32(0xC000023F)
+#define STATUS_REQUEST_ABORTED cpu_to_le32(0xC0000240)
+#define STATUS_CONNECTION_ABORTED cpu_to_le32(0xC0000241)
+#define STATUS_BAD_COMPRESSION_BUFFER cpu_to_le32(0xC0000242)
+#define STATUS_USER_MAPPED_FILE cpu_to_le32(0xC0000243)
+#define STATUS_AUDIT_FAILED cpu_to_le32(0xC0000244)
+#define STATUS_TIMER_RESOLUTION_NOT_SET cpu_to_le32(0xC0000245)
+#define STATUS_CONNECTION_COUNT_LIMIT cpu_to_le32(0xC0000246)
+#define STATUS_LOGIN_TIME_RESTRICTION cpu_to_le32(0xC0000247)
+#define STATUS_LOGIN_WKSTA_RESTRICTION cpu_to_le32(0xC0000248)
+#define STATUS_IMAGE_MP_UP_MISMATCH cpu_to_le32(0xC0000249)
+#define STATUS_INSUFFICIENT_LOGON_INFO cpu_to_le32(0xC0000250)
+#define STATUS_BAD_DLL_ENTRYPOINT cpu_to_le32(0xC0000251)
+#define STATUS_BAD_SERVICE_ENTRYPOINT cpu_to_le32(0xC0000252)
+#define STATUS_LPC_REPLY_LOST cpu_to_le32(0xC0000253)
+#define STATUS_IP_ADDRESS_CONFLICT1 cpu_to_le32(0xC0000254)
+#define STATUS_IP_ADDRESS_CONFLICT2 cpu_to_le32(0xC0000255)
+#define STATUS_REGISTRY_QUOTA_LIMIT cpu_to_le32(0xC0000256)
+#define STATUS_PATH_NOT_COVERED cpu_to_le32(0xC0000257)
+#define STATUS_NO_CALLBACK_ACTIVE cpu_to_le32(0xC0000258)
+#define STATUS_LICENSE_QUOTA_EXCEEDED cpu_to_le32(0xC0000259)
+#define STATUS_PWD_TOO_SHORT cpu_to_le32(0xC000025A)
+#define STATUS_PWD_TOO_RECENT cpu_to_le32(0xC000025B)
+#define STATUS_PWD_HISTORY_CONFLICT cpu_to_le32(0xC000025C)
+#define STATUS_PLUGPLAY_NO_DEVICE cpu_to_le32(0xC000025E)
+#define STATUS_UNSUPPORTED_COMPRESSION cpu_to_le32(0xC000025F)
+#define STATUS_INVALID_HW_PROFILE cpu_to_le32(0xC0000260)
+#define STATUS_INVALID_PLUGPLAY_DEVICE_PATH cpu_to_le32(0xC0000261)
+#define STATUS_DRIVER_ORDINAL_NOT_FOUND cpu_to_le32(0xC0000262)
+#define STATUS_DRIVER_ENTRYPOINT_NOT_FOUND cpu_to_le32(0xC0000263)
+#define STATUS_RESOURCE_NOT_OWNED cpu_to_le32(0xC0000264)
+#define STATUS_TOO_MANY_LINKS cpu_to_le32(0xC0000265)
+#define STATUS_QUOTA_LIST_INCONSISTENT cpu_to_le32(0xC0000266)
+#define STATUS_FILE_IS_OFFLINE cpu_to_le32(0xC0000267)
+#define STATUS_EVALUATION_EXPIRATION cpu_to_le32(0xC0000268)
+#define STATUS_ILLEGAL_DLL_RELOCATION cpu_to_le32(0xC0000269)
+#define STATUS_LICENSE_VIOLATION cpu_to_le32(0xC000026A)
+#define STATUS_DLL_INIT_FAILED_LOGOFF cpu_to_le32(0xC000026B)
+#define STATUS_DRIVER_UNABLE_TO_LOAD cpu_to_le32(0xC000026C)
+#define STATUS_DFS_UNAVAILABLE cpu_to_le32(0xC000026D)
+#define STATUS_VOLUME_DISMOUNTED cpu_to_le32(0xC000026E)
+#define STATUS_WX86_INTERNAL_ERROR cpu_to_le32(0xC000026F)
+#define STATUS_WX86_FLOAT_STACK_CHECK cpu_to_le32(0xC0000270)
+#define STATUS_VALIDATE_CONTINUE cpu_to_le32(0xC0000271)
+#define STATUS_NO_MATCH cpu_to_le32(0xC0000272)
+#define STATUS_NO_MORE_MATCHES cpu_to_le32(0xC0000273)
+#define STATUS_NOT_A_REPARSE_POINT cpu_to_le32(0xC0000275)
+#define STATUS_IO_REPARSE_TAG_INVALID cpu_to_le32(0xC0000276)
+#define STATUS_IO_REPARSE_TAG_MISMATCH cpu_to_le32(0xC0000277)
+#define STATUS_IO_REPARSE_DATA_INVALID cpu_to_le32(0xC0000278)
+#define STATUS_IO_REPARSE_TAG_NOT_HANDLED cpu_to_le32(0xC0000279)
+#define STATUS_REPARSE_POINT_NOT_RESOLVED cpu_to_le32(0xC0000280)
+#define STATUS_DIRECTORY_IS_A_REPARSE_POINT cpu_to_le32(0xC0000281)
+#define STATUS_RANGE_LIST_CONFLICT cpu_to_le32(0xC0000282)
+#define STATUS_SOURCE_ELEMENT_EMPTY cpu_to_le32(0xC0000283)
+#define STATUS_DESTINATION_ELEMENT_FULL cpu_to_le32(0xC0000284)
+#define STATUS_ILLEGAL_ELEMENT_ADDRESS cpu_to_le32(0xC0000285)
+#define STATUS_MAGAZINE_NOT_PRESENT cpu_to_le32(0xC0000286)
+#define STATUS_REINITIALIZATION_NEEDED cpu_to_le32(0xC0000287)
+#define STATUS_ENCRYPTION_FAILED cpu_to_le32(0xC000028A)
+#define STATUS_DECRYPTION_FAILED cpu_to_le32(0xC000028B)
+#define STATUS_RANGE_NOT_FOUND cpu_to_le32(0xC000028C)
+#define STATUS_NO_RECOVERY_POLICY cpu_to_le32(0xC000028D)
+#define STATUS_NO_EFS cpu_to_le32(0xC000028E)
+#define STATUS_WRONG_EFS cpu_to_le32(0xC000028F)
+#define STATUS_NO_USER_KEYS cpu_to_le32(0xC0000290)
+#define STATUS_FILE_NOT_ENCRYPTED cpu_to_le32(0xC0000291)
+#define STATUS_NOT_EXPORT_FORMAT cpu_to_le32(0xC0000292)
+#define STATUS_FILE_ENCRYPTED cpu_to_le32(0xC0000293)
+#define STATUS_WMI_GUID_NOT_FOUND cpu_to_le32(0xC0000295)
+#define STATUS_WMI_INSTANCE_NOT_FOUND cpu_to_le32(0xC0000296)
+#define STATUS_WMI_ITEMID_NOT_FOUND cpu_to_le32(0xC0000297)
+#define STATUS_WMI_TRY_AGAIN cpu_to_le32(0xC0000298)
+#define STATUS_SHARED_POLICY cpu_to_le32(0xC0000299)
+#define STATUS_POLICY_OBJECT_NOT_FOUND cpu_to_le32(0xC000029A)
+#define STATUS_POLICY_ONLY_IN_DS cpu_to_le32(0xC000029B)
+#define STATUS_VOLUME_NOT_UPGRADED cpu_to_le32(0xC000029C)
+#define STATUS_REMOTE_STORAGE_NOT_ACTIVE cpu_to_le32(0xC000029D)
+#define STATUS_REMOTE_STORAGE_MEDIA_ERROR cpu_to_le32(0xC000029E)
+#define STATUS_NO_TRACKING_SERVICE cpu_to_le32(0xC000029F)
+#define STATUS_SERVER_SID_MISMATCH cpu_to_le32(0xC00002A0)
+#define STATUS_DS_NO_ATTRIBUTE_OR_VALUE cpu_to_le32(0xC00002A1)
+#define STATUS_DS_INVALID_ATTRIBUTE_SYNTAX cpu_to_le32(0xC00002A2)
+#define STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED cpu_to_le32(0xC00002A3)
+#define STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS cpu_to_le32(0xC00002A4)
+#define STATUS_DS_BUSY cpu_to_le32(0xC00002A5)
+#define STATUS_DS_UNAVAILABLE cpu_to_le32(0xC00002A6)
+#define STATUS_DS_NO_RIDS_ALLOCATED cpu_to_le32(0xC00002A7)
+#define STATUS_DS_NO_MORE_RIDS cpu_to_le32(0xC00002A8)
+#define STATUS_DS_INCORRECT_ROLE_OWNER cpu_to_le32(0xC00002A9)
+#define STATUS_DS_RIDMGR_INIT_ERROR cpu_to_le32(0xC00002AA)
+#define STATUS_DS_OBJ_CLASS_VIOLATION cpu_to_le32(0xC00002AB)
+#define STATUS_DS_CANT_ON_NON_LEAF cpu_to_le32(0xC00002AC)
+#define STATUS_DS_CANT_ON_RDN cpu_to_le32(0xC00002AD)
+#define STATUS_DS_CANT_MOD_OBJ_CLASS cpu_to_le32(0xC00002AE)
+#define STATUS_DS_CROSS_DOM_MOVE_FAILED cpu_to_le32(0xC00002AF)
+#define STATUS_DS_GC_NOT_AVAILABLE cpu_to_le32(0xC00002B0)
+#define STATUS_DIRECTORY_SERVICE_REQUIRED cpu_to_le32(0xC00002B1)
+#define STATUS_REPARSE_ATTRIBUTE_CONFLICT cpu_to_le32(0xC00002B2)
+#define STATUS_CANT_ENABLE_DENY_ONLY cpu_to_le32(0xC00002B3)
+#define STATUS_FLOAT_MULTIPLE_FAULTS cpu_to_le32(0xC00002B4)
+#define STATUS_FLOAT_MULTIPLE_TRAPS cpu_to_le32(0xC00002B5)
+#define STATUS_DEVICE_REMOVED cpu_to_le32(0xC00002B6)
+#define STATUS_JOURNAL_DELETE_IN_PROGRESS cpu_to_le32(0xC00002B7)
+#define STATUS_JOURNAL_NOT_ACTIVE cpu_to_le32(0xC00002B8)
+#define STATUS_NOINTERFACE cpu_to_le32(0xC00002B9)
+#define STATUS_DS_ADMIN_LIMIT_EXCEEDED cpu_to_le32(0xC00002C1)
+#define STATUS_DRIVER_FAILED_SLEEP cpu_to_le32(0xC00002C2)
+#define STATUS_MUTUAL_AUTHENTICATION_FAILED cpu_to_le32(0xC00002C3)
+#define STATUS_CORRUPT_SYSTEM_FILE cpu_to_le32(0xC00002C4)
+#define STATUS_DATATYPE_MISALIGNMENT_ERROR cpu_to_le32(0xC00002C5)
+#define STATUS_WMI_READ_ONLY cpu_to_le32(0xC00002C6)
+#define STATUS_WMI_SET_FAILURE cpu_to_le32(0xC00002C7)
+#define STATUS_COMMITMENT_MINIMUM cpu_to_le32(0xC00002C8)
+#define STATUS_REG_NAT_CONSUMPTION cpu_to_le32(0xC00002C9)
+#define STATUS_TRANSPORT_FULL cpu_to_le32(0xC00002CA)
+#define STATUS_DS_SAM_INIT_FAILURE cpu_to_le32(0xC00002CB)
+#define STATUS_ONLY_IF_CONNECTED cpu_to_le32(0xC00002CC)
+#define STATUS_DS_SENSITIVE_GROUP_VIOLATION cpu_to_le32(0xC00002CD)
+#define STATUS_PNP_RESTART_ENUMERATION cpu_to_le32(0xC00002CE)
+#define STATUS_JOURNAL_ENTRY_DELETED cpu_to_le32(0xC00002CF)
+#define STATUS_DS_CANT_MOD_PRIMARYGROUPID cpu_to_le32(0xC00002D0)
+#define STATUS_SYSTEM_IMAGE_BAD_SIGNATURE cpu_to_le32(0xC00002D1)
+#define STATUS_PNP_REBOOT_REQUIRED cpu_to_le32(0xC00002D2)
+#define STATUS_POWER_STATE_INVALID cpu_to_le32(0xC00002D3)
+#define STATUS_DS_INVALID_GROUP_TYPE cpu_to_le32(0xC00002D4)
+#define STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN cpu_to_le32(0xC00002D5)
+#define STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN cpu_to_le32(0xC00002D6)
+#define STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER cpu_to_le32(0xC00002D7)
+#define STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER cpu_to_le32(0xC00002D8)
+#define STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER cpu_to_le32(0xC00002D9)
+#define STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER cpu_to_le32(0xC00002DA)
+#define STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER \
+ cpu_to_le32(0xC00002DB)
+#define STATUS_DS_HAVE_PRIMARY_MEMBERS cpu_to_le32(0xC00002DC)
+#define STATUS_WMI_NOT_SUPPORTED cpu_to_le32(0xC00002DD)
+#define STATUS_INSUFFICIENT_POWER cpu_to_le32(0xC00002DE)
+#define STATUS_SAM_NEED_BOOTKEY_PASSWORD cpu_to_le32(0xC00002DF)
+#define STATUS_SAM_NEED_BOOTKEY_FLOPPY cpu_to_le32(0xC00002E0)
+#define STATUS_DS_CANT_START cpu_to_le32(0xC00002E1)
+#define STATUS_DS_INIT_FAILURE cpu_to_le32(0xC00002E2)
+#define STATUS_SAM_INIT_FAILURE cpu_to_le32(0xC00002E3)
+#define STATUS_DS_GC_REQUIRED cpu_to_le32(0xC00002E4)
+#define STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY cpu_to_le32(0xC00002E5)
+#define STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS cpu_to_le32(0xC00002E6)
+#define STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED cpu_to_le32(0xC00002E7)
+#define STATUS_MULTIPLE_FAULT_VIOLATION cpu_to_le32(0xC00002E8)
+#define STATUS_CURRENT_DOMAIN_NOT_ALLOWED cpu_to_le32(0xC00002E9)
+#define STATUS_CANNOT_MAKE cpu_to_le32(0xC00002EA)
+#define STATUS_SYSTEM_SHUTDOWN cpu_to_le32(0xC00002EB)
+#define STATUS_DS_INIT_FAILURE_CONSOLE cpu_to_le32(0xC00002EC)
+#define STATUS_DS_SAM_INIT_FAILURE_CONSOLE cpu_to_le32(0xC00002ED)
+#define STATUS_UNFINISHED_CONTEXT_DELETED cpu_to_le32(0xC00002EE)
+#define STATUS_NO_TGT_REPLY cpu_to_le32(0xC00002EF)
+#define STATUS_OBJECTID_NOT_FOUND cpu_to_le32(0xC00002F0)
+#define STATUS_NO_IP_ADDRESSES cpu_to_le32(0xC00002F1)
+#define STATUS_WRONG_CREDENTIAL_HANDLE cpu_to_le32(0xC00002F2)
+#define STATUS_CRYPTO_SYSTEM_INVALID cpu_to_le32(0xC00002F3)
+#define STATUS_MAX_REFERRALS_EXCEEDED cpu_to_le32(0xC00002F4)
+#define STATUS_MUST_BE_KDC cpu_to_le32(0xC00002F5)
+#define STATUS_STRONG_CRYPTO_NOT_SUPPORTED cpu_to_le32(0xC00002F6)
+#define STATUS_TOO_MANY_PRINCIPALS cpu_to_le32(0xC00002F7)
+#define STATUS_NO_PA_DATA cpu_to_le32(0xC00002F8)
+#define STATUS_PKINIT_NAME_MISMATCH cpu_to_le32(0xC00002F9)
+#define STATUS_SMARTCARD_LOGON_REQUIRED cpu_to_le32(0xC00002FA)
+#define STATUS_KDC_INVALID_REQUEST cpu_to_le32(0xC00002FB)
+#define STATUS_KDC_UNABLE_TO_REFER cpu_to_le32(0xC00002FC)
+#define STATUS_KDC_UNKNOWN_ETYPE cpu_to_le32(0xC00002FD)
+#define STATUS_SHUTDOWN_IN_PROGRESS cpu_to_le32(0xC00002FE)
+#define STATUS_SERVER_SHUTDOWN_IN_PROGRESS cpu_to_le32(0xC00002FF)
+#define STATUS_NOT_SUPPORTED_ON_SBS cpu_to_le32(0xC0000300)
+#define STATUS_WMI_GUID_DISCONNECTED cpu_to_le32(0xC0000301)
+#define STATUS_WMI_ALREADY_DISABLED cpu_to_le32(0xC0000302)
+#define STATUS_WMI_ALREADY_ENABLED cpu_to_le32(0xC0000303)
+#define STATUS_MFT_TOO_FRAGMENTED cpu_to_le32(0xC0000304)
+#define STATUS_COPY_PROTECTION_FAILURE cpu_to_le32(0xC0000305)
+#define STATUS_CSS_AUTHENTICATION_FAILURE cpu_to_le32(0xC0000306)
+#define STATUS_CSS_KEY_NOT_PRESENT cpu_to_le32(0xC0000307)
+#define STATUS_CSS_KEY_NOT_ESTABLISHED cpu_to_le32(0xC0000308)
+#define STATUS_CSS_SCRAMBLED_SECTOR cpu_to_le32(0xC0000309)
+#define STATUS_CSS_REGION_MISMATCH cpu_to_le32(0xC000030A)
+#define STATUS_CSS_RESETS_EXHAUSTED cpu_to_le32(0xC000030B)
+#define STATUS_PKINIT_FAILURE cpu_to_le32(0xC0000320)
+#define STATUS_SMARTCARD_SUBSYSTEM_FAILURE cpu_to_le32(0xC0000321)
+#define STATUS_NO_KERB_KEY cpu_to_le32(0xC0000322)
+#define STATUS_HOST_DOWN cpu_to_le32(0xC0000350)
+#define STATUS_UNSUPPORTED_PREAUTH cpu_to_le32(0xC0000351)
+#define STATUS_EFS_ALG_BLOB_TOO_BIG cpu_to_le32(0xC0000352)
+#define STATUS_PORT_NOT_SET cpu_to_le32(0xC0000353)
+#define STATUS_DEBUGGER_INACTIVE cpu_to_le32(0xC0000354)
+#define STATUS_DS_VERSION_CHECK_FAILURE cpu_to_le32(0xC0000355)
+#define STATUS_AUDITING_DISABLED cpu_to_le32(0xC0000356)
+#define STATUS_PRENT4_MACHINE_ACCOUNT cpu_to_le32(0xC0000357)
+#define STATUS_DS_AG_CANT_HAVE_UNIVERSAL_MEMBER cpu_to_le32(0xC0000358)
+#define STATUS_INVALID_IMAGE_WIN_32 cpu_to_le32(0xC0000359)
+#define STATUS_INVALID_IMAGE_WIN_64 cpu_to_le32(0xC000035A)
+#define STATUS_BAD_BINDINGS cpu_to_le32(0xC000035B)
+#define STATUS_NETWORK_SESSION_EXPIRED cpu_to_le32(0xC000035C)
+#define STATUS_APPHELP_BLOCK cpu_to_le32(0xC000035D)
+#define STATUS_ALL_SIDS_FILTERED cpu_to_le32(0xC000035E)
+#define STATUS_NOT_SAFE_MODE_DRIVER cpu_to_le32(0xC000035F)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_DEFAULT cpu_to_le32(0xC0000361)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_PATH cpu_to_le32(0xC0000362)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_PUBLISHER cpu_to_le32(0xC0000363)
+#define STATUS_ACCESS_DISABLED_BY_POLICY_OTHER cpu_to_le32(0xC0000364)
+#define STATUS_FAILED_DRIVER_ENTRY cpu_to_le32(0xC0000365)
+#define STATUS_DEVICE_ENUMERATION_ERROR cpu_to_le32(0xC0000366)
+#define STATUS_MOUNT_POINT_NOT_RESOLVED cpu_to_le32(0xC0000368)
+#define STATUS_INVALID_DEVICE_OBJECT_PARAMETER cpu_to_le32(0xC0000369)
+#define STATUS_MCA_OCCURRED cpu_to_le32(0xC000036A)
+#define STATUS_DRIVER_BLOCKED_CRITICAL cpu_to_le32(0xC000036B)
+#define STATUS_DRIVER_BLOCKED cpu_to_le32(0xC000036C)
+#define STATUS_DRIVER_DATABASE_ERROR cpu_to_le32(0xC000036D)
+#define STATUS_SYSTEM_HIVE_TOO_LARGE cpu_to_le32(0xC000036E)
+#define STATUS_INVALID_IMPORT_OF_NON_DLL cpu_to_le32(0xC000036F)
+#define STATUS_NO_SECRETS cpu_to_le32(0xC0000371)
+#define STATUS_ACCESS_DISABLED_NO_SAFER_UI_BY_POLICY cpu_to_le32(0xC0000372)
+#define STATUS_FAILED_STACK_SWITCH cpu_to_le32(0xC0000373)
+#define STATUS_HEAP_CORRUPTION cpu_to_le32(0xC0000374)
+#define STATUS_SMARTCARD_WRONG_PIN cpu_to_le32(0xC0000380)
+#define STATUS_SMARTCARD_CARD_BLOCKED cpu_to_le32(0xC0000381)
+#define STATUS_SMARTCARD_CARD_NOT_AUTHENTICATED cpu_to_le32(0xC0000382)
+#define STATUS_SMARTCARD_NO_CARD cpu_to_le32(0xC0000383)
+#define STATUS_SMARTCARD_NO_KEY_CONTAINER cpu_to_le32(0xC0000384)
+#define STATUS_SMARTCARD_NO_CERTIFICATE cpu_to_le32(0xC0000385)
+#define STATUS_SMARTCARD_NO_KEYSET cpu_to_le32(0xC0000386)
+#define STATUS_SMARTCARD_IO_ERROR cpu_to_le32(0xC0000387)
+#define STATUS_DOWNGRADE_DETECTED cpu_to_le32(0xC0000388)
+#define STATUS_SMARTCARD_CERT_REVOKED cpu_to_le32(0xC0000389)
+#define STATUS_ISSUING_CA_UNTRUSTED cpu_to_le32(0xC000038A)
+#define STATUS_REVOCATION_OFFLINE_C cpu_to_le32(0xC000038B)
+#define STATUS_PKINIT_CLIENT_FAILURE cpu_to_le32(0xC000038C)
+#define STATUS_SMARTCARD_CERT_EXPIRED cpu_to_le32(0xC000038D)
+#define STATUS_DRIVER_FAILED_PRIOR_UNLOAD cpu_to_le32(0xC000038E)
+#define STATUS_SMARTCARD_SILENT_CONTEXT cpu_to_le32(0xC000038F)
+#define STATUS_PER_USER_TRUST_QUOTA_EXCEEDED cpu_to_le32(0xC0000401)
+#define STATUS_ALL_USER_TRUST_QUOTA_EXCEEDED cpu_to_le32(0xC0000402)
+#define STATUS_USER_DELETE_TRUST_QUOTA_EXCEEDED cpu_to_le32(0xC0000403)
+#define STATUS_DS_NAME_NOT_UNIQUE cpu_to_le32(0xC0000404)
+#define STATUS_DS_DUPLICATE_ID_FOUND cpu_to_le32(0xC0000405)
+#define STATUS_DS_GROUP_CONVERSION_ERROR cpu_to_le32(0xC0000406)
+#define STATUS_VOLSNAP_PREPARE_HIBERNATE cpu_to_le32(0xC0000407)
+#define STATUS_USER2USER_REQUIRED cpu_to_le32(0xC0000408)
+#define STATUS_STACK_BUFFER_OVERRUN cpu_to_le32(0xC0000409)
+#define STATUS_NO_S4U_PROT_SUPPORT cpu_to_le32(0xC000040A)
+#define STATUS_CROSSREALM_DELEGATION_FAILURE cpu_to_le32(0xC000040B)
+#define STATUS_REVOCATION_OFFLINE_KDC cpu_to_le32(0xC000040C)
+#define STATUS_ISSUING_CA_UNTRUSTED_KDC cpu_to_le32(0xC000040D)
+#define STATUS_KDC_CERT_EXPIRED cpu_to_le32(0xC000040E)
+#define STATUS_KDC_CERT_REVOKED cpu_to_le32(0xC000040F)
+#define STATUS_PARAMETER_QUOTA_EXCEEDED cpu_to_le32(0xC0000410)
+#define STATUS_HIBERNATION_FAILURE cpu_to_le32(0xC0000411)
+#define STATUS_DELAY_LOAD_FAILED cpu_to_le32(0xC0000412)
+#define STATUS_AUTHENTICATION_FIREWALL_FAILED cpu_to_le32(0xC0000413)
+#define STATUS_VDM_DISALLOWED cpu_to_le32(0xC0000414)
+#define STATUS_HUNG_DISPLAY_DRIVER_THREAD cpu_to_le32(0xC0000415)
+#define STATUS_INSUFFICIENT_RESOURCE_FOR_SPECIFIED_SHARED_SECTION_SIZE \
+ cpu_to_le32(0xC0000416)
+#define STATUS_INVALID_CRUNTIME_PARAMETER cpu_to_le32(0xC0000417)
+#define STATUS_NTLM_BLOCKED cpu_to_le32(0xC0000418)
+#define STATUS_ASSERTION_FAILURE cpu_to_le32(0xC0000420)
+#define STATUS_VERIFIER_STOP cpu_to_le32(0xC0000421)
+#define STATUS_CALLBACK_POP_STACK cpu_to_le32(0xC0000423)
+#define STATUS_INCOMPATIBLE_DRIVER_BLOCKED cpu_to_le32(0xC0000424)
+#define STATUS_HIVE_UNLOADED cpu_to_le32(0xC0000425)
+#define STATUS_COMPRESSION_DISABLED cpu_to_le32(0xC0000426)
+#define STATUS_FILE_SYSTEM_LIMITATION cpu_to_le32(0xC0000427)
+#define STATUS_INVALID_IMAGE_HASH cpu_to_le32(0xC0000428)
+#define STATUS_NOT_CAPABLE cpu_to_le32(0xC0000429)
+#define STATUS_REQUEST_OUT_OF_SEQUENCE cpu_to_le32(0xC000042A)
+#define STATUS_IMPLEMENTATION_LIMIT cpu_to_le32(0xC000042B)
+#define STATUS_ELEVATION_REQUIRED cpu_to_le32(0xC000042C)
+#define STATUS_BEYOND_VDL cpu_to_le32(0xC0000432)
+#define STATUS_ENCOUNTERED_WRITE_IN_PROGRESS cpu_to_le32(0xC0000433)
+#define STATUS_PTE_CHANGED cpu_to_le32(0xC0000434)
+#define STATUS_PURGE_FAILED cpu_to_le32(0xC0000435)
+#define STATUS_CRED_REQUIRES_CONFIRMATION cpu_to_le32(0xC0000440)
+#define STATUS_CS_ENCRYPTION_INVALID_SERVER_RESPONSE cpu_to_le32(0xC0000441)
+#define STATUS_CS_ENCRYPTION_UNSUPPORTED_SERVER cpu_to_le32(0xC0000442)
+#define STATUS_CS_ENCRYPTION_EXISTING_ENCRYPTED_FILE cpu_to_le32(0xC0000443)
+#define STATUS_CS_ENCRYPTION_NEW_ENCRYPTED_FILE cpu_to_le32(0xC0000444)
+#define STATUS_CS_ENCRYPTION_FILE_NOT_CSE cpu_to_le32(0xC0000445)
+#define STATUS_INVALID_LABEL cpu_to_le32(0xC0000446)
+#define STATUS_DRIVER_PROCESS_TERMINATED cpu_to_le32(0xC0000450)
+#define STATUS_AMBIGUOUS_SYSTEM_DEVICE cpu_to_le32(0xC0000451)
+#define STATUS_SYSTEM_DEVICE_NOT_FOUND cpu_to_le32(0xC0000452)
+#define STATUS_RESTART_BOOT_APPLICATION cpu_to_le32(0xC0000453)
+#define STATUS_INVALID_TASK_NAME cpu_to_le32(0xC0000500)
+#define STATUS_INVALID_TASK_INDEX cpu_to_le32(0xC0000501)
+#define STATUS_THREAD_ALREADY_IN_TASK cpu_to_le32(0xC0000502)
+#define STATUS_CALLBACK_BYPASS cpu_to_le32(0xC0000503)
+#define STATUS_PORT_CLOSED cpu_to_le32(0xC0000700)
+#define STATUS_MESSAGE_LOST cpu_to_le32(0xC0000701)
+#define STATUS_INVALID_MESSAGE cpu_to_le32(0xC0000702)
+#define STATUS_REQUEST_CANCELED cpu_to_le32(0xC0000703)
+#define STATUS_RECURSIVE_DISPATCH cpu_to_le32(0xC0000704)
+#define STATUS_LPC_RECEIVE_BUFFER_EXPECTED cpu_to_le32(0xC0000705)
+#define STATUS_LPC_INVALID_CONNECTION_USAGE cpu_to_le32(0xC0000706)
+#define STATUS_LPC_REQUESTS_NOT_ALLOWED cpu_to_le32(0xC0000707)
+#define STATUS_RESOURCE_IN_USE cpu_to_le32(0xC0000708)
+#define STATUS_HARDWARE_MEMORY_ERROR cpu_to_le32(0xC0000709)
+#define STATUS_THREADPOOL_HANDLE_EXCEPTION cpu_to_le32(0xC000070A)
+#define STATUS_THREADPOOL_SET_EVENT_ON_COMPLETION_FAILED cpu_to_le32(0xC000070B)
+#define STATUS_THREADPOOL_RELEASE_SEMAPHORE_ON_COMPLETION_FAILED \
+ cpu_to_le32(0xC000070C)
+#define STATUS_THREADPOOL_RELEASE_MUTEX_ON_COMPLETION_FAILED \
+ cpu_to_le32(0xC000070D)
+#define STATUS_THREADPOOL_FREE_LIBRARY_ON_COMPLETION_FAILED \
+ cpu_to_le32(0xC000070E)
+#define STATUS_THREADPOOL_RELEASED_DURING_OPERATION cpu_to_le32(0xC000070F)
+#define STATUS_CALLBACK_RETURNED_WHILE_IMPERSONATING cpu_to_le32(0xC0000710)
+#define STATUS_APC_RETURNED_WHILE_IMPERSONATING cpu_to_le32(0xC0000711)
+#define STATUS_PROCESS_IS_PROTECTED cpu_to_le32(0xC0000712)
+#define STATUS_MCA_EXCEPTION cpu_to_le32(0xC0000713)
+#define STATUS_CERTIFICATE_MAPPING_NOT_UNIQUE cpu_to_le32(0xC0000714)
+#define STATUS_SYMLINK_CLASS_DISABLED cpu_to_le32(0xC0000715)
+#define STATUS_INVALID_IDN_NORMALIZATION cpu_to_le32(0xC0000716)
+#define STATUS_NO_UNICODE_TRANSLATION cpu_to_le32(0xC0000717)
+#define STATUS_ALREADY_REGISTERED cpu_to_le32(0xC0000718)
+#define STATUS_CONTEXT_MISMATCH cpu_to_le32(0xC0000719)
+#define STATUS_PORT_ALREADY_HAS_COMPLETION_LIST cpu_to_le32(0xC000071A)
+#define STATUS_CALLBACK_RETURNED_THREAD_PRIORITY cpu_to_le32(0xC000071B)
+#define STATUS_INVALID_THREAD cpu_to_le32(0xC000071C)
+#define STATUS_CALLBACK_RETURNED_TRANSACTION cpu_to_le32(0xC000071D)
+#define STATUS_CALLBACK_RETURNED_LDR_LOCK cpu_to_le32(0xC000071E)
+#define STATUS_CALLBACK_RETURNED_LANG cpu_to_le32(0xC000071F)
+#define STATUS_CALLBACK_RETURNED_PRI_BACK cpu_to_le32(0xC0000720)
+#define STATUS_CALLBACK_RETURNED_THREAD_AFFINITY cpu_to_le32(0xC0000721)
+#define STATUS_DISK_REPAIR_DISABLED cpu_to_le32(0xC0000800)
+#define STATUS_DS_DOMAIN_RENAME_IN_PROGRESS cpu_to_le32(0xC0000801)
+#define STATUS_DISK_QUOTA_EXCEEDED cpu_to_le32(0xC0000802)
+#define STATUS_CONTENT_BLOCKED cpu_to_le32(0xC0000804)
+#define STATUS_BAD_CLUSTERS cpu_to_le32(0xC0000805)
+#define STATUS_VOLUME_DIRTY cpu_to_le32(0xC0000806)
+#define STATUS_FILE_CHECKED_OUT cpu_to_le32(0xC0000901)
+#define STATUS_CHECKOUT_REQUIRED cpu_to_le32(0xC0000902)
+#define STATUS_BAD_FILE_TYPE cpu_to_le32(0xC0000903)
+#define STATUS_FILE_TOO_LARGE cpu_to_le32(0xC0000904)
+#define STATUS_FORMS_AUTH_REQUIRED cpu_to_le32(0xC0000905)
+#define STATUS_VIRUS_INFECTED cpu_to_le32(0xC0000906)
+#define STATUS_VIRUS_DELETED cpu_to_le32(0xC0000907)
+#define STATUS_BAD_MCFG_TABLE cpu_to_le32(0xC0000908)
+#define STATUS_WOW_ASSERTION cpu_to_le32(0xC0009898)
+#define STATUS_INVALID_SIGNATURE cpu_to_le32(0xC000A000)
+#define STATUS_HMAC_NOT_SUPPORTED cpu_to_le32(0xC000A001)
+#define STATUS_IPSEC_QUEUE_OVERFLOW cpu_to_le32(0xC000A010)
+#define STATUS_ND_QUEUE_OVERFLOW cpu_to_le32(0xC000A011)
+#define STATUS_HOPLIMIT_EXCEEDED cpu_to_le32(0xC000A012)
+#define STATUS_PROTOCOL_NOT_SUPPORTED cpu_to_le32(0xC000A013)
+#define STATUS_LOST_WRITEBEHIND_DATA_NETWORK_DISCONNECTED \
+ cpu_to_le32(0xC000A080)
+#define STATUS_LOST_WRITEBEHIND_DATA_NETWORK_SERVER_ERROR \
+ cpu_to_le32(0xC000A081)
+#define STATUS_LOST_WRITEBEHIND_DATA_LOCAL_DISK_ERROR cpu_to_le32(0xC000A082)
+#define STATUS_XML_PARSE_ERROR cpu_to_le32(0xC000A083)
+#define STATUS_XMLDSIG_ERROR cpu_to_le32(0xC000A084)
+#define STATUS_WRONG_COMPARTMENT cpu_to_le32(0xC000A085)
+#define STATUS_AUTHIP_FAILURE cpu_to_le32(0xC000A086)
+#define DBG_NO_STATE_CHANGE cpu_to_le32(0xC0010001)
+#define DBG_APP_NOT_IDLE cpu_to_le32(0xC0010002)
+#define RPC_NT_INVALID_STRING_BINDING cpu_to_le32(0xC0020001)
+#define RPC_NT_WRONG_KIND_OF_BINDING cpu_to_le32(0xC0020002)
+#define RPC_NT_INVALID_BINDING cpu_to_le32(0xC0020003)
+#define RPC_NT_PROTSEQ_NOT_SUPPORTED cpu_to_le32(0xC0020004)
+#define RPC_NT_INVALID_RPC_PROTSEQ cpu_to_le32(0xC0020005)
+#define RPC_NT_INVALID_STRING_UUID cpu_to_le32(0xC0020006)
+#define RPC_NT_INVALID_ENDPOINT_FORMAT cpu_to_le32(0xC0020007)
+#define RPC_NT_INVALID_NET_ADDR cpu_to_le32(0xC0020008)
+#define RPC_NT_NO_ENDPOINT_FOUND cpu_to_le32(0xC0020009)
+#define RPC_NT_INVALID_TIMEOUT cpu_to_le32(0xC002000A)
+#define RPC_NT_OBJECT_NOT_FOUND cpu_to_le32(0xC002000B)
+#define RPC_NT_ALREADY_REGISTERED cpu_to_le32(0xC002000C)
+#define RPC_NT_TYPE_ALREADY_REGISTERED cpu_to_le32(0xC002000D)
+#define RPC_NT_ALREADY_LISTENING cpu_to_le32(0xC002000E)
+#define RPC_NT_NO_PROTSEQS_REGISTERED cpu_to_le32(0xC002000F)
+#define RPC_NT_NOT_LISTENING cpu_to_le32(0xC0020010)
+#define RPC_NT_UNKNOWN_MGR_TYPE cpu_to_le32(0xC0020011)
+#define RPC_NT_UNKNOWN_IF cpu_to_le32(0xC0020012)
+#define RPC_NT_NO_BINDINGS cpu_to_le32(0xC0020013)
+#define RPC_NT_NO_PROTSEQS cpu_to_le32(0xC0020014)
+#define RPC_NT_CANT_CREATE_ENDPOINT cpu_to_le32(0xC0020015)
+#define RPC_NT_OUT_OF_RESOURCES cpu_to_le32(0xC0020016)
+#define RPC_NT_SERVER_UNAVAILABLE cpu_to_le32(0xC0020017)
+#define RPC_NT_SERVER_TOO_BUSY cpu_to_le32(0xC0020018)
+#define RPC_NT_INVALID_NETWORK_OPTIONS cpu_to_le32(0xC0020019)
+#define RPC_NT_NO_CALL_ACTIVE cpu_to_le32(0xC002001A)
+#define RPC_NT_CALL_FAILED cpu_to_le32(0xC002001B)
+#define RPC_NT_CALL_FAILED_DNE cpu_to_le32(0xC002001C)
+#define RPC_NT_PROTOCOL_ERROR cpu_to_le32(0xC002001D)
+#define RPC_NT_UNSUPPORTED_TRANS_SYN cpu_to_le32(0xC002001F)
+#define RPC_NT_UNSUPPORTED_TYPE cpu_to_le32(0xC0020021)
+#define RPC_NT_INVALID_TAG cpu_to_le32(0xC0020022)
+#define RPC_NT_INVALID_BOUND cpu_to_le32(0xC0020023)
+#define RPC_NT_NO_ENTRY_NAME cpu_to_le32(0xC0020024)
+#define RPC_NT_INVALID_NAME_SYNTAX cpu_to_le32(0xC0020025)
+#define RPC_NT_UNSUPPORTED_NAME_SYNTAX cpu_to_le32(0xC0020026)
+#define RPC_NT_UUID_NO_ADDRESS cpu_to_le32(0xC0020028)
+#define RPC_NT_DUPLICATE_ENDPOINT cpu_to_le32(0xC0020029)
+#define RPC_NT_UNKNOWN_AUTHN_TYPE cpu_to_le32(0xC002002A)
+#define RPC_NT_MAX_CALLS_TOO_SMALL cpu_to_le32(0xC002002B)
+#define RPC_NT_STRING_TOO_LONG cpu_to_le32(0xC002002C)
+#define RPC_NT_PROTSEQ_NOT_FOUND cpu_to_le32(0xC002002D)
+#define RPC_NT_PROCNUM_OUT_OF_RANGE cpu_to_le32(0xC002002E)
+#define RPC_NT_BINDING_HAS_NO_AUTH cpu_to_le32(0xC002002F)
+#define RPC_NT_UNKNOWN_AUTHN_SERVICE cpu_to_le32(0xC0020030)
+#define RPC_NT_UNKNOWN_AUTHN_LEVEL cpu_to_le32(0xC0020031)
+#define RPC_NT_INVALID_AUTH_IDENTITY cpu_to_le32(0xC0020032)
+#define RPC_NT_UNKNOWN_AUTHZ_SERVICE cpu_to_le32(0xC0020033)
+#define EPT_NT_INVALID_ENTRY cpu_to_le32(0xC0020034)
+#define EPT_NT_CANT_PERFORM_OP cpu_to_le32(0xC0020035)
+#define EPT_NT_NOT_REGISTERED cpu_to_le32(0xC0020036)
+#define RPC_NT_NOTHING_TO_EXPORT cpu_to_le32(0xC0020037)
+#define RPC_NT_INCOMPLETE_NAME cpu_to_le32(0xC0020038)
+#define RPC_NT_INVALID_VERS_OPTION cpu_to_le32(0xC0020039)
+#define RPC_NT_NO_MORE_MEMBERS cpu_to_le32(0xC002003A)
+#define RPC_NT_NOT_ALL_OBJS_UNEXPORTED cpu_to_le32(0xC002003B)
+#define RPC_NT_INTERFACE_NOT_FOUND cpu_to_le32(0xC002003C)
+#define RPC_NT_ENTRY_ALREADY_EXISTS cpu_to_le32(0xC002003D)
+#define RPC_NT_ENTRY_NOT_FOUND cpu_to_le32(0xC002003E)
+#define RPC_NT_NAME_SERVICE_UNAVAILABLE cpu_to_le32(0xC002003F)
+#define RPC_NT_INVALID_NAF_ID cpu_to_le32(0xC0020040)
+#define RPC_NT_CANNOT_SUPPORT cpu_to_le32(0xC0020041)
+#define RPC_NT_NO_CONTEXT_AVAILABLE cpu_to_le32(0xC0020042)
+#define RPC_NT_INTERNAL_ERROR cpu_to_le32(0xC0020043)
+#define RPC_NT_ZERO_DIVIDE cpu_to_le32(0xC0020044)
+#define RPC_NT_ADDRESS_ERROR cpu_to_le32(0xC0020045)
+#define RPC_NT_FP_DIV_ZERO cpu_to_le32(0xC0020046)
+#define RPC_NT_FP_UNDERFLOW cpu_to_le32(0xC0020047)
+#define RPC_NT_FP_OVERFLOW cpu_to_le32(0xC0020048)
+#define RPC_NT_CALL_IN_PROGRESS cpu_to_le32(0xC0020049)
+#define RPC_NT_NO_MORE_BINDINGS cpu_to_le32(0xC002004A)
+#define RPC_NT_GROUP_MEMBER_NOT_FOUND cpu_to_le32(0xC002004B)
+#define EPT_NT_CANT_CREATE cpu_to_le32(0xC002004C)
+#define RPC_NT_INVALID_OBJECT cpu_to_le32(0xC002004D)
+#define RPC_NT_NO_INTERFACES cpu_to_le32(0xC002004F)
+#define RPC_NT_CALL_CANCELLED cpu_to_le32(0xC0020050)
+#define RPC_NT_BINDING_INCOMPLETE cpu_to_le32(0xC0020051)
+#define RPC_NT_COMM_FAILURE cpu_to_le32(0xC0020052)
+#define RPC_NT_UNSUPPORTED_AUTHN_LEVEL cpu_to_le32(0xC0020053)
+#define RPC_NT_NO_PRINC_NAME cpu_to_le32(0xC0020054)
+#define RPC_NT_NOT_RPC_ERROR cpu_to_le32(0xC0020055)
+#define RPC_NT_SEC_PKG_ERROR cpu_to_le32(0xC0020057)
+#define RPC_NT_NOT_CANCELLED cpu_to_le32(0xC0020058)
+#define RPC_NT_INVALID_ASYNC_HANDLE cpu_to_le32(0xC0020062)
+#define RPC_NT_INVALID_ASYNC_CALL cpu_to_le32(0xC0020063)
+#define RPC_NT_PROXY_ACCESS_DENIED cpu_to_le32(0xC0020064)
+#define RPC_NT_NO_MORE_ENTRIES cpu_to_le32(0xC0030001)
+#define RPC_NT_SS_CHAR_TRANS_OPEN_FAIL cpu_to_le32(0xC0030002)
+#define RPC_NT_SS_CHAR_TRANS_SHORT_FILE cpu_to_le32(0xC0030003)
+#define RPC_NT_SS_IN_NULL_CONTEXT cpu_to_le32(0xC0030004)
+#define RPC_NT_SS_CONTEXT_MISMATCH cpu_to_le32(0xC0030005)
+#define RPC_NT_SS_CONTEXT_DAMAGED cpu_to_le32(0xC0030006)
+#define RPC_NT_SS_HANDLES_MISMATCH cpu_to_le32(0xC0030007)
+#define RPC_NT_SS_CANNOT_GET_CALL_HANDLE cpu_to_le32(0xC0030008)
+#define RPC_NT_NULL_REF_POINTER cpu_to_le32(0xC0030009)
+#define RPC_NT_ENUM_VALUE_OUT_OF_RANGE cpu_to_le32(0xC003000A)
+#define RPC_NT_BYTE_COUNT_TOO_SMALL cpu_to_le32(0xC003000B)
+#define RPC_NT_BAD_STUB_DATA cpu_to_le32(0xC003000C)
+#define RPC_NT_INVALID_ES_ACTION cpu_to_le32(0xC0030059)
+#define RPC_NT_WRONG_ES_VERSION cpu_to_le32(0xC003005A)
+#define RPC_NT_WRONG_STUB_VERSION cpu_to_le32(0xC003005B)
+#define RPC_NT_INVALID_PIPE_OBJECT cpu_to_le32(0xC003005C)
+#define RPC_NT_INVALID_PIPE_OPERATION cpu_to_le32(0xC003005D)
+#define RPC_NT_WRONG_PIPE_VERSION cpu_to_le32(0xC003005E)
+#define RPC_NT_PIPE_CLOSED cpu_to_le32(0xC003005F)
+#define RPC_NT_PIPE_DISCIPLINE_ERROR cpu_to_le32(0xC0030060)
+#define RPC_NT_PIPE_EMPTY cpu_to_le32(0xC0030061)
+#define STATUS_PNP_BAD_MPS_TABLE cpu_to_le32(0xC0040035)
+#define STATUS_PNP_TRANSLATION_FAILED cpu_to_le32(0xC0040036)
+#define STATUS_PNP_IRQ_TRANSLATION_FAILED cpu_to_le32(0xC0040037)
+#define STATUS_PNP_INVALID_ID cpu_to_le32(0xC0040038)
+#define STATUS_IO_REISSUE_AS_CACHED cpu_to_le32(0xC0040039)
+#define STATUS_CTX_WINSTATION_NAME_INVALID cpu_to_le32(0xC00A0001)
+#define STATUS_CTX_INVALID_PD cpu_to_le32(0xC00A0002)
+#define STATUS_CTX_PD_NOT_FOUND cpu_to_le32(0xC00A0003)
+#define STATUS_CTX_CLOSE_PENDING cpu_to_le32(0xC00A0006)
+#define STATUS_CTX_NO_OUTBUF cpu_to_le32(0xC00A0007)
+#define STATUS_CTX_MODEM_INF_NOT_FOUND cpu_to_le32(0xC00A0008)
+#define STATUS_CTX_INVALID_MODEMNAME cpu_to_le32(0xC00A0009)
+#define STATUS_CTX_RESPONSE_ERROR cpu_to_le32(0xC00A000A)
+#define STATUS_CTX_MODEM_RESPONSE_TIMEOUT cpu_to_le32(0xC00A000B)
+#define STATUS_CTX_MODEM_RESPONSE_NO_CARRIER cpu_to_le32(0xC00A000C)
+#define STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE cpu_to_le32(0xC00A000D)
+#define STATUS_CTX_MODEM_RESPONSE_BUSY cpu_to_le32(0xC00A000E)
+#define STATUS_CTX_MODEM_RESPONSE_VOICE cpu_to_le32(0xC00A000F)
+#define STATUS_CTX_TD_ERROR cpu_to_le32(0xC00A0010)
+#define STATUS_CTX_LICENSE_CLIENT_INVALID cpu_to_le32(0xC00A0012)
+#define STATUS_CTX_LICENSE_NOT_AVAILABLE cpu_to_le32(0xC00A0013)
+#define STATUS_CTX_LICENSE_EXPIRED cpu_to_le32(0xC00A0014)
+#define STATUS_CTX_WINSTATION_NOT_FOUND cpu_to_le32(0xC00A0015)
+#define STATUS_CTX_WINSTATION_NAME_COLLISION cpu_to_le32(0xC00A0016)
+#define STATUS_CTX_WINSTATION_BUSY cpu_to_le32(0xC00A0017)
+#define STATUS_CTX_BAD_VIDEO_MODE cpu_to_le32(0xC00A0018)
+#define STATUS_CTX_GRAPHICS_INVALID cpu_to_le32(0xC00A0022)
+#define STATUS_CTX_NOT_CONSOLE cpu_to_le32(0xC00A0024)
+#define STATUS_CTX_CLIENT_QUERY_TIMEOUT cpu_to_le32(0xC00A0026)
+#define STATUS_CTX_CONSOLE_DISCONNECT cpu_to_le32(0xC00A0027)
+#define STATUS_CTX_CONSOLE_CONNECT cpu_to_le32(0xC00A0028)
+#define STATUS_CTX_SHADOW_DENIED cpu_to_le32(0xC00A002A)
+#define STATUS_CTX_WINSTATION_ACCESS_DENIED cpu_to_le32(0xC00A002B)
+#define STATUS_CTX_INVALID_WD cpu_to_le32(0xC00A002E)
+#define STATUS_CTX_WD_NOT_FOUND cpu_to_le32(0xC00A002F)
+#define STATUS_CTX_SHADOW_INVALID cpu_to_le32(0xC00A0030)
+#define STATUS_CTX_SHADOW_DISABLED cpu_to_le32(0xC00A0031)
+#define STATUS_RDP_PROTOCOL_ERROR cpu_to_le32(0xC00A0032)
+#define STATUS_CTX_CLIENT_LICENSE_NOT_SET cpu_to_le32(0xC00A0033)
+#define STATUS_CTX_CLIENT_LICENSE_IN_USE cpu_to_le32(0xC00A0034)
+#define STATUS_CTX_SHADOW_ENDED_BY_MODE_CHANGE cpu_to_le32(0xC00A0035)
+#define STATUS_CTX_SHADOW_NOT_RUNNING cpu_to_le32(0xC00A0036)
+#define STATUS_CTX_LOGON_DISABLED cpu_to_le32(0xC00A0037)
+#define STATUS_CTX_SECURITY_LAYER_ERROR cpu_to_le32(0xC00A0038)
+#define STATUS_TS_INCOMPATIBLE_SESSIONS cpu_to_le32(0xC00A0039)
+#define STATUS_MUI_FILE_NOT_FOUND cpu_to_le32(0xC00B0001)
+#define STATUS_MUI_INVALID_FILE cpu_to_le32(0xC00B0002)
+#define STATUS_MUI_INVALID_RC_CONFIG cpu_to_le32(0xC00B0003)
+#define STATUS_MUI_INVALID_LOCALE_NAME cpu_to_le32(0xC00B0004)
+#define STATUS_MUI_INVALID_ULTIMATEFALLBACK_NAME cpu_to_le32(0xC00B0005)
+#define STATUS_MUI_FILE_NOT_LOADED cpu_to_le32(0xC00B0006)
+#define STATUS_RESOURCE_ENUM_USER_STOP cpu_to_le32(0xC00B0007)
+#define STATUS_CLUSTER_INVALID_NODE cpu_to_le32(0xC0130001)
+#define STATUS_CLUSTER_NODE_EXISTS cpu_to_le32(0xC0130002)
+#define STATUS_CLUSTER_JOIN_IN_PROGRESS cpu_to_le32(0xC0130003)
+#define STATUS_CLUSTER_NODE_NOT_FOUND cpu_to_le32(0xC0130004)
+#define STATUS_CLUSTER_LOCAL_NODE_NOT_FOUND cpu_to_le32(0xC0130005)
+#define STATUS_CLUSTER_NETWORK_EXISTS cpu_to_le32(0xC0130006)
+#define STATUS_CLUSTER_NETWORK_NOT_FOUND cpu_to_le32(0xC0130007)
+#define STATUS_CLUSTER_NETINTERFACE_EXISTS cpu_to_le32(0xC0130008)
+#define STATUS_CLUSTER_NETINTERFACE_NOT_FOUND cpu_to_le32(0xC0130009)
+#define STATUS_CLUSTER_INVALID_REQUEST cpu_to_le32(0xC013000A)
+#define STATUS_CLUSTER_INVALID_NETWORK_PROVIDER cpu_to_le32(0xC013000B)
+#define STATUS_CLUSTER_NODE_DOWN cpu_to_le32(0xC013000C)
+#define STATUS_CLUSTER_NODE_UNREACHABLE cpu_to_le32(0xC013000D)
+#define STATUS_CLUSTER_NODE_NOT_MEMBER cpu_to_le32(0xC013000E)
+#define STATUS_CLUSTER_JOIN_NOT_IN_PROGRESS cpu_to_le32(0xC013000F)
+#define STATUS_CLUSTER_INVALID_NETWORK cpu_to_le32(0xC0130010)
+#define STATUS_CLUSTER_NO_NET_ADAPTERS cpu_to_le32(0xC0130011)
+#define STATUS_CLUSTER_NODE_UP cpu_to_le32(0xC0130012)
+#define STATUS_CLUSTER_NODE_PAUSED cpu_to_le32(0xC0130013)
+#define STATUS_CLUSTER_NODE_NOT_PAUSED cpu_to_le32(0xC0130014)
+#define STATUS_CLUSTER_NO_SECURITY_CONTEXT cpu_to_le32(0xC0130015)
+#define STATUS_CLUSTER_NETWORK_NOT_INTERNAL cpu_to_le32(0xC0130016)
+#define STATUS_CLUSTER_POISONED cpu_to_le32(0xC0130017)
+#define STATUS_ACPI_INVALID_OPCODE cpu_to_le32(0xC0140001)
+#define STATUS_ACPI_STACK_OVERFLOW cpu_to_le32(0xC0140002)
+#define STATUS_ACPI_ASSERT_FAILED cpu_to_le32(0xC0140003)
+#define STATUS_ACPI_INVALID_INDEX cpu_to_le32(0xC0140004)
+#define STATUS_ACPI_INVALID_ARGUMENT cpu_to_le32(0xC0140005)
+#define STATUS_ACPI_FATAL cpu_to_le32(0xC0140006)
+#define STATUS_ACPI_INVALID_SUPERNAME cpu_to_le32(0xC0140007)
+#define STATUS_ACPI_INVALID_ARGTYPE cpu_to_le32(0xC0140008)
+#define STATUS_ACPI_INVALID_OBJTYPE cpu_to_le32(0xC0140009)
+#define STATUS_ACPI_INVALID_TARGETTYPE cpu_to_le32(0xC014000A)
+#define STATUS_ACPI_INCORRECT_ARGUMENT_COUNT cpu_to_le32(0xC014000B)
+#define STATUS_ACPI_ADDRESS_NOT_MAPPED cpu_to_le32(0xC014000C)
+#define STATUS_ACPI_INVALID_EVENTTYPE cpu_to_le32(0xC014000D)
+#define STATUS_ACPI_HANDLER_COLLISION cpu_to_le32(0xC014000E)
+#define STATUS_ACPI_INVALID_DATA cpu_to_le32(0xC014000F)
+#define STATUS_ACPI_INVALID_REGION cpu_to_le32(0xC0140010)
+#define STATUS_ACPI_INVALID_ACCESS_SIZE cpu_to_le32(0xC0140011)
+#define STATUS_ACPI_ACQUIRE_GLOBAL_LOCK cpu_to_le32(0xC0140012)
+#define STATUS_ACPI_ALREADY_INITIALIZED cpu_to_le32(0xC0140013)
+#define STATUS_ACPI_NOT_INITIALIZED cpu_to_le32(0xC0140014)
+#define STATUS_ACPI_INVALID_MUTEX_LEVEL cpu_to_le32(0xC0140015)
+#define STATUS_ACPI_MUTEX_NOT_OWNED cpu_to_le32(0xC0140016)
+#define STATUS_ACPI_MUTEX_NOT_OWNER cpu_to_le32(0xC0140017)
+#define STATUS_ACPI_RS_ACCESS cpu_to_le32(0xC0140018)
+#define STATUS_ACPI_INVALID_TABLE cpu_to_le32(0xC0140019)
+#define STATUS_ACPI_REG_HANDLER_FAILED cpu_to_le32(0xC0140020)
+#define STATUS_ACPI_POWER_REQUEST_FAILED cpu_to_le32(0xC0140021)
+#define STATUS_SXS_SECTION_NOT_FOUND cpu_to_le32(0xC0150001)
+#define STATUS_SXS_CANT_GEN_ACTCTX cpu_to_le32(0xC0150002)
+#define STATUS_SXS_INVALID_ACTCTXDATA_FORMAT cpu_to_le32(0xC0150003)
+#define STATUS_SXS_ASSEMBLY_NOT_FOUND cpu_to_le32(0xC0150004)
+#define STATUS_SXS_MANIFEST_FORMAT_ERROR cpu_to_le32(0xC0150005)
+#define STATUS_SXS_MANIFEST_PARSE_ERROR cpu_to_le32(0xC0150006)
+#define STATUS_SXS_ACTIVATION_CONTEXT_DISABLED cpu_to_le32(0xC0150007)
+#define STATUS_SXS_KEY_NOT_FOUND cpu_to_le32(0xC0150008)
+#define STATUS_SXS_VERSION_CONFLICT cpu_to_le32(0xC0150009)
+#define STATUS_SXS_WRONG_SECTION_TYPE cpu_to_le32(0xC015000A)
+#define STATUS_SXS_THREAD_QUERIES_DISABLED cpu_to_le32(0xC015000B)
+#define STATUS_SXS_ASSEMBLY_MISSING cpu_to_le32(0xC015000C)
+#define STATUS_SXS_PROCESS_DEFAULT_ALREADY_SET cpu_to_le32(0xC015000E)
+#define STATUS_SXS_EARLY_DEACTIVATION cpu_to_le32(0xC015000F)
+#define STATUS_SXS_INVALID_DEACTIVATION cpu_to_le32(0xC0150010)
+#define STATUS_SXS_MULTIPLE_DEACTIVATION cpu_to_le32(0xC0150011)
+#define STATUS_SXS_SYSTEM_DEFAULT_ACTIVATION_CONTEXT_EMPTY \
+ cpu_to_le32(0xC0150012)
+#define STATUS_SXS_PROCESS_TERMINATION_REQUESTED cpu_to_le32(0xC0150013)
+#define STATUS_SXS_CORRUPT_ACTIVATION_STACK cpu_to_le32(0xC0150014)
+#define STATUS_SXS_CORRUPTION cpu_to_le32(0xC0150015)
+#define STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_VALUE cpu_to_le32(0xC0150016)
+#define STATUS_SXS_INVALID_IDENTITY_ATTRIBUTE_NAME cpu_to_le32(0xC0150017)
+#define STATUS_SXS_IDENTITY_DUPLICATE_ATTRIBUTE cpu_to_le32(0xC0150018)
+#define STATUS_SXS_IDENTITY_PARSE_ERROR cpu_to_le32(0xC0150019)
+#define STATUS_SXS_COMPONENT_STORE_CORRUPT cpu_to_le32(0xC015001A)
+#define STATUS_SXS_FILE_HASH_MISMATCH cpu_to_le32(0xC015001B)
+#define STATUS_SXS_MANIFEST_IDENTITY_SAME_BUT_CONTENTS_DIFFERENT \
+ cpu_to_le32(0xC015001C)
+#define STATUS_SXS_IDENTITIES_DIFFERENT cpu_to_le32(0xC015001D)
+#define STATUS_SXS_ASSEMBLY_IS_NOT_A_DEPLOYMENT cpu_to_le32(0xC015001E)
+#define STATUS_SXS_FILE_NOT_PART_OF_ASSEMBLY cpu_to_le32(0xC015001F)
+#define STATUS_ADVANCED_INSTALLER_FAILED cpu_to_le32(0xC0150020)
+#define STATUS_XML_ENCODING_MISMATCH cpu_to_le32(0xC0150021)
+#define STATUS_SXS_MANIFEST_TOO_BIG cpu_to_le32(0xC0150022)
+#define STATUS_SXS_SETTING_NOT_REGISTERED cpu_to_le32(0xC0150023)
+#define STATUS_SXS_TRANSACTION_CLOSURE_INCOMPLETE cpu_to_le32(0xC0150024)
+#define STATUS_SMI_PRIMITIVE_INSTALLER_FAILED cpu_to_le32(0xC0150025)
+#define STATUS_GENERIC_COMMAND_FAILED cpu_to_le32(0xC0150026)
+#define STATUS_SXS_FILE_HASH_MISSING cpu_to_le32(0xC0150027)
+#define STATUS_TRANSACTIONAL_CONFLICT cpu_to_le32(0xC0190001)
+#define STATUS_INVALID_TRANSACTION cpu_to_le32(0xC0190002)
+#define STATUS_TRANSACTION_NOT_ACTIVE cpu_to_le32(0xC0190003)
+#define STATUS_TM_INITIALIZATION_FAILED cpu_to_le32(0xC0190004)
+#define STATUS_RM_NOT_ACTIVE cpu_to_le32(0xC0190005)
+#define STATUS_RM_METADATA_CORRUPT cpu_to_le32(0xC0190006)
+#define STATUS_TRANSACTION_NOT_JOINED cpu_to_le32(0xC0190007)
+#define STATUS_DIRECTORY_NOT_RM cpu_to_le32(0xC0190008)
+#define STATUS_TRANSACTIONS_UNSUPPORTED_REMOTE cpu_to_le32(0xC019000A)
+#define STATUS_LOG_RESIZE_INVALID_SIZE cpu_to_le32(0xC019000B)
+#define STATUS_REMOTE_FILE_VERSION_MISMATCH cpu_to_le32(0xC019000C)
+#define STATUS_CRM_PROTOCOL_ALREADY_EXISTS cpu_to_le32(0xC019000F)
+#define STATUS_TRANSACTION_PROPAGATION_FAILED cpu_to_le32(0xC0190010)
+#define STATUS_CRM_PROTOCOL_NOT_FOUND cpu_to_le32(0xC0190011)
+#define STATUS_TRANSACTION_SUPERIOR_EXISTS cpu_to_le32(0xC0190012)
+#define STATUS_TRANSACTION_REQUEST_NOT_VALID cpu_to_le32(0xC0190013)
+#define STATUS_TRANSACTION_NOT_REQUESTED cpu_to_le32(0xC0190014)
+#define STATUS_TRANSACTION_ALREADY_ABORTED cpu_to_le32(0xC0190015)
+#define STATUS_TRANSACTION_ALREADY_COMMITTED cpu_to_le32(0xC0190016)
+#define STATUS_TRANSACTION_INVALID_MARSHALL_BUFFER cpu_to_le32(0xC0190017)
+#define STATUS_CURRENT_TRANSACTION_NOT_VALID cpu_to_le32(0xC0190018)
+#define STATUS_LOG_GROWTH_FAILED cpu_to_le32(0xC0190019)
+#define STATUS_OBJECT_NO_LONGER_EXISTS cpu_to_le32(0xC0190021)
+#define STATUS_STREAM_MINIVERSION_NOT_FOUND cpu_to_le32(0xC0190022)
+#define STATUS_STREAM_MINIVERSION_NOT_VALID cpu_to_le32(0xC0190023)
+#define STATUS_MINIVERSION_INACCESSIBLE_FROM_SPECIFIED_TRANSACTION \
+ cpu_to_le32(0xC0190024)
+#define STATUS_CANT_OPEN_MINIVERSION_WITH_MODIFY_INTENT cpu_to_le32(0xC0190025)
+#define STATUS_CANT_CREATE_MORE_STREAM_MINIVERSIONS cpu_to_le32(0xC0190026)
+#define STATUS_HANDLE_NO_LONGER_VALID cpu_to_le32(0xC0190028)
+#define STATUS_LOG_CORRUPTION_DETECTED cpu_to_le32(0xC0190030)
+#define STATUS_RM_DISCONNECTED cpu_to_le32(0xC0190032)
+#define STATUS_ENLISTMENT_NOT_SUPERIOR cpu_to_le32(0xC0190033)
+#define STATUS_FILE_IDENTITY_NOT_PERSISTENT cpu_to_le32(0xC0190036)
+#define STATUS_CANT_BREAK_TRANSACTIONAL_DEPENDENCY cpu_to_le32(0xC0190037)
+#define STATUS_CANT_CROSS_RM_BOUNDARY cpu_to_le32(0xC0190038)
+#define STATUS_TXF_DIR_NOT_EMPTY cpu_to_le32(0xC0190039)
+#define STATUS_INDOUBT_TRANSACTIONS_EXIST cpu_to_le32(0xC019003A)
+#define STATUS_TM_VOLATILE cpu_to_le32(0xC019003B)
+#define STATUS_ROLLBACK_TIMER_EXPIRED cpu_to_le32(0xC019003C)
+#define STATUS_TXF_ATTRIBUTE_CORRUPT cpu_to_le32(0xC019003D)
+#define STATUS_EFS_NOT_ALLOWED_IN_TRANSACTION cpu_to_le32(0xC019003E)
+#define STATUS_TRANSACTIONAL_OPEN_NOT_ALLOWED cpu_to_le32(0xC019003F)
+#define STATUS_TRANSACTED_MAPPING_UNSUPPORTED_REMOTE cpu_to_le32(0xC0190040)
+#define STATUS_TRANSACTION_REQUIRED_PROMOTION cpu_to_le32(0xC0190043)
+#define STATUS_CANNOT_EXECUTE_FILE_IN_TRANSACTION cpu_to_le32(0xC0190044)
+#define STATUS_TRANSACTIONS_NOT_FROZEN cpu_to_le32(0xC0190045)
+#define STATUS_TRANSACTION_FREEZE_IN_PROGRESS cpu_to_le32(0xC0190046)
+#define STATUS_NOT_SNAPSHOT_VOLUME cpu_to_le32(0xC0190047)
+#define STATUS_NO_SAVEPOINT_WITH_OPEN_FILES cpu_to_le32(0xC0190048)
+#define STATUS_SPARSE_NOT_ALLOWED_IN_TRANSACTION cpu_to_le32(0xC0190049)
+#define STATUS_TM_IDENTITY_MISMATCH cpu_to_le32(0xC019004A)
+#define STATUS_FLOATED_SECTION cpu_to_le32(0xC019004B)
+#define STATUS_CANNOT_ACCEPT_TRANSACTED_WORK cpu_to_le32(0xC019004C)
+#define STATUS_CANNOT_ABORT_TRANSACTIONS cpu_to_le32(0xC019004D)
+#define STATUS_TRANSACTION_NOT_FOUND cpu_to_le32(0xC019004E)
+#define STATUS_RESOURCEMANAGER_NOT_FOUND cpu_to_le32(0xC019004F)
+#define STATUS_ENLISTMENT_NOT_FOUND cpu_to_le32(0xC0190050)
+#define STATUS_TRANSACTIONMANAGER_NOT_FOUND cpu_to_le32(0xC0190051)
+#define STATUS_TRANSACTIONMANAGER_NOT_ONLINE cpu_to_le32(0xC0190052)
+#define STATUS_TRANSACTIONMANAGER_RECOVERY_NAME_COLLISION \
+ cpu_to_le32(0xC0190053)
+#define STATUS_TRANSACTION_NOT_ROOT cpu_to_le32(0xC0190054)
+#define STATUS_TRANSACTION_OBJECT_EXPIRED cpu_to_le32(0xC0190055)
+#define STATUS_COMPRESSION_NOT_ALLOWED_IN_TRANSACTION cpu_to_le32(0xC0190056)
+#define STATUS_TRANSACTION_RESPONSE_NOT_ENLISTED cpu_to_le32(0xC0190057)
+#define STATUS_TRANSACTION_RECORD_TOO_LONG cpu_to_le32(0xC0190058)
+#define STATUS_NO_LINK_TRACKING_IN_TRANSACTION cpu_to_le32(0xC0190059)
+#define STATUS_OPERATION_NOT_SUPPORTED_IN_TRANSACTION cpu_to_le32(0xC019005A)
+#define STATUS_TRANSACTION_INTEGRITY_VIOLATED cpu_to_le32(0xC019005B)
+#define STATUS_LOG_SECTOR_INVALID cpu_to_le32(0xC01A0001)
+#define STATUS_LOG_SECTOR_PARITY_INVALID cpu_to_le32(0xC01A0002)
+#define STATUS_LOG_SECTOR_REMAPPED cpu_to_le32(0xC01A0003)
+#define STATUS_LOG_BLOCK_INCOMPLETE cpu_to_le32(0xC01A0004)
+#define STATUS_LOG_INVALID_RANGE cpu_to_le32(0xC01A0005)
+#define STATUS_LOG_BLOCKS_EXHAUSTED cpu_to_le32(0xC01A0006)
+#define STATUS_LOG_READ_CONTEXT_INVALID cpu_to_le32(0xC01A0007)
+#define STATUS_LOG_RESTART_INVALID cpu_to_le32(0xC01A0008)
+#define STATUS_LOG_BLOCK_VERSION cpu_to_le32(0xC01A0009)
+#define STATUS_LOG_BLOCK_INVALID cpu_to_le32(0xC01A000A)
+#define STATUS_LOG_READ_MODE_INVALID cpu_to_le32(0xC01A000B)
+#define STATUS_LOG_METADATA_CORRUPT cpu_to_le32(0xC01A000D)
+#define STATUS_LOG_METADATA_INVALID cpu_to_le32(0xC01A000E)
+#define STATUS_LOG_METADATA_INCONSISTENT cpu_to_le32(0xC01A000F)
+#define STATUS_LOG_RESERVATION_INVALID cpu_to_le32(0xC01A0010)
+#define STATUS_LOG_CANT_DELETE cpu_to_le32(0xC01A0011)
+#define STATUS_LOG_CONTAINER_LIMIT_EXCEEDED cpu_to_le32(0xC01A0012)
+#define STATUS_LOG_START_OF_LOG cpu_to_le32(0xC01A0013)
+#define STATUS_LOG_POLICY_ALREADY_INSTALLED cpu_to_le32(0xC01A0014)
+#define STATUS_LOG_POLICY_NOT_INSTALLED cpu_to_le32(0xC01A0015)
+#define STATUS_LOG_POLICY_INVALID cpu_to_le32(0xC01A0016)
+#define STATUS_LOG_POLICY_CONFLICT cpu_to_le32(0xC01A0017)
+#define STATUS_LOG_PINNED_ARCHIVE_TAIL cpu_to_le32(0xC01A0018)
+#define STATUS_LOG_RECORD_NONEXISTENT cpu_to_le32(0xC01A0019)
+#define STATUS_LOG_RECORDS_RESERVED_INVALID cpu_to_le32(0xC01A001A)
+#define STATUS_LOG_SPACE_RESERVED_INVALID cpu_to_le32(0xC01A001B)
+#define STATUS_LOG_TAIL_INVALID cpu_to_le32(0xC01A001C)
+#define STATUS_LOG_FULL cpu_to_le32(0xC01A001D)
+#define STATUS_LOG_MULTIPLEXED cpu_to_le32(0xC01A001E)
+#define STATUS_LOG_DEDICATED cpu_to_le32(0xC01A001F)
+#define STATUS_LOG_ARCHIVE_NOT_IN_PROGRESS cpu_to_le32(0xC01A0020)
+#define STATUS_LOG_ARCHIVE_IN_PROGRESS cpu_to_le32(0xC01A0021)
+#define STATUS_LOG_EPHEMERAL cpu_to_le32(0xC01A0022)
+#define STATUS_LOG_NOT_ENOUGH_CONTAINERS cpu_to_le32(0xC01A0023)
+#define STATUS_LOG_CLIENT_ALREADY_REGISTERED cpu_to_le32(0xC01A0024)
+#define STATUS_LOG_CLIENT_NOT_REGISTERED cpu_to_le32(0xC01A0025)
+#define STATUS_LOG_FULL_HANDLER_IN_PROGRESS cpu_to_le32(0xC01A0026)
+#define STATUS_LOG_CONTAINER_READ_FAILED cpu_to_le32(0xC01A0027)
+#define STATUS_LOG_CONTAINER_WRITE_FAILED cpu_to_le32(0xC01A0028)
+#define STATUS_LOG_CONTAINER_OPEN_FAILED cpu_to_le32(0xC01A0029)
+#define STATUS_LOG_CONTAINER_STATE_INVALID cpu_to_le32(0xC01A002A)
+#define STATUS_LOG_STATE_INVALID cpu_to_le32(0xC01A002B)
+#define STATUS_LOG_PINNED cpu_to_le32(0xC01A002C)
+#define STATUS_LOG_METADATA_FLUSH_FAILED cpu_to_le32(0xC01A002D)
+#define STATUS_LOG_INCONSISTENT_SECURITY cpu_to_le32(0xC01A002E)
+#define STATUS_LOG_APPENDED_FLUSH_FAILED cpu_to_le32(0xC01A002F)
+#define STATUS_LOG_PINNED_RESERVATION cpu_to_le32(0xC01A0030)
+#define STATUS_VIDEO_HUNG_DISPLAY_DRIVER_THREAD cpu_to_le32(0xC01B00EA)
+#define STATUS_FLT_NO_HANDLER_DEFINED cpu_to_le32(0xC01C0001)
+#define STATUS_FLT_CONTEXT_ALREADY_DEFINED cpu_to_le32(0xC01C0002)
+#define STATUS_FLT_INVALID_ASYNCHRONOUS_REQUEST cpu_to_le32(0xC01C0003)
+#define STATUS_FLT_DISALLOW_FAST_IO cpu_to_le32(0xC01C0004)
+#define STATUS_FLT_INVALID_NAME_REQUEST cpu_to_le32(0xC01C0005)
+#define STATUS_FLT_NOT_SAFE_TO_POST_OPERATION cpu_to_le32(0xC01C0006)
+#define STATUS_FLT_NOT_INITIALIZED cpu_to_le32(0xC01C0007)
+#define STATUS_FLT_FILTER_NOT_READY cpu_to_le32(0xC01C0008)
+#define STATUS_FLT_POST_OPERATION_CLEANUP cpu_to_le32(0xC01C0009)
+#define STATUS_FLT_INTERNAL_ERROR cpu_to_le32(0xC01C000A)
+#define STATUS_FLT_DELETING_OBJECT cpu_to_le32(0xC01C000B)
+#define STATUS_FLT_MUST_BE_NONPAGED_POOL cpu_to_le32(0xC01C000C)
+#define STATUS_FLT_DUPLICATE_ENTRY cpu_to_le32(0xC01C000D)
+#define STATUS_FLT_CBDQ_DISABLED cpu_to_le32(0xC01C000E)
+#define STATUS_FLT_DO_NOT_ATTACH cpu_to_le32(0xC01C000F)
+#define STATUS_FLT_DO_NOT_DETACH cpu_to_le32(0xC01C0010)
+#define STATUS_FLT_INSTANCE_ALTITUDE_COLLISION cpu_to_le32(0xC01C0011)
+#define STATUS_FLT_INSTANCE_NAME_COLLISION cpu_to_le32(0xC01C0012)
+#define STATUS_FLT_FILTER_NOT_FOUND cpu_to_le32(0xC01C0013)
+#define STATUS_FLT_VOLUME_NOT_FOUND cpu_to_le32(0xC01C0014)
+#define STATUS_FLT_INSTANCE_NOT_FOUND cpu_to_le32(0xC01C0015)
+#define STATUS_FLT_CONTEXT_ALLOCATION_NOT_FOUND cpu_to_le32(0xC01C0016)
+#define STATUS_FLT_INVALID_CONTEXT_REGISTRATION cpu_to_le32(0xC01C0017)
+#define STATUS_FLT_NAME_CACHE_MISS cpu_to_le32(0xC01C0018)
+#define STATUS_FLT_NO_DEVICE_OBJECT cpu_to_le32(0xC01C0019)
+#define STATUS_FLT_VOLUME_ALREADY_MOUNTED cpu_to_le32(0xC01C001A)
+#define STATUS_FLT_ALREADY_ENLISTED cpu_to_le32(0xC01C001B)
+#define STATUS_FLT_CONTEXT_ALREADY_LINKED cpu_to_le32(0xC01C001C)
+#define STATUS_FLT_NO_WAITER_FOR_REPLY cpu_to_le32(0xC01C0020)
+#define STATUS_MONITOR_NO_DESCRIPTOR cpu_to_le32(0xC01D0001)
+#define STATUS_MONITOR_UNKNOWN_DESCRIPTOR_FORMAT cpu_to_le32(0xC01D0002)
+#define STATUS_MONITOR_INVALID_DESCRIPTOR_CHECKSUM cpu_to_le32(0xC01D0003)
+#define STATUS_MONITOR_INVALID_STANDARD_TIMING_BLOCK cpu_to_le32(0xC01D0004)
+#define STATUS_MONITOR_WMI_DATABLOCK_REGISTRATION_FAILED cpu_to_le32(0xC01D0005)
+#define STATUS_MONITOR_INVALID_SERIAL_NUMBER_MONDSC_BLOCK \
+ cpu_to_le32(0xC01D0006)
+#define STATUS_MONITOR_INVALID_USER_FRIENDLY_MONDSC_BLOCK \
+ cpu_to_le32(0xC01D0007)
+#define STATUS_MONITOR_NO_MORE_DESCRIPTOR_DATA cpu_to_le32(0xC01D0008)
+#define STATUS_MONITOR_INVALID_DETAILED_TIMING_BLOCK cpu_to_le32(0xC01D0009)
+#define STATUS_GRAPHICS_NOT_EXCLUSIVE_MODE_OWNER cpu_to_le32(0xC01E0000)
+#define STATUS_GRAPHICS_INSUFFICIENT_DMA_BUFFER cpu_to_le32(0xC01E0001)
+#define STATUS_GRAPHICS_INVALID_DISPLAY_ADAPTER cpu_to_le32(0xC01E0002)
+#define STATUS_GRAPHICS_ADAPTER_WAS_RESET cpu_to_le32(0xC01E0003)
+#define STATUS_GRAPHICS_INVALID_DRIVER_MODEL cpu_to_le32(0xC01E0004)
+#define STATUS_GRAPHICS_PRESENT_MODE_CHANGED cpu_to_le32(0xC01E0005)
+#define STATUS_GRAPHICS_PRESENT_OCCLUDED cpu_to_le32(0xC01E0006)
+#define STATUS_GRAPHICS_PRESENT_DENIED cpu_to_le32(0xC01E0007)
+#define STATUS_GRAPHICS_CANNOTCOLORCONVERT cpu_to_le32(0xC01E0008)
+#define STATUS_GRAPHICS_NO_VIDEO_MEMORY cpu_to_le32(0xC01E0100)
+#define STATUS_GRAPHICS_CANT_LOCK_MEMORY cpu_to_le32(0xC01E0101)
+#define STATUS_GRAPHICS_ALLOCATION_BUSY cpu_to_le32(0xC01E0102)
+#define STATUS_GRAPHICS_TOO_MANY_REFERENCES cpu_to_le32(0xC01E0103)
+#define STATUS_GRAPHICS_TRY_AGAIN_LATER cpu_to_le32(0xC01E0104)
+#define STATUS_GRAPHICS_TRY_AGAIN_NOW cpu_to_le32(0xC01E0105)
+#define STATUS_GRAPHICS_ALLOCATION_INVALID cpu_to_le32(0xC01E0106)
+#define STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNAVAILABLE cpu_to_le32(0xC01E0107)
+#define STATUS_GRAPHICS_UNSWIZZLING_APERTURE_UNSUPPORTED cpu_to_le32(0xC01E0108)
+#define STATUS_GRAPHICS_CANT_EVICT_PINNED_ALLOCATION cpu_to_le32(0xC01E0109)
+#define STATUS_GRAPHICS_INVALID_ALLOCATION_USAGE cpu_to_le32(0xC01E0110)
+#define STATUS_GRAPHICS_CANT_RENDER_LOCKED_ALLOCATION cpu_to_le32(0xC01E0111)
+#define STATUS_GRAPHICS_ALLOCATION_CLOSED cpu_to_le32(0xC01E0112)
+#define STATUS_GRAPHICS_INVALID_ALLOCATION_INSTANCE cpu_to_le32(0xC01E0113)
+#define STATUS_GRAPHICS_INVALID_ALLOCATION_HANDLE cpu_to_le32(0xC01E0114)
+#define STATUS_GRAPHICS_WRONG_ALLOCATION_DEVICE cpu_to_le32(0xC01E0115)
+#define STATUS_GRAPHICS_ALLOCATION_CONTENT_LOST cpu_to_le32(0xC01E0116)
+#define STATUS_GRAPHICS_GPU_EXCEPTION_ON_DEVICE cpu_to_le32(0xC01E0200)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY cpu_to_le32(0xC01E0300)
+#define STATUS_GRAPHICS_VIDPN_TOPOLOGY_NOT_SUPPORTED cpu_to_le32(0xC01E0301)
+#define STATUS_GRAPHICS_VIDPN_TOPOLOGY_CURRENTLY_NOT_SUPPORTED \
+ cpu_to_le32(0xC01E0302)
+#define STATUS_GRAPHICS_INVALID_VIDPN cpu_to_le32(0xC01E0303)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE cpu_to_le32(0xC01E0304)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET cpu_to_le32(0xC01E0305)
+#define STATUS_GRAPHICS_VIDPN_MODALITY_NOT_SUPPORTED cpu_to_le32(0xC01E0306)
+#define STATUS_GRAPHICS_INVALID_VIDPN_SOURCEMODESET cpu_to_le32(0xC01E0308)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TARGETMODESET cpu_to_le32(0xC01E0309)
+#define STATUS_GRAPHICS_INVALID_FREQUENCY cpu_to_le32(0xC01E030A)
+#define STATUS_GRAPHICS_INVALID_ACTIVE_REGION cpu_to_le32(0xC01E030B)
+#define STATUS_GRAPHICS_INVALID_TOTAL_REGION cpu_to_le32(0xC01E030C)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_SOURCE_MODE \
+ cpu_to_le32(0xC01E0310)
+#define STATUS_GRAPHICS_INVALID_VIDEO_PRESENT_TARGET_MODE \
+ cpu_to_le32(0xC01E0311)
+#define STATUS_GRAPHICS_PINNED_MODE_MUST_REMAIN_IN_SET cpu_to_le32(0xC01E0312)
+#define STATUS_GRAPHICS_PATH_ALREADY_IN_TOPOLOGY cpu_to_le32(0xC01E0313)
+#define STATUS_GRAPHICS_MODE_ALREADY_IN_MODESET cpu_to_le32(0xC01E0314)
+#define STATUS_GRAPHICS_INVALID_VIDEOPRESENTSOURCESET cpu_to_le32(0xC01E0315)
+#define STATUS_GRAPHICS_INVALID_VIDEOPRESENTTARGETSET cpu_to_le32(0xC01E0316)
+#define STATUS_GRAPHICS_SOURCE_ALREADY_IN_SET cpu_to_le32(0xC01E0317)
+#define STATUS_GRAPHICS_TARGET_ALREADY_IN_SET cpu_to_le32(0xC01E0318)
+#define STATUS_GRAPHICS_INVALID_VIDPN_PRESENT_PATH cpu_to_le32(0xC01E0319)
+#define STATUS_GRAPHICS_NO_RECOMMENDED_VIDPN_TOPOLOGY cpu_to_le32(0xC01E031A)
+#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGESET \
+ cpu_to_le32(0xC01E031B)
+#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE cpu_to_le32(0xC01E031C)
+#define STATUS_GRAPHICS_FREQUENCYRANGE_NOT_IN_SET cpu_to_le32(0xC01E031D)
+#define STATUS_GRAPHICS_FREQUENCYRANGE_ALREADY_IN_SET cpu_to_le32(0xC01E031F)
+#define STATUS_GRAPHICS_STALE_MODESET cpu_to_le32(0xC01E0320)
+#define STATUS_GRAPHICS_INVALID_MONITOR_SOURCEMODESET cpu_to_le32(0xC01E0321)
+#define STATUS_GRAPHICS_INVALID_MONITOR_SOURCE_MODE cpu_to_le32(0xC01E0322)
+#define STATUS_GRAPHICS_NO_RECOMMENDED_FUNCTIONAL_VIDPN cpu_to_le32(0xC01E0323)
+#define STATUS_GRAPHICS_MODE_ID_MUST_BE_UNIQUE cpu_to_le32(0xC01E0324)
+#define STATUS_GRAPHICS_EMPTY_ADAPTER_MONITOR_MODE_SUPPORT_INTERSECTION \
+ cpu_to_le32(0xC01E0325)
+#define STATUS_GRAPHICS_VIDEO_PRESENT_TARGETS_LESS_THAN_SOURCES \
+ cpu_to_le32(0xC01E0326)
+#define STATUS_GRAPHICS_PATH_NOT_IN_TOPOLOGY cpu_to_le32(0xC01E0327)
+#define STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_SOURCE \
+ cpu_to_le32(0xC01E0328)
+#define STATUS_GRAPHICS_ADAPTER_MUST_HAVE_AT_LEAST_ONE_TARGET \
+ cpu_to_le32(0xC01E0329)
+#define STATUS_GRAPHICS_INVALID_MONITORDESCRIPTORSET cpu_to_le32(0xC01E032A)
+#define STATUS_GRAPHICS_INVALID_MONITORDESCRIPTOR cpu_to_le32(0xC01E032B)
+#define STATUS_GRAPHICS_MONITORDESCRIPTOR_NOT_IN_SET cpu_to_le32(0xC01E032C)
+#define STATUS_GRAPHICS_MONITORDESCRIPTOR_ALREADY_IN_SET cpu_to_le32(0xC01E032D)
+#define STATUS_GRAPHICS_MONITORDESCRIPTOR_ID_MUST_BE_UNIQUE \
+ cpu_to_le32(0xC01E032E)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TARGET_SUBSET_TYPE cpu_to_le32(0xC01E032F)
+#define STATUS_GRAPHICS_RESOURCES_NOT_RELATED cpu_to_le32(0xC01E0330)
+#define STATUS_GRAPHICS_SOURCE_ID_MUST_BE_UNIQUE cpu_to_le32(0xC01E0331)
+#define STATUS_GRAPHICS_TARGET_ID_MUST_BE_UNIQUE cpu_to_le32(0xC01E0332)
+#define STATUS_GRAPHICS_NO_AVAILABLE_VIDPN_TARGET cpu_to_le32(0xC01E0333)
+#define STATUS_GRAPHICS_MONITOR_COULD_NOT_BE_ASSOCIATED_WITH_ADAPTER \
+ cpu_to_le32(0xC01E0334)
+#define STATUS_GRAPHICS_NO_VIDPNMGR cpu_to_le32(0xC01E0335)
+#define STATUS_GRAPHICS_NO_ACTIVE_VIDPN cpu_to_le32(0xC01E0336)
+#define STATUS_GRAPHICS_STALE_VIDPN_TOPOLOGY cpu_to_le32(0xC01E0337)
+#define STATUS_GRAPHICS_MONITOR_NOT_CONNECTED cpu_to_le32(0xC01E0338)
+#define STATUS_GRAPHICS_SOURCE_NOT_IN_TOPOLOGY cpu_to_le32(0xC01E0339)
+#define STATUS_GRAPHICS_INVALID_PRIMARYSURFACE_SIZE cpu_to_le32(0xC01E033A)
+#define STATUS_GRAPHICS_INVALID_VISIBLEREGION_SIZE cpu_to_le32(0xC01E033B)
+#define STATUS_GRAPHICS_INVALID_STRIDE cpu_to_le32(0xC01E033C)
+#define STATUS_GRAPHICS_INVALID_PIXELFORMAT cpu_to_le32(0xC01E033D)
+#define STATUS_GRAPHICS_INVALID_COLORBASIS cpu_to_le32(0xC01E033E)
+#define STATUS_GRAPHICS_INVALID_PIXELVALUEACCESSMODE cpu_to_le32(0xC01E033F)
+#define STATUS_GRAPHICS_TARGET_NOT_IN_TOPOLOGY cpu_to_le32(0xC01E0340)
+#define STATUS_GRAPHICS_NO_DISPLAY_MODE_MANAGEMENT_SUPPORT \
+ cpu_to_le32(0xC01E0341)
+#define STATUS_GRAPHICS_VIDPN_SOURCE_IN_USE cpu_to_le32(0xC01E0342)
+#define STATUS_GRAPHICS_CANT_ACCESS_ACTIVE_VIDPN cpu_to_le32(0xC01E0343)
+#define STATUS_GRAPHICS_INVALID_PATH_IMPORTANCE_ORDINAL cpu_to_le32(0xC01E0344)
+#define STATUS_GRAPHICS_INVALID_PATH_CONTENT_GEOMETRY_TRANSFORMATION \
+ cpu_to_le32(0xC01E0345)
+#define STATUS_GRAPHICS_PATH_CONTENT_GEOMETRY_TRANSFORMATION_NOT_SUPPORTED \
+ cpu_to_le32(0xC01E0346)
+#define STATUS_GRAPHICS_INVALID_GAMMA_RAMP cpu_to_le32(0xC01E0347)
+#define STATUS_GRAPHICS_GAMMA_RAMP_NOT_SUPPORTED cpu_to_le32(0xC01E0348)
+#define STATUS_GRAPHICS_MULTISAMPLING_NOT_SUPPORTED cpu_to_le32(0xC01E0349)
+#define STATUS_GRAPHICS_MODE_NOT_IN_MODESET cpu_to_le32(0xC01E034A)
+#define STATUS_GRAPHICS_INVALID_VIDPN_TOPOLOGY_RECOMMENDATION_REASON \
+ cpu_to_le32(0xC01E034D)
+#define STATUS_GRAPHICS_INVALID_PATH_CONTENT_TYPE cpu_to_le32(0xC01E034E)
+#define STATUS_GRAPHICS_INVALID_COPYPROTECTION_TYPE cpu_to_le32(0xC01E034F)
+#define STATUS_GRAPHICS_UNASSIGNED_MODESET_ALREADY_EXISTS \
+ cpu_to_le32(0xC01E0350)
+#define STATUS_GRAPHICS_INVALID_SCANLINE_ORDERING cpu_to_le32(0xC01E0352)
+#define STATUS_GRAPHICS_TOPOLOGY_CHANGES_NOT_ALLOWED cpu_to_le32(0xC01E0353)
+#define STATUS_GRAPHICS_NO_AVAILABLE_IMPORTANCE_ORDINALS cpu_to_le32(0xC01E0354)
+#define STATUS_GRAPHICS_INCOMPATIBLE_PRIVATE_FORMAT cpu_to_le32(0xC01E0355)
+#define STATUS_GRAPHICS_INVALID_MODE_PRUNING_ALGORITHM cpu_to_le32(0xC01E0356)
+#define STATUS_GRAPHICS_INVALID_MONITOR_CAPABILITY_ORIGIN \
+ cpu_to_le32(0xC01E0357)
+#define STATUS_GRAPHICS_INVALID_MONITOR_FREQUENCYRANGE_CONSTRAINT \
+ cpu_to_le32(0xC01E0358)
+#define STATUS_GRAPHICS_MAX_NUM_PATHS_REACHED cpu_to_le32(0xC01E0359)
+#define STATUS_GRAPHICS_CANCEL_VIDPN_TOPOLOGY_AUGMENTATION \
+ cpu_to_le32(0xC01E035A)
+#define STATUS_GRAPHICS_INVALID_CLIENT_TYPE cpu_to_le32(0xC01E035B)
+#define STATUS_GRAPHICS_CLIENTVIDPN_NOT_SET cpu_to_le32(0xC01E035C)
+#define STATUS_GRAPHICS_SPECIFIED_CHILD_ALREADY_CONNECTED \
+ cpu_to_le32(0xC01E0400)
+#define STATUS_GRAPHICS_CHILD_DESCRIPTOR_NOT_SUPPORTED cpu_to_le32(0xC01E0401)
+#define STATUS_GRAPHICS_NOT_A_LINKED_ADAPTER cpu_to_le32(0xC01E0430)
+#define STATUS_GRAPHICS_LEADLINK_NOT_ENUMERATED cpu_to_le32(0xC01E0431)
+#define STATUS_GRAPHICS_CHAINLINKS_NOT_ENUMERATED cpu_to_le32(0xC01E0432)
+#define STATUS_GRAPHICS_ADAPTER_CHAIN_NOT_READY cpu_to_le32(0xC01E0433)
+#define STATUS_GRAPHICS_CHAINLINKS_NOT_STARTED cpu_to_le32(0xC01E0434)
+#define STATUS_GRAPHICS_CHAINLINKS_NOT_POWERED_ON cpu_to_le32(0xC01E0435)
+#define STATUS_GRAPHICS_INCONSISTENT_DEVICE_LINK_STATE cpu_to_le32(0xC01E0436)
+#define STATUS_GRAPHICS_NOT_POST_DEVICE_DRIVER cpu_to_le32(0xC01E0438)
+#define STATUS_GRAPHICS_ADAPTER_ACCESS_NOT_EXCLUDED cpu_to_le32(0xC01E043B)
+#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_COPP_SEMANTICS \
+ cpu_to_le32(0xC01E051C)
+#define STATUS_GRAPHICS_OPM_INVALID_INFORMATION_REQUEST cpu_to_le32(0xC01E051D)
+#define STATUS_GRAPHICS_OPM_DRIVER_INTERNAL_ERROR cpu_to_le32(0xC01E051E)
+#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_DOES_NOT_HAVE_OPM_SEMANTICS \
+ cpu_to_le32(0xC01E051F)
+#define STATUS_GRAPHICS_OPM_SIGNALING_NOT_SUPPORTED cpu_to_le32(0xC01E0520)
+#define STATUS_GRAPHICS_OPM_INVALID_CONFIGURATION_REQUEST \
+ cpu_to_le32(0xC01E0521)
+#define STATUS_GRAPHICS_OPM_NOT_SUPPORTED cpu_to_le32(0xC01E0500)
+#define STATUS_GRAPHICS_COPP_NOT_SUPPORTED cpu_to_le32(0xC01E0501)
+#define STATUS_GRAPHICS_UAB_NOT_SUPPORTED cpu_to_le32(0xC01E0502)
+#define STATUS_GRAPHICS_OPM_INVALID_ENCRYPTED_PARAMETERS cpu_to_le32(0xC01E0503)
+#define STATUS_GRAPHICS_OPM_PARAMETER_ARRAY_TOO_SMALL cpu_to_le32(0xC01E0504)
+#define STATUS_GRAPHICS_OPM_NO_PROTECTED_OUTPUTS_EXIST cpu_to_le32(0xC01E0505)
+#define STATUS_GRAPHICS_PVP_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME \
+ cpu_to_le32(0xC01E0506)
+#define STATUS_GRAPHICS_PVP_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP \
+ cpu_to_le32(0xC01E0507)
+#define STATUS_GRAPHICS_PVP_MIRRORING_DEVICES_NOT_SUPPORTED \
+ cpu_to_le32(0xC01E0508)
+#define STATUS_GRAPHICS_OPM_INVALID_POINTER cpu_to_le32(0xC01E050A)
+#define STATUS_GRAPHICS_OPM_INTERNAL_ERROR cpu_to_le32(0xC01E050B)
+#define STATUS_GRAPHICS_OPM_INVALID_HANDLE cpu_to_le32(0xC01E050C)
+#define STATUS_GRAPHICS_PVP_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE \
+ cpu_to_le32(0xC01E050D)
+#define STATUS_GRAPHICS_PVP_INVALID_CERTIFICATE_LENGTH cpu_to_le32(0xC01E050E)
+#define STATUS_GRAPHICS_OPM_SPANNING_MODE_ENABLED cpu_to_le32(0xC01E050F)
+#define STATUS_GRAPHICS_OPM_THEATER_MODE_ENABLED cpu_to_le32(0xC01E0510)
+#define STATUS_GRAPHICS_PVP_HFS_FAILED cpu_to_le32(0xC01E0511)
+#define STATUS_GRAPHICS_OPM_INVALID_SRM cpu_to_le32(0xC01E0512)
+#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_HDCP cpu_to_le32(0xC01E0513)
+#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_ACP cpu_to_le32(0xC01E0514)
+#define STATUS_GRAPHICS_OPM_OUTPUT_DOES_NOT_SUPPORT_CGMSA \
+ cpu_to_le32(0xC01E0515)
+#define STATUS_GRAPHICS_OPM_HDCP_SRM_NEVER_SET cpu_to_le32(0xC01E0516)
+#define STATUS_GRAPHICS_OPM_RESOLUTION_TOO_HIGH cpu_to_le32(0xC01E0517)
+#define STATUS_GRAPHICS_OPM_ALL_HDCP_HARDWARE_ALREADY_IN_USE \
+ cpu_to_le32(0xC01E0518)
+#define STATUS_GRAPHICS_OPM_PROTECTED_OUTPUT_NO_LONGER_EXISTS \
+ cpu_to_le32(0xC01E051A)
+#define STATUS_GRAPHICS_OPM_SESSION_TYPE_CHANGE_IN_PROGRESS \
+ cpu_to_le32(0xC01E051B)
+#define STATUS_GRAPHICS_I2C_NOT_SUPPORTED cpu_to_le32(0xC01E0580)
+#define STATUS_GRAPHICS_I2C_DEVICE_DOES_NOT_EXIST cpu_to_le32(0xC01E0581)
+#define STATUS_GRAPHICS_I2C_ERROR_TRANSMITTING_DATA cpu_to_le32(0xC01E0582)
+#define STATUS_GRAPHICS_I2C_ERROR_RECEIVING_DATA cpu_to_le32(0xC01E0583)
+#define STATUS_GRAPHICS_DDCCI_VCP_NOT_SUPPORTED cpu_to_le32(0xC01E0584)
+#define STATUS_GRAPHICS_DDCCI_INVALID_DATA cpu_to_le32(0xC01E0585)
+#define STATUS_GRAPHICS_DDCCI_MONITOR_RETURNED_INVALID_TIMING_STATUS_BYTE \
+ cpu_to_le32(0xC01E0586)
+#define STATUS_GRAPHICS_DDCCI_INVALID_CAPABILITIES_STRING \
+ cpu_to_le32(0xC01E0587)
+#define STATUS_GRAPHICS_MCA_INTERNAL_ERROR cpu_to_le32(0xC01E0588)
+#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_COMMAND cpu_to_le32(0xC01E0589)
+#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_LENGTH cpu_to_le32(0xC01E058A)
+#define STATUS_GRAPHICS_DDCCI_INVALID_MESSAGE_CHECKSUM cpu_to_le32(0xC01E058B)
+#define STATUS_GRAPHICS_INVALID_PHYSICAL_MONITOR_HANDLE cpu_to_le32(0xC01E058C)
+#define STATUS_GRAPHICS_MONITOR_NO_LONGER_EXISTS cpu_to_le32(0xC01E058D)
+#define STATUS_GRAPHICS_ONLY_CONSOLE_SESSION_SUPPORTED cpu_to_le32(0xC01E05E0)
+#define STATUS_GRAPHICS_NO_DISPLAY_DEVICE_CORRESPONDS_TO_NAME \
+ cpu_to_le32(0xC01E05E1)
+#define STATUS_GRAPHICS_DISPLAY_DEVICE_NOT_ATTACHED_TO_DESKTOP \
+ cpu_to_le32(0xC01E05E2)
+#define STATUS_GRAPHICS_MIRRORING_DEVICES_NOT_SUPPORTED cpu_to_le32(0xC01E05E3)
+#define STATUS_GRAPHICS_INVALID_POINTER cpu_to_le32(0xC01E05E4)
+#define STATUS_GRAPHICS_NO_MONITORS_CORRESPOND_TO_DISPLAY_DEVICE \
+ cpu_to_le32(0xC01E05E5)
+#define STATUS_GRAPHICS_PARAMETER_ARRAY_TOO_SMALL cpu_to_le32(0xC01E05E6)
+#define STATUS_GRAPHICS_INTERNAL_ERROR cpu_to_le32(0xC01E05E7)
+#define STATUS_GRAPHICS_SESSION_TYPE_CHANGE_IN_PROGRESS cpu_to_le32(0xC01E05E8)
+#define STATUS_FVE_LOCKED_VOLUME cpu_to_le32(0xC0210000)
+#define STATUS_FVE_NOT_ENCRYPTED cpu_to_le32(0xC0210001)
+#define STATUS_FVE_BAD_INFORMATION cpu_to_le32(0xC0210002)
+#define STATUS_FVE_TOO_SMALL cpu_to_le32(0xC0210003)
+#define STATUS_FVE_FAILED_WRONG_FS cpu_to_le32(0xC0210004)
+#define STATUS_FVE_FAILED_BAD_FS cpu_to_le32(0xC0210005)
+#define STATUS_FVE_FS_NOT_EXTENDED cpu_to_le32(0xC0210006)
+#define STATUS_FVE_FS_MOUNTED cpu_to_le32(0xC0210007)
+#define STATUS_FVE_NO_LICENSE cpu_to_le32(0xC0210008)
+#define STATUS_FVE_ACTION_NOT_ALLOWED cpu_to_le32(0xC0210009)
+#define STATUS_FVE_BAD_DATA cpu_to_le32(0xC021000A)
+#define STATUS_FVE_VOLUME_NOT_BOUND cpu_to_le32(0xC021000B)
+#define STATUS_FVE_NOT_DATA_VOLUME cpu_to_le32(0xC021000C)
+#define STATUS_FVE_CONV_READ_ERROR cpu_to_le32(0xC021000D)
+#define STATUS_FVE_CONV_WRITE_ERROR cpu_to_le32(0xC021000E)
+#define STATUS_FVE_OVERLAPPED_UPDATE cpu_to_le32(0xC021000F)
+#define STATUS_FVE_FAILED_SECTOR_SIZE cpu_to_le32(0xC0210010)
+#define STATUS_FVE_FAILED_AUTHENTICATION cpu_to_le32(0xC0210011)
+#define STATUS_FVE_NOT_OS_VOLUME cpu_to_le32(0xC0210012)
+#define STATUS_FVE_KEYFILE_NOT_FOUND cpu_to_le32(0xC0210013)
+#define STATUS_FVE_KEYFILE_INVALID cpu_to_le32(0xC0210014)
+#define STATUS_FVE_KEYFILE_NO_VMK cpu_to_le32(0xC0210015)
+#define STATUS_FVE_TPM_DISABLED cpu_to_le32(0xC0210016)
+#define STATUS_FVE_TPM_SRK_AUTH_NOT_ZERO cpu_to_le32(0xC0210017)
+#define STATUS_FVE_TPM_INVALID_PCR cpu_to_le32(0xC0210018)
+#define STATUS_FVE_TPM_NO_VMK cpu_to_le32(0xC0210019)
+#define STATUS_FVE_PIN_INVALID cpu_to_le32(0xC021001A)
+#define STATUS_FVE_AUTH_INVALID_APPLICATION cpu_to_le32(0xC021001B)
+#define STATUS_FVE_AUTH_INVALID_CONFIG cpu_to_le32(0xC021001C)
+#define STATUS_FVE_DEBUGGER_ENABLED cpu_to_le32(0xC021001D)
+#define STATUS_FVE_DRY_RUN_FAILED cpu_to_le32(0xC021001E)
+#define STATUS_FVE_BAD_METADATA_POINTER cpu_to_le32(0xC021001F)
+#define STATUS_FVE_OLD_METADATA_COPY cpu_to_le32(0xC0210020)
+#define STATUS_FVE_REBOOT_REQUIRED cpu_to_le32(0xC0210021)
+#define STATUS_FVE_RAW_ACCESS cpu_to_le32(0xC0210022)
+#define STATUS_FVE_RAW_BLOCKED cpu_to_le32(0xC0210023)
+#define STATUS_FWP_CALLOUT_NOT_FOUND cpu_to_le32(0xC0220001)
+#define STATUS_FWP_CONDITION_NOT_FOUND cpu_to_le32(0xC0220002)
+#define STATUS_FWP_FILTER_NOT_FOUND cpu_to_le32(0xC0220003)
+#define STATUS_FWP_LAYER_NOT_FOUND cpu_to_le32(0xC0220004)
+#define STATUS_FWP_PROVIDER_NOT_FOUND cpu_to_le32(0xC0220005)
+#define STATUS_FWP_PROVIDER_CONTEXT_NOT_FOUND cpu_to_le32(0xC0220006)
+#define STATUS_FWP_SUBLAYER_NOT_FOUND cpu_to_le32(0xC0220007)
+#define STATUS_FWP_NOT_FOUND cpu_to_le32(0xC0220008)
+#define STATUS_FWP_ALREADY_EXISTS cpu_to_le32(0xC0220009)
+#define STATUS_FWP_IN_USE cpu_to_le32(0xC022000A)
+#define STATUS_FWP_DYNAMIC_SESSION_IN_PROGRESS cpu_to_le32(0xC022000B)
+#define STATUS_FWP_WRONG_SESSION cpu_to_le32(0xC022000C)
+#define STATUS_FWP_NO_TXN_IN_PROGRESS cpu_to_le32(0xC022000D)
+#define STATUS_FWP_TXN_IN_PROGRESS cpu_to_le32(0xC022000E)
+#define STATUS_FWP_TXN_ABORTED cpu_to_le32(0xC022000F)
+#define STATUS_FWP_SESSION_ABORTED cpu_to_le32(0xC0220010)
+#define STATUS_FWP_INCOMPATIBLE_TXN cpu_to_le32(0xC0220011)
+#define STATUS_FWP_TIMEOUT cpu_to_le32(0xC0220012)
+#define STATUS_FWP_NET_EVENTS_DISABLED cpu_to_le32(0xC0220013)
+#define STATUS_FWP_INCOMPATIBLE_LAYER cpu_to_le32(0xC0220014)
+#define STATUS_FWP_KM_CLIENTS_ONLY cpu_to_le32(0xC0220015)
+#define STATUS_FWP_LIFETIME_MISMATCH cpu_to_le32(0xC0220016)
+#define STATUS_FWP_BUILTIN_OBJECT cpu_to_le32(0xC0220017)
+#define STATUS_FWP_TOO_MANY_BOOTTIME_FILTERS cpu_to_le32(0xC0220018)
+#define STATUS_FWP_TOO_MANY_CALLOUTS cpu_to_le32(0xC0220018)
+#define STATUS_FWP_NOTIFICATION_DROPPED cpu_to_le32(0xC0220019)
+#define STATUS_FWP_TRAFFIC_MISMATCH cpu_to_le32(0xC022001A)
+#define STATUS_FWP_INCOMPATIBLE_SA_STATE cpu_to_le32(0xC022001B)
+#define STATUS_FWP_NULL_POINTER cpu_to_le32(0xC022001C)
+#define STATUS_FWP_INVALID_ENUMERATOR cpu_to_le32(0xC022001D)
+#define STATUS_FWP_INVALID_FLAGS cpu_to_le32(0xC022001E)
+#define STATUS_FWP_INVALID_NET_MASK cpu_to_le32(0xC022001F)
+#define STATUS_FWP_INVALID_RANGE cpu_to_le32(0xC0220020)
+#define STATUS_FWP_INVALID_INTERVAL cpu_to_le32(0xC0220021)
+#define STATUS_FWP_ZERO_LENGTH_ARRAY cpu_to_le32(0xC0220022)
+#define STATUS_FWP_NULL_DISPLAY_NAME cpu_to_le32(0xC0220023)
+#define STATUS_FWP_INVALID_ACTION_TYPE cpu_to_le32(0xC0220024)
+#define STATUS_FWP_INVALID_WEIGHT cpu_to_le32(0xC0220025)
+#define STATUS_FWP_MATCH_TYPE_MISMATCH cpu_to_le32(0xC0220026)
+#define STATUS_FWP_TYPE_MISMATCH cpu_to_le32(0xC0220027)
+#define STATUS_FWP_OUT_OF_BOUNDS cpu_to_le32(0xC0220028)
+#define STATUS_FWP_RESERVED cpu_to_le32(0xC0220029)
+#define STATUS_FWP_DUPLICATE_CONDITION cpu_to_le32(0xC022002A)
+#define STATUS_FWP_DUPLICATE_KEYMOD cpu_to_le32(0xC022002B)
+#define STATUS_FWP_ACTION_INCOMPATIBLE_WITH_LAYER cpu_to_le32(0xC022002C)
+#define STATUS_FWP_ACTION_INCOMPATIBLE_WITH_SUBLAYER cpu_to_le32(0xC022002D)
+#define STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_LAYER cpu_to_le32(0xC022002E)
+#define STATUS_FWP_CONTEXT_INCOMPATIBLE_WITH_CALLOUT cpu_to_le32(0xC022002F)
+#define STATUS_FWP_INCOMPATIBLE_AUTH_METHOD cpu_to_le32(0xC0220030)
+#define STATUS_FWP_INCOMPATIBLE_DH_GROUP cpu_to_le32(0xC0220031)
+#define STATUS_FWP_EM_NOT_SUPPORTED cpu_to_le32(0xC0220032)
+#define STATUS_FWP_NEVER_MATCH cpu_to_le32(0xC0220033)
+#define STATUS_FWP_PROVIDER_CONTEXT_MISMATCH cpu_to_le32(0xC0220034)
+#define STATUS_FWP_INVALID_PARAMETER cpu_to_le32(0xC0220035)
+#define STATUS_FWP_TOO_MANY_SUBLAYERS cpu_to_le32(0xC0220036)
+#define STATUS_FWP_CALLOUT_NOTIFICATION_FAILED cpu_to_le32(0xC0220037)
+#define STATUS_FWP_INCOMPATIBLE_AUTH_CONFIG cpu_to_le32(0xC0220038)
+#define STATUS_FWP_INCOMPATIBLE_CIPHER_CONFIG cpu_to_le32(0xC0220039)
+#define STATUS_FWP_TCPIP_NOT_READY cpu_to_le32(0xC0220100)
+#define STATUS_FWP_INJECT_HANDLE_CLOSING cpu_to_le32(0xC0220101)
+#define STATUS_FWP_INJECT_HANDLE_STALE cpu_to_le32(0xC0220102)
+#define STATUS_FWP_CANNOT_PEND cpu_to_le32(0xC0220103)
+#define STATUS_NDIS_CLOSING cpu_to_le32(0xC0230002)
+#define STATUS_NDIS_BAD_VERSION cpu_to_le32(0xC0230004)
+#define STATUS_NDIS_BAD_CHARACTERISTICS cpu_to_le32(0xC0230005)
+#define STATUS_NDIS_ADAPTER_NOT_FOUND cpu_to_le32(0xC0230006)
+#define STATUS_NDIS_OPEN_FAILED cpu_to_le32(0xC0230007)
+#define STATUS_NDIS_DEVICE_FAILED cpu_to_le32(0xC0230008)
+#define STATUS_NDIS_MULTICAST_FULL cpu_to_le32(0xC0230009)
+#define STATUS_NDIS_MULTICAST_EXISTS cpu_to_le32(0xC023000A)
+#define STATUS_NDIS_MULTICAST_NOT_FOUND cpu_to_le32(0xC023000B)
+#define STATUS_NDIS_REQUEST_ABORTED cpu_to_le32(0xC023000C)
+#define STATUS_NDIS_RESET_IN_PROGRESS cpu_to_le32(0xC023000D)
+#define STATUS_NDIS_INVALID_PACKET cpu_to_le32(0xC023000F)
+#define STATUS_NDIS_INVALID_DEVICE_REQUEST cpu_to_le32(0xC0230010)
+#define STATUS_NDIS_ADAPTER_NOT_READY cpu_to_le32(0xC0230011)
+#define STATUS_NDIS_INVALID_LENGTH cpu_to_le32(0xC0230014)
+#define STATUS_NDIS_INVALID_DATA cpu_to_le32(0xC0230015)
+#define STATUS_NDIS_BUFFER_TOO_SHORT cpu_to_le32(0xC0230016)
+#define STATUS_NDIS_INVALID_OID cpu_to_le32(0xC0230017)
+#define STATUS_NDIS_ADAPTER_REMOVED cpu_to_le32(0xC0230018)
+#define STATUS_NDIS_UNSUPPORTED_MEDIA cpu_to_le32(0xC0230019)
+#define STATUS_NDIS_GROUP_ADDRESS_IN_USE cpu_to_le32(0xC023001A)
+#define STATUS_NDIS_FILE_NOT_FOUND cpu_to_le32(0xC023001B)
+#define STATUS_NDIS_ERROR_READING_FILE cpu_to_le32(0xC023001C)
+#define STATUS_NDIS_ALREADY_MAPPED cpu_to_le32(0xC023001D)
+#define STATUS_NDIS_RESOURCE_CONFLICT cpu_to_le32(0xC023001E)
+#define STATUS_NDIS_MEDIA_DISCONNECTED cpu_to_le32(0xC023001F)
+#define STATUS_NDIS_INVALID_ADDRESS cpu_to_le32(0xC0230022)
+#define STATUS_NDIS_PAUSED cpu_to_le32(0xC023002A)
+#define STATUS_NDIS_INTERFACE_NOT_FOUND cpu_to_le32(0xC023002B)
+#define STATUS_NDIS_UNSUPPORTED_REVISION cpu_to_le32(0xC023002C)
+#define STATUS_NDIS_INVALID_PORT cpu_to_le32(0xC023002D)
+#define STATUS_NDIS_INVALID_PORT_STATE cpu_to_le32(0xC023002E)
+#define STATUS_NDIS_LOW_POWER_STATE cpu_to_le32(0xC023002F)
+#define STATUS_NDIS_NOT_SUPPORTED cpu_to_le32(0xC02300BB)
+#define STATUS_NDIS_DOT11_AUTO_CONFIG_ENABLED cpu_to_le32(0xC0232000)
+#define STATUS_NDIS_DOT11_MEDIA_IN_USE cpu_to_le32(0xC0232001)
+#define STATUS_NDIS_DOT11_POWER_STATE_INVALID cpu_to_le32(0xC0232002)
+#define STATUS_IPSEC_BAD_SPI cpu_to_le32(0xC0360001)
+#define STATUS_IPSEC_SA_LIFETIME_EXPIRED cpu_to_le32(0xC0360002)
+#define STATUS_IPSEC_WRONG_SA cpu_to_le32(0xC0360003)
+#define STATUS_IPSEC_REPLAY_CHECK_FAILED cpu_to_le32(0xC0360004)
+#define STATUS_IPSEC_INVALID_PACKET cpu_to_le32(0xC0360005)
+#define STATUS_IPSEC_INTEGRITY_CHECK_FAILED cpu_to_le32(0xC0360006)
+#define STATUS_IPSEC_CLEAR_TEXT_DROP cpu_to_le32(0xC0360007)
+
+#define STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP cpu_to_le32(0xC05D0000)
+#define STATUS_INVALID_LOCK_RANGE cpu_to_le32(0xC00001a1)
diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c
new file mode 100644
index 000000000000..44aea33a67fa
--- /dev/null
+++ b/fs/ksmbd/transport_ipc.c
@@ -0,0 +1,874 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/hashtable.h>
+#include <net/net_namespace.h>
+#include <net/genetlink.h>
+#include <linux/socket.h>
+#include <linux/workqueue.h>
+
+#include "vfs_cache.h"
+#include "transport_ipc.h"
+#include "server.h"
+#include "smb_common.h"
+
+#include "mgmt/user_config.h"
+#include "mgmt/share_config.h"
+#include "mgmt/user_session.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/ksmbd_ida.h"
+#include "connection.h"
+#include "transport_tcp.h"
+
+#define IPC_WAIT_TIMEOUT (2 * HZ)
+
+#define IPC_MSG_HASH_BITS 3
+static DEFINE_HASHTABLE(ipc_msg_table, IPC_MSG_HASH_BITS);
+static DECLARE_RWSEM(ipc_msg_table_lock);
+static DEFINE_MUTEX(startup_lock);
+
+static DEFINE_IDA(ipc_ida);
+
+static unsigned int ksmbd_tools_pid;
+
+static bool ksmbd_ipc_validate_version(struct genl_info *m)
+{
+ if (m->genlhdr->version != KSMBD_GENL_VERSION) {
+ pr_err("%s. ksmbd: %d, kernel module: %d. %s.\n",
+ "Daemon and kernel module version mismatch",
+ m->genlhdr->version,
+ KSMBD_GENL_VERSION,
+ "User-space ksmbd should terminate");
+ return false;
+ }
+ return true;
+}
+
+struct ksmbd_ipc_msg {
+ unsigned int type;
+ unsigned int sz;
+ unsigned char payload[];
+};
+
+struct ipc_msg_table_entry {
+ unsigned int handle;
+ unsigned int type;
+ wait_queue_head_t wait;
+ struct hlist_node ipc_table_hlist;
+
+ void *response;
+};
+
+static struct delayed_work ipc_timer_work;
+
+static int handle_startup_event(struct sk_buff *skb, struct genl_info *info);
+static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info);
+static int handle_generic_event(struct sk_buff *skb, struct genl_info *info);
+static int ksmbd_ipc_heartbeat_request(void);
+
+static const struct nla_policy ksmbd_nl_policy[KSMBD_EVENT_MAX] = {
+ [KSMBD_EVENT_UNSPEC] = {
+ .len = 0,
+ },
+ [KSMBD_EVENT_HEARTBEAT_REQUEST] = {
+ .len = sizeof(struct ksmbd_heartbeat),
+ },
+ [KSMBD_EVENT_STARTING_UP] = {
+ .len = sizeof(struct ksmbd_startup_request),
+ },
+ [KSMBD_EVENT_SHUTTING_DOWN] = {
+ .len = sizeof(struct ksmbd_shutdown_request),
+ },
+ [KSMBD_EVENT_LOGIN_REQUEST] = {
+ .len = sizeof(struct ksmbd_login_request),
+ },
+ [KSMBD_EVENT_LOGIN_RESPONSE] = {
+ .len = sizeof(struct ksmbd_login_response),
+ },
+ [KSMBD_EVENT_SHARE_CONFIG_REQUEST] = {
+ .len = sizeof(struct ksmbd_share_config_request),
+ },
+ [KSMBD_EVENT_SHARE_CONFIG_RESPONSE] = {
+ .len = sizeof(struct ksmbd_share_config_response),
+ },
+ [KSMBD_EVENT_TREE_CONNECT_REQUEST] = {
+ .len = sizeof(struct ksmbd_tree_connect_request),
+ },
+ [KSMBD_EVENT_TREE_CONNECT_RESPONSE] = {
+ .len = sizeof(struct ksmbd_tree_connect_response),
+ },
+ [KSMBD_EVENT_TREE_DISCONNECT_REQUEST] = {
+ .len = sizeof(struct ksmbd_tree_disconnect_request),
+ },
+ [KSMBD_EVENT_LOGOUT_REQUEST] = {
+ .len = sizeof(struct ksmbd_logout_request),
+ },
+ [KSMBD_EVENT_RPC_REQUEST] = {
+ },
+ [KSMBD_EVENT_RPC_RESPONSE] = {
+ },
+ [KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST] = {
+ },
+ [KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE] = {
+ },
+};
+
+static struct genl_ops ksmbd_genl_ops[] = {
+ {
+ .cmd = KSMBD_EVENT_UNSPEC,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_HEARTBEAT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_STARTING_UP,
+ .doit = handle_startup_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SHUTTING_DOWN,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_LOGIN_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_LOGIN_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SHARE_CONFIG_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SHARE_CONFIG_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_TREE_CONNECT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_TREE_CONNECT_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_TREE_DISCONNECT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_LOGOUT_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_RPC_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_RPC_RESPONSE,
+ .doit = handle_generic_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST,
+ .doit = handle_unsupported_event,
+ },
+ {
+ .cmd = KSMBD_EVENT_SPNEGO_AUTHEN_RESPONSE,
+ .doit = handle_generic_event,
+ },
+};
+
+static struct genl_family ksmbd_genl_family = {
+ .name = KSMBD_GENL_NAME,
+ .version = KSMBD_GENL_VERSION,
+ .hdrsize = 0,
+ .maxattr = KSMBD_EVENT_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = ksmbd_genl_ops,
+ .n_ops = ARRAY_SIZE(ksmbd_genl_ops),
+};
+
+static void ksmbd_nl_init_fixup(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ksmbd_genl_ops); i++)
+ ksmbd_genl_ops[i].validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP;
+
+ ksmbd_genl_family.policy = ksmbd_nl_policy;
+}
+
+static int rpc_context_flags(struct ksmbd_session *sess)
+{
+ if (user_guest(sess->user))
+ return KSMBD_RPC_RESTRICTED_CONTEXT;
+ return 0;
+}
+
+static void ipc_update_last_active(void)
+{
+ if (server_conf.ipc_timeout)
+ server_conf.ipc_last_active = jiffies;
+}
+
+static struct ksmbd_ipc_msg *ipc_msg_alloc(size_t sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ size_t msg_sz = sz + sizeof(struct ksmbd_ipc_msg);
+
+ msg = kvmalloc(msg_sz, GFP_KERNEL | __GFP_ZERO);
+ if (msg)
+ msg->sz = sz;
+ return msg;
+}
+
+static void ipc_msg_free(struct ksmbd_ipc_msg *msg)
+{
+ kvfree(msg);
+}
+
+static void ipc_msg_handle_free(int handle)
+{
+ if (handle >= 0)
+ ksmbd_release_id(&ipc_ida, handle);
+}
+
+static int handle_response(int type, void *payload, size_t sz)
+{
+ unsigned int handle = *(unsigned int *)payload;
+ struct ipc_msg_table_entry *entry;
+ int ret = 0;
+
+ ipc_update_last_active();
+ down_read(&ipc_msg_table_lock);
+ hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) {
+ if (handle != entry->handle)
+ continue;
+
+ entry->response = NULL;
+ /*
+ * Response message type value should be equal to
+ * request message type + 1.
+ */
+ if (entry->type + 1 != type) {
+ pr_err("Waiting for IPC type %d, got %d. Ignore.\n",
+ entry->type + 1, type);
+ }
+
+ entry->response = kvmalloc(sz, GFP_KERNEL | __GFP_ZERO);
+ if (!entry->response) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ memcpy(entry->response, payload, sz);
+ wake_up_interruptible(&entry->wait);
+ ret = 0;
+ break;
+ }
+ up_read(&ipc_msg_table_lock);
+
+ return ret;
+}
+
+static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
+{
+ int ret;
+
+ ksmbd_set_fd_limit(req->file_max);
+ server_conf.flags = req->flags;
+ server_conf.signing = req->signing;
+ server_conf.tcp_port = req->tcp_port;
+ server_conf.ipc_timeout = req->ipc_timeout * HZ;
+ server_conf.deadtime = req->deadtime * SMB_ECHO_INTERVAL;
+ server_conf.share_fake_fscaps = req->share_fake_fscaps;
+ ksmbd_init_domain(req->sub_auth);
+
+ if (req->smb2_max_read)
+ init_smb2_max_read_size(req->smb2_max_read);
+ if (req->smb2_max_write)
+ init_smb2_max_write_size(req->smb2_max_write);
+ if (req->smb2_max_trans)
+ init_smb2_max_trans_size(req->smb2_max_trans);
+
+ ret = ksmbd_set_netbios_name(req->netbios_name);
+ ret |= ksmbd_set_server_string(req->server_string);
+ ret |= ksmbd_set_work_group(req->work_group);
+ ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req),
+ req->ifc_list_sz);
+ if (ret) {
+ pr_err("Server configuration error: %s %s %s\n",
+ req->netbios_name, req->server_string,
+ req->work_group);
+ return ret;
+ }
+
+ if (req->min_prot[0]) {
+ ret = ksmbd_lookup_protocol_idx(req->min_prot);
+ if (ret >= 0)
+ server_conf.min_protocol = ret;
+ }
+ if (req->max_prot[0]) {
+ ret = ksmbd_lookup_protocol_idx(req->max_prot);
+ if (ret >= 0)
+ server_conf.max_protocol = ret;
+ }
+
+ if (server_conf.ipc_timeout)
+ schedule_delayed_work(&ipc_timer_work, server_conf.ipc_timeout);
+ return 0;
+}
+
+static int handle_startup_event(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret = 0;
+
+#ifdef CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+#endif
+
+ if (!ksmbd_ipc_validate_version(info))
+ return -EINVAL;
+
+ if (!info->attrs[KSMBD_EVENT_STARTING_UP])
+ return -EINVAL;
+
+ mutex_lock(&startup_lock);
+ if (!ksmbd_server_configurable()) {
+ mutex_unlock(&startup_lock);
+ pr_err("Server reset is in progress, can't start daemon\n");
+ return -EINVAL;
+ }
+
+ if (ksmbd_tools_pid) {
+ if (ksmbd_ipc_heartbeat_request() == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pr_err("Reconnect to a new user space daemon\n");
+ } else {
+ struct ksmbd_startup_request *req;
+
+ req = nla_data(info->attrs[info->genlhdr->cmd]);
+ ret = ipc_server_config_on_startup(req);
+ if (ret)
+ goto out;
+ server_queue_ctrl_init_work();
+ }
+
+ ksmbd_tools_pid = info->snd_portid;
+ ipc_update_last_active();
+
+out:
+ mutex_unlock(&startup_lock);
+ return ret;
+}
+
+static int handle_unsupported_event(struct sk_buff *skb, struct genl_info *info)
+{
+ pr_err("Unknown IPC event: %d, ignore.\n", info->genlhdr->cmd);
+ return -EINVAL;
+}
+
+static int handle_generic_event(struct sk_buff *skb, struct genl_info *info)
+{
+ void *payload;
+ int sz;
+ int type = info->genlhdr->cmd;
+
+#ifdef CONFIG_SMB_SERVER_CHECK_CAP_NET_ADMIN
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+#endif
+
+ if (type >= KSMBD_EVENT_MAX) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (!ksmbd_ipc_validate_version(info))
+ return -EINVAL;
+
+ if (!info->attrs[type])
+ return -EINVAL;
+
+ payload = nla_data(info->attrs[info->genlhdr->cmd]);
+ sz = nla_len(info->attrs[info->genlhdr->cmd]);
+ return handle_response(type, payload, sz);
+}
+
+static int ipc_msg_send(struct ksmbd_ipc_msg *msg)
+{
+ struct genlmsghdr *nlh;
+ struct sk_buff *skb;
+ int ret = -EINVAL;
+
+ if (!ksmbd_tools_pid)
+ return ret;
+
+ skb = genlmsg_new(msg->sz, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ nlh = genlmsg_put(skb, 0, 0, &ksmbd_genl_family, 0, msg->type);
+ if (!nlh)
+ goto out;
+
+ ret = nla_put(skb, msg->type, msg->sz, msg->payload);
+ if (ret) {
+ genlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ genlmsg_end(skb, nlh);
+ ret = genlmsg_unicast(&init_net, skb, ksmbd_tools_pid);
+ if (!ret)
+ ipc_update_last_active();
+ return ret;
+
+out:
+ nlmsg_free(skb);
+ return ret;
+}
+
+static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
+{
+ struct ipc_msg_table_entry entry;
+ int ret;
+
+ if ((int)handle < 0)
+ return NULL;
+
+ entry.type = msg->type;
+ entry.response = NULL;
+ init_waitqueue_head(&entry.wait);
+
+ down_write(&ipc_msg_table_lock);
+ entry.handle = handle;
+ hash_add(ipc_msg_table, &entry.ipc_table_hlist, entry.handle);
+ up_write(&ipc_msg_table_lock);
+
+ ret = ipc_msg_send(msg);
+ if (ret)
+ goto out;
+
+ ret = wait_event_interruptible_timeout(entry.wait,
+ entry.response != NULL,
+ IPC_WAIT_TIMEOUT);
+out:
+ down_write(&ipc_msg_table_lock);
+ hash_del(&entry.ipc_table_hlist);
+ up_write(&ipc_msg_table_lock);
+ return entry.response;
+}
+
+static int ksmbd_ipc_heartbeat_request(void)
+{
+ struct ksmbd_ipc_msg *msg;
+ int ret;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_heartbeat));
+ if (!msg)
+ return -EINVAL;
+
+ msg->type = KSMBD_EVENT_HEARTBEAT_REQUEST;
+ ret = ipc_msg_send(msg);
+ ipc_msg_free(msg);
+ return ret;
+}
+
+struct ksmbd_login_response *ksmbd_ipc_login_request(const char *account)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_login_request *req;
+ struct ksmbd_login_response *resp;
+
+ if (strlen(account) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ)
+ return NULL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_login_request));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_LOGIN_REQUEST;
+ req = (struct ksmbd_login_request *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_spnego_authen_response *
+ksmbd_ipc_spnego_authen_request(const char *spnego_blob, int blob_len)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_spnego_authen_request *req;
+ struct ksmbd_spnego_authen_response *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_spnego_authen_request) +
+ blob_len + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST;
+ req = (struct ksmbd_spnego_authen_request *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ req->spnego_blob_len = blob_len;
+ memcpy(req->spnego_blob, spnego_blob, blob_len);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_tree_connect_response *
+ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess,
+ struct ksmbd_share_config *share,
+ struct ksmbd_tree_connect *tree_conn,
+ struct sockaddr *peer_addr)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_tree_connect_request *req;
+ struct ksmbd_tree_connect_response *resp;
+
+ if (strlen(user_name(sess->user)) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ)
+ return NULL;
+
+ if (strlen(share->name) >= KSMBD_REQ_MAX_SHARE_NAME)
+ return NULL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_tree_connect_request));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_TREE_CONNECT_REQUEST;
+ req = (struct ksmbd_tree_connect_request *)msg->payload;
+
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ req->account_flags = sess->user->flags;
+ req->session_id = sess->id;
+ req->connect_id = tree_conn->id;
+ strscpy(req->account, user_name(sess->user), KSMBD_REQ_MAX_ACCOUNT_NAME_SZ);
+ strscpy(req->share, share->name, KSMBD_REQ_MAX_SHARE_NAME);
+ snprintf(req->peer_addr, sizeof(req->peer_addr), "%pIS", peer_addr);
+
+ if (peer_addr->sa_family == AF_INET6)
+ req->flags |= KSMBD_TREE_CONN_FLAG_REQUEST_IPV6;
+ if (test_session_flag(sess, CIFDS_SESSION_FLAG_SMB2))
+ req->flags |= KSMBD_TREE_CONN_FLAG_REQUEST_SMB2;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id,
+ unsigned long long connect_id)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_tree_disconnect_request *req;
+ int ret;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_tree_disconnect_request));
+ if (!msg)
+ return -ENOMEM;
+
+ msg->type = KSMBD_EVENT_TREE_DISCONNECT_REQUEST;
+ req = (struct ksmbd_tree_disconnect_request *)msg->payload;
+ req->session_id = session_id;
+ req->connect_id = connect_id;
+
+ ret = ipc_msg_send(msg);
+ ipc_msg_free(msg);
+ return ret;
+}
+
+int ksmbd_ipc_logout_request(const char *account)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_logout_request *req;
+ int ret;
+
+ if (strlen(account) >= KSMBD_REQ_MAX_ACCOUNT_NAME_SZ)
+ return -EINVAL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_logout_request));
+ if (!msg)
+ return -ENOMEM;
+
+ msg->type = KSMBD_EVENT_LOGOUT_REQUEST;
+ req = (struct ksmbd_logout_request *)msg->payload;
+ strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ);
+
+ ret = ipc_msg_send(msg);
+ ipc_msg_free(msg);
+ return ret;
+}
+
+struct ksmbd_share_config_response *
+ksmbd_ipc_share_config_request(const char *name)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_share_config_request *req;
+ struct ksmbd_share_config_response *resp;
+
+ if (strlen(name) >= KSMBD_REQ_MAX_SHARE_NAME)
+ return NULL;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_share_config_request));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_SHARE_CONFIG_REQUEST;
+ req = (struct ksmbd_share_config_request *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ strscpy(req->share_name, name, KSMBD_REQ_MAX_SHARE_NAME);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_open(struct ksmbd_session *sess, int handle)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= KSMBD_RPC_OPEN_METHOD;
+ req->payload_sz = 0;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_close(struct ksmbd_session *sess, int handle)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= KSMBD_RPC_CLOSE_METHOD;
+ req->payload_sz = 0;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_WRITE_METHOD;
+ req->payload_sz = payload_sz;
+ memcpy(req->payload, payload, payload_sz);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command));
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_READ_METHOD;
+ req->payload_sz = 0;
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = handle;
+ req->flags = ksmbd_session_rpc_method(sess, handle);
+ req->flags |= rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_IOCTL_METHOD;
+ req->payload_sz = payload_sz;
+ memcpy(req->payload, payload, payload_sz);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+struct ksmbd_rpc_command *ksmbd_rpc_rap(struct ksmbd_session *sess, void *payload,
+ size_t payload_sz)
+{
+ struct ksmbd_ipc_msg *msg;
+ struct ksmbd_rpc_command *req;
+ struct ksmbd_rpc_command *resp;
+
+ msg = ipc_msg_alloc(sizeof(struct ksmbd_rpc_command) + payload_sz + 1);
+ if (!msg)
+ return NULL;
+
+ msg->type = KSMBD_EVENT_RPC_REQUEST;
+ req = (struct ksmbd_rpc_command *)msg->payload;
+ req->handle = ksmbd_acquire_id(&ipc_ida);
+ req->flags = rpc_context_flags(sess);
+ req->flags |= KSMBD_RPC_RAP_METHOD;
+ req->payload_sz = payload_sz;
+ memcpy(req->payload, payload, payload_sz);
+
+ resp = ipc_msg_send_request(msg, req->handle);
+ ipc_msg_handle_free(req->handle);
+ ipc_msg_free(msg);
+ return resp;
+}
+
+static int __ipc_heartbeat(void)
+{
+ unsigned long delta;
+
+ if (!ksmbd_server_running())
+ return 0;
+
+ if (time_after(jiffies, server_conf.ipc_last_active)) {
+ delta = (jiffies - server_conf.ipc_last_active);
+ } else {
+ ipc_update_last_active();
+ schedule_delayed_work(&ipc_timer_work,
+ server_conf.ipc_timeout);
+ return 0;
+ }
+
+ if (delta < server_conf.ipc_timeout) {
+ schedule_delayed_work(&ipc_timer_work,
+ server_conf.ipc_timeout - delta);
+ return 0;
+ }
+
+ if (ksmbd_ipc_heartbeat_request() == 0) {
+ schedule_delayed_work(&ipc_timer_work,
+ server_conf.ipc_timeout);
+ return 0;
+ }
+
+ mutex_lock(&startup_lock);
+ WRITE_ONCE(server_conf.state, SERVER_STATE_RESETTING);
+ server_conf.ipc_last_active = 0;
+ ksmbd_tools_pid = 0;
+ pr_err("No IPC daemon response for %lus\n", delta / HZ);
+ mutex_unlock(&startup_lock);
+ return -EINVAL;
+}
+
+static void ipc_timer_heartbeat(struct work_struct *w)
+{
+ if (__ipc_heartbeat())
+ server_queue_ctrl_reset_work();
+}
+
+int ksmbd_ipc_id_alloc(void)
+{
+ return ksmbd_acquire_id(&ipc_ida);
+}
+
+void ksmbd_rpc_id_free(int handle)
+{
+ ksmbd_release_id(&ipc_ida, handle);
+}
+
+void ksmbd_ipc_release(void)
+{
+ cancel_delayed_work_sync(&ipc_timer_work);
+ genl_unregister_family(&ksmbd_genl_family);
+}
+
+void ksmbd_ipc_soft_reset(void)
+{
+ mutex_lock(&startup_lock);
+ ksmbd_tools_pid = 0;
+ cancel_delayed_work_sync(&ipc_timer_work);
+ mutex_unlock(&startup_lock);
+}
+
+int ksmbd_ipc_init(void)
+{
+ int ret = 0;
+
+ ksmbd_nl_init_fixup();
+ INIT_DELAYED_WORK(&ipc_timer_work, ipc_timer_heartbeat);
+
+ ret = genl_register_family(&ksmbd_genl_family);
+ if (ret) {
+ pr_err("Failed to register KSMBD netlink interface %d\n", ret);
+ cancel_delayed_work_sync(&ipc_timer_work);
+ }
+
+ return ret;
+}
diff --git a/fs/ksmbd/transport_ipc.h b/fs/ksmbd/transport_ipc.h
new file mode 100644
index 000000000000..9eacc895ffdb
--- /dev/null
+++ b/fs/ksmbd/transport_ipc.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_TRANSPORT_IPC_H__
+#define __KSMBD_TRANSPORT_IPC_H__
+
+#include <linux/wait.h>
+
+#define KSMBD_IPC_MAX_PAYLOAD 4096
+
+struct ksmbd_login_response *
+ksmbd_ipc_login_request(const char *account);
+
+struct ksmbd_session;
+struct ksmbd_share_config;
+struct ksmbd_tree_connect;
+struct sockaddr;
+
+struct ksmbd_tree_connect_response *
+ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess,
+ struct ksmbd_share_config *share,
+ struct ksmbd_tree_connect *tree_conn,
+ struct sockaddr *peer_addr);
+int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id,
+ unsigned long long connect_id);
+int ksmbd_ipc_logout_request(const char *account);
+struct ksmbd_share_config_response *
+ksmbd_ipc_share_config_request(const char *name);
+struct ksmbd_spnego_authen_response *
+ksmbd_ipc_spnego_authen_request(const char *spnego_blob, int blob_len);
+int ksmbd_ipc_id_alloc(void);
+void ksmbd_rpc_id_free(int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_open(struct ksmbd_session *sess, int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_close(struct ksmbd_session *sess, int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz);
+struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle);
+struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle,
+ void *payload, size_t payload_sz);
+struct ksmbd_rpc_command *ksmbd_rpc_rap(struct ksmbd_session *sess, void *payload,
+ size_t payload_sz);
+void ksmbd_ipc_release(void);
+void ksmbd_ipc_soft_reset(void);
+int ksmbd_ipc_init(void);
+#endif /* __KSMBD_TRANSPORT_IPC_H__ */
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
new file mode 100644
index 000000000000..58f530056ac0
--- /dev/null
+++ b/fs/ksmbd/transport_rdma.c
@@ -0,0 +1,2058 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2017, Microsoft Corporation.
+ * Copyright (C) 2018, LG Electronics.
+ *
+ * Author(s): Long Li <longli@microsoft.com>,
+ * Hyunchul Lee <hyc.lee@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#define SUBMOD_NAME "smb_direct"
+
+#include <linux/kthread.h>
+#include <linux/rwlock.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
+
+#include "glob.h"
+#include "connection.h"
+#include "smb_common.h"
+#include "smbstatus.h"
+#include "transport_rdma.h"
+
+#define SMB_DIRECT_PORT 5445
+
+#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
+
+/* SMB_DIRECT negotiation timeout in seconds */
+#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
+
+#define SMB_DIRECT_MAX_SEND_SGES 8
+#define SMB_DIRECT_MAX_RECV_SGES 1
+
+/*
+ * Default maximum number of RDMA read/write outstanding on this connection
+ * This value is possibly decreased during QP creation on hardware limit
+ */
+#define SMB_DIRECT_CM_INITIATOR_DEPTH 8
+
+/* Maximum number of retries on data transfer operations */
+#define SMB_DIRECT_CM_RETRY 6
+/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
+#define SMB_DIRECT_CM_RNR_RETRY 0
+
+/*
+ * User configurable initial values per SMB_DIRECT transport connection
+ * as defined in [MS-SMBD] 3.1.1.1
+ * Those may change after a SMB_DIRECT negotiation
+ */
+/* The local peer's maximum number of credits to grant to the peer */
+static int smb_direct_receive_credit_max = 255;
+
+/* The remote peer's credit request of local peer */
+static int smb_direct_send_credit_target = 255;
+
+/* The maximum single message size can be sent to remote peer */
+static int smb_direct_max_send_size = 8192;
+
+/* The maximum fragmented upper-layer payload receive size supported */
+static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
+
+/* The maximum single-message size which can be received */
+static int smb_direct_max_receive_size = 8192;
+
+static int smb_direct_max_read_write_size = 1024 * 1024;
+
+static int smb_direct_max_outstanding_rw_ops = 8;
+
+static struct smb_direct_listener {
+ struct rdma_cm_id *cm_id;
+} smb_direct_listener;
+
+static struct workqueue_struct *smb_direct_wq;
+
+enum smb_direct_status {
+ SMB_DIRECT_CS_NEW = 0,
+ SMB_DIRECT_CS_CONNECTED,
+ SMB_DIRECT_CS_DISCONNECTING,
+ SMB_DIRECT_CS_DISCONNECTED,
+};
+
+struct smb_direct_transport {
+ struct ksmbd_transport transport;
+
+ enum smb_direct_status status;
+ bool full_packet_received;
+ wait_queue_head_t wait_status;
+
+ struct rdma_cm_id *cm_id;
+ struct ib_cq *send_cq;
+ struct ib_cq *recv_cq;
+ struct ib_pd *pd;
+ struct ib_qp *qp;
+
+ int max_send_size;
+ int max_recv_size;
+ int max_fragmented_send_size;
+ int max_fragmented_recv_size;
+ int max_rdma_rw_size;
+
+ spinlock_t reassembly_queue_lock;
+ struct list_head reassembly_queue;
+ int reassembly_data_length;
+ int reassembly_queue_length;
+ int first_entry_offset;
+ wait_queue_head_t wait_reassembly_queue;
+
+ spinlock_t receive_credit_lock;
+ int recv_credits;
+ int count_avail_recvmsg;
+ int recv_credit_max;
+ int recv_credit_target;
+
+ spinlock_t recvmsg_queue_lock;
+ struct list_head recvmsg_queue;
+
+ spinlock_t empty_recvmsg_queue_lock;
+ struct list_head empty_recvmsg_queue;
+
+ int send_credit_target;
+ atomic_t send_credits;
+ spinlock_t lock_new_recv_credits;
+ int new_recv_credits;
+ atomic_t rw_avail_ops;
+
+ wait_queue_head_t wait_send_credits;
+ wait_queue_head_t wait_rw_avail_ops;
+
+ mempool_t *sendmsg_mempool;
+ struct kmem_cache *sendmsg_cache;
+ mempool_t *recvmsg_mempool;
+ struct kmem_cache *recvmsg_cache;
+
+ wait_queue_head_t wait_send_payload_pending;
+ atomic_t send_payload_pending;
+ wait_queue_head_t wait_send_pending;
+ atomic_t send_pending;
+
+ struct delayed_work post_recv_credits_work;
+ struct work_struct send_immediate_work;
+ struct work_struct disconnect_work;
+
+ bool negotiation_requested;
+};
+
+#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
+
+enum {
+ SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
+ SMB_DIRECT_MSG_DATA_TRANSFER
+};
+
+static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
+
+struct smb_direct_send_ctx {
+ struct list_head msg_list;
+ int wr_cnt;
+ bool need_invalidate_rkey;
+ unsigned int remote_key;
+};
+
+struct smb_direct_sendmsg {
+ struct smb_direct_transport *transport;
+ struct ib_send_wr wr;
+ struct list_head list;
+ int num_sge;
+ struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
+ struct ib_cqe cqe;
+ u8 packet[];
+};
+
+struct smb_direct_recvmsg {
+ struct smb_direct_transport *transport;
+ struct list_head list;
+ int type;
+ struct ib_sge sge;
+ struct ib_cqe cqe;
+ bool first_segment;
+ u8 packet[];
+};
+
+struct smb_direct_rdma_rw_msg {
+ struct smb_direct_transport *t;
+ struct ib_cqe cqe;
+ struct completion *completion;
+ struct rdma_rw_ctx rw_ctx;
+ struct sg_table sgt;
+ struct scatterlist sg_list[0];
+};
+
+static inline int get_buf_page_count(void *buf, int size)
+{
+ return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
+ (uintptr_t)buf / PAGE_SIZE;
+}
+
+static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
+static void smb_direct_post_recv_credits(struct work_struct *work);
+static int smb_direct_post_send_data(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ struct kvec *iov, int niov,
+ int remaining_data_length);
+
+static inline struct smb_direct_transport *
+smb_trans_direct_transfort(struct ksmbd_transport *t)
+{
+ return container_of(t, struct smb_direct_transport, transport);
+}
+
+static inline void
+*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
+{
+ return (void *)recvmsg->packet;
+}
+
+static inline bool is_receive_credit_post_required(int receive_credits,
+ int avail_recvmsg_count)
+{
+ return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
+ avail_recvmsg_count >= (receive_credits >> 2);
+}
+
+static struct
+smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg = NULL;
+
+ spin_lock(&t->recvmsg_queue_lock);
+ if (!list_empty(&t->recvmsg_queue)) {
+ recvmsg = list_first_entry(&t->recvmsg_queue,
+ struct smb_direct_recvmsg,
+ list);
+ list_del(&recvmsg->list);
+ }
+ spin_unlock(&t->recvmsg_queue_lock);
+ return recvmsg;
+}
+
+static void put_recvmsg(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg)
+{
+ ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
+ recvmsg->sge.length, DMA_FROM_DEVICE);
+
+ spin_lock(&t->recvmsg_queue_lock);
+ list_add(&recvmsg->list, &t->recvmsg_queue);
+ spin_unlock(&t->recvmsg_queue_lock);
+}
+
+static struct
+smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg = NULL;
+
+ spin_lock(&t->empty_recvmsg_queue_lock);
+ if (!list_empty(&t->empty_recvmsg_queue)) {
+ recvmsg = list_first_entry(&t->empty_recvmsg_queue,
+ struct smb_direct_recvmsg, list);
+ list_del(&recvmsg->list);
+ }
+ spin_unlock(&t->empty_recvmsg_queue_lock);
+ return recvmsg;
+}
+
+static void put_empty_recvmsg(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg)
+{
+ ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
+ recvmsg->sge.length, DMA_FROM_DEVICE);
+
+ spin_lock(&t->empty_recvmsg_queue_lock);
+ list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
+ spin_unlock(&t->empty_recvmsg_queue_lock);
+}
+
+static void enqueue_reassembly(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg,
+ int data_length)
+{
+ spin_lock(&t->reassembly_queue_lock);
+ list_add_tail(&recvmsg->list, &t->reassembly_queue);
+ t->reassembly_queue_length++;
+ /*
+ * Make sure reassembly_data_length is updated after list and
+ * reassembly_queue_length are updated. On the dequeue side
+ * reassembly_data_length is checked without a lock to determine
+ * if reassembly_queue_length and list is up to date
+ */
+ virt_wmb();
+ t->reassembly_data_length += data_length;
+ spin_unlock(&t->reassembly_queue_lock);
+}
+
+static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
+{
+ if (!list_empty(&t->reassembly_queue))
+ return list_first_entry(&t->reassembly_queue,
+ struct smb_direct_recvmsg, list);
+ else
+ return NULL;
+}
+
+static void smb_direct_disconnect_rdma_work(struct work_struct *work)
+{
+ struct smb_direct_transport *t =
+ container_of(work, struct smb_direct_transport,
+ disconnect_work);
+
+ if (t->status == SMB_DIRECT_CS_CONNECTED) {
+ t->status = SMB_DIRECT_CS_DISCONNECTING;
+ rdma_disconnect(t->cm_id);
+ }
+}
+
+static void
+smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
+{
+ if (t->status == SMB_DIRECT_CS_CONNECTED)
+ queue_work(smb_direct_wq, &t->disconnect_work);
+}
+
+static void smb_direct_send_immediate_work(struct work_struct *work)
+{
+ struct smb_direct_transport *t = container_of(work,
+ struct smb_direct_transport, send_immediate_work);
+
+ if (t->status != SMB_DIRECT_CS_CONNECTED)
+ return;
+
+ smb_direct_post_send_data(t, NULL, NULL, 0, 0);
+}
+
+static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
+{
+ struct smb_direct_transport *t;
+ struct ksmbd_conn *conn;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return NULL;
+
+ t->cm_id = cm_id;
+ cm_id->context = t;
+
+ t->status = SMB_DIRECT_CS_NEW;
+ init_waitqueue_head(&t->wait_status);
+
+ spin_lock_init(&t->reassembly_queue_lock);
+ INIT_LIST_HEAD(&t->reassembly_queue);
+ t->reassembly_data_length = 0;
+ t->reassembly_queue_length = 0;
+ init_waitqueue_head(&t->wait_reassembly_queue);
+ init_waitqueue_head(&t->wait_send_credits);
+ init_waitqueue_head(&t->wait_rw_avail_ops);
+
+ spin_lock_init(&t->receive_credit_lock);
+ spin_lock_init(&t->recvmsg_queue_lock);
+ INIT_LIST_HEAD(&t->recvmsg_queue);
+
+ spin_lock_init(&t->empty_recvmsg_queue_lock);
+ INIT_LIST_HEAD(&t->empty_recvmsg_queue);
+
+ init_waitqueue_head(&t->wait_send_payload_pending);
+ atomic_set(&t->send_payload_pending, 0);
+ init_waitqueue_head(&t->wait_send_pending);
+ atomic_set(&t->send_pending, 0);
+
+ spin_lock_init(&t->lock_new_recv_credits);
+
+ INIT_DELAYED_WORK(&t->post_recv_credits_work,
+ smb_direct_post_recv_credits);
+ INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
+ INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
+
+ conn = ksmbd_conn_alloc();
+ if (!conn)
+ goto err;
+ conn->transport = KSMBD_TRANS(t);
+ KSMBD_TRANS(t)->conn = conn;
+ KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
+ return t;
+err:
+ kfree(t);
+ return NULL;
+}
+
+static void free_transport(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg;
+
+ wake_up_interruptible(&t->wait_send_credits);
+
+ ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
+ wait_event(t->wait_send_payload_pending,
+ atomic_read(&t->send_payload_pending) == 0);
+ wait_event(t->wait_send_pending,
+ atomic_read(&t->send_pending) == 0);
+
+ cancel_work_sync(&t->disconnect_work);
+ cancel_delayed_work_sync(&t->post_recv_credits_work);
+ cancel_work_sync(&t->send_immediate_work);
+
+ if (t->qp) {
+ ib_drain_qp(t->qp);
+ ib_destroy_qp(t->qp);
+ }
+
+ ksmbd_debug(RDMA, "drain the reassembly queue\n");
+ do {
+ spin_lock(&t->reassembly_queue_lock);
+ recvmsg = get_first_reassembly(t);
+ if (recvmsg) {
+ list_del(&recvmsg->list);
+ spin_unlock(&t->reassembly_queue_lock);
+ put_recvmsg(t, recvmsg);
+ } else {
+ spin_unlock(&t->reassembly_queue_lock);
+ }
+ } while (recvmsg);
+ t->reassembly_data_length = 0;
+
+ if (t->send_cq)
+ ib_free_cq(t->send_cq);
+ if (t->recv_cq)
+ ib_free_cq(t->recv_cq);
+ if (t->pd)
+ ib_dealloc_pd(t->pd);
+ if (t->cm_id)
+ rdma_destroy_id(t->cm_id);
+
+ smb_direct_destroy_pools(t);
+ ksmbd_conn_free(KSMBD_TRANS(t)->conn);
+ kfree(t);
+}
+
+static struct smb_direct_sendmsg
+*smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
+{
+ struct smb_direct_sendmsg *msg;
+
+ msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+ msg->transport = t;
+ INIT_LIST_HEAD(&msg->list);
+ msg->num_sge = 0;
+ return msg;
+}
+
+static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
+ struct smb_direct_sendmsg *msg)
+{
+ int i;
+
+ if (msg->num_sge > 0) {
+ ib_dma_unmap_single(t->cm_id->device,
+ msg->sge[0].addr, msg->sge[0].length,
+ DMA_TO_DEVICE);
+ for (i = 1; i < msg->num_sge; i++)
+ ib_dma_unmap_page(t->cm_id->device,
+ msg->sge[i].addr, msg->sge[i].length,
+ DMA_TO_DEVICE);
+ }
+ mempool_free(msg, t->sendmsg_mempool);
+}
+
+static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
+{
+ switch (recvmsg->type) {
+ case SMB_DIRECT_MSG_DATA_TRANSFER: {
+ struct smb_direct_data_transfer *req =
+ (struct smb_direct_data_transfer *)recvmsg->packet;
+ struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
+ + le32_to_cpu(req->data_offset) - 4);
+ ksmbd_debug(RDMA,
+ "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
+ le16_to_cpu(req->credits_granted),
+ le16_to_cpu(req->credits_requested),
+ req->data_length, req->remaining_data_length,
+ hdr->ProtocolId, hdr->Command);
+ break;
+ }
+ case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
+ struct smb_direct_negotiate_req *req =
+ (struct smb_direct_negotiate_req *)recvmsg->packet;
+ ksmbd_debug(RDMA,
+ "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
+ le16_to_cpu(req->min_version),
+ le16_to_cpu(req->max_version),
+ le16_to_cpu(req->credits_requested),
+ le32_to_cpu(req->preferred_send_size),
+ le32_to_cpu(req->max_receive_size),
+ le32_to_cpu(req->max_fragmented_size));
+ if (le16_to_cpu(req->min_version) > 0x0100 ||
+ le16_to_cpu(req->max_version) < 0x0100)
+ return -EOPNOTSUPP;
+ if (le16_to_cpu(req->credits_requested) <= 0 ||
+ le32_to_cpu(req->max_receive_size) <= 128 ||
+ le32_to_cpu(req->max_fragmented_size) <=
+ 128 * 1024)
+ return -ECONNABORTED;
+
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_transport *t;
+
+ recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
+ t = recvmsg->transport;
+
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ pr_err("Recv error. status='%s (%d)' opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+ smb_direct_disconnect_rdma_connection(t);
+ }
+ put_empty_recvmsg(t, recvmsg);
+ return;
+ }
+
+ ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+
+ ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
+ recvmsg->sge.length, DMA_FROM_DEVICE);
+
+ switch (recvmsg->type) {
+ case SMB_DIRECT_MSG_NEGOTIATE_REQ:
+ t->negotiation_requested = true;
+ t->full_packet_received = true;
+ wake_up_interruptible(&t->wait_status);
+ break;
+ case SMB_DIRECT_MSG_DATA_TRANSFER: {
+ struct smb_direct_data_transfer *data_transfer =
+ (struct smb_direct_data_transfer *)recvmsg->packet;
+ int data_length = le32_to_cpu(data_transfer->data_length);
+ int avail_recvmsg_count, receive_credits;
+
+ if (data_length) {
+ if (t->full_packet_received)
+ recvmsg->first_segment = true;
+
+ if (le32_to_cpu(data_transfer->remaining_data_length))
+ t->full_packet_received = false;
+ else
+ t->full_packet_received = true;
+
+ enqueue_reassembly(t, recvmsg, data_length);
+ wake_up_interruptible(&t->wait_reassembly_queue);
+
+ spin_lock(&t->receive_credit_lock);
+ receive_credits = --(t->recv_credits);
+ avail_recvmsg_count = t->count_avail_recvmsg;
+ spin_unlock(&t->receive_credit_lock);
+ } else {
+ put_empty_recvmsg(t, recvmsg);
+
+ spin_lock(&t->receive_credit_lock);
+ receive_credits = --(t->recv_credits);
+ avail_recvmsg_count = ++(t->count_avail_recvmsg);
+ spin_unlock(&t->receive_credit_lock);
+ }
+
+ t->recv_credit_target =
+ le16_to_cpu(data_transfer->credits_requested);
+ atomic_add(le16_to_cpu(data_transfer->credits_granted),
+ &t->send_credits);
+
+ if (le16_to_cpu(data_transfer->flags) &
+ SMB_DIRECT_RESPONSE_REQUESTED)
+ queue_work(smb_direct_wq, &t->send_immediate_work);
+
+ if (atomic_read(&t->send_credits) > 0)
+ wake_up_interruptible(&t->wait_send_credits);
+
+ if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
+ mod_delayed_work(smb_direct_wq,
+ &t->post_recv_credits_work, 0);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int smb_direct_post_recv(struct smb_direct_transport *t,
+ struct smb_direct_recvmsg *recvmsg)
+{
+ struct ib_recv_wr wr;
+ int ret;
+
+ recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
+ recvmsg->packet, t->max_recv_size,
+ DMA_FROM_DEVICE);
+ ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
+ if (ret)
+ return ret;
+ recvmsg->sge.length = t->max_recv_size;
+ recvmsg->sge.lkey = t->pd->local_dma_lkey;
+ recvmsg->cqe.done = recv_done;
+
+ wr.wr_cqe = &recvmsg->cqe;
+ wr.next = NULL;
+ wr.sg_list = &recvmsg->sge;
+ wr.num_sge = 1;
+
+ ret = ib_post_recv(t->qp, &wr, NULL);
+ if (ret) {
+ pr_err("Can't post recv: %d\n", ret);
+ ib_dma_unmap_single(t->cm_id->device,
+ recvmsg->sge.addr, recvmsg->sge.length,
+ DMA_FROM_DEVICE);
+ smb_direct_disconnect_rdma_connection(t);
+ return ret;
+ }
+ return ret;
+}
+
+static int smb_direct_read(struct ksmbd_transport *t, char *buf,
+ unsigned int size)
+{
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_data_transfer *data_transfer;
+ int to_copy, to_read, data_read, offset;
+ u32 data_length, remaining_data_length, data_offset;
+ int rc;
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+again:
+ if (st->status != SMB_DIRECT_CS_CONNECTED) {
+ pr_err("disconnected\n");
+ return -ENOTCONN;
+ }
+
+ /*
+ * No need to hold the reassembly queue lock all the time as we are
+ * the only one reading from the front of the queue. The transport
+ * may add more entries to the back of the queue at the same time
+ */
+ if (st->reassembly_data_length >= size) {
+ int queue_length;
+ int queue_removed = 0;
+
+ /*
+ * Need to make sure reassembly_data_length is read before
+ * reading reassembly_queue_length and calling
+ * get_first_reassembly. This call is lock free
+ * as we never read at the end of the queue which are being
+ * updated in SOFTIRQ as more data is received
+ */
+ virt_rmb();
+ queue_length = st->reassembly_queue_length;
+ data_read = 0;
+ to_read = size;
+ offset = st->first_entry_offset;
+ while (data_read < size) {
+ recvmsg = get_first_reassembly(st);
+ data_transfer = smb_direct_recvmsg_payload(recvmsg);
+ data_length = le32_to_cpu(data_transfer->data_length);
+ remaining_data_length =
+ le32_to_cpu(data_transfer->remaining_data_length);
+ data_offset = le32_to_cpu(data_transfer->data_offset);
+
+ /*
+ * The upper layer expects RFC1002 length at the
+ * beginning of the payload. Return it to indicate
+ * the total length of the packet. This minimize the
+ * change to upper layer packet processing logic. This
+ * will be eventually remove when an intermediate
+ * transport layer is added
+ */
+ if (recvmsg->first_segment && size == 4) {
+ unsigned int rfc1002_len =
+ data_length + remaining_data_length;
+ *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
+ data_read = 4;
+ recvmsg->first_segment = false;
+ ksmbd_debug(RDMA,
+ "returning rfc1002 length %d\n",
+ rfc1002_len);
+ goto read_rfc1002_done;
+ }
+
+ to_copy = min_t(int, data_length - offset, to_read);
+ memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
+ to_copy);
+
+ /* move on to the next buffer? */
+ if (to_copy == data_length - offset) {
+ queue_length--;
+ /*
+ * No need to lock if we are not at the
+ * end of the queue
+ */
+ if (queue_length) {
+ list_del(&recvmsg->list);
+ } else {
+ spin_lock_irq(&st->reassembly_queue_lock);
+ list_del(&recvmsg->list);
+ spin_unlock_irq(&st->reassembly_queue_lock);
+ }
+ queue_removed++;
+ put_recvmsg(st, recvmsg);
+ offset = 0;
+ } else {
+ offset += to_copy;
+ }
+
+ to_read -= to_copy;
+ data_read += to_copy;
+ }
+
+ spin_lock_irq(&st->reassembly_queue_lock);
+ st->reassembly_data_length -= data_read;
+ st->reassembly_queue_length -= queue_removed;
+ spin_unlock_irq(&st->reassembly_queue_lock);
+
+ spin_lock(&st->receive_credit_lock);
+ st->count_avail_recvmsg += queue_removed;
+ if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
+ spin_unlock(&st->receive_credit_lock);
+ mod_delayed_work(smb_direct_wq,
+ &st->post_recv_credits_work, 0);
+ } else {
+ spin_unlock(&st->receive_credit_lock);
+ }
+
+ st->first_entry_offset = offset;
+ ksmbd_debug(RDMA,
+ "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
+ data_read, st->reassembly_data_length,
+ st->first_entry_offset);
+read_rfc1002_done:
+ return data_read;
+ }
+
+ ksmbd_debug(RDMA, "wait_event on more data\n");
+ rc = wait_event_interruptible(st->wait_reassembly_queue,
+ st->reassembly_data_length >= size ||
+ st->status != SMB_DIRECT_CS_CONNECTED);
+ if (rc)
+ return -EINTR;
+
+ goto again;
+}
+
+static void smb_direct_post_recv_credits(struct work_struct *work)
+{
+ struct smb_direct_transport *t = container_of(work,
+ struct smb_direct_transport, post_recv_credits_work.work);
+ struct smb_direct_recvmsg *recvmsg;
+ int receive_credits, credits = 0;
+ int ret;
+ int use_free = 1;
+
+ spin_lock(&t->receive_credit_lock);
+ receive_credits = t->recv_credits;
+ spin_unlock(&t->receive_credit_lock);
+
+ if (receive_credits < t->recv_credit_target) {
+ while (true) {
+ if (use_free)
+ recvmsg = get_free_recvmsg(t);
+ else
+ recvmsg = get_empty_recvmsg(t);
+ if (!recvmsg) {
+ if (use_free) {
+ use_free = 0;
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
+ recvmsg->first_segment = false;
+
+ ret = smb_direct_post_recv(t, recvmsg);
+ if (ret) {
+ pr_err("Can't post recv: %d\n", ret);
+ put_recvmsg(t, recvmsg);
+ break;
+ }
+ credits++;
+ }
+ }
+
+ spin_lock(&t->receive_credit_lock);
+ t->recv_credits += credits;
+ t->count_avail_recvmsg -= credits;
+ spin_unlock(&t->receive_credit_lock);
+
+ spin_lock(&t->lock_new_recv_credits);
+ t->new_recv_credits += credits;
+ spin_unlock(&t->lock_new_recv_credits);
+
+ if (credits)
+ queue_work(smb_direct_wq, &t->send_immediate_work);
+}
+
+static void send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct smb_direct_sendmsg *sendmsg, *sibling;
+ struct smb_direct_transport *t;
+ struct list_head *pos, *prev, *end;
+
+ sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
+ t = sendmsg->transport;
+
+ ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+
+ if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
+ pr_err("Send error. status='%s (%d)', opcode=%d\n",
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->opcode);
+ smb_direct_disconnect_rdma_connection(t);
+ }
+
+ if (sendmsg->num_sge > 1) {
+ if (atomic_dec_and_test(&t->send_payload_pending))
+ wake_up(&t->wait_send_payload_pending);
+ } else {
+ if (atomic_dec_and_test(&t->send_pending))
+ wake_up(&t->wait_send_pending);
+ }
+
+ /* iterate and free the list of messages in reverse. the list's head
+ * is invalid.
+ */
+ for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
+ prev != end; pos = prev, prev = prev->prev) {
+ sibling = container_of(pos, struct smb_direct_sendmsg, list);
+ smb_direct_free_sendmsg(t, sibling);
+ }
+
+ sibling = container_of(pos, struct smb_direct_sendmsg, list);
+ smb_direct_free_sendmsg(t, sibling);
+}
+
+static int manage_credits_prior_sending(struct smb_direct_transport *t)
+{
+ int new_credits;
+
+ spin_lock(&t->lock_new_recv_credits);
+ new_credits = t->new_recv_credits;
+ t->new_recv_credits = 0;
+ spin_unlock(&t->lock_new_recv_credits);
+
+ return new_credits;
+}
+
+static int smb_direct_post_send(struct smb_direct_transport *t,
+ struct ib_send_wr *wr)
+{
+ int ret;
+
+ if (wr->num_sge > 1)
+ atomic_inc(&t->send_payload_pending);
+ else
+ atomic_inc(&t->send_pending);
+
+ ret = ib_post_send(t->qp, wr, NULL);
+ if (ret) {
+ pr_err("failed to post send: %d\n", ret);
+ if (wr->num_sge > 1) {
+ if (atomic_dec_and_test(&t->send_payload_pending))
+ wake_up(&t->wait_send_payload_pending);
+ } else {
+ if (atomic_dec_and_test(&t->send_pending))
+ wake_up(&t->wait_send_pending);
+ }
+ smb_direct_disconnect_rdma_connection(t);
+ }
+ return ret;
+}
+
+static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ bool need_invalidate_rkey,
+ unsigned int remote_key)
+{
+ INIT_LIST_HEAD(&send_ctx->msg_list);
+ send_ctx->wr_cnt = 0;
+ send_ctx->need_invalidate_rkey = need_invalidate_rkey;
+ send_ctx->remote_key = remote_key;
+}
+
+static int smb_direct_flush_send_list(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ bool is_last)
+{
+ struct smb_direct_sendmsg *first, *last;
+ int ret;
+
+ if (list_empty(&send_ctx->msg_list))
+ return 0;
+
+ first = list_first_entry(&send_ctx->msg_list,
+ struct smb_direct_sendmsg,
+ list);
+ last = list_last_entry(&send_ctx->msg_list,
+ struct smb_direct_sendmsg,
+ list);
+
+ last->wr.send_flags = IB_SEND_SIGNALED;
+ last->wr.wr_cqe = &last->cqe;
+ if (is_last && send_ctx->need_invalidate_rkey) {
+ last->wr.opcode = IB_WR_SEND_WITH_INV;
+ last->wr.ex.invalidate_rkey = send_ctx->remote_key;
+ }
+
+ ret = smb_direct_post_send(t, &first->wr);
+ if (!ret) {
+ smb_direct_send_ctx_init(t, send_ctx,
+ send_ctx->need_invalidate_rkey,
+ send_ctx->remote_key);
+ } else {
+ atomic_add(send_ctx->wr_cnt, &t->send_credits);
+ wake_up(&t->wait_send_credits);
+ list_for_each_entry_safe(first, last, &send_ctx->msg_list,
+ list) {
+ smb_direct_free_sendmsg(t, first);
+ }
+ }
+ return ret;
+}
+
+static int wait_for_credits(struct smb_direct_transport *t,
+ wait_queue_head_t *waitq, atomic_t *credits)
+{
+ int ret;
+
+ do {
+ if (atomic_dec_return(credits) >= 0)
+ return 0;
+
+ atomic_inc(credits);
+ ret = wait_event_interruptible(*waitq,
+ atomic_read(credits) > 0 ||
+ t->status != SMB_DIRECT_CS_CONNECTED);
+
+ if (t->status != SMB_DIRECT_CS_CONNECTED)
+ return -ENOTCONN;
+ else if (ret < 0)
+ return ret;
+ } while (true);
+}
+
+static int wait_for_send_credits(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx)
+{
+ int ret;
+
+ if (send_ctx &&
+ (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
+ ret = smb_direct_flush_send_list(t, send_ctx, false);
+ if (ret)
+ return ret;
+ }
+
+ return wait_for_credits(t, &t->wait_send_credits, &t->send_credits);
+}
+
+static int smb_direct_create_header(struct smb_direct_transport *t,
+ int size, int remaining_data_length,
+ struct smb_direct_sendmsg **sendmsg_out)
+{
+ struct smb_direct_sendmsg *sendmsg;
+ struct smb_direct_data_transfer *packet;
+ int header_length;
+ int ret;
+
+ sendmsg = smb_direct_alloc_sendmsg(t);
+ if (IS_ERR(sendmsg))
+ return PTR_ERR(sendmsg);
+
+ /* Fill in the packet header */
+ packet = (struct smb_direct_data_transfer *)sendmsg->packet;
+ packet->credits_requested = cpu_to_le16(t->send_credit_target);
+ packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
+
+ packet->flags = 0;
+ packet->reserved = 0;
+ if (!size)
+ packet->data_offset = 0;
+ else
+ packet->data_offset = cpu_to_le32(24);
+ packet->data_length = cpu_to_le32(size);
+ packet->remaining_data_length = cpu_to_le32(remaining_data_length);
+ packet->padding = 0;
+
+ ksmbd_debug(RDMA,
+ "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
+ le16_to_cpu(packet->credits_requested),
+ le16_to_cpu(packet->credits_granted),
+ le32_to_cpu(packet->data_offset),
+ le32_to_cpu(packet->data_length),
+ le32_to_cpu(packet->remaining_data_length));
+
+ /* Map the packet to DMA */
+ header_length = sizeof(struct smb_direct_data_transfer);
+ /* If this is a packet without payload, don't send padding */
+ if (!size)
+ header_length =
+ offsetof(struct smb_direct_data_transfer, padding);
+
+ sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
+ (void *)packet,
+ header_length,
+ DMA_TO_DEVICE);
+ ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
+ if (ret) {
+ smb_direct_free_sendmsg(t, sendmsg);
+ return ret;
+ }
+
+ sendmsg->num_sge = 1;
+ sendmsg->sge[0].length = header_length;
+ sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
+
+ *sendmsg_out = sendmsg;
+ return 0;
+}
+
+static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
+{
+ bool high = is_vmalloc_addr(buf);
+ struct page *page;
+ int offset, len;
+ int i = 0;
+
+ if (nentries < get_buf_page_count(buf, size))
+ return -EINVAL;
+
+ offset = offset_in_page(buf);
+ buf -= offset;
+ while (size > 0) {
+ len = min_t(int, PAGE_SIZE - offset, size);
+ if (high)
+ page = vmalloc_to_page(buf);
+ else
+ page = kmap_to_page(buf);
+
+ if (!sg_list)
+ return -EINVAL;
+ sg_set_page(sg_list, page, len, offset);
+ sg_list = sg_next(sg_list);
+
+ buf += PAGE_SIZE;
+ size -= len;
+ offset = 0;
+ i++;
+ }
+ return i;
+}
+
+static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
+ struct scatterlist *sg_list, int nentries,
+ enum dma_data_direction dir)
+{
+ int npages;
+
+ npages = get_sg_list(buf, size, sg_list, nentries);
+ if (npages <= 0)
+ return -EINVAL;
+ return ib_dma_map_sg(device, sg_list, npages, dir);
+}
+
+static int post_sendmsg(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ struct smb_direct_sendmsg *msg)
+{
+ int i;
+
+ for (i = 0; i < msg->num_sge; i++)
+ ib_dma_sync_single_for_device(t->cm_id->device,
+ msg->sge[i].addr, msg->sge[i].length,
+ DMA_TO_DEVICE);
+
+ msg->cqe.done = send_done;
+ msg->wr.opcode = IB_WR_SEND;
+ msg->wr.sg_list = &msg->sge[0];
+ msg->wr.num_sge = msg->num_sge;
+ msg->wr.next = NULL;
+
+ if (send_ctx) {
+ msg->wr.wr_cqe = NULL;
+ msg->wr.send_flags = 0;
+ if (!list_empty(&send_ctx->msg_list)) {
+ struct smb_direct_sendmsg *last;
+
+ last = list_last_entry(&send_ctx->msg_list,
+ struct smb_direct_sendmsg,
+ list);
+ last->wr.next = &msg->wr;
+ }
+ list_add_tail(&msg->list, &send_ctx->msg_list);
+ send_ctx->wr_cnt++;
+ return 0;
+ }
+
+ msg->wr.wr_cqe = &msg->cqe;
+ msg->wr.send_flags = IB_SEND_SIGNALED;
+ return smb_direct_post_send(t, &msg->wr);
+}
+
+static int smb_direct_post_send_data(struct smb_direct_transport *t,
+ struct smb_direct_send_ctx *send_ctx,
+ struct kvec *iov, int niov,
+ int remaining_data_length)
+{
+ int i, j, ret;
+ struct smb_direct_sendmsg *msg;
+ int data_length;
+ struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
+
+ ret = wait_for_send_credits(t, send_ctx);
+ if (ret)
+ return ret;
+
+ data_length = 0;
+ for (i = 0; i < niov; i++)
+ data_length += iov[i].iov_len;
+
+ ret = smb_direct_create_header(t, data_length, remaining_data_length,
+ &msg);
+ if (ret) {
+ atomic_inc(&t->send_credits);
+ return ret;
+ }
+
+ for (i = 0; i < niov; i++) {
+ struct ib_sge *sge;
+ int sg_cnt;
+
+ sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
+ sg_cnt = get_mapped_sg_list(t->cm_id->device,
+ iov[i].iov_base, iov[i].iov_len,
+ sg, SMB_DIRECT_MAX_SEND_SGES - 1,
+ DMA_TO_DEVICE);
+ if (sg_cnt <= 0) {
+ pr_err("failed to map buffer\n");
+ ret = -ENOMEM;
+ goto err;
+ } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES - 1) {
+ pr_err("buffer not fitted into sges\n");
+ ret = -E2BIG;
+ ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
+ DMA_TO_DEVICE);
+ goto err;
+ }
+
+ for (j = 0; j < sg_cnt; j++) {
+ sge = &msg->sge[msg->num_sge];
+ sge->addr = sg_dma_address(&sg[j]);
+ sge->length = sg_dma_len(&sg[j]);
+ sge->lkey = t->pd->local_dma_lkey;
+ msg->num_sge++;
+ }
+ }
+
+ ret = post_sendmsg(t, send_ctx, msg);
+ if (ret)
+ goto err;
+ return 0;
+err:
+ smb_direct_free_sendmsg(t, msg);
+ atomic_inc(&t->send_credits);
+ return ret;
+}
+
+static int smb_direct_writev(struct ksmbd_transport *t,
+ struct kvec *iov, int niovs, int buflen,
+ bool need_invalidate, unsigned int remote_key)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ int remaining_data_length;
+ int start, i, j;
+ int max_iov_size = st->max_send_size -
+ sizeof(struct smb_direct_data_transfer);
+ int ret;
+ struct kvec vec;
+ struct smb_direct_send_ctx send_ctx;
+
+ if (st->status != SMB_DIRECT_CS_CONNECTED)
+ return -ENOTCONN;
+
+ //FIXME: skip RFC1002 header..
+ buflen -= 4;
+ iov[0].iov_base += 4;
+ iov[0].iov_len -= 4;
+
+ remaining_data_length = buflen;
+ ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
+
+ smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
+ start = i = 0;
+ buflen = 0;
+ while (true) {
+ buflen += iov[i].iov_len;
+ if (buflen > max_iov_size) {
+ if (i > start) {
+ remaining_data_length -=
+ (buflen - iov[i].iov_len);
+ ret = smb_direct_post_send_data(st, &send_ctx,
+ &iov[start], i - start,
+ remaining_data_length);
+ if (ret)
+ goto done;
+ } else {
+ /* iov[start] is too big, break it */
+ int nvec = (buflen + max_iov_size - 1) /
+ max_iov_size;
+
+ for (j = 0; j < nvec; j++) {
+ vec.iov_base =
+ (char *)iov[start].iov_base +
+ j * max_iov_size;
+ vec.iov_len =
+ min_t(int, max_iov_size,
+ buflen - max_iov_size * j);
+ remaining_data_length -= vec.iov_len;
+ ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
+ remaining_data_length);
+ if (ret)
+ goto done;
+ }
+ i++;
+ if (i == niovs)
+ break;
+ }
+ start = i;
+ buflen = 0;
+ } else {
+ i++;
+ if (i == niovs) {
+ /* send out all remaining vecs */
+ remaining_data_length -= buflen;
+ ret = smb_direct_post_send_data(st, &send_ctx,
+ &iov[start], i - start,
+ remaining_data_length);
+ if (ret)
+ goto done;
+ break;
+ }
+ }
+ }
+
+done:
+ ret = smb_direct_flush_send_list(st, &send_ctx, true);
+
+ /*
+ * As an optimization, we don't wait for individual I/O to finish
+ * before sending the next one.
+ * Send them all and wait for pending send count to get to 0
+ * that means all the I/Os have been out and we are good to return
+ */
+
+ wait_event(st->wait_send_payload_pending,
+ atomic_read(&st->send_payload_pending) == 0);
+ return ret;
+}
+
+static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
+ enum dma_data_direction dir)
+{
+ struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
+ struct smb_direct_rdma_rw_msg, cqe);
+ struct smb_direct_transport *t = msg->t;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ pr_err("read/write error. opcode = %d, status = %s(%d)\n",
+ wc->opcode, ib_wc_status_msg(wc->status), wc->status);
+ smb_direct_disconnect_rdma_connection(t);
+ }
+
+ if (atomic_inc_return(&t->rw_avail_ops) > 0)
+ wake_up(&t->wait_rw_avail_ops);
+
+ rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sg_list, msg->sgt.nents, dir);
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+ complete(msg->completion);
+ kfree(msg);
+}
+
+static void read_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ read_write_done(cq, wc, DMA_FROM_DEVICE);
+}
+
+static void write_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ read_write_done(cq, wc, DMA_TO_DEVICE);
+}
+
+static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
+ int buf_len, u32 remote_key, u64 remote_offset,
+ u32 remote_len, bool is_read)
+{
+ struct smb_direct_rdma_rw_msg *msg;
+ int ret;
+ DECLARE_COMPLETION_ONSTACK(completion);
+ struct ib_send_wr *first_wr = NULL;
+
+ ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
+ if (ret < 0)
+ return ret;
+
+ /* TODO: mempool */
+ msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
+ sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
+ if (!msg) {
+ atomic_inc(&t->rw_avail_ops);
+ return -ENOMEM;
+ }
+
+ msg->sgt.sgl = &msg->sg_list[0];
+ ret = sg_alloc_table_chained(&msg->sgt,
+ get_buf_page_count(buf, buf_len),
+ msg->sg_list, SG_CHUNK_SIZE);
+ if (ret) {
+ atomic_inc(&t->rw_avail_ops);
+ kfree(msg);
+ return -ENOMEM;
+ }
+
+ ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents);
+ if (ret <= 0) {
+ pr_err("failed to get pages\n");
+ goto err;
+ }
+
+ ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sg_list, get_buf_page_count(buf, buf_len),
+ 0, remote_offset, remote_key,
+ is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ if (ret < 0) {
+ pr_err("failed to init rdma_rw_ctx: %d\n", ret);
+ goto err;
+ }
+
+ msg->t = t;
+ msg->cqe.done = is_read ? read_done : write_done;
+ msg->completion = &completion;
+ first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
+ &msg->cqe, NULL);
+
+ ret = ib_post_send(t->qp, first_wr, NULL);
+ if (ret) {
+ pr_err("failed to post send wr: %d\n", ret);
+ goto err;
+ }
+
+ wait_for_completion(&completion);
+ return 0;
+
+err:
+ atomic_inc(&t->rw_avail_ops);
+ if (first_wr)
+ rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
+ msg->sg_list, msg->sgt.nents,
+ is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+ kfree(msg);
+ return ret;
+}
+
+static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf,
+ unsigned int buflen, u32 remote_key,
+ u64 remote_offset, u32 remote_len)
+{
+ return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
+ remote_key, remote_offset,
+ remote_len, false);
+}
+
+static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf,
+ unsigned int buflen, u32 remote_key,
+ u64 remote_offset, u32 remote_len)
+{
+ return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
+ remote_key, remote_offset,
+ remote_len, true);
+}
+
+static void smb_direct_disconnect(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+
+ ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
+
+ smb_direct_disconnect_rdma_work(&st->disconnect_work);
+ wait_event_interruptible(st->wait_status,
+ st->status == SMB_DIRECT_CS_DISCONNECTED);
+ free_transport(st);
+}
+
+static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ struct smb_direct_transport *t = cm_id->context;
+
+ ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
+ cm_id, rdma_event_msg(event->event), event->event);
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ESTABLISHED: {
+ t->status = SMB_DIRECT_CS_CONNECTED;
+ wake_up_interruptible(&t->wait_status);
+ break;
+ }
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_DISCONNECTED: {
+ t->status = SMB_DIRECT_CS_DISCONNECTED;
+ wake_up_interruptible(&t->wait_status);
+ wake_up_interruptible(&t->wait_reassembly_queue);
+ wake_up(&t->wait_send_credits);
+ break;
+ }
+ case RDMA_CM_EVENT_CONNECT_ERROR: {
+ t->status = SMB_DIRECT_CS_DISCONNECTED;
+ wake_up_interruptible(&t->wait_status);
+ break;
+ }
+ default:
+ pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
+ cm_id, rdma_event_msg(event->event),
+ event->event);
+ break;
+ }
+ return 0;
+}
+
+static void smb_direct_qpair_handler(struct ib_event *event, void *context)
+{
+ struct smb_direct_transport *t = context;
+
+ ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
+ t->cm_id, ib_event_msg(event->event), event->event);
+
+ switch (event->event) {
+ case IB_EVENT_CQ_ERR:
+ case IB_EVENT_QP_FATAL:
+ smb_direct_disconnect_rdma_connection(t);
+ break;
+ default:
+ break;
+ }
+}
+
+static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
+ int failed)
+{
+ struct smb_direct_sendmsg *sendmsg;
+ struct smb_direct_negotiate_resp *resp;
+ int ret;
+
+ sendmsg = smb_direct_alloc_sendmsg(t);
+ if (IS_ERR(sendmsg))
+ return -ENOMEM;
+
+ resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
+ if (failed) {
+ memset(resp, 0, sizeof(*resp));
+ resp->min_version = cpu_to_le16(0x0100);
+ resp->max_version = cpu_to_le16(0x0100);
+ resp->status = STATUS_NOT_SUPPORTED;
+ } else {
+ resp->status = STATUS_SUCCESS;
+ resp->min_version = SMB_DIRECT_VERSION_LE;
+ resp->max_version = SMB_DIRECT_VERSION_LE;
+ resp->negotiated_version = SMB_DIRECT_VERSION_LE;
+ resp->reserved = 0;
+ resp->credits_requested =
+ cpu_to_le16(t->send_credit_target);
+ resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
+ resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
+ resp->preferred_send_size = cpu_to_le32(t->max_send_size);
+ resp->max_receive_size = cpu_to_le32(t->max_recv_size);
+ resp->max_fragmented_size =
+ cpu_to_le32(t->max_fragmented_recv_size);
+ }
+
+ sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
+ (void *)resp, sizeof(*resp),
+ DMA_TO_DEVICE);
+ ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
+ if (ret) {
+ smb_direct_free_sendmsg(t, sendmsg);
+ return ret;
+ }
+
+ sendmsg->num_sge = 1;
+ sendmsg->sge[0].length = sizeof(*resp);
+ sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
+
+ ret = post_sendmsg(t, NULL, sendmsg);
+ if (ret) {
+ smb_direct_free_sendmsg(t, sendmsg);
+ return ret;
+ }
+
+ wait_event(t->wait_send_pending,
+ atomic_read(&t->send_pending) == 0);
+ return 0;
+}
+
+static int smb_direct_accept_client(struct smb_direct_transport *t)
+{
+ struct rdma_conn_param conn_param;
+ struct ib_port_immutable port_immutable;
+ u32 ird_ord_hdr[2];
+ int ret;
+
+ memset(&conn_param, 0, sizeof(conn_param));
+ conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
+ SMB_DIRECT_CM_INITIATOR_DEPTH);
+ conn_param.responder_resources = 0;
+
+ t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
+ t->cm_id->port_num,
+ &port_immutable);
+ if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
+ ird_ord_hdr[0] = conn_param.responder_resources;
+ ird_ord_hdr[1] = 1;
+ conn_param.private_data = ird_ord_hdr;
+ conn_param.private_data_len = sizeof(ird_ord_hdr);
+ } else {
+ conn_param.private_data = NULL;
+ conn_param.private_data_len = 0;
+ }
+ conn_param.retry_count = SMB_DIRECT_CM_RETRY;
+ conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
+ conn_param.flow_control = 0;
+
+ ret = rdma_accept(t->cm_id, &conn_param);
+ if (ret) {
+ pr_err("error at rdma_accept: %d\n", ret);
+ return ret;
+ }
+
+ wait_event_interruptible(t->wait_status,
+ t->status != SMB_DIRECT_CS_NEW);
+ if (t->status != SMB_DIRECT_CS_CONNECTED)
+ return -ENOTCONN;
+ return 0;
+}
+
+static int smb_direct_negotiate(struct smb_direct_transport *t)
+{
+ int ret;
+ struct smb_direct_recvmsg *recvmsg;
+ struct smb_direct_negotiate_req *req;
+
+ recvmsg = get_free_recvmsg(t);
+ if (!recvmsg)
+ return -ENOMEM;
+ recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
+
+ ret = smb_direct_post_recv(t, recvmsg);
+ if (ret) {
+ pr_err("Can't post recv: %d\n", ret);
+ goto out;
+ }
+
+ t->negotiation_requested = false;
+ ret = smb_direct_accept_client(t);
+ if (ret) {
+ pr_err("Can't accept client\n");
+ goto out;
+ }
+
+ smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
+
+ ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
+ ret = wait_event_interruptible_timeout(t->wait_status,
+ t->negotiation_requested ||
+ t->status == SMB_DIRECT_CS_DISCONNECTED,
+ SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
+ if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
+ ret = ret < 0 ? ret : -ETIMEDOUT;
+ goto out;
+ }
+
+ ret = smb_direct_check_recvmsg(recvmsg);
+ if (ret == -ECONNABORTED)
+ goto out;
+
+ req = (struct smb_direct_negotiate_req *)recvmsg->packet;
+ t->max_recv_size = min_t(int, t->max_recv_size,
+ le32_to_cpu(req->preferred_send_size));
+ t->max_send_size = min_t(int, t->max_send_size,
+ le32_to_cpu(req->max_receive_size));
+ t->max_fragmented_send_size =
+ le32_to_cpu(req->max_fragmented_size);
+
+ ret = smb_direct_send_negotiate_response(t, ret);
+out:
+ if (recvmsg)
+ put_recvmsg(t, recvmsg);
+ return ret;
+}
+
+static int smb_direct_init_params(struct smb_direct_transport *t,
+ struct ib_qp_cap *cap)
+{
+ struct ib_device *device = t->cm_id->device;
+ int max_send_sges, max_pages, max_rw_wrs, max_send_wrs;
+
+ /* need 2 more sge. because a SMB_DIRECT header will be mapped,
+ * and maybe a send buffer could be not page aligned.
+ */
+ t->max_send_size = smb_direct_max_send_size;
+ max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2;
+ if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
+ pr_err("max_send_size %d is too large\n", t->max_send_size);
+ return -EINVAL;
+ }
+
+ /*
+ * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA
+ * read/writes. HCA guarantees at least max_send_sge of sges for
+ * a RDMA read/write work request, and if memory registration is used,
+ * we need reg_mr, local_inv wrs for each read/write.
+ */
+ t->max_rdma_rw_size = smb_direct_max_read_write_size;
+ max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
+ max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES);
+ max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num,
+ max_pages) * 2;
+ max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
+
+ max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
+ if (max_send_wrs > device->attrs.max_cqe ||
+ max_send_wrs > device->attrs.max_qp_wr) {
+ pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n",
+ smb_direct_send_credit_target,
+ smb_direct_max_outstanding_rw_ops);
+ pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
+ device->attrs.max_cqe, device->attrs.max_qp_wr);
+ return -EINVAL;
+ }
+
+ if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
+ smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
+ pr_err("consider lowering receive_credit_max = %d\n",
+ smb_direct_receive_credit_max);
+ pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
+ device->attrs.max_cqe, device->attrs.max_qp_wr);
+ return -EINVAL;
+ }
+
+ if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
+ pr_err("warning: device max_send_sge = %d too small\n",
+ device->attrs.max_send_sge);
+ return -EINVAL;
+ }
+ if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
+ pr_err("warning: device max_recv_sge = %d too small\n",
+ device->attrs.max_recv_sge);
+ return -EINVAL;
+ }
+
+ t->recv_credits = 0;
+ t->count_avail_recvmsg = 0;
+
+ t->recv_credit_max = smb_direct_receive_credit_max;
+ t->recv_credit_target = 10;
+ t->new_recv_credits = 0;
+
+ t->send_credit_target = smb_direct_send_credit_target;
+ atomic_set(&t->send_credits, 0);
+ atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops);
+
+ t->max_send_size = smb_direct_max_send_size;
+ t->max_recv_size = smb_direct_max_receive_size;
+ t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
+
+ cap->max_send_wr = max_send_wrs;
+ cap->max_recv_wr = t->recv_credit_max;
+ cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
+ cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
+ cap->max_inline_data = 0;
+ cap->max_rdma_ctxs = 0;
+ return 0;
+}
+
+static void smb_direct_destroy_pools(struct smb_direct_transport *t)
+{
+ struct smb_direct_recvmsg *recvmsg;
+
+ while ((recvmsg = get_free_recvmsg(t)))
+ mempool_free(recvmsg, t->recvmsg_mempool);
+ while ((recvmsg = get_empty_recvmsg(t)))
+ mempool_free(recvmsg, t->recvmsg_mempool);
+
+ mempool_destroy(t->recvmsg_mempool);
+ t->recvmsg_mempool = NULL;
+
+ kmem_cache_destroy(t->recvmsg_cache);
+ t->recvmsg_cache = NULL;
+
+ mempool_destroy(t->sendmsg_mempool);
+ t->sendmsg_mempool = NULL;
+
+ kmem_cache_destroy(t->sendmsg_cache);
+ t->sendmsg_cache = NULL;
+}
+
+static int smb_direct_create_pools(struct smb_direct_transport *t)
+{
+ char name[80];
+ int i;
+ struct smb_direct_recvmsg *recvmsg;
+
+ snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
+ t->sendmsg_cache = kmem_cache_create(name,
+ sizeof(struct smb_direct_sendmsg) +
+ sizeof(struct smb_direct_negotiate_resp),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!t->sendmsg_cache)
+ return -ENOMEM;
+
+ t->sendmsg_mempool = mempool_create(t->send_credit_target,
+ mempool_alloc_slab, mempool_free_slab,
+ t->sendmsg_cache);
+ if (!t->sendmsg_mempool)
+ goto err;
+
+ snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
+ t->recvmsg_cache = kmem_cache_create(name,
+ sizeof(struct smb_direct_recvmsg) +
+ t->max_recv_size,
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!t->recvmsg_cache)
+ goto err;
+
+ t->recvmsg_mempool =
+ mempool_create(t->recv_credit_max, mempool_alloc_slab,
+ mempool_free_slab, t->recvmsg_cache);
+ if (!t->recvmsg_mempool)
+ goto err;
+
+ INIT_LIST_HEAD(&t->recvmsg_queue);
+
+ for (i = 0; i < t->recv_credit_max; i++) {
+ recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
+ if (!recvmsg)
+ goto err;
+ recvmsg->transport = t;
+ list_add(&recvmsg->list, &t->recvmsg_queue);
+ }
+ t->count_avail_recvmsg = t->recv_credit_max;
+
+ return 0;
+err:
+ smb_direct_destroy_pools(t);
+ return -ENOMEM;
+}
+
+static int smb_direct_create_qpair(struct smb_direct_transport *t,
+ struct ib_qp_cap *cap)
+{
+ int ret;
+ struct ib_qp_init_attr qp_attr;
+
+ t->pd = ib_alloc_pd(t->cm_id->device, 0);
+ if (IS_ERR(t->pd)) {
+ pr_err("Can't create RDMA PD\n");
+ ret = PTR_ERR(t->pd);
+ t->pd = NULL;
+ return ret;
+ }
+
+ t->send_cq = ib_alloc_cq(t->cm_id->device, t,
+ t->send_credit_target, 0, IB_POLL_WORKQUEUE);
+ if (IS_ERR(t->send_cq)) {
+ pr_err("Can't create RDMA send CQ\n");
+ ret = PTR_ERR(t->send_cq);
+ t->send_cq = NULL;
+ goto err;
+ }
+
+ t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
+ cap->max_send_wr + cap->max_rdma_ctxs,
+ 0, IB_POLL_WORKQUEUE);
+ if (IS_ERR(t->recv_cq)) {
+ pr_err("Can't create RDMA recv CQ\n");
+ ret = PTR_ERR(t->recv_cq);
+ t->recv_cq = NULL;
+ goto err;
+ }
+
+ memset(&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.event_handler = smb_direct_qpair_handler;
+ qp_attr.qp_context = t;
+ qp_attr.cap = *cap;
+ qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ qp_attr.qp_type = IB_QPT_RC;
+ qp_attr.send_cq = t->send_cq;
+ qp_attr.recv_cq = t->recv_cq;
+ qp_attr.port_num = ~0;
+
+ ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
+ if (ret) {
+ pr_err("Can't create RDMA QP: %d\n", ret);
+ goto err;
+ }
+
+ t->qp = t->cm_id->qp;
+ t->cm_id->event_handler = smb_direct_cm_handler;
+
+ return 0;
+err:
+ if (t->qp) {
+ ib_destroy_qp(t->qp);
+ t->qp = NULL;
+ }
+ if (t->recv_cq) {
+ ib_destroy_cq(t->recv_cq);
+ t->recv_cq = NULL;
+ }
+ if (t->send_cq) {
+ ib_destroy_cq(t->send_cq);
+ t->send_cq = NULL;
+ }
+ if (t->pd) {
+ ib_dealloc_pd(t->pd);
+ t->pd = NULL;
+ }
+ return ret;
+}
+
+static int smb_direct_prepare(struct ksmbd_transport *t)
+{
+ struct smb_direct_transport *st = smb_trans_direct_transfort(t);
+ int ret;
+ struct ib_qp_cap qp_cap;
+
+ ret = smb_direct_init_params(st, &qp_cap);
+ if (ret) {
+ pr_err("Can't configure RDMA parameters\n");
+ return ret;
+ }
+
+ ret = smb_direct_create_pools(st);
+ if (ret) {
+ pr_err("Can't init RDMA pool: %d\n", ret);
+ return ret;
+ }
+
+ ret = smb_direct_create_qpair(st, &qp_cap);
+ if (ret) {
+ pr_err("Can't accept RDMA client: %d\n", ret);
+ return ret;
+ }
+
+ ret = smb_direct_negotiate(st);
+ if (ret) {
+ pr_err("Can't negotiate: %d\n", ret);
+ return ret;
+ }
+
+ st->status = SMB_DIRECT_CS_CONNECTED;
+ return 0;
+}
+
+static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
+{
+ if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
+ return false;
+ if (attrs->max_fast_reg_page_list_len == 0)
+ return false;
+ return true;
+}
+
+static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
+{
+ struct smb_direct_transport *t;
+
+ if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
+ ksmbd_debug(RDMA,
+ "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
+ new_cm_id->device->attrs.device_cap_flags);
+ return -EPROTONOSUPPORT;
+ }
+
+ t = alloc_transport(new_cm_id);
+ if (!t)
+ return -ENOMEM;
+
+ KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
+ KSMBD_TRANS(t)->conn, "ksmbd:r%u",
+ SMB_DIRECT_PORT);
+ if (IS_ERR(KSMBD_TRANS(t)->handler)) {
+ int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
+
+ pr_err("Can't start thread\n");
+ free_transport(t);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ switch (event->event) {
+ case RDMA_CM_EVENT_CONNECT_REQUEST: {
+ int ret = smb_direct_handle_connect_request(cm_id);
+
+ if (ret) {
+ pr_err("Can't create transport: %d\n", ret);
+ return ret;
+ }
+
+ ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
+ cm_id);
+ break;
+ }
+ default:
+ pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
+ cm_id, rdma_event_msg(event->event), event->event);
+ break;
+ }
+ return 0;
+}
+
+static int smb_direct_listen(int port)
+{
+ int ret;
+ struct rdma_cm_id *cm_id;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = htons(port),
+ };
+
+ cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
+ &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(cm_id)) {
+ pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
+ return PTR_ERR(cm_id);
+ }
+
+ ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+ if (ret) {
+ pr_err("Can't bind: %d\n", ret);
+ goto err;
+ }
+
+ smb_direct_listener.cm_id = cm_id;
+
+ ret = rdma_listen(cm_id, 10);
+ if (ret) {
+ pr_err("Can't listen: %d\n", ret);
+ goto err;
+ }
+ return 0;
+err:
+ smb_direct_listener.cm_id = NULL;
+ rdma_destroy_id(cm_id);
+ return ret;
+}
+
+int ksmbd_rdma_init(void)
+{
+ int ret;
+
+ smb_direct_listener.cm_id = NULL;
+
+ /* When a client is running out of send credits, the credits are
+ * granted by the server's sending a packet using this queue.
+ * This avoids the situation that a clients cannot send packets
+ * for lack of credits
+ */
+ smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
+ WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
+ if (!smb_direct_wq)
+ return -ENOMEM;
+
+ ret = smb_direct_listen(SMB_DIRECT_PORT);
+ if (ret) {
+ destroy_workqueue(smb_direct_wq);
+ smb_direct_wq = NULL;
+ pr_err("Can't listen: %d\n", ret);
+ return ret;
+ }
+
+ ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
+ smb_direct_listener.cm_id);
+ return 0;
+}
+
+int ksmbd_rdma_destroy(void)
+{
+ if (smb_direct_listener.cm_id)
+ rdma_destroy_id(smb_direct_listener.cm_id);
+ smb_direct_listener.cm_id = NULL;
+
+ if (smb_direct_wq) {
+ flush_workqueue(smb_direct_wq);
+ destroy_workqueue(smb_direct_wq);
+ smb_direct_wq = NULL;
+ }
+ return 0;
+}
+
+bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
+{
+ struct ib_device *ibdev;
+ bool rdma_capable = false;
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
+ if (ibdev) {
+ if (rdma_frwr_is_supported(&ibdev->attrs))
+ rdma_capable = true;
+ ib_device_put(ibdev);
+ }
+ return rdma_capable;
+}
+
+static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
+ .prepare = smb_direct_prepare,
+ .disconnect = smb_direct_disconnect,
+ .writev = smb_direct_writev,
+ .read = smb_direct_read,
+ .rdma_read = smb_direct_rdma_read,
+ .rdma_write = smb_direct_rdma_write,
+};
diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h
new file mode 100644
index 000000000000..0fa8adc0776f
--- /dev/null
+++ b/fs/ksmbd/transport_rdma.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2017, Microsoft Corporation.
+ * Copyright (C) 2018, LG Electronics.
+ */
+
+#ifndef __KSMBD_TRANSPORT_RDMA_H__
+#define __KSMBD_TRANSPORT_RDMA_H__
+
+#define SMB_DIRECT_PORT 5445
+
+/* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */
+struct smb_direct_negotiate_req {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+/* SMB DIRECT negotiation response packet [MS-SMBD] 2.2.2 */
+struct smb_direct_negotiate_resp {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 negotiated_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le32 status;
+ __le32 max_readwrite_size;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
+
+/* SMB DIRECT data transfer packet with payload [MS-SMBD] 2.2.3 */
+struct smb_direct_data_transfer {
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le16 flags;
+ __le16 reserved;
+ __le32 remaining_data_length;
+ __le32 data_offset;
+ __le32 data_length;
+ __le32 padding;
+ __u8 buffer[];
+} __packed;
+
+#ifdef CONFIG_SMB_SERVER_SMBDIRECT
+int ksmbd_rdma_init(void);
+int ksmbd_rdma_destroy(void);
+bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
+#else
+static inline int ksmbd_rdma_init(void) { return 0; }
+static inline int ksmbd_rdma_destroy(void) { return 0; }
+static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
+#endif
+
+#endif /* __KSMBD_TRANSPORT_RDMA_H__ */
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c
new file mode 100644
index 000000000000..dc15a5ecd2e0
--- /dev/null
+++ b/fs/ksmbd/transport_tcp.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/freezer.h>
+
+#include "smb_common.h"
+#include "server.h"
+#include "auth.h"
+#include "connection.h"
+#include "transport_tcp.h"
+
+#define IFACE_STATE_DOWN BIT(0)
+#define IFACE_STATE_CONFIGURED BIT(1)
+
+struct interface {
+ struct task_struct *ksmbd_kthread;
+ struct socket *ksmbd_socket;
+ struct list_head entry;
+ char *name;
+ struct mutex sock_release_lock;
+ int state;
+};
+
+static LIST_HEAD(iface_list);
+
+static int bind_additional_ifaces;
+
+struct tcp_transport {
+ struct ksmbd_transport transport;
+ struct socket *sock;
+ struct kvec *iov;
+ unsigned int nr_iov;
+};
+
+static struct ksmbd_transport_ops ksmbd_tcp_transport_ops;
+
+static void tcp_stop_kthread(struct task_struct *kthread);
+static struct interface *alloc_iface(char *ifname);
+
+#define KSMBD_TRANS(t) (&(t)->transport)
+#define TCP_TRANS(t) ((struct tcp_transport *)container_of(t, \
+ struct tcp_transport, transport))
+
+static inline void ksmbd_tcp_nodelay(struct socket *sock)
+{
+ tcp_sock_set_nodelay(sock->sk);
+}
+
+static inline void ksmbd_tcp_reuseaddr(struct socket *sock)
+{
+ sock_set_reuseaddr(sock->sk);
+}
+
+static inline void ksmbd_tcp_rcv_timeout(struct socket *sock, s64 secs)
+{
+ lock_sock(sock->sk);
+ if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
+ sock->sk->sk_rcvtimeo = secs * HZ;
+ else
+ sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+ release_sock(sock->sk);
+}
+
+static inline void ksmbd_tcp_snd_timeout(struct socket *sock, s64 secs)
+{
+ sock_set_sndtimeo(sock->sk, secs);
+}
+
+static struct tcp_transport *alloc_transport(struct socket *client_sk)
+{
+ struct tcp_transport *t;
+ struct ksmbd_conn *conn;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return NULL;
+ t->sock = client_sk;
+
+ conn = ksmbd_conn_alloc();
+ if (!conn) {
+ kfree(t);
+ return NULL;
+ }
+
+ conn->transport = KSMBD_TRANS(t);
+ KSMBD_TRANS(t)->conn = conn;
+ KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops;
+ return t;
+}
+
+static void free_transport(struct tcp_transport *t)
+{
+ kernel_sock_shutdown(t->sock, SHUT_RDWR);
+ sock_release(t->sock);
+ t->sock = NULL;
+
+ ksmbd_conn_free(KSMBD_TRANS(t)->conn);
+ kfree(t->iov);
+ kfree(t);
+}
+
+/**
+ * kvec_array_init() - initialize a IO vector segment
+ * @new: IO vector to be initialized
+ * @iov: base IO vector
+ * @nr_segs: number of segments in base iov
+ * @bytes: total iovec length so far for read
+ *
+ * Return: Number of IO segments
+ */
+static unsigned int kvec_array_init(struct kvec *new, struct kvec *iov,
+ unsigned int nr_segs, size_t bytes)
+{
+ size_t base = 0;
+
+ while (bytes || !iov->iov_len) {
+ int copy = min(bytes, iov->iov_len);
+
+ bytes -= copy;
+ base += copy;
+ if (iov->iov_len == base) {
+ iov++;
+ nr_segs--;
+ base = 0;
+ }
+ }
+
+ memcpy(new, iov, sizeof(*iov) * nr_segs);
+ new->iov_base += base;
+ new->iov_len -= base;
+ return nr_segs;
+}
+
+/**
+ * get_conn_iovec() - get connection iovec for reading from socket
+ * @t: TCP transport instance
+ * @nr_segs: number of segments in iov
+ *
+ * Return: return existing or newly allocate iovec
+ */
+static struct kvec *get_conn_iovec(struct tcp_transport *t, unsigned int nr_segs)
+{
+ struct kvec *new_iov;
+
+ if (t->iov && nr_segs <= t->nr_iov)
+ return t->iov;
+
+ /* not big enough -- allocate a new one and release the old */
+ new_iov = kmalloc_array(nr_segs, sizeof(*new_iov), GFP_KERNEL);
+ if (new_iov) {
+ kfree(t->iov);
+ t->iov = new_iov;
+ t->nr_iov = nr_segs;
+ }
+ return new_iov;
+}
+
+static unsigned short ksmbd_tcp_get_port(const struct sockaddr *sa)
+{
+ switch (sa->sa_family) {
+ case AF_INET:
+ return ntohs(((struct sockaddr_in *)sa)->sin_port);
+ case AF_INET6:
+ return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
+ }
+ return 0;
+}
+
+/**
+ * ksmbd_tcp_new_connection() - create a new tcp session on mount
+ * @client_sk: socket associated with new connection
+ *
+ * whenever a new connection is requested, create a conn thread
+ * (session thread) to handle new incoming smb requests from the connection
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int ksmbd_tcp_new_connection(struct socket *client_sk)
+{
+ struct sockaddr *csin;
+ int rc = 0;
+ struct tcp_transport *t;
+
+ t = alloc_transport(client_sk);
+ if (!t)
+ return -ENOMEM;
+
+ csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn);
+ if (kernel_getpeername(client_sk, csin) < 0) {
+ pr_err("client ip resolution failed\n");
+ rc = -EINVAL;
+ goto out_error;
+ }
+
+ KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
+ KSMBD_TRANS(t)->conn,
+ "ksmbd:%u",
+ ksmbd_tcp_get_port(csin));
+ if (IS_ERR(KSMBD_TRANS(t)->handler)) {
+ pr_err("cannot start conn thread\n");
+ rc = PTR_ERR(KSMBD_TRANS(t)->handler);
+ free_transport(t);
+ }
+ return rc;
+
+out_error:
+ free_transport(t);
+ return rc;
+}
+
+/**
+ * ksmbd_kthread_fn() - listen to new SMB connections and callback server
+ * @p: arguments to forker thread
+ *
+ * Return: Returns a task_struct or ERR_PTR
+ */
+static int ksmbd_kthread_fn(void *p)
+{
+ struct socket *client_sk = NULL;
+ struct interface *iface = (struct interface *)p;
+ int ret;
+
+ while (!kthread_should_stop()) {
+ mutex_lock(&iface->sock_release_lock);
+ if (!iface->ksmbd_socket) {
+ mutex_unlock(&iface->sock_release_lock);
+ break;
+ }
+ ret = kernel_accept(iface->ksmbd_socket, &client_sk,
+ O_NONBLOCK);
+ mutex_unlock(&iface->sock_release_lock);
+ if (ret) {
+ if (ret == -EAGAIN)
+ /* check for new connections every 100 msecs */
+ schedule_timeout_interruptible(HZ / 10);
+ continue;
+ }
+
+ ksmbd_debug(CONN, "connect success: accepted new connection\n");
+ client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT;
+ client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT;
+
+ ksmbd_tcp_new_connection(client_sk);
+ }
+
+ ksmbd_debug(CONN, "releasing socket\n");
+ return 0;
+}
+
+/**
+ * ksmbd_tcp_run_kthread() - start forker thread
+ * @iface: pointer to struct interface
+ *
+ * start forker thread(ksmbd/0) at module init time to listen
+ * on port 445 for new SMB connection requests. It creates per connection
+ * server threads(ksmbd/x)
+ *
+ * Return: 0 on success or error number
+ */
+static int ksmbd_tcp_run_kthread(struct interface *iface)
+{
+ int rc;
+ struct task_struct *kthread;
+
+ kthread = kthread_run(ksmbd_kthread_fn, (void *)iface, "ksmbd-%s",
+ iface->name);
+ if (IS_ERR(kthread)) {
+ rc = PTR_ERR(kthread);
+ return rc;
+ }
+ iface->ksmbd_kthread = kthread;
+
+ return 0;
+}
+
+/**
+ * ksmbd_tcp_readv() - read data from socket in given iovec
+ * @t: TCP transport instance
+ * @iov_orig: base IO vector
+ * @nr_segs: number of segments in base iov
+ * @to_read: number of bytes to read from socket
+ *
+ * Return: on success return number of bytes read from socket,
+ * otherwise return error number
+ */
+static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig,
+ unsigned int nr_segs, unsigned int to_read)
+{
+ int length = 0;
+ int total_read;
+ unsigned int segs;
+ struct msghdr ksmbd_msg;
+ struct kvec *iov;
+ struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn;
+
+ iov = get_conn_iovec(t, nr_segs);
+ if (!iov)
+ return -ENOMEM;
+
+ ksmbd_msg.msg_control = NULL;
+ ksmbd_msg.msg_controllen = 0;
+
+ for (total_read = 0; to_read; total_read += length, to_read -= length) {
+ try_to_freeze();
+
+ if (!ksmbd_conn_alive(conn)) {
+ total_read = -ESHUTDOWN;
+ break;
+ }
+ segs = kvec_array_init(iov, iov_orig, nr_segs, total_read);
+
+ length = kernel_recvmsg(t->sock, &ksmbd_msg,
+ iov, segs, to_read, 0);
+
+ if (length == -EINTR) {
+ total_read = -ESHUTDOWN;
+ break;
+ } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) {
+ total_read = -EAGAIN;
+ break;
+ } else if (length == -ERESTARTSYS || length == -EAGAIN) {
+ usleep_range(1000, 2000);
+ length = 0;
+ continue;
+ } else if (length <= 0) {
+ total_read = -EAGAIN;
+ break;
+ }
+ }
+ return total_read;
+}
+
+/**
+ * ksmbd_tcp_read() - read data from socket in given buffer
+ * @t: TCP transport instance
+ * @buf: buffer to store read data from socket
+ * @to_read: number of bytes to read from socket
+ *
+ * Return: on success return number of bytes read from socket,
+ * otherwise return error number
+ */
+static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, unsigned int to_read)
+{
+ struct kvec iov;
+
+ iov.iov_base = buf;
+ iov.iov_len = to_read;
+
+ return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read);
+}
+
+static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov,
+ int nvecs, int size, bool need_invalidate,
+ unsigned int remote_key)
+
+{
+ struct msghdr smb_msg = {.msg_flags = MSG_NOSIGNAL};
+
+ return kernel_sendmsg(TCP_TRANS(t)->sock, &smb_msg, iov, nvecs, size);
+}
+
+static void ksmbd_tcp_disconnect(struct ksmbd_transport *t)
+{
+ free_transport(TCP_TRANS(t));
+}
+
+static void tcp_destroy_socket(struct socket *ksmbd_socket)
+{
+ int ret;
+
+ if (!ksmbd_socket)
+ return;
+
+ /* set zero to timeout */
+ ksmbd_tcp_rcv_timeout(ksmbd_socket, 0);
+ ksmbd_tcp_snd_timeout(ksmbd_socket, 0);
+
+ ret = kernel_sock_shutdown(ksmbd_socket, SHUT_RDWR);
+ if (ret)
+ pr_err("Failed to shutdown socket: %d\n", ret);
+ sock_release(ksmbd_socket);
+}
+
+/**
+ * create_socket - create socket for ksmbd/0
+ *
+ * Return: Returns a task_struct or ERR_PTR
+ */
+static int create_socket(struct interface *iface)
+{
+ int ret;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in sin;
+ struct socket *ksmbd_socket;
+ bool ipv4 = false;
+
+ ret = sock_create(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &ksmbd_socket);
+ if (ret) {
+ pr_err("Can't create socket for ipv6, try ipv4: %d\n", ret);
+ ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP,
+ &ksmbd_socket);
+ if (ret) {
+ pr_err("Can't create socket for ipv4: %d\n", ret);
+ goto out_error;
+ }
+
+ sin.sin_family = PF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(server_conf.tcp_port);
+ ipv4 = true;
+ } else {
+ sin6.sin6_family = PF_INET6;
+ sin6.sin6_addr = in6addr_any;
+ sin6.sin6_port = htons(server_conf.tcp_port);
+ }
+
+ ksmbd_tcp_nodelay(ksmbd_socket);
+ ksmbd_tcp_reuseaddr(ksmbd_socket);
+
+ ret = sock_setsockopt(ksmbd_socket,
+ SOL_SOCKET,
+ SO_BINDTODEVICE,
+ KERNEL_SOCKPTR(iface->name),
+ strlen(iface->name));
+ if (ret != -ENODEV && ret < 0) {
+ pr_err("Failed to set SO_BINDTODEVICE: %d\n", ret);
+ goto out_error;
+ }
+
+ if (ipv4)
+ ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin,
+ sizeof(sin));
+ else
+ ret = kernel_bind(ksmbd_socket, (struct sockaddr *)&sin6,
+ sizeof(sin6));
+ if (ret) {
+ pr_err("Failed to bind socket: %d\n", ret);
+ goto out_error;
+ }
+
+ ksmbd_socket->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT;
+ ksmbd_socket->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT;
+
+ ret = kernel_listen(ksmbd_socket, KSMBD_SOCKET_BACKLOG);
+ if (ret) {
+ pr_err("Port listen() error: %d\n", ret);
+ goto out_error;
+ }
+
+ iface->ksmbd_socket = ksmbd_socket;
+ ret = ksmbd_tcp_run_kthread(iface);
+ if (ret) {
+ pr_err("Can't start ksmbd main kthread: %d\n", ret);
+ goto out_error;
+ }
+ iface->state = IFACE_STATE_CONFIGURED;
+
+ return 0;
+
+out_error:
+ tcp_destroy_socket(ksmbd_socket);
+ iface->ksmbd_socket = NULL;
+ return ret;
+}
+
+static int ksmbd_netdev_event(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct interface *iface;
+ int ret, found = 0;
+
+ switch (event) {
+ case NETDEV_UP:
+ if (netdev->priv_flags & IFF_BRIDGE_PORT)
+ return NOTIFY_OK;
+
+ list_for_each_entry(iface, &iface_list, entry) {
+ if (!strcmp(iface->name, netdev->name)) {
+ found = 1;
+ if (iface->state != IFACE_STATE_DOWN)
+ break;
+ ret = create_socket(iface);
+ if (ret)
+ return NOTIFY_OK;
+ break;
+ }
+ }
+ if (!found && bind_additional_ifaces) {
+ iface = alloc_iface(kstrdup(netdev->name, GFP_KERNEL));
+ if (!iface)
+ return NOTIFY_OK;
+ ret = create_socket(iface);
+ if (ret)
+ break;
+ }
+ break;
+ case NETDEV_DOWN:
+ list_for_each_entry(iface, &iface_list, entry) {
+ if (!strcmp(iface->name, netdev->name) &&
+ iface->state == IFACE_STATE_CONFIGURED) {
+ tcp_stop_kthread(iface->ksmbd_kthread);
+ iface->ksmbd_kthread = NULL;
+ mutex_lock(&iface->sock_release_lock);
+ tcp_destroy_socket(iface->ksmbd_socket);
+ iface->ksmbd_socket = NULL;
+ mutex_unlock(&iface->sock_release_lock);
+
+ iface->state = IFACE_STATE_DOWN;
+ break;
+ }
+ }
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ksmbd_netdev_notifier = {
+ .notifier_call = ksmbd_netdev_event,
+};
+
+int ksmbd_tcp_init(void)
+{
+ register_netdevice_notifier(&ksmbd_netdev_notifier);
+
+ return 0;
+}
+
+static void tcp_stop_kthread(struct task_struct *kthread)
+{
+ int ret;
+
+ if (!kthread)
+ return;
+
+ ret = kthread_stop(kthread);
+ if (ret)
+ pr_err("failed to stop forker thread\n");
+}
+
+void ksmbd_tcp_destroy(void)
+{
+ struct interface *iface, *tmp;
+
+ unregister_netdevice_notifier(&ksmbd_netdev_notifier);
+
+ list_for_each_entry_safe(iface, tmp, &iface_list, entry) {
+ list_del(&iface->entry);
+ kfree(iface->name);
+ kfree(iface);
+ }
+}
+
+static struct interface *alloc_iface(char *ifname)
+{
+ struct interface *iface;
+
+ if (!ifname)
+ return NULL;
+
+ iface = kzalloc(sizeof(struct interface), GFP_KERNEL);
+ if (!iface) {
+ kfree(ifname);
+ return NULL;
+ }
+
+ iface->name = ifname;
+ iface->state = IFACE_STATE_DOWN;
+ list_add(&iface->entry, &iface_list);
+ mutex_init(&iface->sock_release_lock);
+ return iface;
+}
+
+int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz)
+{
+ int sz = 0;
+
+ if (!ifc_list_sz) {
+ struct net_device *netdev;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev) {
+ if (netdev->priv_flags & IFF_BRIDGE_PORT)
+ continue;
+ if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL)))
+ return -ENOMEM;
+ }
+ rtnl_unlock();
+ bind_additional_ifaces = 1;
+ return 0;
+ }
+
+ while (ifc_list_sz > 0) {
+ if (!alloc_iface(kstrdup(ifc_list, GFP_KERNEL)))
+ return -ENOMEM;
+
+ sz = strlen(ifc_list);
+ if (!sz)
+ break;
+
+ ifc_list += sz + 1;
+ ifc_list_sz -= (sz + 1);
+ }
+
+ bind_additional_ifaces = 0;
+
+ return 0;
+}
+
+static struct ksmbd_transport_ops ksmbd_tcp_transport_ops = {
+ .read = ksmbd_tcp_read,
+ .writev = ksmbd_tcp_writev,
+ .disconnect = ksmbd_tcp_disconnect,
+};
diff --git a/fs/ksmbd/transport_tcp.h b/fs/ksmbd/transport_tcp.h
new file mode 100644
index 000000000000..e338bebe322f
--- /dev/null
+++ b/fs/ksmbd/transport_tcp.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_TRANSPORT_TCP_H__
+#define __KSMBD_TRANSPORT_TCP_H__
+
+int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz);
+int ksmbd_tcp_init(void);
+void ksmbd_tcp_destroy(void);
+
+#endif /* __KSMBD_TRANSPORT_TCP_H__ */
diff --git a/fs/ksmbd/unicode.c b/fs/ksmbd/unicode.c
new file mode 100644
index 000000000000..a0db699ddafd
--- /dev/null
+++ b/fs/ksmbd/unicode.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Some of the source code in this file came from fs/cifs/cifs_unicode.c
+ *
+ * Copyright (c) International Business Machines Corp., 2000,2009
+ * Modified by Steve French (sfrench@us.ibm.com)
+ * Modified by Namjae Jeon (linkinjeon@kernel.org)
+ */
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+#include "glob.h"
+#include "unicode.h"
+#include "uniupr.h"
+#include "smb_common.h"
+
+/*
+ * smb_utf16_bytes() - how long will a string be after conversion?
+ * @from: pointer to input string
+ * @maxbytes: don't go past this many bytes of input string
+ * @codepage: destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ *
+ * Return: string length after conversion
+ */
+static int smb_utf16_bytes(const __le16 *from, int maxbytes,
+ const struct nls_table *codepage)
+{
+ int i;
+ int charlen, outlen = 0;
+ int maxwords = maxbytes / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp;
+
+ for (i = 0; i < maxwords; i++) {
+ ftmp = get_unaligned_le16(&from[i]);
+ if (ftmp == 0)
+ break;
+
+ charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
+ if (charlen > 0)
+ outlen += charlen;
+ else
+ outlen++;
+ }
+
+ return outlen;
+}
+
+/*
+ * cifs_mapchar() - convert a host-endian char to proper char in codepage
+ * @target: where converted character should be copied
+ * @src_char: 2 byte host-endian source character
+ * @cp: codepage to which character should be converted
+ * @mapchar: should character be mapped according to mapchars mount option?
+ *
+ * This function handles the conversion of a single character. It is the
+ * responsibility of the caller to ensure that the target buffer is large
+ * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
+ *
+ * Return: string length after conversion
+ */
+static int
+cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+ bool mapchar)
+{
+ int len = 1;
+
+ if (!mapchar)
+ goto cp_convert;
+
+ /*
+ * BB: Cannot handle remapping UNI_SLASH until all the calls to
+ * build_path_from_dentry are modified, as they use slash as
+ * separator.
+ */
+ switch (src_char) {
+ case UNI_COLON:
+ *target = ':';
+ break;
+ case UNI_ASTERISK:
+ *target = '*';
+ break;
+ case UNI_QUESTION:
+ *target = '?';
+ break;
+ case UNI_PIPE:
+ *target = '|';
+ break;
+ case UNI_GRTRTHAN:
+ *target = '>';
+ break;
+ case UNI_LESSTHAN:
+ *target = '<';
+ break;
+ default:
+ goto cp_convert;
+ }
+
+out:
+ return len;
+
+cp_convert:
+ len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
+ if (len <= 0) {
+ *target = '?';
+ len = 1;
+ }
+
+ goto out;
+}
+
+/*
+ * is_char_allowed() - check for valid character
+ * @ch: input character to be checked
+ *
+ * Return: 1 if char is allowed, otherwise 0
+ */
+static inline int is_char_allowed(char *ch)
+{
+ /* check for control chars, wildcards etc. */
+ if (!(*ch & 0x80) &&
+ (*ch <= 0x1f ||
+ *ch == '?' || *ch == '"' || *ch == '<' ||
+ *ch == '>' || *ch == '|'))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * smb_from_utf16() - convert utf16le string to local charset
+ * @to: destination buffer
+ * @from: source buffer
+ * @tolen: destination buffer size (in bytes)
+ * @fromlen: source buffer size (in bytes)
+ * @codepage: codepage to which characters should be converted
+ * @mapchar: should characters be remapped according to the mapchars option?
+ *
+ * Convert a little-endian utf16le string (as sent by the server) to a string
+ * in the provided codepage. The tolen and fromlen parameters are to ensure
+ * that the code doesn't walk off of the end of the buffer (which is always
+ * a danger if the alignment of the source buffer is off). The destination
+ * string is always properly null terminated and fits in the destination
+ * buffer. Returns the length of the destination string in bytes (including
+ * null terminator).
+ *
+ * Note that some windows versions actually send multiword UTF-16 characters
+ * instead of straight UTF16-2. The linux nls routines however aren't able to
+ * deal with those characters properly. In the event that we get some of
+ * those characters, they won't be translated properly.
+ *
+ * Return: string length after conversion
+ */
+static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
+ const struct nls_table *codepage, bool mapchar)
+{
+ int i, charlen, safelen;
+ int outlen = 0;
+ int nullsize = nls_nullsize(codepage);
+ int fromwords = fromlen / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp;
+
+ /*
+ * because the chars can be of varying widths, we need to take care
+ * not to overflow the destination buffer when we get close to the
+ * end of it. Until we get to this offset, we don't need to check
+ * for overflow however.
+ */
+ safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
+
+ for (i = 0; i < fromwords; i++) {
+ ftmp = get_unaligned_le16(&from[i]);
+ if (ftmp == 0)
+ break;
+
+ /*
+ * check to see if converting this character might make the
+ * conversion bleed into the null terminator
+ */
+ if (outlen >= safelen) {
+ charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
+ if ((outlen + charlen) > (tolen - nullsize))
+ break;
+ }
+
+ /* put converted char into 'to' buffer */
+ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
+ outlen += charlen;
+ }
+
+ /* properly null-terminate string */
+ for (i = 0; i < nullsize; i++)
+ to[outlen++] = 0;
+
+ return outlen;
+}
+
+/*
+ * smb_strtoUTF16() - Convert character string to unicode string
+ * @to: destination buffer
+ * @from: source buffer
+ * @len: destination buffer size (in bytes)
+ * @codepage: codepage to which characters should be converted
+ *
+ * Return: string length after conversion
+ */
+int smb_strtoUTF16(__le16 *to, const char *from, int len,
+ const struct nls_table *codepage)
+{
+ int charlen;
+ int i;
+ wchar_t wchar_to; /* needed to quiet sparse */
+
+ /* special case for utf8 to handle no plane0 chars */
+ if (!strcmp(codepage->charset, "utf8")) {
+ /*
+ * convert utf8 -> utf16, we assume we have enough space
+ * as caller should have assumed conversion does not overflow
+ * in destination len is length in wchar_t units (16bits)
+ */
+ i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
+ (wchar_t *)to, len);
+
+ /* if success terminate and exit */
+ if (i >= 0)
+ goto success;
+ /*
+ * if fails fall back to UCS encoding as this
+ * function should not return negative values
+ * currently can fail only if source contains
+ * invalid encoded characters
+ */
+ }
+
+ for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
+ charlen = codepage->char2uni(from, len, &wchar_to);
+ if (charlen < 1) {
+ /* A question mark */
+ wchar_to = 0x003f;
+ charlen = 1;
+ }
+ put_unaligned_le16(wchar_to, &to[i]);
+ }
+
+success:
+ put_unaligned_le16(0, &to[i]);
+ return i;
+}
+
+/*
+ * smb_strndup_from_utf16() - copy a string from wire format to the local
+ * codepage
+ * @src: source string
+ * @maxlen: don't walk past this many bytes in the source string
+ * @is_unicode: is this a unicode string?
+ * @codepage: destination codepage
+ *
+ * Take a string given by the server, convert it to the local codepage and
+ * put it in a new buffer. Returns a pointer to the new string or NULL on
+ * error.
+ *
+ * Return: destination string buffer or error ptr
+ */
+char *smb_strndup_from_utf16(const char *src, const int maxlen,
+ const bool is_unicode,
+ const struct nls_table *codepage)
+{
+ int len, ret;
+ char *dst;
+
+ if (is_unicode) {
+ len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
+ len += nls_nullsize(codepage);
+ dst = kmalloc(len, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+ ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
+ false);
+ if (ret < 0) {
+ kfree(dst);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ len = strnlen(src, maxlen);
+ len++;
+ dst = kmalloc(len, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+ strscpy(dst, src, len);
+ }
+
+ return dst;
+}
+
+/*
+ * Convert 16 bit Unicode pathname to wire format from string in current code
+ * page. Conversion may involve remapping up the six characters that are
+ * only legal in POSIX-like OS (if they are present in the string). Path
+ * names are little endian 16 bit Unicode on the wire
+ */
+/*
+ * smbConvertToUTF16() - convert string from local charset to utf16
+ * @target: destination buffer
+ * @source: source buffer
+ * @srclen: source buffer size (in bytes)
+ * @cp: codepage to which characters should be converted
+ * @mapchar: should characters be remapped according to the mapchars option?
+ *
+ * Convert 16 bit Unicode pathname to wire format from string in current code
+ * page. Conversion may involve remapping up the six characters that are
+ * only legal in POSIX-like OS (if they are present in the string). Path
+ * names are little endian 16 bit Unicode on the wire
+ *
+ * Return: char length after conversion
+ */
+int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
+ const struct nls_table *cp, int mapchars)
+{
+ int i, j, charlen;
+ char src_char;
+ __le16 dst_char;
+ wchar_t tmp;
+
+ if (!mapchars)
+ return smb_strtoUTF16(target, source, srclen, cp);
+
+ for (i = 0, j = 0; i < srclen; j++) {
+ src_char = source[i];
+ charlen = 1;
+ switch (src_char) {
+ case 0:
+ put_unaligned(0, &target[j]);
+ return j;
+ case ':':
+ dst_char = cpu_to_le16(UNI_COLON);
+ break;
+ case '*':
+ dst_char = cpu_to_le16(UNI_ASTERISK);
+ break;
+ case '?':
+ dst_char = cpu_to_le16(UNI_QUESTION);
+ break;
+ case '<':
+ dst_char = cpu_to_le16(UNI_LESSTHAN);
+ break;
+ case '>':
+ dst_char = cpu_to_le16(UNI_GRTRTHAN);
+ break;
+ case '|':
+ dst_char = cpu_to_le16(UNI_PIPE);
+ break;
+ /*
+ * FIXME: We can not handle remapping backslash (UNI_SLASH)
+ * until all the calls to build_path_from_dentry are modified,
+ * as they use backslash as separator.
+ */
+ default:
+ charlen = cp->char2uni(source + i, srclen - i, &tmp);
+ dst_char = cpu_to_le16(tmp);
+
+ /*
+ * if no match, use question mark, which at least in
+ * some cases serves as wild card
+ */
+ if (charlen < 1) {
+ dst_char = cpu_to_le16(0x003f);
+ charlen = 1;
+ }
+ }
+ /*
+ * character may take more than one byte in the source string,
+ * but will take exactly two bytes in the target string
+ */
+ i += charlen;
+ put_unaligned(dst_char, &target[j]);
+ }
+
+ return j;
+}
diff --git a/fs/ksmbd/unicode.h b/fs/ksmbd/unicode.h
new file mode 100644
index 000000000000..5593024230ae
--- /dev/null
+++ b/fs/ksmbd/unicode.h
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Some of the source code in this file came from fs/cifs/cifs_unicode.c
+ * cifs_unicode: Unicode kernel case support
+ *
+ * Function:
+ * Convert a unicode character to upper or lower case using
+ * compressed tables.
+ *
+ * Copyright (c) International Business Machines Corp., 2000,2009
+ *
+ *
+ * Notes:
+ * These APIs are based on the C library functions. The semantics
+ * should match the C functions but with expanded size operands.
+ *
+ * The upper/lower functions are based on a table created by mkupr.
+ * This is a compressed table of upper and lower case conversion.
+ *
+ */
+#ifndef _CIFS_UNICODE_H
+#define _CIFS_UNICODE_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+#include <linux/nls.h>
+
+#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
+
+/*
+ * Windows maps these to the user defined 16 bit Unicode range since they are
+ * reserved symbols (along with \ and /), otherwise illegal to store
+ * in filenames in NTFS
+ */
+#define UNI_ASTERISK ((__u16)('*' + 0xF000))
+#define UNI_QUESTION ((__u16)('?' + 0xF000))
+#define UNI_COLON ((__u16)(':' + 0xF000))
+#define UNI_GRTRTHAN ((__u16)('>' + 0xF000))
+#define UNI_LESSTHAN ((__u16)('<' + 0xF000))
+#define UNI_PIPE ((__u16)('|' + 0xF000))
+#define UNI_SLASH ((__u16)('\\' + 0xF000))
+
+/* Just define what we want from uniupr.h. We don't want to define the tables
+ * in each source file.
+ */
+#ifndef UNICASERANGE_DEFINED
+struct UniCaseRange {
+ wchar_t start;
+ wchar_t end;
+ signed char *table;
+};
+#endif /* UNICASERANGE_DEFINED */
+
+#ifndef UNIUPR_NOUPPER
+extern signed char SmbUniUpperTable[512];
+extern const struct UniCaseRange SmbUniUpperRange[];
+#endif /* UNIUPR_NOUPPER */
+
+#ifndef UNIUPR_NOLOWER
+extern signed char CifsUniLowerTable[512];
+extern const struct UniCaseRange CifsUniLowerRange[];
+#endif /* UNIUPR_NOLOWER */
+
+#ifdef __KERNEL__
+int smb_strtoUTF16(__le16 *to, const char *from, int len,
+ const struct nls_table *codepage);
+char *smb_strndup_from_utf16(const char *src, const int maxlen,
+ const bool is_unicode,
+ const struct nls_table *codepage);
+int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
+ const struct nls_table *cp, int mapchars);
+char *ksmbd_extract_sharename(char *treename);
+#endif
+
+/*
+ * UniStrcat: Concatenate the second string to the first
+ *
+ * Returns:
+ * Address of the first string
+ */
+static inline wchar_t *UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
+{
+ wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */
+
+ while (*ucs1++)
+ /*NULL*/; /* To end of first string */
+ ucs1--; /* Return to the null */
+ while ((*ucs1++ = *ucs2++))
+ /*NULL*/; /* copy string 2 over */
+ return anchor;
+}
+
+/*
+ * UniStrchr: Find a character in a string
+ *
+ * Returns:
+ * Address of first occurrence of character in string
+ * or NULL if the character is not in the string
+ */
+static inline wchar_t *UniStrchr(const wchar_t *ucs, wchar_t uc)
+{
+ while ((*ucs != uc) && *ucs)
+ ucs++;
+
+ if (*ucs == uc)
+ return (wchar_t *)ucs;
+ return NULL;
+}
+
+/*
+ * UniStrcmp: Compare two strings
+ *
+ * Returns:
+ * < 0: First string is less than second
+ * = 0: Strings are equal
+ * > 0: First string is greater than second
+ */
+static inline int UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
+{
+ while ((*ucs1 == *ucs2) && *ucs1) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)*ucs2;
+}
+
+/*
+ * UniStrcpy: Copy a string
+ */
+static inline wchar_t *UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
+{
+ wchar_t *anchor = ucs1; /* save the start of result string */
+
+ while ((*ucs1++ = *ucs2++))
+ /*NULL*/;
+ return anchor;
+}
+
+/*
+ * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes)
+ */
+static inline size_t UniStrlen(const wchar_t *ucs1)
+{
+ int i = 0;
+
+ while (*ucs1++)
+ i++;
+ return i;
+}
+
+/*
+ * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a
+ * string (length limited)
+ */
+static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen)
+{
+ int i = 0;
+
+ while (*ucs1++) {
+ i++;
+ if (i >= maxlen)
+ break;
+ }
+ return i;
+}
+
+/*
+ * UniStrncat: Concatenate length limited string
+ */
+static inline wchar_t *UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1; /* save pointer to string 1 */
+
+ while (*ucs1++)
+ /*NULL*/;
+ ucs1--; /* point to null terminator of s1 */
+ while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */
+ ucs1++;
+ ucs2++;
+ }
+ *ucs1 = 0; /* Null terminate the result */
+ return anchor;
+}
+
+/*
+ * UniStrncmp: Compare length limited string
+ */
+static inline int UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ if (!n)
+ return 0; /* Null strings are equal */
+ while ((*ucs1 == *ucs2) && *ucs1 && --n) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)*ucs2;
+}
+
+/*
+ * UniStrncmp_le: Compare length limited string - native to little-endian
+ */
+static inline int
+UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ if (!n)
+ return 0; /* Null strings are equal */
+ while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
+ ucs1++;
+ ucs2++;
+ }
+ return (int)*ucs1 - (int)__le16_to_cpu(*ucs2);
+}
+
+/*
+ * UniStrncpy: Copy length limited string with pad
+ */
+static inline wchar_t *UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1;
+
+ while (n-- && *ucs2) /* Copy the strings */
+ *ucs1++ = *ucs2++;
+
+ n++;
+ while (n--) /* Pad with nulls */
+ *ucs1++ = 0;
+ return anchor;
+}
+
+/*
+ * UniStrncpy_le: Copy length limited string with pad to little-endian
+ */
+static inline wchar_t *UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
+{
+ wchar_t *anchor = ucs1;
+
+ while (n-- && *ucs2) /* Copy the strings */
+ *ucs1++ = __le16_to_cpu(*ucs2++);
+
+ n++;
+ while (n--) /* Pad with nulls */
+ *ucs1++ = 0;
+ return anchor;
+}
+
+/*
+ * UniStrstr: Find a string in a string
+ *
+ * Returns:
+ * Address of first match found
+ * NULL if no matching string is found
+ */
+static inline wchar_t *UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
+{
+ const wchar_t *anchor1 = ucs1;
+ const wchar_t *anchor2 = ucs2;
+
+ while (*ucs1) {
+ if (*ucs1 == *ucs2) {
+ /* Partial match found */
+ ucs1++;
+ ucs2++;
+ } else {
+ if (!*ucs2) /* Match found */
+ return (wchar_t *)anchor1;
+ ucs1 = ++anchor1; /* No match */
+ ucs2 = anchor2;
+ }
+ }
+
+ if (!*ucs2) /* Both end together */
+ return (wchar_t *)anchor1; /* Match found */
+ return NULL; /* No match */
+}
+
+#ifndef UNIUPR_NOUPPER
+/*
+ * UniToupper: Convert a unicode character to upper case
+ */
+static inline wchar_t UniToupper(register wchar_t uc)
+{
+ register const struct UniCaseRange *rp;
+
+ if (uc < sizeof(SmbUniUpperTable)) {
+ /* Latin characters */
+ return uc + SmbUniUpperTable[uc]; /* Use base tables */
+ }
+
+ rp = SmbUniUpperRange; /* Use range tables */
+ while (rp->start) {
+ if (uc < rp->start) /* Before start of range */
+ return uc; /* Uppercase = input */
+ if (uc <= rp->end) /* In range */
+ return uc + rp->table[uc - rp->start];
+ rp++; /* Try next range */
+ }
+ return uc; /* Past last range */
+}
+
+/*
+ * UniStrupr: Upper case a unicode string
+ */
+static inline __le16 *UniStrupr(register __le16 *upin)
+{
+ register __le16 *up;
+
+ up = upin;
+ while (*up) { /* For all characters */
+ *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
+ up++;
+ }
+ return upin; /* Return input pointer */
+}
+#endif /* UNIUPR_NOUPPER */
+
+#ifndef UNIUPR_NOLOWER
+/*
+ * UniTolower: Convert a unicode character to lower case
+ */
+static inline wchar_t UniTolower(register wchar_t uc)
+{
+ register const struct UniCaseRange *rp;
+
+ if (uc < sizeof(CifsUniLowerTable)) {
+ /* Latin characters */
+ return uc + CifsUniLowerTable[uc]; /* Use base tables */
+ }
+
+ rp = CifsUniLowerRange; /* Use range tables */
+ while (rp->start) {
+ if (uc < rp->start) /* Before start of range */
+ return uc; /* Uppercase = input */
+ if (uc <= rp->end) /* In range */
+ return uc + rp->table[uc - rp->start];
+ rp++; /* Try next range */
+ }
+ return uc; /* Past last range */
+}
+
+/*
+ * UniStrlwr: Lower case a unicode string
+ */
+static inline wchar_t *UniStrlwr(register wchar_t *upin)
+{
+ register wchar_t *up;
+
+ up = upin;
+ while (*up) { /* For all characters */
+ *up = UniTolower(*up);
+ up++;
+ }
+ return upin; /* Return input pointer */
+}
+
+#endif
+
+#endif /* _CIFS_UNICODE_H */
diff --git a/fs/ksmbd/uniupr.h b/fs/ksmbd/uniupr.h
new file mode 100644
index 000000000000..26583b776897
--- /dev/null
+++ b/fs/ksmbd/uniupr.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Some of the source code in this file came from fs/cifs/uniupr.h
+ * Copyright (c) International Business Machines Corp., 2000,2002
+ *
+ * uniupr.h - Unicode compressed case ranges
+ *
+ */
+#ifndef __KSMBD_UNIUPR_H
+#define __KSMBD_UNIUPR_H
+
+#ifndef UNIUPR_NOUPPER
+/*
+ * Latin upper case
+ */
+signed char SmbUniUpperTable[512] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 040-04f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 050-05f */
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, /* 060-06f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, 0, 0, 0, 0, 0, /* 070-07f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0c0-0cf */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0d0-0df */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, /* 0e0-0ef */
+ -32, -32, -32, -32, -32, -32, -32, 0, -32, -32,
+ -32, -32, -32, -32, -32, 121, /* 0f0-0ff */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 100-10f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 110-11f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 120-12f */
+ 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 130-13f */
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, /* 140-14f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 150-15f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 160-16f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 170-17f */
+ 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, /* 180-18f */
+ 0, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, /* 190-19f */
+ 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, /* 1a0-1af */
+ -1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, /* 1b0-1bf */
+ 0, 0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, /* 1c0-1cf */
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -79, 0, -1, /* 1d0-1df */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e0-1ef */
+ 0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, /* 1f0-1ff */
+};
+
+/* Upper case range - Greek */
+static signed char UniCaseRangeU03a0[47] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -38, -37, -37, -37, /* 3a0-3af */
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 3b0-3bf */
+ -32, -32, -31, -32, -32, -32, -32, -32, -32, -32, -32, -32, -64,
+ -63, -63,
+};
+
+/* Upper case range - Cyrillic */
+static signed char UniCaseRangeU0430[48] = {
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 430-43f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, /* 440-44f */
+ 0, -80, -80, -80, -80, -80, -80, -80, -80, -80, -80,
+ -80, -80, 0, -80, -80, /* 450-45f */
+};
+
+/* Upper case range - Extended cyrillic */
+static signed char UniCaseRangeU0490[61] = {
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 490-49f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4a0-4af */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4b0-4bf */
+ 0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1,
+};
+
+/* Upper case range - Extended latin and greek */
+static signed char UniCaseRangeU1e00[509] = {
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e00-1e0f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e10-1e1f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e20-1e2f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e30-1e3f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e40-1e4f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e50-1e5f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e60-1e6f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e70-1e7f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e80-1e8f */
+ 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, -59, 0, -1, 0, -1, /* 1e90-1e9f */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ea0-1eaf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1eb0-1ebf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ec0-1ecf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ed0-1edf */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ee0-1eef */
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f00-1f0f */
+ 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f10-1f1f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f20-1f2f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f30-1f3f */
+ 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f40-1f4f */
+ 0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f50-1f5f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f60-1f6f */
+ 74, 74, 86, 86, 86, 86, 100, 100, 0, 0, 112, 112,
+ 126, 126, 0, 0, /* 1f70-1f7f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f80-1f8f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f90-1f9f */
+ 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fa0-1faf */
+ 8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fb0-1fbf */
+ 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fc0-1fcf */
+ 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fd0-1fdf */
+ 8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fe0-1fef */
+ 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* Upper case range - Wide latin */
+static signed char UniCaseRangeUff40[27] = {
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, /* ff40-ff4f */
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+};
+
+/*
+ * Upper Case Range
+ */
+const struct UniCaseRange SmbUniUpperRange[] = {
+ {0x03a0, 0x03ce, UniCaseRangeU03a0},
+ {0x0430, 0x045f, UniCaseRangeU0430},
+ {0x0490, 0x04cc, UniCaseRangeU0490},
+ {0x1e00, 0x1ffc, UniCaseRangeU1e00},
+ {0xff40, 0xff5a, UniCaseRangeUff40},
+ {0}
+};
+#endif
+
+#ifndef UNIUPR_NOLOWER
+/*
+ * Latin lower case
+ */
+signed char CifsUniLowerTable[512] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 040-04f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0,
+ 0, 0, 0, /* 050-05f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 060-06f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 070-07f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, /* 0c0-0cf */
+ 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32,
+ 32, 32, 32, 0, /* 0d0-0df */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0e0-0ef */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0f0-0ff */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 100-10f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 110-11f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 120-12f */
+ 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, /* 130-13f */
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, /* 140-14f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 150-15f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 160-16f */
+ 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0,
+ 0, /* 170-17f */
+ 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 79,
+ 0, /* 180-18f */
+ 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, /* 190-19f */
+ 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, /* 1a0-1af */
+ 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, /* 1b0-1bf */
+ 0, 0, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 1, /* 1c0-1cf */
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, /* 1d0-1df */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e0-1ef */
+ 0, 2, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1f0-1ff */
+};
+
+/* Lower case range - Greek */
+static signed char UniCaseRangeL0380[44] = {
+ 0, 0, 0, 0, 0, 0, 38, 0, 37, 37, 37, 0, 64, 0, 63, 63, /* 380-38f */
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 390-39f */
+ 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+};
+
+/* Lower case range - Cyrillic */
+static signed char UniCaseRangeL0400[48] = {
+ 0, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
+ 0, 80, 80, /* 400-40f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 410-41f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, /* 420-42f */
+};
+
+/* Lower case range - Extended cyrillic */
+static signed char UniCaseRangeL0490[60] = {
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 490-49f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 4a0-4af */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 4b0-4bf */
+ 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
+};
+
+/* Lower case range - Extended latin and greek */
+static signed char UniCaseRangeL1e00[504] = {
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e00-1e0f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e10-1e1f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e20-1e2f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e30-1e3f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e40-1e4f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e50-1e5f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e60-1e6f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e70-1e7f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1e80-1e8f */
+ 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 1e90-1e9f */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ea0-1eaf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1eb0-1ebf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ec0-1ecf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ed0-1edf */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, /* 1ee0-1eef */
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f00-1f0f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, 0, 0, /* 1f10-1f1f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f20-1f2f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f30-1f3f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, 0, 0, /* 1f40-1f4f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, -8, 0, -8, 0, -8, 0, -8, /* 1f50-1f5f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f60-1f6f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f70-1f7f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f80-1f8f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1f90-1f9f */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -8, -8, -8, -8, -8, -8, /* 1fa0-1faf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -74, -74, -9, 0, 0, 0, /* 1fb0-1fbf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -86, -86, -86, -86, -9, 0,
+ 0, 0, /* 1fc0-1fcf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -100, -100, 0, 0, 0, 0, /* 1fd0-1fdf */
+ 0, 0, 0, 0, 0, 0, 0, 0, -8, -8, -112, -112, -7, 0,
+ 0, 0, /* 1fe0-1fef */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* Lower case range - Wide latin */
+static signed char UniCaseRangeLff20[27] = {
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, /* ff20-ff2f */
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+};
+
+/*
+ * Lower Case Range
+ */
+const struct UniCaseRange CifsUniLowerRange[] = {
+ {0x0380, 0x03ab, UniCaseRangeL0380},
+ {0x0400, 0x042f, UniCaseRangeL0400},
+ {0x0490, 0x04cb, UniCaseRangeL0490},
+ {0x1e00, 0x1ff7, UniCaseRangeL1e00},
+ {0xff20, 0xff3a, UniCaseRangeLff20},
+ {0}
+};
+#endif
+
+#endif /* __KSMBD_UNIUPR_H */
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
new file mode 100644
index 000000000000..aee28ee6b19c
--- /dev/null
+++ b/fs/ksmbd/vfs.c
@@ -0,0 +1,1895 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/backing-dev.h>
+#include <linux/writeback.h>
+#include <linux/xattr.h>
+#include <linux/falloc.h>
+#include <linux/genhd.h>
+#include <linux/fsnotify.h>
+#include <linux/dcache.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/xacct.h>
+#include <linux/crc32c.h>
+
+#include "glob.h"
+#include "oplock.h"
+#include "connection.h"
+#include "vfs.h"
+#include "vfs_cache.h"
+#include "smbacl.h"
+#include "ndr.h"
+#include "auth.h"
+#include "misc.h"
+
+#include "smb_common.h"
+#include "mgmt/share_config.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/user_session.h"
+#include "mgmt/user_config.h"
+
+static char *extract_last_component(char *path)
+{
+ char *p = strrchr(path, '/');
+
+ if (p && p[1] != '\0') {
+ *p = '\0';
+ p++;
+ } else {
+ p = NULL;
+ pr_err("Invalid path %s\n", path);
+ }
+ return p;
+}
+
+static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
+ struct inode *parent_inode,
+ struct inode *inode)
+{
+ if (!test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_INHERIT_OWNER))
+ return;
+
+ i_uid_write(inode, i_uid_read(parent_inode));
+}
+
+/**
+ * ksmbd_vfs_lock_parent() - lock parent dentry if it is stable
+ *
+ * the parent dentry got by dget_parent or @parent could be
+ * unstable, we try to lock a parent inode and lookup the
+ * child dentry again.
+ *
+ * the reference count of @parent isn't incremented.
+ */
+int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
+{
+ struct dentry *dentry;
+ int ret = 0;
+
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
+ dentry = lookup_one_len(child->d_name.name, parent,
+ child->d_name.len);
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out_err;
+ }
+
+ if (dentry != child) {
+ ret = -ESTALE;
+ dput(dentry);
+ goto out_err;
+ }
+
+ dput(dentry);
+ return 0;
+out_err:
+ inode_unlock(d_inode(parent));
+ return ret;
+}
+
+int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
+ struct dentry *dentry)
+{
+ struct dentry *parent;
+ int ret;
+
+ parent = dget_parent(dentry);
+ ret = ksmbd_vfs_lock_parent(parent, dentry);
+ if (ret) {
+ dput(parent);
+ return ret;
+ }
+
+ ret = inode_permission(user_ns, d_inode(parent),
+ MAY_EXEC | MAY_WRITE);
+
+ inode_unlock(d_inode(parent));
+ dput(parent);
+ return ret;
+}
+
+int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+ struct dentry *dentry, __le32 *daccess)
+{
+ struct dentry *parent;
+ int ret = 0;
+
+ *daccess = cpu_to_le32(FILE_READ_ATTRIBUTES | READ_CONTROL);
+
+ if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_WRITE))
+ *daccess |= cpu_to_le32(WRITE_DAC | WRITE_OWNER | SYNCHRONIZE |
+ FILE_WRITE_DATA | FILE_APPEND_DATA |
+ FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES |
+ FILE_DELETE_CHILD);
+
+ if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_READ))
+ *daccess |= FILE_READ_DATA_LE | FILE_READ_EA_LE;
+
+ if (!inode_permission(user_ns, d_inode(dentry), MAY_OPEN | MAY_EXEC))
+ *daccess |= FILE_EXECUTE_LE;
+
+ parent = dget_parent(dentry);
+ ret = ksmbd_vfs_lock_parent(parent, dentry);
+ if (ret) {
+ dput(parent);
+ return ret;
+ }
+
+ if (!inode_permission(user_ns, d_inode(parent), MAY_EXEC | MAY_WRITE))
+ *daccess |= FILE_DELETE_LE;
+
+ inode_unlock(d_inode(parent));
+ dput(parent);
+ return ret;
+}
+
+/**
+ * ksmbd_vfs_create() - vfs helper for smb create file
+ * @work: work
+ * @name: file name
+ * @mode: file create mode
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
+{
+ struct path path;
+ struct dentry *dentry;
+ int err;
+
+ dentry = kern_path_create(AT_FDCWD, name, &path, 0);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ if (err != -ENOENT)
+ pr_err("path create failed for %s, err %d\n",
+ name, err);
+ return err;
+ }
+
+ mode |= S_IFREG;
+ err = vfs_create(mnt_user_ns(path.mnt), d_inode(path.dentry),
+ dentry, mode, true);
+ if (!err) {
+ ksmbd_vfs_inherit_owner(work, d_inode(path.dentry),
+ d_inode(dentry));
+ } else {
+ pr_err("File(%s): creation failed (err:%d)\n", name, err);
+ }
+ done_path_create(&path, dentry);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_mkdir() - vfs helper for smb create directory
+ * @work: work
+ * @name: directory name
+ * @mode: directory create mode
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
+{
+ struct path path;
+ struct dentry *dentry;
+ int err;
+
+ dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ if (err != -EEXIST)
+ ksmbd_debug(VFS, "path create failed for %s, err %d\n",
+ name, err);
+ return err;
+ }
+
+ mode |= S_IFDIR;
+ err = vfs_mkdir(mnt_user_ns(path.mnt), d_inode(path.dentry),
+ dentry, mode);
+ if (err) {
+ goto out;
+ } else if (d_unhashed(dentry)) {
+ struct dentry *d;
+
+ d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
+ dentry->d_name.len);
+ if (IS_ERR(d)) {
+ err = PTR_ERR(d);
+ goto out;
+ }
+ if (unlikely(d_is_negative(d))) {
+ dput(d);
+ err = -ENOENT;
+ goto out;
+ }
+
+ ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
+ dput(d);
+ }
+out:
+ done_path_create(&path, dentry);
+ if (err)
+ pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
+ return err;
+}
+
+static ssize_t ksmbd_vfs_getcasexattr(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name,
+ int attr_name_len, char **attr_value)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t value_len = -ENOENT, xattr_list_len;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len <= 0)
+ goto out;
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(VFS, "%s, len %zd\n", name, strlen(name));
+ if (strncasecmp(attr_name, name, attr_name_len))
+ continue;
+
+ value_len = ksmbd_vfs_getxattr(user_ns,
+ dentry,
+ name,
+ attr_value);
+ if (value_len < 0)
+ pr_err("failed to get xattr in file\n");
+ break;
+ }
+
+out:
+ kvfree(xattr_list);
+ return value_len;
+}
+
+static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos,
+ size_t count)
+{
+ ssize_t v_len;
+ char *stream_buf = NULL;
+
+ ksmbd_debug(VFS, "read stream data pos : %llu, count : %zd\n",
+ *pos, count);
+
+ v_len = ksmbd_vfs_getcasexattr(file_mnt_user_ns(fp->filp),
+ fp->filp->f_path.dentry,
+ fp->stream.name,
+ fp->stream.size,
+ &stream_buf);
+ if ((int)v_len <= 0)
+ return (int)v_len;
+
+ if (v_len <= *pos) {
+ count = -EINVAL;
+ goto free_buf;
+ }
+
+ if (v_len - *pos < count)
+ count = v_len - *pos;
+
+ memcpy(buf, &stream_buf[*pos], count);
+
+free_buf:
+ kvfree(stream_buf);
+ return count;
+}
+
+/**
+ * check_lock_range() - vfs helper for smb byte range file locking
+ * @filp: the file to apply the lock to
+ * @start: lock start byte offset
+ * @end: lock end byte offset
+ * @type: byte range type read/write
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int check_lock_range(struct file *filp, loff_t start, loff_t end,
+ unsigned char type)
+{
+ struct file_lock *flock;
+ struct file_lock_context *ctx = file_inode(filp)->i_flctx;
+ int error = 0;
+
+ if (!ctx || list_empty_careful(&ctx->flc_posix))
+ return 0;
+
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(flock, &ctx->flc_posix, fl_list) {
+ /* check conflict locks */
+ if (flock->fl_end >= start && end >= flock->fl_start) {
+ if (flock->fl_type == F_RDLCK) {
+ if (type == WRITE) {
+ pr_err("not allow write by shared lock\n");
+ error = 1;
+ goto out;
+ }
+ } else if (flock->fl_type == F_WRLCK) {
+ /* check owner in lock */
+ if (flock->fl_file != filp) {
+ error = 1;
+ pr_err("not allow rw access by exclusive lock from other opens\n");
+ goto out;
+ }
+ }
+ }
+ }
+out:
+ spin_unlock(&ctx->flc_lock);
+ return error;
+}
+
+/**
+ * ksmbd_vfs_read() - vfs helper for smb file read
+ * @work: smb work
+ * @fid: file id of open file
+ * @count: read byte count
+ * @pos: file pos
+ *
+ * Return: number of read bytes on success, otherwise error
+ */
+int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
+ loff_t *pos)
+{
+ struct file *filp = fp->filp;
+ ssize_t nbytes = 0;
+ char *rbuf = work->aux_payload_buf;
+ struct inode *inode = file_inode(filp);
+
+ if (S_ISDIR(inode->i_mode))
+ return -EISDIR;
+
+ if (unlikely(count == 0))
+ return 0;
+
+ if (work->conn->connection_type) {
+ if (!(fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
+ pr_err("no right to read(%pd)\n",
+ fp->filp->f_path.dentry);
+ return -EACCES;
+ }
+ }
+
+ if (ksmbd_stream_fd(fp))
+ return ksmbd_vfs_stream_read(fp, rbuf, pos, count);
+
+ if (!work->tcon->posix_extensions) {
+ int ret;
+
+ ret = check_lock_range(filp, *pos, *pos + count - 1, READ);
+ if (ret) {
+ pr_err("unable to read due to lock\n");
+ return -EAGAIN;
+ }
+ }
+
+ nbytes = kernel_read(filp, rbuf, count, pos);
+ if (nbytes < 0) {
+ pr_err("smb read failed for (%s), err = %zd\n",
+ fp->filename, nbytes);
+ return nbytes;
+ }
+
+ filp->f_pos = *pos;
+ return nbytes;
+}
+
+static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
+ size_t count)
+{
+ char *stream_buf = NULL, *wbuf;
+ struct user_namespace *user_ns = file_mnt_user_ns(fp->filp);
+ size_t size, v_len;
+ int err = 0;
+
+ ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
+ *pos, count);
+
+ size = *pos + count;
+ if (size > XATTR_SIZE_MAX) {
+ size = XATTR_SIZE_MAX;
+ count = (*pos + count) - XATTR_SIZE_MAX;
+ }
+
+ v_len = ksmbd_vfs_getcasexattr(user_ns,
+ fp->filp->f_path.dentry,
+ fp->stream.name,
+ fp->stream.size,
+ &stream_buf);
+ if ((int)v_len < 0) {
+ pr_err("not found stream in xattr : %zd\n", v_len);
+ err = (int)v_len;
+ goto out;
+ }
+
+ if (v_len < size) {
+ wbuf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!wbuf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (v_len > 0)
+ memcpy(wbuf, stream_buf, v_len);
+ kvfree(stream_buf);
+ stream_buf = wbuf;
+ }
+
+ memcpy(&stream_buf[*pos], buf, count);
+
+ err = ksmbd_vfs_setxattr(user_ns,
+ fp->filp->f_path.dentry,
+ fp->stream.name,
+ (void *)stream_buf,
+ size,
+ 0);
+ if (err < 0)
+ goto out;
+
+ fp->filp->f_pos = *pos;
+ err = 0;
+out:
+ kvfree(stream_buf);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_write() - vfs helper for smb file write
+ * @work: work
+ * @fid: file id of open file
+ * @buf: buf containing data for writing
+ * @count: read byte count
+ * @pos: file pos
+ * @sync: fsync after write
+ * @written: number of bytes written
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf, size_t count, loff_t *pos, bool sync,
+ ssize_t *written)
+{
+ struct ksmbd_session *sess = work->sess;
+ struct file *filp;
+ loff_t offset = *pos;
+ int err = 0;
+
+ if (sess->conn->connection_type) {
+ if (!(fp->daccess & FILE_WRITE_DATA_LE)) {
+ pr_err("no right to write(%pd)\n",
+ fp->filp->f_path.dentry);
+ err = -EACCES;
+ goto out;
+ }
+ }
+
+ filp = fp->filp;
+
+ if (ksmbd_stream_fd(fp)) {
+ err = ksmbd_vfs_stream_write(fp, buf, pos, count);
+ if (!err)
+ *written = count;
+ goto out;
+ }
+
+ if (!work->tcon->posix_extensions) {
+ err = check_lock_range(filp, *pos, *pos + count - 1, WRITE);
+ if (err) {
+ pr_err("unable to write due to lock\n");
+ err = -EAGAIN;
+ goto out;
+ }
+ }
+
+ /* Do we need to break any of a levelII oplock? */
+ smb_break_all_levII_oplock(work, fp, 1);
+
+ err = kernel_write(filp, buf, count, pos);
+ if (err < 0) {
+ ksmbd_debug(VFS, "smb write failed, err = %d\n", err);
+ goto out;
+ }
+
+ filp->f_pos = *pos;
+ *written = err;
+ err = 0;
+ if (sync) {
+ err = vfs_fsync_range(filp, offset, offset + *written, 0);
+ if (err < 0)
+ pr_err("fsync failed for filename = %pd, err = %d\n",
+ fp->filp->f_path.dentry, err);
+ }
+
+out:
+ return err;
+}
+
+/**
+ * ksmbd_vfs_getattr() - vfs helper for smb getattr
+ * @work: work
+ * @fid: file id of open file
+ * @attrs: inode attributes
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_getattr(struct path *path, struct kstat *stat)
+{
+ int err;
+
+ err = vfs_getattr(path, stat, STATX_BTIME, AT_STATX_SYNC_AS_STAT);
+ if (err)
+ pr_err("getattr failed, err %d\n", err);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_fsync() - vfs helper for smb fsync
+ * @work: work
+ * @fid: file id of open file
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id)
+{
+ struct ksmbd_file *fp;
+ int err;
+
+ fp = ksmbd_lookup_fd_slow(work, fid, p_id);
+ if (!fp) {
+ pr_err("failed to get filp for fid %llu\n", fid);
+ return -ENOENT;
+ }
+ err = vfs_fsync(fp->filp, 0);
+ if (err < 0)
+ pr_err("smb fsync failed, err = %d\n", err);
+ ksmbd_fd_put(work, fp);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_remove_file() - vfs helper for smb rmdir or unlink
+ * @name: absolute directory or file name
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
+{
+ struct path path;
+ struct dentry *parent;
+ int err;
+ int flags = 0;
+
+ if (ksmbd_override_fsids(work))
+ return -ENOMEM;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ flags = LOOKUP_FOLLOW;
+
+ err = kern_path(name, flags, &path);
+ if (err) {
+ ksmbd_debug(VFS, "can't get %s, err %d\n", name, err);
+ ksmbd_revert_fsids(work);
+ return err;
+ }
+
+ parent = dget_parent(path.dentry);
+ err = ksmbd_vfs_lock_parent(parent, path.dentry);
+ if (err) {
+ dput(parent);
+ path_put(&path);
+ ksmbd_revert_fsids(work);
+ return err;
+ }
+
+ if (!d_inode(path.dentry)->i_nlink) {
+ err = -ENOENT;
+ goto out_err;
+ }
+
+ if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
+ err = vfs_rmdir(mnt_user_ns(path.mnt), d_inode(parent),
+ path.dentry);
+ if (err && err != -ENOTEMPTY)
+ ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name,
+ err);
+ } else {
+ err = vfs_unlink(mnt_user_ns(path.mnt), d_inode(parent),
+ path.dentry, NULL);
+ if (err)
+ ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name,
+ err);
+ }
+
+out_err:
+ inode_unlock(d_inode(parent));
+ dput(parent);
+ path_put(&path);
+ ksmbd_revert_fsids(work);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_link() - vfs helper for creating smb hardlink
+ * @oldname: source file name
+ * @newname: hardlink name
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
+ const char *newname)
+{
+ struct path oldpath, newpath;
+ struct dentry *dentry;
+ int err;
+ int flags = 0;
+
+ if (ksmbd_override_fsids(work))
+ return -ENOMEM;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ flags = LOOKUP_FOLLOW;
+
+ err = kern_path(oldname, flags, &oldpath);
+ if (err) {
+ pr_err("cannot get linux path for %s, err = %d\n",
+ oldname, err);
+ goto out1;
+ }
+
+ dentry = kern_path_create(AT_FDCWD, newname, &newpath,
+ flags | LOOKUP_REVAL);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ pr_err("path create err for %s, err %d\n", newname, err);
+ goto out2;
+ }
+
+ err = -EXDEV;
+ if (oldpath.mnt != newpath.mnt) {
+ pr_err("vfs_link failed err %d\n", err);
+ goto out3;
+ }
+
+ err = vfs_link(oldpath.dentry, mnt_user_ns(newpath.mnt),
+ d_inode(newpath.dentry),
+ dentry, NULL);
+ if (err)
+ ksmbd_debug(VFS, "vfs_link failed err %d\n", err);
+
+out3:
+ done_path_create(&newpath, dentry);
+out2:
+ path_put(&oldpath);
+out1:
+ ksmbd_revert_fsids(work);
+ return err;
+}
+
+static int ksmbd_validate_entry_in_use(struct dentry *src_dent)
+{
+ struct dentry *dst_dent;
+
+ spin_lock(&src_dent->d_lock);
+ list_for_each_entry(dst_dent, &src_dent->d_subdirs, d_child) {
+ struct ksmbd_file *child_fp;
+
+ if (d_really_is_negative(dst_dent))
+ continue;
+
+ child_fp = ksmbd_lookup_fd_inode(d_inode(dst_dent));
+ if (child_fp) {
+ spin_unlock(&src_dent->d_lock);
+ ksmbd_debug(VFS, "Forbid rename, sub file/dir is in use\n");
+ return -EACCES;
+ }
+ }
+ spin_unlock(&src_dent->d_lock);
+
+ return 0;
+}
+
+static int __ksmbd_vfs_rename(struct ksmbd_work *work,
+ struct user_namespace *src_user_ns,
+ struct dentry *src_dent_parent,
+ struct dentry *src_dent,
+ struct user_namespace *dst_user_ns,
+ struct dentry *dst_dent_parent,
+ struct dentry *trap_dent,
+ char *dst_name)
+{
+ struct dentry *dst_dent;
+ int err;
+
+ if (!work->tcon->posix_extensions) {
+ err = ksmbd_validate_entry_in_use(src_dent);
+ if (err)
+ return err;
+ }
+
+ if (d_really_is_negative(src_dent_parent))
+ return -ENOENT;
+ if (d_really_is_negative(dst_dent_parent))
+ return -ENOENT;
+ if (d_really_is_negative(src_dent))
+ return -ENOENT;
+ if (src_dent == trap_dent)
+ return -EINVAL;
+
+ if (ksmbd_override_fsids(work))
+ return -ENOMEM;
+
+ dst_dent = lookup_one_len(dst_name, dst_dent_parent, strlen(dst_name));
+ err = PTR_ERR(dst_dent);
+ if (IS_ERR(dst_dent)) {
+ pr_err("lookup failed %s [%d]\n", dst_name, err);
+ goto out;
+ }
+
+ err = -ENOTEMPTY;
+ if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) {
+ struct renamedata rd = {
+ .old_mnt_userns = src_user_ns,
+ .old_dir = d_inode(src_dent_parent),
+ .old_dentry = src_dent,
+ .new_mnt_userns = dst_user_ns,
+ .new_dir = d_inode(dst_dent_parent),
+ .new_dentry = dst_dent,
+ };
+ err = vfs_rename(&rd);
+ }
+ if (err)
+ pr_err("vfs_rename failed err %d\n", err);
+ if (dst_dent)
+ dput(dst_dent);
+out:
+ ksmbd_revert_fsids(work);
+ return err;
+}
+
+int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *newname)
+{
+ struct path dst_path;
+ struct dentry *src_dent_parent, *dst_dent_parent;
+ struct dentry *src_dent, *trap_dent, *src_child;
+ char *dst_name;
+ int err;
+ int flags;
+
+ dst_name = extract_last_component(newname);
+ if (!dst_name)
+ return -EINVAL;
+
+ src_dent_parent = dget_parent(fp->filp->f_path.dentry);
+ src_dent = fp->filp->f_path.dentry;
+
+ flags = LOOKUP_DIRECTORY;
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS))
+ flags |= LOOKUP_FOLLOW;
+
+ err = kern_path(newname, flags, &dst_path);
+ if (err) {
+ ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err);
+ goto out;
+ }
+ dst_dent_parent = dst_path.dentry;
+
+ trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
+ dget(src_dent);
+ dget(dst_dent_parent);
+ src_child = lookup_one_len(src_dent->d_name.name, src_dent_parent,
+ src_dent->d_name.len);
+ if (IS_ERR(src_child)) {
+ err = PTR_ERR(src_child);
+ goto out_lock;
+ }
+
+ if (src_child != src_dent) {
+ err = -ESTALE;
+ dput(src_child);
+ goto out_lock;
+ }
+ dput(src_child);
+
+ err = __ksmbd_vfs_rename(work,
+ file_mnt_user_ns(fp->filp),
+ src_dent_parent,
+ src_dent,
+ mnt_user_ns(dst_path.mnt),
+ dst_dent_parent,
+ trap_dent,
+ dst_name);
+out_lock:
+ dput(src_dent);
+ dput(dst_dent_parent);
+ unlock_rename(src_dent_parent, dst_dent_parent);
+ path_put(&dst_path);
+out:
+ dput(src_dent_parent);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_truncate() - vfs helper for smb file truncate
+ * @work: work
+ * @name: old filename
+ * @fid: file id of old file
+ * @size: truncate to given size
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_truncate(struct ksmbd_work *work, const char *name,
+ struct ksmbd_file *fp, loff_t size)
+{
+ struct path path;
+ int err = 0;
+
+ if (name) {
+ err = kern_path(name, 0, &path);
+ if (err) {
+ pr_err("cannot get linux path for %s, err %d\n",
+ name, err);
+ return err;
+ }
+ err = vfs_truncate(&path, size);
+ if (err)
+ pr_err("truncate failed for %s err %d\n",
+ name, err);
+ path_put(&path);
+ } else {
+ struct file *filp;
+
+ filp = fp->filp;
+
+ /* Do we need to break any of a levelII oplock? */
+ smb_break_all_levII_oplock(work, fp, 1);
+
+ if (!work->tcon->posix_extensions) {
+ struct inode *inode = file_inode(filp);
+
+ if (size < inode->i_size) {
+ err = check_lock_range(filp, size,
+ inode->i_size - 1, WRITE);
+ } else {
+ err = check_lock_range(filp, inode->i_size,
+ size - 1, WRITE);
+ }
+
+ if (err) {
+ pr_err("failed due to lock\n");
+ return -EAGAIN;
+ }
+ }
+
+ err = vfs_truncate(&filp->f_path, size);
+ if (err)
+ pr_err("truncate failed for filename : %s err %d\n",
+ fp->filename, err);
+ }
+
+ return err;
+}
+
+/**
+ * ksmbd_vfs_listxattr() - vfs helper for smb list extended attributes
+ * @dentry: dentry of file for listing xattrs
+ * @list: destination buffer
+ * @size: destination buffer length
+ *
+ * Return: xattr list length on success, otherwise error
+ */
+ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list)
+{
+ ssize_t size;
+ char *vlist = NULL;
+
+ size = vfs_listxattr(dentry, NULL, 0);
+ if (size <= 0)
+ return size;
+
+ vlist = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!vlist)
+ return -ENOMEM;
+
+ *list = vlist;
+ size = vfs_listxattr(dentry, vlist, size);
+ if (size < 0) {
+ ksmbd_debug(VFS, "listxattr failed\n");
+ kvfree(vlist);
+ *list = NULL;
+ }
+
+ return size;
+}
+
+static ssize_t ksmbd_vfs_xattr_len(struct user_namespace *user_ns,
+ struct dentry *dentry, char *xattr_name)
+{
+ return vfs_getxattr(user_ns, dentry, xattr_name, NULL, 0);
+}
+
+/**
+ * ksmbd_vfs_getxattr() - vfs helper for smb get extended attributes value
+ * @user_ns: user namespace
+ * @dentry: dentry of file for getting xattrs
+ * @xattr_name: name of xattr name to query
+ * @xattr_buf: destination buffer xattr value
+ *
+ * Return: read xattr value length on success, otherwise error
+ */
+ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ char *xattr_name, char **xattr_buf)
+{
+ ssize_t xattr_len;
+ char *buf;
+
+ *xattr_buf = NULL;
+ xattr_len = ksmbd_vfs_xattr_len(user_ns, dentry, xattr_name);
+ if (xattr_len < 0)
+ return xattr_len;
+
+ buf = kmalloc(xattr_len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ xattr_len = vfs_getxattr(user_ns, dentry, xattr_name,
+ (void *)buf, xattr_len);
+ if (xattr_len > 0)
+ *xattr_buf = buf;
+ else
+ kfree(buf);
+ return xattr_len;
+}
+
+/**
+ * ksmbd_vfs_setxattr() - vfs helper for smb set extended attributes value
+ * @user_ns: user namespace
+ * @dentry: dentry to set XATTR at
+ * @name: xattr name for setxattr
+ * @value: xattr value to set
+ * @size: size of xattr value
+ * @flags: destination buffer length
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
+ struct dentry *dentry, const char *attr_name,
+ const void *attr_value, size_t attr_size, int flags)
+{
+ int err;
+
+ err = vfs_setxattr(user_ns,
+ dentry,
+ attr_name,
+ attr_value,
+ attr_size,
+ flags);
+ if (err)
+ ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
+ return err;
+}
+
+/**
+ * ksmbd_vfs_set_fadvise() - convert smb IO caching options to linux options
+ * @filp: file pointer for IO
+ * @options: smb IO options
+ */
+void ksmbd_vfs_set_fadvise(struct file *filp, __le32 option)
+{
+ struct address_space *mapping;
+
+ mapping = filp->f_mapping;
+
+ if (!option || !mapping)
+ return;
+
+ if (option & FILE_WRITE_THROUGH_LE) {
+ filp->f_flags |= O_SYNC;
+ } else if (option & FILE_SEQUENTIAL_ONLY_LE) {
+ filp->f_ra.ra_pages = inode_to_bdi(mapping->host)->ra_pages * 2;
+ spin_lock(&filp->f_lock);
+ filp->f_mode &= ~FMODE_RANDOM;
+ spin_unlock(&filp->f_lock);
+ } else if (option & FILE_RANDOM_ACCESS_LE) {
+ spin_lock(&filp->f_lock);
+ filp->f_mode |= FMODE_RANDOM;
+ spin_unlock(&filp->f_lock);
+ }
+}
+
+int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
+ loff_t off, loff_t len)
+{
+ smb_break_all_levII_oplock(work, fp, 1);
+ if (fp->f_ci->m_fattr & ATTR_SPARSE_FILE_LE)
+ return vfs_fallocate(fp->filp,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ off, len);
+
+ return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len);
+}
+
+int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
+ struct file_allocated_range_buffer *ranges,
+ int in_count, int *out_count)
+{
+ struct file *f = fp->filp;
+ struct inode *inode = file_inode(fp->filp);
+ loff_t maxbytes = (u64)inode->i_sb->s_maxbytes, end;
+ loff_t extent_start, extent_end;
+ int ret = 0;
+
+ if (start > maxbytes)
+ return -EFBIG;
+
+ if (!in_count)
+ return 0;
+
+ /*
+ * Shrink request scope to what the fs can actually handle.
+ */
+ if (length > maxbytes || (maxbytes - length) < start)
+ length = maxbytes - start;
+
+ if (start + length > inode->i_size)
+ length = inode->i_size - start;
+
+ *out_count = 0;
+ end = start + length;
+ while (start < end && *out_count < in_count) {
+ extent_start = f->f_op->llseek(f, start, SEEK_DATA);
+ if (extent_start < 0) {
+ if (extent_start != -ENXIO)
+ ret = (int)extent_start;
+ break;
+ }
+
+ if (extent_start >= end)
+ break;
+
+ extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE);
+ if (extent_end < 0) {
+ if (extent_end != -ENXIO)
+ ret = (int)extent_end;
+ break;
+ } else if (extent_start >= extent_end) {
+ break;
+ }
+
+ ranges[*out_count].file_offset = cpu_to_le64(extent_start);
+ ranges[(*out_count)++].length =
+ cpu_to_le64(min(extent_end, end) - extent_start);
+
+ start = extent_end;
+ }
+
+ return ret;
+}
+
+int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name)
+{
+ return vfs_removexattr(user_ns, dentry, attr_name);
+}
+
+int ksmbd_vfs_unlink(struct user_namespace *user_ns,
+ struct dentry *dir, struct dentry *dentry)
+{
+ int err = 0;
+
+ err = ksmbd_vfs_lock_parent(dir, dentry);
+ if (err)
+ return err;
+ dget(dentry);
+
+ if (S_ISDIR(d_inode(dentry)->i_mode))
+ err = vfs_rmdir(user_ns, d_inode(dir), dentry);
+ else
+ err = vfs_unlink(user_ns, d_inode(dir), dentry, NULL);
+
+ dput(dentry);
+ inode_unlock(d_inode(dir));
+ if (err)
+ ksmbd_debug(VFS, "failed to delete, err %d\n", err);
+
+ return err;
+}
+
+static int __dir_empty(struct dir_context *ctx, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct ksmbd_readdir_data *buf;
+
+ buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
+ buf->dirent_count++;
+
+ if (buf->dirent_count > 2)
+ return -ENOTEMPTY;
+ return 0;
+}
+
+/**
+ * ksmbd_vfs_empty_dir() - check for empty directory
+ * @fp: ksmbd file pointer
+ *
+ * Return: true if directory empty, otherwise false
+ */
+int ksmbd_vfs_empty_dir(struct ksmbd_file *fp)
+{
+ int err;
+ struct ksmbd_readdir_data readdir_data;
+
+ memset(&readdir_data, 0, sizeof(struct ksmbd_readdir_data));
+
+ set_ctx_actor(&readdir_data.ctx, __dir_empty);
+ readdir_data.dirent_count = 0;
+
+ err = iterate_dir(fp->filp, &readdir_data.ctx);
+ if (readdir_data.dirent_count > 2)
+ err = -ENOTEMPTY;
+ else
+ err = 0;
+ return err;
+}
+
+static int __caseless_lookup(struct dir_context *ctx, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ struct ksmbd_readdir_data *buf;
+
+ buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
+
+ if (buf->used != namlen)
+ return 0;
+ if (!strncasecmp((char *)buf->private, name, namlen)) {
+ memcpy((char *)buf->private, name, namlen);
+ buf->dirent_count = 1;
+ return -EEXIST;
+ }
+ return 0;
+}
+
+/**
+ * ksmbd_vfs_lookup_in_dir() - lookup a file in a directory
+ * @dir: path info
+ * @name: filename to lookup
+ * @namelen: filename length
+ *
+ * Return: 0 on success, otherwise error
+ */
+static int ksmbd_vfs_lookup_in_dir(struct path *dir, char *name, size_t namelen)
+{
+ int ret;
+ struct file *dfilp;
+ int flags = O_RDONLY | O_LARGEFILE;
+ struct ksmbd_readdir_data readdir_data = {
+ .ctx.actor = __caseless_lookup,
+ .private = name,
+ .used = namelen,
+ .dirent_count = 0,
+ };
+
+ dfilp = dentry_open(dir, flags, current_cred());
+ if (IS_ERR(dfilp))
+ return PTR_ERR(dfilp);
+
+ ret = iterate_dir(dfilp, &readdir_data.ctx);
+ if (readdir_data.dirent_count > 0)
+ ret = 0;
+ fput(dfilp);
+ return ret;
+}
+
+/**
+ * ksmbd_vfs_kern_path() - lookup a file and get path info
+ * @name: name of file for lookup
+ * @flags: lookup flags
+ * @path: if lookup succeed, return path info
+ * @caseless: caseless filename lookup
+ *
+ * Return: 0 on success, otherwise error
+ */
+int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path,
+ bool caseless)
+{
+ int err;
+
+ if (name[0] != '/')
+ return -EINVAL;
+
+ err = kern_path(name, flags, path);
+ if (!err)
+ return 0;
+
+ if (caseless) {
+ char *filepath;
+ struct path parent;
+ size_t path_len, remain_len;
+
+ filepath = kstrdup(name, GFP_KERNEL);
+ if (!filepath)
+ return -ENOMEM;
+
+ path_len = strlen(filepath);
+ remain_len = path_len - 1;
+
+ err = kern_path("/", flags, &parent);
+ if (err)
+ goto out;
+
+ while (d_can_lookup(parent.dentry)) {
+ char *filename = filepath + path_len - remain_len;
+ char *next = strchrnul(filename, '/');
+ size_t filename_len = next - filename;
+ bool is_last = !next[0];
+
+ if (filename_len == 0)
+ break;
+
+ err = ksmbd_vfs_lookup_in_dir(&parent, filename,
+ filename_len);
+ if (err) {
+ path_put(&parent);
+ goto out;
+ }
+
+ path_put(&parent);
+ next[0] = '\0';
+
+ err = kern_path(filepath, flags, &parent);
+ if (err)
+ goto out;
+
+ if (is_last) {
+ path->mnt = parent.mnt;
+ path->dentry = parent.dentry;
+ goto out;
+ }
+
+ next[0] = '/';
+ remain_len -= filename_len + 1;
+ }
+
+ path_put(&parent);
+ err = -EINVAL;
+out:
+ kfree(filepath);
+ }
+ return err;
+}
+
+int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t xattr_list_len;
+ int err = 0;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
+
+ if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) ||
+ !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) {
+ err = ksmbd_vfs_remove_xattr(user_ns, dentry, name);
+ if (err)
+ ksmbd_debug(SMB,
+ "remove acl xattr failed : %s\n", name);
+ }
+ }
+out:
+ kvfree(xattr_list);
+ return err;
+}
+
+int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t xattr_list_len;
+ int err = 0;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len < 0) {
+ goto out;
+ } else if (!xattr_list_len) {
+ ksmbd_debug(SMB, "empty xattr in the file\n");
+ goto out;
+ }
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
+
+ if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) {
+ err = ksmbd_vfs_remove_xattr(user_ns, dentry, name);
+ if (err)
+ ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
+ }
+ }
+out:
+ kvfree(xattr_list);
+ return err;
+}
+
+static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode,
+ int acl_type)
+{
+ struct xattr_smb_acl *smb_acl = NULL;
+ struct posix_acl *posix_acls;
+ struct posix_acl_entry *pa_entry;
+ struct xattr_acl_entry *xa_entry;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
+ return NULL;
+
+ posix_acls = get_acl(inode, acl_type);
+ if (!posix_acls)
+ return NULL;
+
+ smb_acl = kzalloc(sizeof(struct xattr_smb_acl) +
+ sizeof(struct xattr_acl_entry) * posix_acls->a_count,
+ GFP_KERNEL);
+ if (!smb_acl)
+ goto out;
+
+ smb_acl->count = posix_acls->a_count;
+ pa_entry = posix_acls->a_entries;
+ xa_entry = smb_acl->entries;
+ for (i = 0; i < posix_acls->a_count; i++, pa_entry++, xa_entry++) {
+ switch (pa_entry->e_tag) {
+ case ACL_USER:
+ xa_entry->type = SMB_ACL_USER;
+ xa_entry->uid = from_kuid(user_ns, pa_entry->e_uid);
+ break;
+ case ACL_USER_OBJ:
+ xa_entry->type = SMB_ACL_USER_OBJ;
+ break;
+ case ACL_GROUP:
+ xa_entry->type = SMB_ACL_GROUP;
+ xa_entry->gid = from_kgid(user_ns, pa_entry->e_gid);
+ break;
+ case ACL_GROUP_OBJ:
+ xa_entry->type = SMB_ACL_GROUP_OBJ;
+ break;
+ case ACL_OTHER:
+ xa_entry->type = SMB_ACL_OTHER;
+ break;
+ case ACL_MASK:
+ xa_entry->type = SMB_ACL_MASK;
+ break;
+ default:
+ pr_err("unknown type : 0x%x\n", pa_entry->e_tag);
+ goto out;
+ }
+
+ if (pa_entry->e_perm & ACL_READ)
+ xa_entry->perm |= SMB_ACL_READ;
+ if (pa_entry->e_perm & ACL_WRITE)
+ xa_entry->perm |= SMB_ACL_WRITE;
+ if (pa_entry->e_perm & ACL_EXECUTE)
+ xa_entry->perm |= SMB_ACL_EXECUTE;
+ }
+out:
+ posix_acl_release(posix_acls);
+ return smb_acl;
+}
+
+int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd *pntsd, int len)
+{
+ int rc;
+ struct ndr sd_ndr = {0}, acl_ndr = {0};
+ struct xattr_ntacl acl = {0};
+ struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL;
+ struct inode *inode = d_inode(dentry);
+
+ acl.version = 4;
+ acl.hash_type = XATTR_SD_HASH_TYPE_SHA256;
+ acl.current_time = ksmbd_UnixTimeToNT(current_time(inode));
+
+ memcpy(acl.desc, "posix_acl", 9);
+ acl.desc_len = 10;
+
+ pntsd->osidoffset =
+ cpu_to_le32(le32_to_cpu(pntsd->osidoffset) + NDR_NTSD_OFFSETOF);
+ pntsd->gsidoffset =
+ cpu_to_le32(le32_to_cpu(pntsd->gsidoffset) + NDR_NTSD_OFFSETOF);
+ pntsd->dacloffset =
+ cpu_to_le32(le32_to_cpu(pntsd->dacloffset) + NDR_NTSD_OFFSETOF);
+
+ acl.sd_buf = (char *)pntsd;
+ acl.sd_size = len;
+
+ rc = ksmbd_gen_sd_hash(conn, acl.sd_buf, acl.sd_size, acl.hash);
+ if (rc) {
+ pr_err("failed to generate hash for ndr acl\n");
+ return rc;
+ }
+
+ smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_ACCESS);
+ if (S_ISDIR(inode->i_mode))
+ def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_DEFAULT);
+
+ rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode,
+ smb_acl, def_smb_acl);
+ if (rc) {
+ pr_err("failed to encode ndr to posix acl\n");
+ goto out;
+ }
+
+ rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset,
+ acl.posix_acl_hash);
+ if (rc) {
+ pr_err("failed to generate hash for ndr acl\n");
+ goto out;
+ }
+
+ rc = ndr_encode_v4_ntacl(&sd_ndr, &acl);
+ if (rc) {
+ pr_err("failed to encode ndr to posix acl\n");
+ goto out;
+ }
+
+ rc = ksmbd_vfs_setxattr(user_ns, dentry,
+ XATTR_NAME_SD, sd_ndr.data,
+ sd_ndr.offset, 0);
+ if (rc < 0)
+ pr_err("Failed to store XATTR ntacl :%d\n", rc);
+
+ kfree(sd_ndr.data);
+out:
+ kfree(acl_ndr.data);
+ kfree(smb_acl);
+ kfree(def_smb_acl);
+ return rc;
+}
+
+int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd **pntsd)
+{
+ int rc;
+ struct ndr n;
+ struct inode *inode = d_inode(dentry);
+ struct ndr acl_ndr = {0};
+ struct xattr_ntacl acl;
+ struct xattr_smb_acl *smb_acl = NULL, *def_smb_acl = NULL;
+ __u8 cmp_hash[XATTR_SD_HASH_SIZE] = {0};
+
+ rc = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_SD, &n.data);
+ if (rc <= 0)
+ return rc;
+
+ n.length = rc;
+ rc = ndr_decode_v4_ntacl(&n, &acl);
+ if (rc)
+ goto free_n_data;
+
+ smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_ACCESS);
+ if (S_ISDIR(inode->i_mode))
+ def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(user_ns, inode,
+ ACL_TYPE_DEFAULT);
+
+ rc = ndr_encode_posix_acl(&acl_ndr, user_ns, inode, smb_acl,
+ def_smb_acl);
+ if (rc) {
+ pr_err("failed to encode ndr to posix acl\n");
+ goto out_free;
+ }
+
+ rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset, cmp_hash);
+ if (rc) {
+ pr_err("failed to generate hash for ndr acl\n");
+ goto out_free;
+ }
+
+ if (memcmp(cmp_hash, acl.posix_acl_hash, XATTR_SD_HASH_SIZE)) {
+ pr_err("hash value diff\n");
+ rc = -EINVAL;
+ goto out_free;
+ }
+
+ *pntsd = acl.sd_buf;
+ (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) -
+ NDR_NTSD_OFFSETOF);
+ (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) -
+ NDR_NTSD_OFFSETOF);
+ (*pntsd)->dacloffset = cpu_to_le32(le32_to_cpu((*pntsd)->dacloffset) -
+ NDR_NTSD_OFFSETOF);
+
+ rc = acl.sd_size;
+out_free:
+ kfree(acl_ndr.data);
+ kfree(smb_acl);
+ kfree(def_smb_acl);
+ if (rc < 0) {
+ kfree(acl.sd_buf);
+ *pntsd = NULL;
+ }
+
+free_n_data:
+ kfree(n.data);
+ return rc;
+}
+
+int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da)
+{
+ struct ndr n;
+ int err;
+
+ err = ndr_encode_dos_attr(&n, da);
+ if (err)
+ return err;
+
+ err = ksmbd_vfs_setxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ (void *)n.data, n.offset, 0);
+ if (err)
+ ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
+ kfree(n.data);
+
+ return err;
+}
+
+int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da)
+{
+ struct ndr n;
+ int err;
+
+ err = ksmbd_vfs_getxattr(user_ns, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ (char **)&n.data);
+ if (err > 0) {
+ n.length = err;
+ if (ndr_decode_dos_attr(&n, da))
+ err = -EINVAL;
+ kfree(n.data);
+ } else {
+ ksmbd_debug(SMB, "failed to load dos attribute in xattr\n");
+ }
+
+ return err;
+}
+
+/**
+ * ksmbd_vfs_init_kstat() - convert unix stat information to smb stat format
+ * @p: destination buffer
+ * @ksmbd_kstat: ksmbd kstat wrapper
+ */
+void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat)
+{
+ struct file_directory_info *info = (struct file_directory_info *)(*p);
+ struct kstat *kstat = ksmbd_kstat->kstat;
+ u64 time;
+
+ info->FileIndex = 0;
+ info->CreationTime = cpu_to_le64(ksmbd_kstat->create_time);
+ time = ksmbd_UnixTimeToNT(kstat->atime);
+ info->LastAccessTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(kstat->mtime);
+ info->LastWriteTime = cpu_to_le64(time);
+ time = ksmbd_UnixTimeToNT(kstat->ctime);
+ info->ChangeTime = cpu_to_le64(time);
+
+ if (ksmbd_kstat->file_attributes & ATTR_DIRECTORY_LE) {
+ info->EndOfFile = 0;
+ info->AllocationSize = 0;
+ } else {
+ info->EndOfFile = cpu_to_le64(kstat->size);
+ info->AllocationSize = cpu_to_le64(kstat->blocks << 9);
+ }
+ info->ExtFileAttributes = ksmbd_kstat->file_attributes;
+
+ return info;
+}
+
+int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct ksmbd_kstat *ksmbd_kstat)
+{
+ u64 time;
+ int rc;
+
+ generic_fillattr(user_ns, d_inode(dentry), ksmbd_kstat->kstat);
+
+ time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
+ ksmbd_kstat->create_time = time;
+
+ /*
+ * set default value for the case that store dos attributes is not yes
+ * or that acl is disable in server's filesystem and the config is yes.
+ */
+ if (S_ISDIR(ksmbd_kstat->kstat->mode))
+ ksmbd_kstat->file_attributes = ATTR_DIRECTORY_LE;
+ else
+ ksmbd_kstat->file_attributes = ATTR_ARCHIVE_LE;
+
+ if (test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
+ struct xattr_dos_attrib da;
+
+ rc = ksmbd_vfs_get_dos_attrib_xattr(user_ns, dentry, &da);
+ if (rc > 0) {
+ ksmbd_kstat->file_attributes = cpu_to_le32(da.attr);
+ ksmbd_kstat->create_time = da.create_time;
+ } else {
+ ksmbd_debug(VFS, "fail to load dos attribute.\n");
+ }
+ }
+
+ return 0;
+}
+
+ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name,
+ int attr_name_len)
+{
+ char *name, *xattr_list = NULL;
+ ssize_t value_len = -ENOENT, xattr_list_len;
+
+ xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ if (xattr_list_len <= 0)
+ goto out;
+
+ for (name = xattr_list; name - xattr_list < xattr_list_len;
+ name += strlen(name) + 1) {
+ ksmbd_debug(VFS, "%s, len %zd\n", name, strlen(name));
+ if (strncasecmp(attr_name, name, attr_name_len))
+ continue;
+
+ value_len = ksmbd_vfs_xattr_len(user_ns, dentry, name);
+ break;
+ }
+
+out:
+ kvfree(xattr_list);
+ return value_len;
+}
+
+int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
+ size_t *xattr_stream_name_size, int s_type)
+{
+ char *type, *buf;
+
+ if (s_type == DIR_STREAM)
+ type = ":$INDEX_ALLOCATION";
+ else
+ type = ":$DATA";
+
+ buf = kasprintf(GFP_KERNEL, "%s%s%s",
+ XATTR_NAME_STREAM, stream_name, type);
+ if (!buf)
+ return -ENOMEM;
+
+ *xattr_stream_name = buf;
+ *xattr_stream_name_size = strlen(buf) + 1;
+
+ return 0;
+}
+
+int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
+ struct ksmbd_file *src_fp,
+ struct ksmbd_file *dst_fp,
+ struct srv_copychunk *chunks,
+ unsigned int chunk_count,
+ unsigned int *chunk_count_written,
+ unsigned int *chunk_size_written,
+ loff_t *total_size_written)
+{
+ unsigned int i;
+ loff_t src_off, dst_off, src_file_size;
+ size_t len;
+ int ret;
+
+ *chunk_count_written = 0;
+ *chunk_size_written = 0;
+ *total_size_written = 0;
+
+ if (!(src_fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
+ pr_err("no right to read(%pd)\n", src_fp->filp->f_path.dentry);
+ return -EACCES;
+ }
+ if (!(dst_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) {
+ pr_err("no right to write(%pd)\n", dst_fp->filp->f_path.dentry);
+ return -EACCES;
+ }
+
+ if (ksmbd_stream_fd(src_fp) || ksmbd_stream_fd(dst_fp))
+ return -EBADF;
+
+ smb_break_all_levII_oplock(work, dst_fp, 1);
+
+ if (!work->tcon->posix_extensions) {
+ for (i = 0; i < chunk_count; i++) {
+ src_off = le64_to_cpu(chunks[i].SourceOffset);
+ dst_off = le64_to_cpu(chunks[i].TargetOffset);
+ len = le32_to_cpu(chunks[i].Length);
+
+ if (check_lock_range(src_fp->filp, src_off,
+ src_off + len - 1, READ))
+ return -EAGAIN;
+ if (check_lock_range(dst_fp->filp, dst_off,
+ dst_off + len - 1, WRITE))
+ return -EAGAIN;
+ }
+ }
+
+ src_file_size = i_size_read(file_inode(src_fp->filp));
+
+ for (i = 0; i < chunk_count; i++) {
+ src_off = le64_to_cpu(chunks[i].SourceOffset);
+ dst_off = le64_to_cpu(chunks[i].TargetOffset);
+ len = le32_to_cpu(chunks[i].Length);
+
+ if (src_off + len > src_file_size)
+ return -E2BIG;
+
+ ret = vfs_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off, len, 0);
+ if (ret < 0)
+ return ret;
+
+ *chunk_count_written += 1;
+ *total_size_written += ret;
+ }
+ return 0;
+}
+
+void ksmbd_vfs_posix_lock_wait(struct file_lock *flock)
+{
+ wait_event(flock->fl_wait, !flock->fl_blocker);
+}
+
+int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout)
+{
+ return wait_event_interruptible_timeout(flock->fl_wait,
+ !flock->fl_blocker,
+ timeout);
+}
+
+void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
+{
+ locks_delete_block(flock);
+}
+
+int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode)
+{
+ struct posix_acl_state acl_state;
+ struct posix_acl *acls;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ ksmbd_debug(SMB, "Set posix acls\n");
+ rc = init_acl_state(&acl_state, 1);
+ if (rc)
+ return rc;
+
+ /* Set default owner group */
+ acl_state.owner.allow = (inode->i_mode & 0700) >> 6;
+ acl_state.group.allow = (inode->i_mode & 0070) >> 3;
+ acl_state.other.allow = inode->i_mode & 0007;
+ acl_state.users->aces[acl_state.users->n].uid = inode->i_uid;
+ acl_state.users->aces[acl_state.users->n++].perms.allow =
+ acl_state.owner.allow;
+ acl_state.groups->aces[acl_state.groups->n].gid = inode->i_gid;
+ acl_state.groups->aces[acl_state.groups->n++].perms.allow =
+ acl_state.group.allow;
+ acl_state.mask.allow = 0x07;
+
+ acls = posix_acl_alloc(6, GFP_KERNEL);
+ if (!acls) {
+ free_acl_state(&acl_state);
+ return -ENOMEM;
+ }
+ posix_state_to_acl(&acl_state, acls->a_entries);
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_ACCESS, acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
+ rc);
+ else if (S_ISDIR(inode->i_mode)) {
+ posix_state_to_acl(&acl_state, acls->a_entries);
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_DEFAULT,
+ acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
+ rc);
+ }
+ free_acl_state(&acl_state);
+ posix_acl_release(acls);
+ return rc;
+}
+
+int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode, struct inode *parent_inode)
+{
+ struct posix_acl *acls;
+ struct posix_acl_entry *pace;
+ int rc, i;
+
+ if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ acls = get_acl(parent_inode, ACL_TYPE_DEFAULT);
+ if (!acls)
+ return -ENOENT;
+ pace = acls->a_entries;
+
+ for (i = 0; i < acls->a_count; i++, pace++) {
+ if (pace->e_tag == ACL_MASK) {
+ pace->e_perm = 0x07;
+ break;
+ }
+ }
+
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_ACCESS, acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
+ rc);
+ if (S_ISDIR(inode->i_mode)) {
+ rc = set_posix_acl(user_ns, inode, ACL_TYPE_DEFAULT,
+ acls);
+ if (rc < 0)
+ ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
+ rc);
+ }
+ posix_acl_release(acls);
+ return rc;
+}
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
new file mode 100644
index 000000000000..cb0cba0d5d07
--- /dev/null
+++ b/fs/ksmbd/vfs.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __KSMBD_VFS_H__
+#define __KSMBD_VFS_H__
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <uapi/linux/xattr.h>
+#include <linux/posix_acl.h>
+
+#include "smbacl.h"
+#include "xattr.h"
+
+/*
+ * Enumeration for stream type.
+ */
+enum {
+ DATA_STREAM = 1, /* type $DATA */
+ DIR_STREAM /* type $INDEX_ALLOCATION */
+};
+
+/* CreateOptions */
+/* Flag is set, it must not be a file , valid for directory only */
+#define FILE_DIRECTORY_FILE_LE cpu_to_le32(0x00000001)
+#define FILE_WRITE_THROUGH_LE cpu_to_le32(0x00000002)
+#define FILE_SEQUENTIAL_ONLY_LE cpu_to_le32(0x00000004)
+
+/* Should not buffer on server*/
+#define FILE_NO_INTERMEDIATE_BUFFERING_LE cpu_to_le32(0x00000008)
+/* MBZ */
+#define FILE_SYNCHRONOUS_IO_ALERT_LE cpu_to_le32(0x00000010)
+/* MBZ */
+#define FILE_SYNCHRONOUS_IO_NONALERT_LE cpu_to_le32(0x00000020)
+
+/* Flaf must not be set for directory */
+#define FILE_NON_DIRECTORY_FILE_LE cpu_to_le32(0x00000040)
+
+/* Should be zero */
+#define CREATE_TREE_CONNECTION cpu_to_le32(0x00000080)
+#define FILE_COMPLETE_IF_OPLOCKED_LE cpu_to_le32(0x00000100)
+#define FILE_NO_EA_KNOWLEDGE_LE cpu_to_le32(0x00000200)
+#define FILE_OPEN_REMOTE_INSTANCE cpu_to_le32(0x00000400)
+
+/**
+ * Doc says this is obsolete "open for recovery" flag should be zero
+ * in any case.
+ */
+#define CREATE_OPEN_FOR_RECOVERY cpu_to_le32(0x00000400)
+#define FILE_RANDOM_ACCESS_LE cpu_to_le32(0x00000800)
+#define FILE_DELETE_ON_CLOSE_LE cpu_to_le32(0x00001000)
+#define FILE_OPEN_BY_FILE_ID_LE cpu_to_le32(0x00002000)
+#define FILE_OPEN_FOR_BACKUP_INTENT_LE cpu_to_le32(0x00004000)
+#define FILE_NO_COMPRESSION_LE cpu_to_le32(0x00008000)
+
+/* Should be zero*/
+#define FILE_OPEN_REQUIRING_OPLOCK cpu_to_le32(0x00010000)
+#define FILE_DISALLOW_EXCLUSIVE cpu_to_le32(0x00020000)
+#define FILE_RESERVE_OPFILTER_LE cpu_to_le32(0x00100000)
+#define FILE_OPEN_REPARSE_POINT_LE cpu_to_le32(0x00200000)
+#define FILE_OPEN_NO_RECALL_LE cpu_to_le32(0x00400000)
+
+/* Should be zero */
+#define FILE_OPEN_FOR_FREE_SPACE_QUERY_LE cpu_to_le32(0x00800000)
+#define CREATE_OPTIONS_MASK cpu_to_le32(0x00FFFFFF)
+#define CREATE_OPTION_READONLY 0x10000000
+/* system. NB not sent over wire */
+#define CREATE_OPTION_SPECIAL 0x20000000
+
+struct ksmbd_work;
+struct ksmbd_file;
+struct ksmbd_conn;
+
+struct ksmbd_dir_info {
+ const char *name;
+ char *wptr;
+ char *rptr;
+ int name_len;
+ int out_buf_len;
+ int num_entry;
+ int data_count;
+ int last_entry_offset;
+ bool hide_dot_file;
+ int flags;
+};
+
+struct ksmbd_readdir_data {
+ struct dir_context ctx;
+ union {
+ void *private;
+ char *dirent;
+ };
+
+ unsigned int used;
+ unsigned int dirent_count;
+ unsigned int file_attr;
+};
+
+/* ksmbd kstat wrapper to get valid create time when reading dir entry */
+struct ksmbd_kstat {
+ struct kstat *kstat;
+ unsigned long long create_time;
+ __le32 file_attributes;
+};
+
+int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child);
+int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry);
+int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
+ struct dentry *dentry, __le32 *daccess);
+int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode);
+int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode);
+int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp,
+ size_t count, loff_t *pos);
+int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *buf, size_t count, loff_t *pos, bool sync,
+ ssize_t *written);
+int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id);
+int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name);
+int ksmbd_vfs_link(struct ksmbd_work *work,
+ const char *oldname, const char *newname);
+int ksmbd_vfs_getattr(struct path *path, struct kstat *stat);
+int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+ char *newname);
+int ksmbd_vfs_truncate(struct ksmbd_work *work, const char *name,
+ struct ksmbd_file *fp, loff_t size);
+struct srv_copychunk;
+int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
+ struct ksmbd_file *src_fp,
+ struct ksmbd_file *dst_fp,
+ struct srv_copychunk *chunks,
+ unsigned int chunk_count,
+ unsigned int *chunk_count_written,
+ unsigned int *chunk_size_written,
+ loff_t *total_size_written);
+ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list);
+ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ char *xattr_name,
+ char **xattr_buf);
+ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name,
+ int attr_name_len);
+int ksmbd_vfs_setxattr(struct user_namespace *user_ns,
+ struct dentry *dentry, const char *attr_name,
+ const void *attr_value, size_t attr_size, int flags);
+int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
+ size_t *xattr_stream_name_size, int s_type);
+int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry, char *attr_name);
+int ksmbd_vfs_kern_path(char *name, unsigned int flags, struct path *path,
+ bool caseless);
+int ksmbd_vfs_empty_dir(struct ksmbd_file *fp);
+void ksmbd_vfs_set_fadvise(struct file *filp, __le32 option);
+int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
+ loff_t off, loff_t len);
+struct file_allocated_range_buffer;
+int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
+ struct file_allocated_range_buffer *ranges,
+ int in_count, int *out_count);
+int ksmbd_vfs_unlink(struct user_namespace *user_ns,
+ struct dentry *dir, struct dentry *dentry);
+void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat);
+int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct ksmbd_kstat *ksmbd_kstat);
+void ksmbd_vfs_posix_lock_wait(struct file_lock *flock);
+int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout);
+void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock);
+int ksmbd_vfs_remove_acl_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry);
+int ksmbd_vfs_remove_sd_xattrs(struct user_namespace *user_ns,
+ struct dentry *dentry);
+int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd *pntsd, int len);
+int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
+ struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct smb_ntsd **pntsd);
+int ksmbd_vfs_set_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da);
+int ksmbd_vfs_get_dos_attrib_xattr(struct user_namespace *user_ns,
+ struct dentry *dentry,
+ struct xattr_dos_attrib *da);
+int ksmbd_vfs_set_init_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode);
+int ksmbd_vfs_inherit_posix_acl(struct user_namespace *user_ns,
+ struct inode *inode,
+ struct inode *parent_inode);
+#endif /* __KSMBD_VFS_H__ */
diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c
new file mode 100644
index 000000000000..92d8c61ffd2a
--- /dev/null
+++ b/fs/ksmbd/vfs_cache.c
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "glob.h"
+#include "vfs_cache.h"
+#include "oplock.h"
+#include "vfs.h"
+#include "connection.h"
+#include "mgmt/tree_connect.h"
+#include "mgmt/user_session.h"
+#include "smb_common.h"
+
+#define S_DEL_PENDING 1
+#define S_DEL_ON_CLS 2
+#define S_DEL_ON_CLS_STREAM 8
+
+static unsigned int inode_hash_mask __read_mostly;
+static unsigned int inode_hash_shift __read_mostly;
+static struct hlist_head *inode_hashtable __read_mostly;
+static DEFINE_RWLOCK(inode_hash_lock);
+
+static struct ksmbd_file_table global_ft;
+static atomic_long_t fd_limit;
+static struct kmem_cache *filp_cache;
+
+void ksmbd_set_fd_limit(unsigned long limit)
+{
+ limit = min(limit, get_max_files());
+ atomic_long_set(&fd_limit, limit);
+}
+
+static bool fd_limit_depleted(void)
+{
+ long v = atomic_long_dec_return(&fd_limit);
+
+ if (v >= 0)
+ return false;
+ atomic_long_inc(&fd_limit);
+ return true;
+}
+
+static void fd_limit_close(void)
+{
+ atomic_long_inc(&fd_limit);
+}
+
+/*
+ * INODE hash
+ */
+
+static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
+{
+ unsigned long tmp;
+
+ tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+ L1_CACHE_BYTES;
+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> inode_hash_shift);
+ return tmp & inode_hash_mask;
+}
+
+static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode)
+{
+ struct hlist_head *head = inode_hashtable +
+ inode_hash(inode->i_sb, inode->i_ino);
+ struct ksmbd_inode *ci = NULL, *ret_ci = NULL;
+
+ hlist_for_each_entry(ci, head, m_hash) {
+ if (ci->m_inode == inode) {
+ if (atomic_inc_not_zero(&ci->m_count))
+ ret_ci = ci;
+ break;
+ }
+ }
+ return ret_ci;
+}
+
+static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp)
+{
+ return __ksmbd_inode_lookup(file_inode(fp->filp));
+}
+
+static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode)
+{
+ struct ksmbd_inode *ci;
+
+ read_lock(&inode_hash_lock);
+ ci = __ksmbd_inode_lookup(inode);
+ read_unlock(&inode_hash_lock);
+ return ci;
+}
+
+int ksmbd_query_inode_status(struct inode *inode)
+{
+ struct ksmbd_inode *ci;
+ int ret = KSMBD_INODE_STATUS_UNKNOWN;
+
+ read_lock(&inode_hash_lock);
+ ci = __ksmbd_inode_lookup(inode);
+ if (ci) {
+ ret = KSMBD_INODE_STATUS_OK;
+ if (ci->m_flags & S_DEL_PENDING)
+ ret = KSMBD_INODE_STATUS_PENDING_DELETE;
+ atomic_dec(&ci->m_count);
+ }
+ read_unlock(&inode_hash_lock);
+ return ret;
+}
+
+bool ksmbd_inode_pending_delete(struct ksmbd_file *fp)
+{
+ return (fp->f_ci->m_flags & S_DEL_PENDING);
+}
+
+void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp)
+{
+ fp->f_ci->m_flags |= S_DEL_PENDING;
+}
+
+void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp)
+{
+ fp->f_ci->m_flags &= ~S_DEL_PENDING;
+}
+
+void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
+ int file_info)
+{
+ if (ksmbd_stream_fd(fp)) {
+ fp->f_ci->m_flags |= S_DEL_ON_CLS_STREAM;
+ return;
+ }
+
+ fp->f_ci->m_flags |= S_DEL_ON_CLS;
+}
+
+static void ksmbd_inode_hash(struct ksmbd_inode *ci)
+{
+ struct hlist_head *b = inode_hashtable +
+ inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino);
+
+ hlist_add_head(&ci->m_hash, b);
+}
+
+static void ksmbd_inode_unhash(struct ksmbd_inode *ci)
+{
+ write_lock(&inode_hash_lock);
+ hlist_del_init(&ci->m_hash);
+ write_unlock(&inode_hash_lock);
+}
+
+static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp)
+{
+ ci->m_inode = file_inode(fp->filp);
+ atomic_set(&ci->m_count, 1);
+ atomic_set(&ci->op_count, 0);
+ atomic_set(&ci->sop_count, 0);
+ ci->m_flags = 0;
+ ci->m_fattr = 0;
+ INIT_LIST_HEAD(&ci->m_fp_list);
+ INIT_LIST_HEAD(&ci->m_op_list);
+ rwlock_init(&ci->m_lock);
+ return 0;
+}
+
+static struct ksmbd_inode *ksmbd_inode_get(struct ksmbd_file *fp)
+{
+ struct ksmbd_inode *ci, *tmpci;
+ int rc;
+
+ read_lock(&inode_hash_lock);
+ ci = ksmbd_inode_lookup(fp);
+ read_unlock(&inode_hash_lock);
+ if (ci)
+ return ci;
+
+ ci = kmalloc(sizeof(struct ksmbd_inode), GFP_KERNEL);
+ if (!ci)
+ return NULL;
+
+ rc = ksmbd_inode_init(ci, fp);
+ if (rc) {
+ pr_err("inode initialized failed\n");
+ kfree(ci);
+ return NULL;
+ }
+
+ write_lock(&inode_hash_lock);
+ tmpci = ksmbd_inode_lookup(fp);
+ if (!tmpci) {
+ ksmbd_inode_hash(ci);
+ } else {
+ kfree(ci);
+ ci = tmpci;
+ }
+ write_unlock(&inode_hash_lock);
+ return ci;
+}
+
+static void ksmbd_inode_free(struct ksmbd_inode *ci)
+{
+ ksmbd_inode_unhash(ci);
+ kfree(ci);
+}
+
+static void ksmbd_inode_put(struct ksmbd_inode *ci)
+{
+ if (atomic_dec_and_test(&ci->m_count))
+ ksmbd_inode_free(ci);
+}
+
+int __init ksmbd_inode_hash_init(void)
+{
+ unsigned int loop;
+ unsigned long numentries = 16384;
+ unsigned long bucketsize = sizeof(struct hlist_head);
+ unsigned long size;
+
+ inode_hash_shift = ilog2(numentries);
+ inode_hash_mask = (1 << inode_hash_shift) - 1;
+
+ size = bucketsize << inode_hash_shift;
+
+ /* init master fp hash table */
+ inode_hashtable = vmalloc(size);
+ if (!inode_hashtable)
+ return -ENOMEM;
+
+ for (loop = 0; loop < (1U << inode_hash_shift); loop++)
+ INIT_HLIST_HEAD(&inode_hashtable[loop]);
+ return 0;
+}
+
+void ksmbd_release_inode_hash(void)
+{
+ vfree(inode_hashtable);
+}
+
+static void __ksmbd_inode_close(struct ksmbd_file *fp)
+{
+ struct dentry *dir, *dentry;
+ struct ksmbd_inode *ci = fp->f_ci;
+ int err;
+ struct file *filp;
+
+ filp = fp->filp;
+ if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) {
+ ci->m_flags &= ~S_DEL_ON_CLS_STREAM;
+ err = ksmbd_vfs_remove_xattr(file_mnt_user_ns(filp),
+ filp->f_path.dentry,
+ fp->stream.name);
+ if (err)
+ pr_err("remove xattr failed : %s\n",
+ fp->stream.name);
+ }
+
+ if (atomic_dec_and_test(&ci->m_count)) {
+ write_lock(&ci->m_lock);
+ if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) {
+ dentry = filp->f_path.dentry;
+ dir = dentry->d_parent;
+ ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING);
+ write_unlock(&ci->m_lock);
+ ksmbd_vfs_unlink(file_mnt_user_ns(filp), dir, dentry);
+ write_lock(&ci->m_lock);
+ }
+ write_unlock(&ci->m_lock);
+
+ ksmbd_inode_free(ci);
+ }
+}
+
+static void __ksmbd_remove_durable_fd(struct ksmbd_file *fp)
+{
+ if (!has_file_id(fp->persistent_id))
+ return;
+
+ write_lock(&global_ft.lock);
+ idr_remove(global_ft.idr, fp->persistent_id);
+ write_unlock(&global_ft.lock);
+}
+
+static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
+{
+ if (!has_file_id(fp->volatile_id))
+ return;
+
+ write_lock(&fp->f_ci->m_lock);
+ list_del_init(&fp->node);
+ write_unlock(&fp->f_ci->m_lock);
+
+ write_lock(&ft->lock);
+ idr_remove(ft->idr, fp->volatile_id);
+ write_unlock(&ft->lock);
+}
+
+static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
+{
+ struct file *filp;
+ struct ksmbd_lock *smb_lock, *tmp_lock;
+
+ fd_limit_close();
+ __ksmbd_remove_durable_fd(fp);
+ __ksmbd_remove_fd(ft, fp);
+
+ close_id_del_oplock(fp);
+ filp = fp->filp;
+
+ __ksmbd_inode_close(fp);
+ if (!IS_ERR_OR_NULL(filp))
+ fput(filp);
+
+ /* because the reference count of fp is 0, it is guaranteed that
+ * there are not accesses to fp->lock_list.
+ */
+ list_for_each_entry_safe(smb_lock, tmp_lock, &fp->lock_list, flist) {
+ spin_lock(&fp->conn->llist_lock);
+ list_del(&smb_lock->clist);
+ spin_unlock(&fp->conn->llist_lock);
+
+ list_del(&smb_lock->flist);
+ locks_free_lock(smb_lock->fl);
+ kfree(smb_lock);
+ }
+
+ kfree(fp->filename);
+ if (ksmbd_stream_fd(fp))
+ kfree(fp->stream.name);
+ kmem_cache_free(filp_cache, fp);
+}
+
+static struct ksmbd_file *ksmbd_fp_get(struct ksmbd_file *fp)
+{
+ if (!atomic_inc_not_zero(&fp->refcount))
+ return NULL;
+ return fp;
+}
+
+static struct ksmbd_file *__ksmbd_lookup_fd(struct ksmbd_file_table *ft,
+ u64 id)
+{
+ struct ksmbd_file *fp;
+
+ if (!has_file_id(id))
+ return NULL;
+
+ read_lock(&ft->lock);
+ fp = idr_find(ft->idr, id);
+ if (fp)
+ fp = ksmbd_fp_get(fp);
+ read_unlock(&ft->lock);
+ return fp;
+}
+
+static void __put_fd_final(struct ksmbd_work *work, struct ksmbd_file *fp)
+{
+ __ksmbd_close_fd(&work->sess->file_table, fp);
+ atomic_dec(&work->conn->stats.open_files_count);
+}
+
+static void set_close_state_blocked_works(struct ksmbd_file *fp)
+{
+ struct ksmbd_work *cancel_work, *ctmp;
+
+ spin_lock(&fp->f_lock);
+ list_for_each_entry_safe(cancel_work, ctmp, &fp->blocked_works,
+ fp_entry) {
+ list_del(&cancel_work->fp_entry);
+ cancel_work->state = KSMBD_WORK_CLOSED;
+ cancel_work->cancel_fn(cancel_work->cancel_argv);
+ }
+ spin_unlock(&fp->f_lock);
+}
+
+int ksmbd_close_fd(struct ksmbd_work *work, u64 id)
+{
+ struct ksmbd_file *fp;
+ struct ksmbd_file_table *ft;
+
+ if (!has_file_id(id))
+ return 0;
+
+ ft = &work->sess->file_table;
+ read_lock(&ft->lock);
+ fp = idr_find(ft->idr, id);
+ if (fp) {
+ set_close_state_blocked_works(fp);
+
+ if (!atomic_dec_and_test(&fp->refcount))
+ fp = NULL;
+ }
+ read_unlock(&ft->lock);
+
+ if (!fp)
+ return -EINVAL;
+
+ __put_fd_final(work, fp);
+ return 0;
+}
+
+void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp)
+{
+ if (!fp)
+ return;
+
+ if (!atomic_dec_and_test(&fp->refcount))
+ return;
+ __put_fd_final(work, fp);
+}
+
+static bool __sanity_check(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)
+{
+ if (!fp)
+ return false;
+ if (fp->tcon != tcon)
+ return false;
+ return true;
+}
+
+struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id)
+{
+ return __ksmbd_lookup_fd(&work->sess->file_table, id);
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_fast(struct ksmbd_work *work, u64 id)
+{
+ struct ksmbd_file *fp = __ksmbd_lookup_fd(&work->sess->file_table, id);
+
+ if (__sanity_check(work->tcon, fp))
+ return fp;
+
+ ksmbd_fd_put(work, fp);
+ return NULL;
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
+ u64 pid)
+{
+ struct ksmbd_file *fp;
+
+ if (!has_file_id(id)) {
+ id = work->compound_fid;
+ pid = work->compound_pfid;
+ }
+
+ fp = __ksmbd_lookup_fd(&work->sess->file_table, id);
+ if (!__sanity_check(work->tcon, fp)) {
+ ksmbd_fd_put(work, fp);
+ return NULL;
+ }
+ if (fp->persistent_id != pid) {
+ ksmbd_fd_put(work, fp);
+ return NULL;
+ }
+ return fp;
+}
+
+struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id)
+{
+ return __ksmbd_lookup_fd(&global_ft, id);
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+
+ read_lock(&global_ft.lock);
+ idr_for_each_entry(global_ft.idr, fp, id) {
+ if (!memcmp(fp->create_guid,
+ cguid,
+ SMB2_CREATE_GUID_SIZE)) {
+ fp = ksmbd_fp_get(fp);
+ break;
+ }
+ }
+ read_unlock(&global_ft.lock);
+
+ return fp;
+}
+
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode)
+{
+ struct ksmbd_file *lfp;
+ struct ksmbd_inode *ci;
+
+ ci = ksmbd_inode_lookup_by_vfsinode(inode);
+ if (!ci)
+ return NULL;
+
+ read_lock(&ci->m_lock);
+ list_for_each_entry(lfp, &ci->m_fp_list, node) {
+ if (inode == file_inode(lfp->filp)) {
+ atomic_dec(&ci->m_count);
+ read_unlock(&ci->m_lock);
+ return lfp;
+ }
+ }
+ atomic_dec(&ci->m_count);
+ read_unlock(&ci->m_lock);
+ return NULL;
+}
+
+#define OPEN_ID_TYPE_VOLATILE_ID (0)
+#define OPEN_ID_TYPE_PERSISTENT_ID (1)
+
+static void __open_id_set(struct ksmbd_file *fp, u64 id, int type)
+{
+ if (type == OPEN_ID_TYPE_VOLATILE_ID)
+ fp->volatile_id = id;
+ if (type == OPEN_ID_TYPE_PERSISTENT_ID)
+ fp->persistent_id = id;
+}
+
+static int __open_id(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
+ int type)
+{
+ u64 id = 0;
+ int ret;
+
+ if (type == OPEN_ID_TYPE_VOLATILE_ID && fd_limit_depleted()) {
+ __open_id_set(fp, KSMBD_NO_FID, type);
+ return -EMFILE;
+ }
+
+ idr_preload(GFP_KERNEL);
+ write_lock(&ft->lock);
+ ret = idr_alloc_cyclic(ft->idr, fp, 0, INT_MAX - 1, GFP_NOWAIT);
+ if (ret >= 0) {
+ id = ret;
+ ret = 0;
+ } else {
+ id = KSMBD_NO_FID;
+ fd_limit_close();
+ }
+
+ __open_id_set(fp, id, type);
+ write_unlock(&ft->lock);
+ idr_preload_end();
+ return ret;
+}
+
+unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp)
+{
+ __open_id(&global_ft, fp, OPEN_ID_TYPE_PERSISTENT_ID);
+ return fp->persistent_id;
+}
+
+struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp)
+{
+ struct ksmbd_file *fp;
+ int ret;
+
+ fp = kmem_cache_zalloc(filp_cache, GFP_KERNEL);
+ if (!fp) {
+ pr_err("Failed to allocate memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_LIST_HEAD(&fp->blocked_works);
+ INIT_LIST_HEAD(&fp->node);
+ INIT_LIST_HEAD(&fp->lock_list);
+ spin_lock_init(&fp->f_lock);
+ atomic_set(&fp->refcount, 1);
+
+ fp->filp = filp;
+ fp->conn = work->sess->conn;
+ fp->tcon = work->tcon;
+ fp->volatile_id = KSMBD_NO_FID;
+ fp->persistent_id = KSMBD_NO_FID;
+ fp->f_ci = ksmbd_inode_get(fp);
+
+ if (!fp->f_ci) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ ret = __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID);
+ if (ret) {
+ ksmbd_inode_put(fp->f_ci);
+ goto err_out;
+ }
+
+ atomic_inc(&work->conn->stats.open_files_count);
+ return fp;
+
+err_out:
+ kmem_cache_free(filp_cache, fp);
+ return ERR_PTR(ret);
+}
+
+static int
+__close_file_table_ids(struct ksmbd_file_table *ft,
+ struct ksmbd_tree_connect *tcon,
+ bool (*skip)(struct ksmbd_tree_connect *tcon,
+ struct ksmbd_file *fp))
+{
+ unsigned int id;
+ struct ksmbd_file *fp;
+ int num = 0;
+
+ idr_for_each_entry(ft->idr, fp, id) {
+ if (skip(tcon, fp))
+ continue;
+
+ set_close_state_blocked_works(fp);
+
+ if (!atomic_dec_and_test(&fp->refcount))
+ continue;
+ __ksmbd_close_fd(ft, fp);
+ num++;
+ }
+ return num;
+}
+
+static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon,
+ struct ksmbd_file *fp)
+{
+ return fp->tcon != tcon;
+}
+
+static bool session_fd_check(struct ksmbd_tree_connect *tcon,
+ struct ksmbd_file *fp)
+{
+ return false;
+}
+
+void ksmbd_close_tree_conn_fds(struct ksmbd_work *work)
+{
+ int num = __close_file_table_ids(&work->sess->file_table,
+ work->tcon,
+ tree_conn_fd_check);
+
+ atomic_sub(num, &work->conn->stats.open_files_count);
+}
+
+void ksmbd_close_session_fds(struct ksmbd_work *work)
+{
+ int num = __close_file_table_ids(&work->sess->file_table,
+ work->tcon,
+ session_fd_check);
+
+ atomic_sub(num, &work->conn->stats.open_files_count);
+}
+
+int ksmbd_init_global_file_table(void)
+{
+ return ksmbd_init_file_table(&global_ft);
+}
+
+void ksmbd_free_global_file_table(void)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+
+ idr_for_each_entry(global_ft.idr, fp, id) {
+ __ksmbd_remove_durable_fd(fp);
+ kmem_cache_free(filp_cache, fp);
+ }
+
+ ksmbd_destroy_file_table(&global_ft);
+}
+
+int ksmbd_file_table_flush(struct ksmbd_work *work)
+{
+ struct ksmbd_file *fp = NULL;
+ unsigned int id;
+ int ret;
+
+ read_lock(&work->sess->file_table.lock);
+ idr_for_each_entry(work->sess->file_table.idr, fp, id) {
+ ret = ksmbd_vfs_fsync(work, fp->volatile_id, KSMBD_NO_FID);
+ if (ret)
+ break;
+ }
+ read_unlock(&work->sess->file_table.lock);
+ return ret;
+}
+
+int ksmbd_init_file_table(struct ksmbd_file_table *ft)
+{
+ ft->idr = kzalloc(sizeof(struct idr), GFP_KERNEL);
+ if (!ft->idr)
+ return -ENOMEM;
+
+ idr_init(ft->idr);
+ rwlock_init(&ft->lock);
+ return 0;
+}
+
+void ksmbd_destroy_file_table(struct ksmbd_file_table *ft)
+{
+ if (!ft->idr)
+ return;
+
+ __close_file_table_ids(ft, NULL, session_fd_check);
+ idr_destroy(ft->idr);
+ kfree(ft->idr);
+ ft->idr = NULL;
+}
+
+int ksmbd_init_file_cache(void)
+{
+ filp_cache = kmem_cache_create("ksmbd_file_cache",
+ sizeof(struct ksmbd_file), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!filp_cache)
+ goto out;
+
+ return 0;
+
+out:
+ pr_err("failed to allocate file cache\n");
+ return -ENOMEM;
+}
+
+void ksmbd_exit_file_cache(void)
+{
+ kmem_cache_destroy(filp_cache);
+}
diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h
new file mode 100644
index 000000000000..70dfe6a99f13
--- /dev/null
+++ b/fs/ksmbd/vfs_cache.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __VFS_CACHE_H__
+#define __VFS_CACHE_H__
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/workqueue.h>
+
+#include "vfs.h"
+
+/* Windows style file permissions for extended response */
+#define FILE_GENERIC_ALL 0x1F01FF
+#define FILE_GENERIC_READ 0x120089
+#define FILE_GENERIC_WRITE 0x120116
+#define FILE_GENERIC_EXECUTE 0X1200a0
+
+#define KSMBD_START_FID 0
+#define KSMBD_NO_FID (INT_MAX)
+#define SMB2_NO_FID (0xFFFFFFFFFFFFFFFFULL)
+
+struct ksmbd_conn;
+struct ksmbd_session;
+
+struct ksmbd_lock {
+ struct file_lock *fl;
+ struct list_head clist;
+ struct list_head flist;
+ struct list_head llist;
+ unsigned int flags;
+ int cmd;
+ int zero_len;
+ unsigned long long start;
+ unsigned long long end;
+};
+
+struct stream {
+ char *name;
+ ssize_t size;
+};
+
+struct ksmbd_inode {
+ rwlock_t m_lock;
+ atomic_t m_count;
+ atomic_t op_count;
+ /* opinfo count for streams */
+ atomic_t sop_count;
+ struct inode *m_inode;
+ unsigned int m_flags;
+ struct hlist_node m_hash;
+ struct list_head m_fp_list;
+ struct list_head m_op_list;
+ struct oplock_info *m_opinfo;
+ __le32 m_fattr;
+};
+
+struct ksmbd_file {
+ struct file *filp;
+ char *filename;
+ u64 persistent_id;
+ u64 volatile_id;
+
+ spinlock_t f_lock;
+
+ struct ksmbd_inode *f_ci;
+ struct ksmbd_inode *f_parent_ci;
+ struct oplock_info __rcu *f_opinfo;
+ struct ksmbd_conn *conn;
+ struct ksmbd_tree_connect *tcon;
+
+ atomic_t refcount;
+ __le32 daccess;
+ __le32 saccess;
+ __le32 coption;
+ __le32 cdoption;
+ __u64 create_time;
+ __u64 itime;
+
+ bool is_nt_open;
+ bool attrib_only;
+
+ char client_guid[16];
+ char create_guid[16];
+ char app_instance_id[16];
+
+ struct stream stream;
+ struct list_head node;
+ struct list_head blocked_works;
+ struct list_head lock_list;
+
+ int durable_timeout;
+
+ /* for SMB1 */
+ int pid;
+
+ /* conflict lock fail count for SMB1 */
+ unsigned int cflock_cnt;
+ /* last lock failure start offset for SMB1 */
+ unsigned long long llock_fstart;
+
+ int dirent_offset;
+
+ /* if ls is happening on directory, below is valid*/
+ struct ksmbd_readdir_data readdir_data;
+ int dot_dotdot[2];
+};
+
+static inline void set_ctx_actor(struct dir_context *ctx,
+ filldir_t actor)
+{
+ ctx->actor = actor;
+}
+
+#define KSMBD_NR_OPEN_DEFAULT BITS_PER_LONG
+
+struct ksmbd_file_table {
+ rwlock_t lock;
+ struct idr *idr;
+};
+
+static inline bool has_file_id(u64 id)
+{
+ return id < KSMBD_NO_FID;
+}
+
+static inline bool ksmbd_stream_fd(struct ksmbd_file *fp)
+{
+ return fp->stream.name != NULL;
+}
+
+int ksmbd_init_file_table(struct ksmbd_file_table *ft);
+void ksmbd_destroy_file_table(struct ksmbd_file_table *ft);
+int ksmbd_close_fd(struct ksmbd_work *work, u64 id);
+struct ksmbd_file *ksmbd_lookup_fd_fast(struct ksmbd_work *work, u64 id);
+struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id);
+struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
+ u64 pid);
+void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
+struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
+struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
+struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode);
+unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
+struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp);
+void ksmbd_close_tree_conn_fds(struct ksmbd_work *work);
+void ksmbd_close_session_fds(struct ksmbd_work *work);
+int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
+int ksmbd_init_global_file_table(void);
+void ksmbd_free_global_file_table(void);
+int ksmbd_file_table_flush(struct ksmbd_work *work);
+void ksmbd_set_fd_limit(unsigned long limit);
+
+/*
+ * INODE hash
+ */
+int __init ksmbd_inode_hash_init(void);
+void ksmbd_release_inode_hash(void);
+
+enum KSMBD_INODE_STATUS {
+ KSMBD_INODE_STATUS_OK,
+ KSMBD_INODE_STATUS_UNKNOWN,
+ KSMBD_INODE_STATUS_PENDING_DELETE,
+};
+
+int ksmbd_query_inode_status(struct inode *inode);
+bool ksmbd_inode_pending_delete(struct ksmbd_file *fp);
+void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp);
+void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp);
+void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
+ int file_info);
+int ksmbd_init_file_cache(void);
+void ksmbd_exit_file_cache(void);
+#endif /* __VFS_CACHE_H__ */
diff --git a/fs/ksmbd/xattr.h b/fs/ksmbd/xattr.h
new file mode 100644
index 000000000000..8857c01093d9
--- /dev/null
+++ b/fs/ksmbd/xattr.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef __XATTR_H__
+#define __XATTR_H__
+
+/*
+ * These are on-disk structures to store additional metadata into xattr to
+ * reproduce windows filesystem semantics. And they are encoded with NDR to
+ * compatible with samba's xattr meta format. The compatibility with samba
+ * is important because it can lose the information(file attribute,
+ * creation time, acls) about the existing files when switching between
+ * ksmbd and samba.
+ */
+
+/*
+ * Dos attribute flags used for what variable is valid.
+ */
+enum {
+ XATTR_DOSINFO_ATTRIB = 0x00000001,
+ XATTR_DOSINFO_EA_SIZE = 0x00000002,
+ XATTR_DOSINFO_SIZE = 0x00000004,
+ XATTR_DOSINFO_ALLOC_SIZE = 0x00000008,
+ XATTR_DOSINFO_CREATE_TIME = 0x00000010,
+ XATTR_DOSINFO_CHANGE_TIME = 0x00000020,
+ XATTR_DOSINFO_ITIME = 0x00000040
+};
+
+/*
+ * Dos attribute structure which is compatible with samba's one.
+ * Storing it into the xattr named "DOSATTRIB" separately from inode
+ * allows ksmbd to faithfully reproduce windows filesystem semantics
+ * on top of a POSIX filesystem.
+ */
+struct xattr_dos_attrib {
+ __u16 version; /* version 3 or version 4 */
+ __u32 flags; /* valid flags */
+ __u32 attr; /* Dos attribute */
+ __u32 ea_size; /* EA size */
+ __u64 size;
+ __u64 alloc_size;
+ __u64 create_time; /* File creation time */
+ __u64 change_time; /* File change time */
+ __u64 itime; /* Invented/Initial time */
+};
+
+/*
+ * Enumeration is used for computing posix acl hash.
+ */
+enum {
+ SMB_ACL_TAG_INVALID = 0,
+ SMB_ACL_USER,
+ SMB_ACL_USER_OBJ,
+ SMB_ACL_GROUP,
+ SMB_ACL_GROUP_OBJ,
+ SMB_ACL_OTHER,
+ SMB_ACL_MASK
+};
+
+#define SMB_ACL_READ 4
+#define SMB_ACL_WRITE 2
+#define SMB_ACL_EXECUTE 1
+
+struct xattr_acl_entry {
+ int type;
+ uid_t uid;
+ gid_t gid;
+ mode_t perm;
+};
+
+/*
+ * xattr_smb_acl structure is used for computing posix acl hash.
+ */
+struct xattr_smb_acl {
+ int count;
+ int next;
+ struct xattr_acl_entry entries[0];
+};
+
+/* 64bytes hash in xattr_ntacl is computed with sha256 */
+#define XATTR_SD_HASH_TYPE_SHA256 0x1
+#define XATTR_SD_HASH_SIZE 64
+
+/*
+ * xattr_ntacl is used for storing ntacl and hashes.
+ * Hash is used for checking valid posix acl and ntacl in xattr.
+ */
+struct xattr_ntacl {
+ __u16 version; /* version 4*/
+ void *sd_buf;
+ __u32 sd_size;
+ __u16 hash_type; /* hash type */
+ __u8 desc[10]; /* posix_acl description */
+ __u16 desc_len;
+ __u64 current_time;
+ __u8 hash[XATTR_SD_HASH_SIZE]; /* 64bytes hash for ntacl */
+ __u8 posix_acl_hash[XATTR_SD_HASH_SIZE]; /* 64bytes hash for posix acl */
+};
+
+/* DOS ATTRIBUITE XATTR PREFIX */
+#define DOS_ATTRIBUTE_PREFIX "DOSATTRIB"
+#define DOS_ATTRIBUTE_PREFIX_LEN (sizeof(DOS_ATTRIBUTE_PREFIX) - 1)
+#define XATTR_NAME_DOS_ATTRIBUTE (XATTR_USER_PREFIX DOS_ATTRIBUTE_PREFIX)
+#define XATTR_NAME_DOS_ATTRIBUTE_LEN \
+ (sizeof(XATTR_USER_PREFIX DOS_ATTRIBUTE_PREFIX) - 1)
+
+/* STREAM XATTR PREFIX */
+#define STREAM_PREFIX "DosStream."
+#define STREAM_PREFIX_LEN (sizeof(STREAM_PREFIX) - 1)
+#define XATTR_NAME_STREAM (XATTR_USER_PREFIX STREAM_PREFIX)
+#define XATTR_NAME_STREAM_LEN (sizeof(XATTR_NAME_STREAM) - 1)
+
+/* SECURITY DESCRIPTOR(NTACL) XATTR PREFIX */
+#define SD_PREFIX "NTACL"
+#define SD_PREFIX_LEN (sizeof(SD_PREFIX) - 1)
+#define XATTR_NAME_SD (XATTR_SECURITY_PREFIX SD_PREFIX)
+#define XATTR_NAME_SD_LEN \
+ (sizeof(XATTR_SECURITY_PREFIX SD_PREFIX) - 1)
+
+#endif /* __XATTR_H__ */
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2de048f80eb8..0ab9756ed235 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -584,7 +584,7 @@ static struct ctl_table nlm_sysctls[] = {
.data = &nsm_use_hostnames,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dobool,
},
{
.procname = "nsm_local_state",
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4c10fb5138f1..e10ae2c41279 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -40,12 +40,15 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
+ int mode = lock_to_openmode(&lock->fl);
+
+ error = nlm_lookup_file(rqstp, &file, lock);
+ if (error)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
- lock->fl.fl_file = file->f_file;
+ lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 61d3cc2283dc..e9b85d8fd5fe 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -31,6 +31,7 @@
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
#include <linux/kthread.h>
+#include <linux/exportfs.h>
#define NLMDBG_FACILITY NLMDBG_SVCLOCK
@@ -395,28 +396,10 @@ nlmsvc_release_lockowner(struct nlm_lock *lock)
nlmsvc_put_lockowner(lock->fl.fl_owner);
}
-static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
-{
- struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
- new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
-}
-
-static void nlmsvc_locks_release_private(struct file_lock *fl)
-{
- nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
-}
-
-static const struct file_lock_operations nlmsvc_lock_ops = {
- .fl_copy_lock = nlmsvc_locks_copy_lock,
- .fl_release_private = nlmsvc_locks_release_private,
-};
-
void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
pid_t pid)
{
fl->fl_owner = nlmsvc_find_lockowner(host, pid);
- if (fl->fl_owner != NULL)
- fl->fl_ops = &nlmsvc_lock_ops;
}
/*
@@ -488,17 +471,24 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_cookie *cookie, int reclaim)
{
struct nlm_block *block = NULL;
+ struct inode *inode = nlmsvc_file_inode(file);
int error;
+ int mode;
+ int async_block = 0;
__be32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ inode->i_sb->s_id, inode->i_ino,
lock->fl.fl_type, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end,
wait);
+ if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) {
+ async_block = wait;
+ wait = 0;
+ }
+
/* Lock file against concurrent access */
mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting)
@@ -542,7 +532,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
if (!wait)
lock->fl.fl_flags &= ~FL_SLEEP;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
dprintk("lockd: vfs_lock_file returned %d\n", error);
@@ -558,7 +549,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
*/
if (wait)
break;
- ret = nlm_lck_denied;
+ ret = async_block ? nlm_lck_blocked : nlm_lck_denied;
goto out;
case FILE_LOCK_DEFERRED:
if (wait)
@@ -595,12 +586,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *conflock, struct nlm_cookie *cookie)
{
int error;
+ int mode;
__be32 ret;
struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_type,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -613,7 +605,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
/* If there's a conflicting lock, remember to clean up the test lock */
test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
- error = vfs_test_lock(file->f_file, &lock->fl);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_test_lock(file->f_file[mode], &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
if (error == FILE_LOCK_DEFERRED)
@@ -634,7 +627,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
- conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
+ conflock->svid = lock->fl.fl_pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
@@ -659,11 +652,11 @@ out:
__be32
nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
{
- int error;
+ int error = 0;
dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -672,7 +665,12 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
nlmsvc_cancel_blocked(net, file, lock);
lock->fl.fl_type = F_UNLCK;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ if (file->f_file[O_RDONLY])
+ error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK,
+ &lock->fl, NULL);
+ if (file->f_file[O_WRONLY])
+ error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK,
+ &lock->fl, NULL);
return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
}
@@ -689,10 +687,11 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
{
struct nlm_block *block;
int status = 0;
+ int mode;
dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
- locks_inode(file->f_file)->i_sb->s_id,
- locks_inode(file->f_file)->i_ino,
+ nlmsvc_file_inode(file)->i_sb->s_id,
+ nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -704,7 +703,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
block = nlmsvc_lookup_block(file, lock);
mutex_unlock(&file->f_mutex);
if (block != NULL) {
- vfs_cancel_lock(block->b_file->f_file,
+ mode = lock_to_openmode(&lock->fl);
+ vfs_cancel_lock(block->b_file->f_file[mode],
&block->b_call->a_args.lock.fl);
status = nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
@@ -788,9 +788,21 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
+static fl_owner_t nlmsvc_get_owner(fl_owner_t owner)
+{
+ return nlmsvc_get_lockowner(owner);
+}
+
+static void nlmsvc_put_owner(fl_owner_t owner)
+{
+ nlmsvc_put_lockowner(owner);
+}
+
const struct lock_manager_operations nlmsvc_lock_operations = {
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
+ .lm_get_owner = nlmsvc_get_owner,
+ .lm_put_owner = nlmsvc_put_owner,
};
/*
@@ -809,6 +821,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call->a_args.lock;
+ int mode;
int error;
loff_t fl_start, fl_end;
@@ -834,7 +847,8 @@ nlmsvc_grant_blocked(struct nlm_block *block)
lock->fl.fl_flags |= FL_SLEEP;
fl_start = lock->fl.fl_start;
fl_end = lock->fl.fl_end;
- error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
lock->fl.fl_start = fl_start;
lock->fl.fl_end = fl_end;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 4ae4b63b5392..99696d3f6dd6 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -55,6 +55,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
struct nlm_host *host = NULL;
struct nlm_file *file = NULL;
struct nlm_lock *lock = &argp->lock;
+ int mode;
__be32 error = 0;
/* nfsd callbacks must have been installed for this procedure */
@@ -69,13 +70,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
+ error = cast_status(nlm_lookup_file(rqstp, &file, lock));
if (error != 0)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
- lock->fl.fl_file = file->f_file;
+ mode = lock_to_openmode(&lock->fl);
+ lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 028fc152da22..cb3a7512c33e 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
{
- struct inode *inode = locks_inode(file->f_file);
+ struct inode *inode = nlmsvc_file_inode(file);
dprintk("lockd: %s %s/%ld\n",
msg, inode->i_sb->s_id, inode->i_ino);
@@ -71,56 +71,75 @@ static inline unsigned int file_hash(struct nfs_fh *f)
return tmp & (FILE_NRHASH - 1);
}
+int lock_to_openmode(struct file_lock *lock)
+{
+ return (lock->fl_type == F_WRLCK) ? O_WRONLY : O_RDONLY;
+}
+
+/*
+ * Open the file. Note that if we're reexporting, for example,
+ * this could block the lockd thread for a while.
+ *
+ * We have to make sure we have the right credential to open
+ * the file.
+ */
+static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
+ struct nlm_file *file, int mode)
+{
+ struct file **fp = &file->f_file[mode];
+ __be32 nfserr;
+
+ if (*fp)
+ return 0;
+ nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
+ if (nfserr)
+ dprintk("lockd: open failed (error %d)\n", nfserr);
+ return nfserr;
+}
+
/*
* Lookup file info. If it doesn't exist, create a file info struct
* and open a (VFS) file for the given inode.
- *
- * FIXME:
- * Note that we open the file O_RDONLY even when creating write locks.
- * This is not quite right, but for now, we assume the client performs
- * the proper R/W checking.
*/
__be32
nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
- struct nfs_fh *f)
+ struct nlm_lock *lock)
{
struct nlm_file *file;
unsigned int hash;
__be32 nfserr;
+ int mode;
- nlm_debug_print_fh("nlm_lookup_file", f);
+ nlm_debug_print_fh("nlm_lookup_file", &lock->fh);
- hash = file_hash(f);
+ hash = file_hash(&lock->fh);
+ mode = lock_to_openmode(&lock->fl);
/* Lock file table */
mutex_lock(&nlm_file_mutex);
hlist_for_each_entry(file, &nlm_files[hash], f_list)
- if (!nfs_compare_fh(&file->f_handle, f))
+ if (!nfs_compare_fh(&file->f_handle, &lock->fh)) {
+ mutex_lock(&file->f_mutex);
+ nfserr = nlm_do_fopen(rqstp, file, mode);
+ mutex_unlock(&file->f_mutex);
goto found;
-
- nlm_debug_print_fh("creating file for", f);
+ }
+ nlm_debug_print_fh("creating file for", &lock->fh);
nfserr = nlm_lck_denied_nolocks;
file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file)
- goto out_unlock;
+ goto out_free;
- memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
+ memcpy(&file->f_handle, &lock->fh, sizeof(struct nfs_fh));
mutex_init(&file->f_mutex);
INIT_HLIST_NODE(&file->f_list);
INIT_LIST_HEAD(&file->f_blocks);
- /* Open the file. Note that this must not sleep for too long, else
- * we would lock up lockd:-) So no NFS re-exports, folks.
- *
- * We have to make sure we have the right credential to open
- * the file.
- */
- if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
- dprintk("lockd: open failed (error %d)\n", nfserr);
- goto out_free;
- }
+ nfserr = nlm_do_fopen(rqstp, file, mode);
+ if (nfserr)
+ goto out_unlock;
hlist_add_head(&file->f_list, &nlm_files[hash]);
@@ -128,7 +147,6 @@ found:
dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
*result = file;
file->f_count++;
- nfserr = 0;
out_unlock:
mutex_unlock(&nlm_file_mutex);
@@ -148,13 +166,34 @@ nlm_delete_file(struct nlm_file *file)
nlm_debug_print_file("closing file", file);
if (!hlist_unhashed(&file->f_list)) {
hlist_del(&file->f_list);
- nlmsvc_ops->fclose(file->f_file);
+ if (file->f_file[O_RDONLY])
+ nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
+ if (file->f_file[O_WRONLY])
+ nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
kfree(file);
} else {
printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
}
}
+static int nlm_unlock_files(struct nlm_file *file)
+{
+ struct file_lock lock;
+ struct file *f;
+
+ lock.fl_type = F_UNLCK;
+ lock.fl_start = 0;
+ lock.fl_end = OFFSET_MAX;
+ for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
+ if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
+ pr_warn("lockd: unlock failure in %s:%d\n",
+ __FILE__, __LINE__);
+ return 1;
+ }
+ }
+ return 0;
+}
+
/*
* Loop over all locks on the given file and perform the specified
* action.
@@ -182,17 +221,10 @@ again:
lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
- struct file_lock lock = *fl;
spin_unlock(&flctx->flc_lock);
- lock.fl_type = F_UNLCK;
- lock.fl_start = 0;
- lock.fl_end = OFFSET_MAX;
- if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) {
- printk("lockd: unlock failure in %s:%d\n",
- __FILE__, __LINE__);
+ if (nlm_unlock_files(file))
return 1;
- }
goto again;
}
}
@@ -246,6 +278,15 @@ nlm_file_inuse(struct nlm_file *file)
return 0;
}
+static void nlm_close_files(struct nlm_file *file)
+{
+ struct file *f;
+
+ for (f = file->f_file[0]; f <= file->f_file[1]; f++)
+ if (f)
+ nlmsvc_ops->fclose(f);
+}
+
/*
* Loop over all files in the file table.
*/
@@ -276,7 +317,7 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match,
if (list_empty(&file->f_blocks) && !file->f_locks
&& !file->f_shares && !file->f_count) {
hlist_del(&file->f_list);
- nlmsvc_ops->fclose(file->f_file);
+ nlm_close_files(file);
kfree(file);
}
}
@@ -410,12 +451,13 @@ nlmsvc_invalidate_all(void)
nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
}
+
static int
nlmsvc_match_sb(void *datap, struct nlm_file *file)
{
struct super_block *sb = datap;
- return sb == locks_inode(file->f_file)->i_sb;
+ return sb == nlmsvc_file_inode(file)->i_sb;
}
/**
diff --git a/fs/locks.c b/fs/locks.c
index 74b2a1dfe8d8..3d6fb4ae847b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1397,103 +1397,6 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
return error;
}
-#ifdef CONFIG_MANDATORY_FILE_LOCKING
-/**
- * locks_mandatory_locked - Check for an active lock
- * @file: the file to check
- *
- * Searches the inode's list of locks to find any POSIX locks which conflict.
- * This function is called from locks_verify_locked() only.
- */
-int locks_mandatory_locked(struct file *file)
-{
- int ret;
- struct inode *inode = locks_inode(file);
- struct file_lock_context *ctx;
- struct file_lock *fl;
-
- ctx = smp_load_acquire(&inode->i_flctx);
- if (!ctx || list_empty_careful(&ctx->flc_posix))
- return 0;
-
- /*
- * Search the lock list for this inode for any POSIX locks.
- */
- spin_lock(&ctx->flc_lock);
- ret = 0;
- list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
- if (fl->fl_owner != current->files &&
- fl->fl_owner != file) {
- ret = -EAGAIN;
- break;
- }
- }
- spin_unlock(&ctx->flc_lock);
- return ret;
-}
-
-/**
- * locks_mandatory_area - Check for a conflicting lock
- * @inode: the file to check
- * @filp: how the file was opened (if it was)
- * @start: first byte in the file to check
- * @end: lastbyte in the file to check
- * @type: %F_WRLCK for a write lock, else %F_RDLCK
- *
- * Searches the inode's list of locks to find any POSIX locks which conflict.
- */
-int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
- loff_t end, unsigned char type)
-{
- struct file_lock fl;
- int error;
- bool sleep = false;
-
- locks_init_lock(&fl);
- fl.fl_pid = current->tgid;
- fl.fl_file = filp;
- fl.fl_flags = FL_POSIX | FL_ACCESS;
- if (filp && !(filp->f_flags & O_NONBLOCK))
- sleep = true;
- fl.fl_type = type;
- fl.fl_start = start;
- fl.fl_end = end;
-
- for (;;) {
- if (filp) {
- fl.fl_owner = filp;
- fl.fl_flags &= ~FL_SLEEP;
- error = posix_lock_inode(inode, &fl, NULL);
- if (!error)
- break;
- }
-
- if (sleep)
- fl.fl_flags |= FL_SLEEP;
- fl.fl_owner = current->files;
- error = posix_lock_inode(inode, &fl, NULL);
- if (error != FILE_LOCK_DEFERRED)
- break;
- error = wait_event_interruptible(fl.fl_wait,
- list_empty(&fl.fl_blocked_member));
- if (!error) {
- /*
- * If we've been sleeping someone might have
- * changed the permissions behind our back.
- */
- if (__mandatory_lock(inode))
- continue;
- }
-
- break;
- }
- locks_delete_block(&fl);
-
- return error;
-}
-EXPORT_SYMBOL(locks_mandatory_area);
-#endif /* CONFIG_MANDATORY_FILE_LOCKING */
-
static void lease_clear_pending(struct file_lock *fl, int arg)
{
switch (arg) {
@@ -2486,14 +2389,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
- /* Don't allow mandatory locks on files that may be memory mapped
- * and shared.
- */
- if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
- error = -EAGAIN;
- goto out;
- }
-
error = flock_to_posix_lock(filp, file_lock, flock);
if (error)
goto out;
@@ -2611,21 +2506,12 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
struct flock64 *flock)
{
struct file_lock *file_lock = locks_alloc_lock();
- struct inode *inode = locks_inode(filp);
struct file *f;
int error;
if (file_lock == NULL)
return -ENOLCK;
- /* Don't allow mandatory locks on files that may be memory mapped
- * and shared.
- */
- if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
- error = -EAGAIN;
- goto out;
- }
-
error = flock64_to_posix_lock(filp, file_lock, flock);
if (error)
goto out;
@@ -2857,8 +2743,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
seq_puts(f, "POSIX ");
seq_printf(f, " %s ",
- (inode == NULL) ? "*NOINODE*" :
- mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
+ (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
} else if (IS_FLOCK(fl)) {
if (fl->fl_type & LOCK_MAND) {
seq_puts(f, "FLOCK MSNFS ");
diff --git a/fs/namei.c b/fs/namei.c
index bf6d8a738c59..d049d3972695 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -204,6 +204,14 @@ getname_flags(const char __user *filename, int flags, int *empty)
}
struct filename *
+getname_uflags(const char __user *filename, int uflags)
+{
+ int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+
+ return getname_flags(filename, flags, NULL);
+}
+
+struct filename *
getname(const char __user * filename)
{
return getname_flags(filename, 0, NULL);
@@ -247,6 +255,9 @@ getname_kernel(const char * filename)
void putname(struct filename *name)
{
+ if (IS_ERR_OR_NULL(name))
+ return;
+
BUG_ON(name->refcnt <= 0);
if (--name->refcnt > 0)
@@ -2456,7 +2467,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
return err;
}
-int filename_lookup(int dfd, struct filename *name, unsigned flags,
+static int __filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root)
{
int retval;
@@ -2474,6 +2485,14 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
audit_inode(name, path->dentry,
flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
restore_nameidata();
+ return retval;
+}
+
+int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ struct path *path, struct path *root)
+{
+ int retval = __filename_lookup(dfd, name, flags, path, root);
+
putname(name);
return retval;
}
@@ -2495,7 +2514,7 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
return err;
}
-static struct filename *filename_parentat(int dfd, struct filename *name,
+static int __filename_parentat(int dfd, struct filename *name,
unsigned int flags, struct path *parent,
struct qstr *last, int *type)
{
@@ -2503,7 +2522,7 @@ static struct filename *filename_parentat(int dfd, struct filename *name,
struct nameidata nd;
if (IS_ERR(name))
- return name;
+ return PTR_ERR(name);
set_nameidata(&nd, dfd, name, NULL);
retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
if (unlikely(retval == -ECHILD))
@@ -2514,29 +2533,34 @@ static struct filename *filename_parentat(int dfd, struct filename *name,
*last = nd.last;
*type = nd.last_type;
audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
- } else {
- putname(name);
- name = ERR_PTR(retval);
}
restore_nameidata();
- return name;
+ return retval;
+}
+
+static int filename_parentat(int dfd, struct filename *name,
+ unsigned int flags, struct path *parent,
+ struct qstr *last, int *type)
+{
+ int retval = __filename_parentat(dfd, name, flags, parent, last, type);
+
+ putname(name);
+ return retval;
}
/* does lookup, returns the object with parent locked */
struct dentry *kern_path_locked(const char *name, struct path *path)
{
- struct filename *filename;
struct dentry *d;
struct qstr last;
- int type;
+ int type, error;
- filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
+ error = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
&last, &type);
- if (IS_ERR(filename))
- return ERR_CAST(filename);
+ if (error)
+ return ERR_PTR(error);
if (unlikely(type != LAST_NORM)) {
path_put(path);
- putname(filename);
return ERR_PTR(-EINVAL);
}
inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
@@ -2545,7 +2569,6 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
inode_unlock(path->dentry->d_inode);
path_put(path);
}
- putname(filename);
return d;
}
@@ -2575,8 +2598,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);
-static int lookup_one_len_common(const char *name, struct dentry *base,
- int len, struct qstr *this)
+static int lookup_one_common(struct user_namespace *mnt_userns,
+ const char *name, struct dentry *base, int len,
+ struct qstr *this)
{
this->name = name;
this->len = len;
@@ -2604,7 +2628,7 @@ static int lookup_one_len_common(const char *name, struct dentry *base,
return err;
}
- return inode_permission(&init_user_ns, base->d_inode, MAY_EXEC);
+ return inode_permission(mnt_userns, base->d_inode, MAY_EXEC);
}
/**
@@ -2628,7 +2652,7 @@ struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_len_common(name, base, len, &this);
+ err = lookup_one_common(&init_user_ns, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2655,7 +2679,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
WARN_ON_ONCE(!inode_is_locked(base->d_inode));
- err = lookup_one_len_common(name, base, len, &this);
+ err = lookup_one_common(&init_user_ns, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -2665,6 +2689,36 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
EXPORT_SYMBOL(lookup_one_len);
/**
+ * lookup_one - filesystem helper to lookup single pathname component
+ * @mnt_userns: user namespace of the mount the lookup is performed from
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
+ */
+struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
+ struct dentry *base, int len)
+{
+ struct dentry *dentry;
+ struct qstr this;
+ int err;
+
+ WARN_ON_ONCE(!inode_is_locked(base->d_inode));
+
+ err = lookup_one_common(mnt_userns, name, base, len, &this);
+ if (err)
+ return ERR_PTR(err);
+
+ dentry = lookup_dcache(&this, base, 0);
+ return dentry ? dentry : __lookup_slow(&this, base, 0);
+}
+EXPORT_SYMBOL(lookup_one);
+
+/**
* lookup_one_len_unlocked - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
* @base: base directory to lookup from
@@ -2683,7 +2737,7 @@ struct dentry *lookup_one_len_unlocked(const char *name,
int err;
struct dentry *ret;
- err = lookup_one_len_common(name, base, len, &this);
+ err = lookup_one_common(&init_user_ns, name, base, len, &this);
if (err)
return ERR_PTR(err);
@@ -3023,9 +3077,7 @@ static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp)
/*
* Refuse to truncate files with mandatory locks held on them.
*/
- error = locks_verify_locked(filp);
- if (!error)
- error = security_path_truncate(path);
+ error = security_path_truncate(path);
if (!error) {
error = do_truncate(mnt_userns, path->dentry, 0,
ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
@@ -3566,7 +3618,7 @@ struct file *do_file_open_root(const struct path *root,
return file;
}
-static struct dentry *filename_create(int dfd, struct filename *name,
+static struct dentry *__filename_create(int dfd, struct filename *name,
struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
@@ -3582,9 +3634,9 @@ static struct dentry *filename_create(int dfd, struct filename *name,
*/
lookup_flags &= LOOKUP_REVAL;
- name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
- if (IS_ERR(name))
- return ERR_CAST(name);
+ error = __filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ if (error)
+ return ERR_PTR(error);
/*
* Yucky last component or no last component at all?
@@ -3622,7 +3674,6 @@ static struct dentry *filename_create(int dfd, struct filename *name,
error = err2;
goto fail;
}
- putname(name);
return dentry;
fail:
dput(dentry);
@@ -3633,10 +3684,18 @@ unlock:
mnt_drop_write(path->mnt);
out:
path_put(path);
- putname(name);
return dentry;
}
+static inline struct dentry *filename_create(int dfd, struct filename *name,
+ struct path *path, unsigned int lookup_flags)
+{
+ struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
+
+ putname(name);
+ return res;
+}
+
struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
@@ -3725,7 +3784,7 @@ static int may_mknod(umode_t mode)
}
}
-static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+static int do_mknodat(int dfd, struct filename *name, umode_t mode,
unsigned int dev)
{
struct user_namespace *mnt_userns;
@@ -3736,17 +3795,18 @@ static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
error = may_mknod(mode);
if (error)
- return error;
+ goto out1;
retry:
- dentry = user_path_create(dfd, filename, &path, lookup_flags);
+ dentry = __filename_create(dfd, name, &path, lookup_flags);
+ error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ goto out1;
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
error = security_path_mknod(&path, dentry, mode, dev);
if (error)
- goto out;
+ goto out2;
mnt_userns = mnt_user_ns(path.mnt);
switch (mode & S_IFMT) {
@@ -3765,24 +3825,26 @@ retry:
dentry, mode, 0);
break;
}
-out:
+out2:
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+out1:
+ putname(name);
return error;
}
SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
unsigned int, dev)
{
- return do_mknodat(dfd, filename, mode, dev);
+ return do_mknodat(dfd, getname(filename), mode, dev);
}
SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
- return do_mknodat(AT_FDCWD, filename, mode, dev);
+ return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
}
/**
@@ -3827,7 +3889,7 @@ int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
}
EXPORT_SYMBOL(vfs_mkdir);
-static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
+int do_mkdirat(int dfd, struct filename *name, umode_t mode)
{
struct dentry *dentry;
struct path path;
@@ -3835,9 +3897,10 @@ static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
unsigned int lookup_flags = LOOKUP_DIRECTORY;
retry:
- dentry = user_path_create(dfd, pathname, &path, lookup_flags);
+ dentry = __filename_create(dfd, name, &path, lookup_flags);
+ error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ goto out_putname;
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
@@ -3853,17 +3916,19 @@ retry:
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+out_putname:
+ putname(name);
return error;
}
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
- return do_mkdirat(dfd, pathname, mode);
+ return do_mkdirat(dfd, getname(pathname), mode);
}
SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
- return do_mkdirat(AT_FDCWD, pathname, mode);
+ return do_mkdirat(AT_FDCWD, getname(pathname), mode);
}
/**
@@ -3921,62 +3986,62 @@ out:
}
EXPORT_SYMBOL(vfs_rmdir);
-long do_rmdir(int dfd, struct filename *name)
+int do_rmdir(int dfd, struct filename *name)
{
struct user_namespace *mnt_userns;
- int error = 0;
+ int error;
struct dentry *dentry;
struct path path;
struct qstr last;
int type;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, name, lookup_flags,
- &path, &last, &type);
- if (IS_ERR(name))
- return PTR_ERR(name);
+ error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ if (error)
+ goto exit1;
switch (type) {
case LAST_DOTDOT:
error = -ENOTEMPTY;
- goto exit1;
+ goto exit2;
case LAST_DOT:
error = -EINVAL;
- goto exit1;
+ goto exit2;
case LAST_ROOT:
error = -EBUSY;
- goto exit1;
+ goto exit2;
}
error = mnt_want_write(path.mnt);
if (error)
- goto exit1;
+ goto exit2;
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- goto exit2;
+ goto exit3;
if (!dentry->d_inode) {
error = -ENOENT;
- goto exit3;
+ goto exit4;
}
error = security_path_rmdir(&path, dentry);
if (error)
- goto exit3;
+ goto exit4;
mnt_userns = mnt_user_ns(path.mnt);
error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
-exit3:
+exit4:
dput(dentry);
-exit2:
+exit3:
inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
-exit1:
+exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
+exit1:
putname(name);
return error;
}
@@ -4059,7 +4124,7 @@ EXPORT_SYMBOL(vfs_unlink);
* writeout happening, and we don't want to prevent access to the directory
* while waiting on the I/O.
*/
-long do_unlinkat(int dfd, struct filename *name)
+int do_unlinkat(int dfd, struct filename *name)
{
int error;
struct dentry *dentry;
@@ -4070,17 +4135,17 @@ long do_unlinkat(int dfd, struct filename *name)
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
- if (IS_ERR(name))
- return PTR_ERR(name);
+ error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ if (error)
+ goto exit1;
error = -EISDIR;
if (type != LAST_NORM)
- goto exit1;
+ goto exit2;
error = mnt_want_write(path.mnt);
if (error)
- goto exit1;
+ goto exit2;
retry_deleg:
inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
@@ -4097,11 +4162,11 @@ retry_deleg:
ihold(inode);
error = security_path_unlink(&path, dentry);
if (error)
- goto exit2;
+ goto exit3;
mnt_userns = mnt_user_ns(path.mnt);
error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
&delegated_inode);
-exit2:
+exit3:
dput(dentry);
}
inode_unlock(path.dentry->d_inode);
@@ -4114,13 +4179,14 @@ exit2:
goto retry_deleg;
}
mnt_drop_write(path.mnt);
-exit1:
+exit2:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
inode = NULL;
goto retry;
}
+exit1:
putname(name);
return error;
@@ -4131,7 +4197,7 @@ slashes:
error = -EISDIR;
else
error = -ENOTDIR;
- goto exit2;
+ goto exit3;
}
SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
@@ -4186,23 +4252,22 @@ int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
}
EXPORT_SYMBOL(vfs_symlink);
-static long do_symlinkat(const char __user *oldname, int newdfd,
- const char __user *newname)
+int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
{
int error;
- struct filename *from;
struct dentry *dentry;
struct path path;
unsigned int lookup_flags = 0;
- from = getname(oldname);
- if (IS_ERR(from))
- return PTR_ERR(from);
+ if (IS_ERR(from)) {
+ error = PTR_ERR(from);
+ goto out_putnames;
+ }
retry:
- dentry = user_path_create(newdfd, newname, &path, lookup_flags);
+ dentry = __filename_create(newdfd, to, &path, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
- goto out_putname;
+ goto out_putnames;
error = security_path_symlink(&path, dentry, from->name);
if (!error) {
@@ -4217,7 +4282,8 @@ retry:
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
-out_putname:
+out_putnames:
+ putname(to);
putname(from);
return error;
}
@@ -4225,12 +4291,12 @@ out_putname:
SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
int, newdfd, const char __user *, newname)
{
- return do_symlinkat(oldname, newdfd, newname);
+ return do_symlinkat(getname(oldname), newdfd, getname(newname));
}
SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
{
- return do_symlinkat(oldname, AT_FDCWD, newname);
+ return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
}
/**
@@ -4331,8 +4397,8 @@ EXPORT_SYMBOL(vfs_link);
* with linux 2.0, and to avoid hard-linking to directories
* and other special files. --ADM
*/
-static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
- const char __user *newname, int flags)
+int do_linkat(int olddfd, struct filename *old, int newdfd,
+ struct filename *new, int flags)
{
struct user_namespace *mnt_userns;
struct dentry *new_dentry;
@@ -4341,31 +4407,32 @@ static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
int how = 0;
int error;
- if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
- return -EINVAL;
+ if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
+ error = -EINVAL;
+ goto out_putnames;
+ }
/*
* To use null names we require CAP_DAC_READ_SEARCH
* This ensures that not everyone will be able to create
* handlink using the passed filedescriptor.
*/
- if (flags & AT_EMPTY_PATH) {
- if (!capable(CAP_DAC_READ_SEARCH))
- return -ENOENT;
- how = LOOKUP_EMPTY;
+ if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
+ error = -ENOENT;
+ goto out_putnames;
}
if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW;
retry:
- error = user_path_at(olddfd, oldname, how, &old_path);
+ error = __filename_lookup(olddfd, old, how, &old_path, NULL);
if (error)
- return error;
+ goto out_putnames;
- new_dentry = user_path_create(newdfd, newname, &new_path,
+ new_dentry = __filename_create(newdfd, new, &new_path,
(how & LOOKUP_REVAL));
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
- goto out;
+ goto out_putpath;
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
@@ -4393,8 +4460,11 @@ out_dput:
how |= LOOKUP_REVAL;
goto retry;
}
-out:
+out_putpath:
path_put(&old_path);
+out_putnames:
+ putname(old);
+ putname(new);
return error;
}
@@ -4402,12 +4472,13 @@ out:
SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, int, flags)
{
- return do_linkat(olddfd, oldname, newdfd, newname, flags);
+ return do_linkat(olddfd, getname_uflags(oldname, flags),
+ newdfd, getname(newname), flags);
}
SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
{
- return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
}
/**
@@ -4602,29 +4673,25 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
int error = -EINVAL;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
- goto put_both;
+ goto put_names;
if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
(flags & RENAME_EXCHANGE))
- goto put_both;
+ goto put_names;
if (flags & RENAME_EXCHANGE)
target_flags = 0;
retry:
- from = filename_parentat(olddfd, from, lookup_flags, &old_path,
+ error = __filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_last, &old_type);
- if (IS_ERR(from)) {
- error = PTR_ERR(from);
- goto put_new;
- }
+ if (error)
+ goto put_names;
- to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
+ error = __filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
&new_type);
- if (IS_ERR(to)) {
- error = PTR_ERR(to);
+ if (error)
goto exit1;
- }
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
@@ -4727,12 +4794,9 @@ exit1:
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
-put_both:
- if (!IS_ERR(from))
- putname(from);
-put_new:
- if (!IS_ERR(to))
- putname(to);
+put_names:
+ putname(from);
+ putname(to);
return error;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index ab4174a3c802..20caa4b4c539 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1715,18 +1715,14 @@ static inline bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
-#ifdef CONFIG_MANDATORY_FILE_LOCKING
-static inline bool may_mandlock(void)
+static void warn_mandlock(void)
{
- return capable(CAP_SYS_ADMIN);
+ pr_warn_once("=======================================================\n"
+ "WARNING: The mand mount option has been deprecated and\n"
+ " and is ignored by this kernel. Remove the mand\n"
+ " option from the mount to silence this warning.\n"
+ "=======================================================\n");
}
-#else
-static inline bool may_mandlock(void)
-{
- pr_warn("VFS: \"mand\" mount option not supported");
- return false;
-}
-#endif
static int can_umount(const struct path *path, int flags)
{
@@ -1938,6 +1934,20 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
+static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ struct mount *child;
+
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, dentry))
+ continue;
+
+ if (child->mnt.mnt_flags & MNT_LOCKED)
+ return true;
+ }
+ return false;
+}
+
/**
* clone_private_mount - create a private clone of a path
* @path: path to clone
@@ -1953,10 +1963,19 @@ struct vfsmount *clone_private_mount(const struct path *path)
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
+ down_read(&namespace_sem);
if (IS_MNT_UNBINDABLE(old_mnt))
- return ERR_PTR(-EINVAL);
+ goto invalid;
+
+ if (!check_mnt(old_mnt))
+ goto invalid;
+
+ if (has_locked_children(old_mnt, path->dentry))
+ goto invalid;
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+ up_read(&namespace_sem);
+
if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);
@@ -1964,6 +1983,10 @@ struct vfsmount *clone_private_mount(const struct path *path)
new_mnt->mnt_ns = MNT_NS_INTERNAL;
return &new_mnt->mnt;
+
+invalid:
+ up_read(&namespace_sem);
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -2315,19 +2338,6 @@ static int do_change_type(struct path *path, int ms_flags)
return err;
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
-{
- struct mount *child;
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- if (!is_subdir(child->mnt_mountpoint, dentry))
- continue;
-
- if (child->mnt.mnt_flags & MNT_LOCKED)
- return true;
- }
- return false;
-}
-
static struct mount *__do_loopback(struct path *old_path, int recurse)
{
struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
@@ -3179,8 +3189,8 @@ int path_mount(const char *dev_name, struct path *path,
return ret;
if (!may_mount())
return -EPERM;
- if ((flags & SB_MANDLOCK) && !may_mandlock())
- return -EPERM;
+ if (flags & SB_MANDLOCK)
+ warn_mandlock();
/* Default to relatime unless overriden */
if (!(flags & MS_NOATIME))
@@ -3563,9 +3573,8 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
goto err_unlock;
- ret = -EPERM;
- if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
- goto err_unlock;
+ if (fc->sb_flags & SB_MANDLOCK)
+ warn_mandlock();
newmount.mnt = vfs_create_mount(fc);
if (IS_ERR(newmount.mnt)) {
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 37a1a88df771..d772c20bbfd1 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -180,5 +180,5 @@ const struct export_operations nfs_export_ops = {
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR,
+ EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1fef107961bc..aa353fd58240 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -806,9 +806,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
- /* No mandatory locks over NFS */
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- goto out_err;
+ if (fl->fl_flags & FL_RECLAIM)
+ return -ENOGRACE;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
is_local = 1;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 3f5b3d7b62b7..606fa155c28a 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -25,9 +25,11 @@
* Note: we hold the dentry use count while the file is open.
*/
static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
+ int mode)
{
__be32 nfserr;
+ int access;
struct svc_fh fh;
/* must initialize before using! but maxsize doesn't matter */
@@ -36,7 +38,9 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
fh.fh_export = NULL;
- nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
+ access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+ access |= NFSD_MAY_LOCK;
+ nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
/* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fa67ecd5fe63..42356416f0a0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2687,9 +2687,9 @@ static void force_expire_client(struct nfs4_client *clp)
trace_nfsd_clid_admin_expired(&clp->cl_clientid);
- spin_lock(&clp->cl_lock);
+ spin_lock(&nn->client_lock);
clp->cl_time = 0;
- spin_unlock(&clp->cl_lock);
+ spin_unlock(&nn->client_lock);
wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
spin_lock(&nn->client_lock);
@@ -5735,16 +5735,6 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid,
NFS4_SHARE_DENY_READ);
}
-/*
- * Allow READ/WRITE during grace period on recovered state only for files
- * that are not able to provide mandatory locking.
- */
-static inline int
-grace_disallows_io(struct net *net, struct inode *inode)
-{
- return opens_in_grace(net) && mandatory_lock(inode);
-}
-
static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
{
/*
@@ -6026,7 +6016,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **nfp,
struct nfs4_stid **cstid)
{
- struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct nfs4_stid *s = NULL;
@@ -6035,9 +6024,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
if (nfp)
*nfp = NULL;
- if (grace_disallows_io(net, ino))
- return nfserr_grace;
-
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
status = check_special_stateids(net, fhp, stateid, flags);
goto done;
@@ -6835,6 +6821,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
+ struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -6856,6 +6843,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
+ sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -6901,10 +6889,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!locks_in_grace(net) && lock->lk_reclaim)
goto out;
+ if (lock->lk_reclaim)
+ fl_flags |= FL_RECLAIM;
+
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate))
+ if (nfsd4_has_session(cstate) &&
+ !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
@@ -6916,7 +6908,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fl_type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate))
+ if (nfsd4_has_session(cstate) &&
+ !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
@@ -7036,8 +7029,7 @@ out:
/*
* The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
* so we do a temporary open here just to get an open file to pass to
- * vfs_test_lock. (Arguably perhaps test_lock should be done with an
- * inode operation.)
+ * vfs_test_lock.
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
@@ -7052,7 +7044,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
NFSD_MAY_READ));
if (err)
goto out;
+ lock->fl_file = nf->nf_file;
err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ lock->fl_file = NULL;
out:
fh_unlock(fhp);
nfsd_file_put(nf);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 60d7c59e7935..90fcd6178823 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -881,6 +881,7 @@ nfserrno (int errno)
{ nfserr_serverfault, -ENFILE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
+ { nfserr_no_grace, -ENOGRACE},
};
int i;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index adaec43548d1..538520957a81 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -400,18 +400,16 @@ TRACE_EVENT(nfsd_dirent,
TP_STRUCT__entry(
__field(u32, fh_hash)
__field(u64, ino)
- __field(int, len)
- __dynamic_array(unsigned char, name, namlen)
+ __string_len(name, name, namlen)
),
TP_fast_assign(
__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
__entry->ino = ino;
- __entry->len = namlen;
- memcpy(__get_str(name), name, namlen);
+ __assign_str_len(name, name, namlen)
),
- TP_printk("fh_hash=0x%08x ino=%llu name=%.*s",
- __entry->fh_hash, __entry->ino,
- __entry->len, __get_str(name))
+ TP_printk("fh_hash=0x%08x ino=%llu name=%s",
+ __entry->fh_hash, __entry->ino, __get_str(name)
+ )
)
#include "state.h"
@@ -608,7 +606,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
__field(unsigned long, flavor)
__array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
- __dynamic_array(char, name, clp->cl_name.len + 1)
+ __string_len(name, name, clp->cl_name.len)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
@@ -618,8 +616,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__entry->flavor = clp->cl_cred.cr_flavor;
memcpy(__entry->verifier, (void *)&clp->cl_verifier,
NFS4_VERIFIER_SIZE);
- memcpy(__get_str(name), clp->cl_name.data, clp->cl_name.len);
- __get_str(name)[clp->cl_name.len] = '\0';
+ __assign_str_len(name, clp->cl_name.data, clp->cl_name.len);
),
TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
__entry->addr, __get_str(name),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a224a5e23cc1..738d564ca4ce 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -244,7 +244,6 @@ out_nfserr:
* returned. Otherwise the covered directory is returned.
* NOTE: this mountpoint crossing is not supported properly by all
* clients and is explicitly disallowed for NFSv3
- * NeilBrown <neilb@cse.unsw.edu.au>
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
@@ -333,7 +332,6 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct iattr *iap)
{
struct inode *inode = d_inode(fhp->fh_dentry);
- int host_err;
if (iap->ia_size < inode->i_size) {
__be32 err;
@@ -343,20 +341,7 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
return err;
}
-
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserrno;
-
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err)
- goto out_put_write_access;
- return 0;
-
-out_put_write_access:
- put_write_access(inode);
-out_nfserrno:
- return nfserrno(host_err);
+ return nfserrno(get_write_access(inode));
}
/*
@@ -750,13 +735,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
err = nfserr_perm;
if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
goto out;
- /*
- * We must ignore files (but only files) which might have mandatory
- * locks on them because there is no way to know if the accesser has
- * the lock.
- */
- if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
- goto out;
if (!inode->i_fop)
goto out;
@@ -847,26 +825,16 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct svc_rqst *rqstp = sd->u.data;
struct page **pp = rqstp->rq_next_page;
struct page *page = buf->page;
- size_t size;
-
- size = sd->len;
if (rqstp->rq_res.page_len == 0) {
- get_page(page);
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
+ svc_rqst_replace_page(rqstp, page);
rqstp->rq_res.page_base = buf->offset;
- rqstp->rq_res.page_len = size;
} else if (page != pp[-1]) {
- get_page(page);
- if (*rqstp->rq_next_page)
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
- rqstp->rq_res.page_len += size;
- } else
- rqstp->rq_res.page_len += size;
+ svc_rqst_replace_page(rqstp, page);
+ }
+ rqstp->rq_res.page_len += sd->len;
- return size;
+ return sd->len;
}
static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 4abd928b0bc8..f6b2d280aab5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1053,7 +1053,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_time_gran = 1;
sb->s_max_links = NILFS_LINK_MAX;
- sb->s_bdi = bdi_get(sb->s_bdev->bd_bdi);
+ sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi);
err = load_nilfs(nilfs, sb);
if (err)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 64864fb40b40..6facdf476255 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/fanotify.h>
#include <linux/fcntl.h>
+#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
@@ -54,22 +55,27 @@ static int fanotify_max_queued_events __read_mostly;
#include <linux/sysctl.h>
+static long ft_zero = 0;
+static long ft_int_max = INT_MAX;
+
struct ctl_table fanotify_table[] = {
{
.procname = "max_user_groups",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &ft_zero,
+ .extra2 = &ft_int_max,
},
{
.procname = "max_user_marks",
.data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &ft_zero,
+ .extra2 = &ft_int_max,
},
{
.procname = "max_queued_events",
@@ -104,8 +110,10 @@ struct kmem_cache *fanotify_path_event_cachep __read_mostly;
struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
#define FANOTIFY_EVENT_ALIGN 4
-#define FANOTIFY_INFO_HDR_LEN \
+#define FANOTIFY_FID_INFO_HDR_LEN \
(sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
+#define FANOTIFY_PIDFD_INFO_HDR_LEN \
+ sizeof(struct fanotify_event_info_pidfd)
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -114,10 +122,11 @@ static int fanotify_fid_info_len(int fh_len, int name_len)
if (name_len)
info_len += name_len + 1;
- return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN);
+ return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len,
+ FANOTIFY_EVENT_ALIGN);
}
-static int fanotify_event_info_len(unsigned int fid_mode,
+static int fanotify_event_info_len(unsigned int info_mode,
struct fanotify_event *event)
{
struct fanotify_info *info = fanotify_event_info(event);
@@ -128,7 +137,8 @@ static int fanotify_event_info_len(unsigned int fid_mode,
if (dir_fh_len) {
info_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
- } else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) {
+ } else if ((info_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
/*
* With group flag FAN_REPORT_NAME, if name was not recorded in
* event on a directory, we will report the name ".".
@@ -136,6 +146,9 @@ static int fanotify_event_info_len(unsigned int fid_mode,
dot_len = 1;
}
+ if (info_mode & FAN_REPORT_PIDFD)
+ info_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
+
if (fh_len)
info_len += fanotify_fid_info_len(fh_len, dot_len);
@@ -171,7 +184,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
size_t event_size = FAN_EVENT_METADATA_LEN;
struct fanotify_event *event = NULL;
struct fsnotify_event *fsn_event;
- unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
@@ -181,8 +194,8 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
goto out;
event = FANOTIFY_E(fsn_event);
- if (fid_mode)
- event_size += fanotify_event_info_len(fid_mode, event);
+ if (info_mode)
+ event_size += fanotify_event_info_len(info_mode, event);
if (event_size > count) {
event = ERR_PTR(-EINVAL);
@@ -303,9 +316,10 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
-static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
- int info_type, const char *name, size_t name_len,
- char __user *buf, size_t count)
+static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+ int info_type, const char *name,
+ size_t name_len,
+ char __user *buf, size_t count)
{
struct fanotify_event_info_fid info = { };
struct file_handle handle = { };
@@ -398,6 +412,117 @@ static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
return info_len;
}
+static int copy_pidfd_info_to_user(int pidfd,
+ char __user *buf,
+ size_t count)
+{
+ struct fanotify_event_info_pidfd info = { };
+ size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN;
+
+ if (WARN_ON_ONCE(info_len > count))
+ return -EFAULT;
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD;
+ info.hdr.len = info_len;
+ info.pidfd = pidfd;
+
+ if (copy_to_user(buf, &info, info_len))
+ return -EFAULT;
+
+ return info_len;
+}
+
+static int copy_info_records_to_user(struct fanotify_event *event,
+ struct fanotify_info *info,
+ unsigned int info_mode, int pidfd,
+ char __user *buf, size_t count)
+{
+ int ret, total_bytes = 0, info_type = 0;
+ unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS;
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
+
+ /*
+ * Event info records order is as follows: dir fid + name, child fid.
+ */
+ if (fanotify_event_dir_fh_len(event)) {
+ info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
+ FAN_EVENT_INFO_TYPE_DFID;
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+ fanotify_info_dir_fh(info),
+ info_type,
+ fanotify_info_name(info),
+ info->name_len, buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ if (fanotify_event_object_fh_len(event)) {
+ const char *dot = NULL;
+ int dot_len = 0;
+
+ if (fid_mode == FAN_REPORT_FID || info_type) {
+ /*
+ * With only group flag FAN_REPORT_FID only type FID is
+ * reported. Second info record type is always FID.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ } else if ((fid_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_NAME, if name was not
+ * recorded in an event on a directory, report the name
+ * "." with info type DFID_NAME.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
+ dot = ".";
+ dot_len = 1;
+ } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_DIR_FID, a single info
+ * record has type DFID for directory entry modification
+ * event and for event on a directory.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID;
+ } else {
+ /*
+ * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
+ * a single info record has type FID for event on a
+ * non-directory, when there is no directory to report.
+ * For example, on FAN_DELETE_SELF event.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ }
+
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+ fanotify_event_object_fh(event),
+ info_type, dot, dot_len,
+ buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ if (pidfd_mode) {
+ ret = copy_pidfd_info_to_user(pidfd, buf, count);
+ if (ret < 0)
+ return ret;
+
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
+ return total_bytes;
+}
+
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fanotify_event *event,
char __user *buf, size_t count)
@@ -405,15 +530,15 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fanotify_event_metadata metadata;
struct path *path = fanotify_event_path(event);
struct fanotify_info *info = fanotify_event_info(event);
- unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
struct file *f = NULL;
- int ret, fd = FAN_NOFD;
- int info_type = 0;
+ int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
metadata.event_len = FAN_EVENT_METADATA_LEN +
- fanotify_event_info_len(fid_mode, event);
+ fanotify_event_info_len(info_mode, event);
metadata.metadata_len = FAN_EVENT_METADATA_LEN;
metadata.vers = FANOTIFY_METADATA_VERSION;
metadata.reserved = 0;
@@ -442,6 +567,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
}
metadata.fd = fd;
+ if (pidfd_mode) {
+ /*
+ * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual
+ * exclusion is ever lifted. At the time of incoporating pidfd
+ * support within fanotify, the pidfd API only supported the
+ * creation of pidfds for thread-group leaders.
+ */
+ WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID));
+
+ /*
+ * The PIDTYPE_TGID check for an event->pid is performed
+ * preemptively in an attempt to catch out cases where the event
+ * listener reads events after the event generating process has
+ * already terminated. Report FAN_NOPIDFD to the event listener
+ * in those cases, with all other pidfd creation errors being
+ * reported as FAN_EPIDFD.
+ */
+ if (metadata.pid == 0 ||
+ !pid_has_task(event->pid, PIDTYPE_TGID)) {
+ pidfd = FAN_NOPIDFD;
+ } else {
+ pidfd = pidfd_create(event->pid, 0);
+ if (pidfd < 0)
+ pidfd = FAN_EPIDFD;
+ }
+ }
+
ret = -EFAULT;
/*
* Sanity check copy size in case get_one_event() and
@@ -462,67 +614,11 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
if (f)
fd_install(fd, f);
- /* Event info records order is: dir fid + name, child fid */
- if (fanotify_event_dir_fh_len(event)) {
- info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
- FAN_EVENT_INFO_TYPE_DFID;
- ret = copy_info_to_user(fanotify_event_fsid(event),
- fanotify_info_dir_fh(info),
- info_type, fanotify_info_name(info),
- info->name_len, buf, count);
+ if (info_mode) {
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
if (ret < 0)
goto out_close_fd;
-
- buf += ret;
- count -= ret;
- }
-
- if (fanotify_event_object_fh_len(event)) {
- const char *dot = NULL;
- int dot_len = 0;
-
- if (fid_mode == FAN_REPORT_FID || info_type) {
- /*
- * With only group flag FAN_REPORT_FID only type FID is
- * reported. Second info record type is always FID.
- */
- info_type = FAN_EVENT_INFO_TYPE_FID;
- } else if ((fid_mode & FAN_REPORT_NAME) &&
- (event->mask & FAN_ONDIR)) {
- /*
- * With group flag FAN_REPORT_NAME, if name was not
- * recorded in an event on a directory, report the
- * name "." with info type DFID_NAME.
- */
- info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
- dot = ".";
- dot_len = 1;
- } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
- (event->mask & FAN_ONDIR)) {
- /*
- * With group flag FAN_REPORT_DIR_FID, a single info
- * record has type DFID for directory entry modification
- * event and for event on a directory.
- */
- info_type = FAN_EVENT_INFO_TYPE_DFID;
- } else {
- /*
- * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
- * a single info record has type FID for event on a
- * non-directory, when there is no directory to report.
- * For example, on FAN_DELETE_SELF event.
- */
- info_type = FAN_EVENT_INFO_TYPE_FID;
- }
-
- ret = copy_info_to_user(fanotify_event_fsid(event),
- fanotify_event_object_fh(event),
- info_type, dot, dot_len, buf, count);
- if (ret < 0)
- goto out_close_fd;
-
- buf += ret;
- count -= ret;
}
return metadata.event_len;
@@ -532,6 +628,10 @@ out_close_fd:
put_unused_fd(fd);
fput(f);
}
+
+ if (pidfd >= 0)
+ close_fd(pidfd);
+
return ret;
}
@@ -1077,6 +1177,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
#endif
return -EINVAL;
+ /*
+ * A pidfd can only be returned for a thread-group leader; thus
+ * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually
+ * exclusive.
+ */
+ if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
+ return -EINVAL;
+
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@@ -1478,7 +1586,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 11);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 30d422b8c0fc..963e6ce75b96 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -87,15 +87,15 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
if (iput_inode)
iput(iput_inode);
- /* Wait for outstanding inode references from connectors */
- wait_var_event(&sb->s_fsnotify_inode_refs,
- !atomic_long_read(&sb->s_fsnotify_inode_refs));
}
void fsnotify_sb_delete(struct super_block *sb)
{
fsnotify_unmount_inodes(sb);
fsnotify_clear_marks_by_sb(sb);
+ /* Wait for outstanding object references from connectors */
+ wait_var_event(&sb->s_fsnotify_connectors,
+ !atomic_long_read(&sb->s_fsnotify_connectors));
}
/*
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index ff2063ec6b0f..87d8a50ee803 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -27,6 +27,21 @@ static inline struct super_block *fsnotify_conn_sb(
return container_of(conn->obj, struct super_block, s_fsnotify_marks);
}
+static inline struct super_block *fsnotify_connector_sb(
+ struct fsnotify_mark_connector *conn)
+{
+ switch (conn->type) {
+ case FSNOTIFY_OBJ_TYPE_INODE:
+ return fsnotify_conn_inode(conn)->i_sb;
+ case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
+ return fsnotify_conn_mount(conn)->mnt.mnt_sb;
+ case FSNOTIFY_OBJ_TYPE_SB:
+ return fsnotify_conn_sb(conn);
+ default:
+ return NULL;
+ }
+}
+
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 98f61b31745a..62051247f6d2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -55,22 +55,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
#include <linux/sysctl.h>
+static long it_zero = 0;
+static long it_int_max = INT_MAX;
+
struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &it_zero,
+ .extra2 = &it_int_max,
},
{
.procname = "max_user_watches",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &it_zero,
+ .extra2 = &it_int_max,
},
{
.procname = "max_queued_events",
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d32ab349db74..95006d1d29ab 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -169,6 +169,37 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
}
}
+static void fsnotify_get_inode_ref(struct inode *inode)
+{
+ ihold(inode);
+ atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
+}
+
+static void fsnotify_put_inode_ref(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ iput(inode);
+ if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
+ wake_up_var(&sb->s_fsnotify_connectors);
+}
+
+static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
+{
+ struct super_block *sb = fsnotify_connector_sb(conn);
+
+ if (sb)
+ atomic_long_inc(&sb->s_fsnotify_connectors);
+}
+
+static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
+{
+ struct super_block *sb = fsnotify_connector_sb(conn);
+
+ if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
+ wake_up_var(&sb->s_fsnotify_connectors);
+}
+
static void *fsnotify_detach_connector_from_object(
struct fsnotify_mark_connector *conn,
unsigned int *type)
@@ -182,13 +213,13 @@ static void *fsnotify_detach_connector_from_object(
if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = fsnotify_conn_inode(conn);
inode->i_fsnotify_mask = 0;
- atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
}
+ fsnotify_put_sb_connectors(conn);
rcu_assign_pointer(*(conn->obj), NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
@@ -209,19 +240,12 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
/* Drop object reference originally held by a connector */
static void fsnotify_drop_object(unsigned int type, void *objp)
{
- struct inode *inode;
- struct super_block *sb;
-
if (!objp)
return;
/* Currently only inode references are passed to be dropped */
if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
return;
- inode = objp;
- sb = inode->i_sb;
- iput(inode);
- if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
- wake_up_var(&sb->s_fsnotify_inode_refs);
+ fsnotify_put_inode_ref(objp);
}
void fsnotify_put_mark(struct fsnotify_mark *mark)
@@ -493,8 +517,12 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
conn->fsid.val[0] = conn->fsid.val[1] = 0;
conn->flags = 0;
}
- if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
- inode = igrab(fsnotify_conn_inode(conn));
+ if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
+ inode = fsnotify_conn_inode(conn);
+ fsnotify_get_inode_ref(inode);
+ }
+ fsnotify_get_sb_connectors(conn);
+
/*
* cmpxchg() provides the barrier so that readers of *connp can see
* only initialized structure
@@ -502,7 +530,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */
if (inode)
- iput(inode);
+ fsnotify_put_inode_ref(inode);
kmem_cache_free(fsnotify_mark_connector_cachep, conn);
}
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index fab7c6a4a7d0..73a3854b2afb 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -101,8 +101,6 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
- if (__mandatory_lock(inode))
- return -ENOLCK;
if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
ocfs2_mount_local(osb))
@@ -121,8 +119,6 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
- if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
- return -ENOLCK;
return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
}
diff --git a/fs/open.c b/fs/open.c
index 94bef26ff1b6..daa324606a41 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -105,9 +105,7 @@ long vfs_truncate(const struct path *path, loff_t length)
if (error)
goto put_write_and_out;
- error = locks_verify_truncate(inode, NULL, length);
- if (!error)
- error = security_path_truncate(path);
+ error = security_path_truncate(path);
if (!error)
error = do_truncate(mnt_userns, path->dentry, length, 0, NULL);
@@ -189,9 +187,7 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
if (IS_APPEND(file_inode(f.file)))
goto out_putf;
sb_start_write(inode->i_sb);
- error = locks_verify_truncate(inode, f.file, length);
- if (!error)
- error = security_path_truncate(&f.file->f_path);
+ error = security_path_truncate(&f.file->f_path);
if (!error)
error = do_truncate(file_mnt_user_ns(f.file), dentry, length,
ATTR_MTIME | ATTR_CTIME, f.file);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 41ebf52f1bbc..ebde05c9cf62 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -392,6 +392,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
*/
take_dentry_name_snapshot(&name, real);
this = lookup_one_len(name.name.name, connected, name.name.len);
+ release_dentry_name_snapshot(&name);
err = PTR_ERR(this);
if (IS_ERR(this)) {
goto fail;
@@ -406,7 +407,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
}
out:
- release_dentry_name_snapshot(&name);
dput(parent);
inode_unlock(dir);
return this;
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 4d53d3b7e5fe..d081faa55e83 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -392,6 +392,51 @@ out_unlock:
return ret;
}
+/*
+ * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
+ * due to lock order inversion between pipe->mutex in iter_file_splice_write()
+ * and file_start_write(real.file) in ovl_write_iter().
+ *
+ * So do everything ovl_write_iter() does and call iter_file_splice_write() on
+ * the real file.
+ */
+static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ struct inode *inode = file_inode(out);
+ struct inode *realinode = ovl_inode_real(inode);
+ ssize_t ret;
+
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(realinode, inode);
+ ret = file_remove_privs(out);
+ if (ret)
+ goto out_unlock;
+
+ ret = ovl_real_fdget(out, &real);
+ if (ret)
+ goto out_unlock;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ file_start_write(real.file);
+
+ ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
+
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(realinode, inode);
+ revert_creds(old_cred);
+ fdput(real);
+
+out_unlock:
+ inode_unlock(inode);
+
+ return ret;
+}
+
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct fd real;
@@ -603,7 +648,7 @@ const struct file_operations ovl_file_operations = {
.fadvise = ovl_fadvise,
.flush = ovl_flush,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index e8ad2c2c77dd..150fdf3bc68d 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -481,6 +481,8 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
}
this = lookup_one_len(p->name, dir, p->len);
if (IS_ERR_OR_NULL(this) || !this->d_inode) {
+ /* Mark a stale entry */
+ p->is_whiteout = true;
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -776,6 +778,9 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
if (err)
goto out;
}
+ }
+ /* ovl_cache_update_ino() sets is_whiteout on stale entry */
+ if (!p->is_whiteout) {
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
break;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 9ef4231cce61..6d4342bad9f1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -32,6 +32,21 @@
#include "internal.h"
/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
+/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
@@ -348,10 +363,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
* _very_ unlikely case that the pipe was full, but we got
* no data.
*/
- if (unlikely(was_full)) {
+ if (unlikely(was_full))
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
/*
* But because we didn't read anything, at this point we can
@@ -370,12 +384,11 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
wake_next_reader = false;
__pipe_unlock(pipe);
- if (was_full) {
+ if (was_full)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
if (wake_next_reader)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
if (ret > 0)
file_accessed(filp);
return ret;
@@ -429,9 +442,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
#endif
/*
- * Epoll nonsensically wants a wakeup whether the pipe
- * was already empty or not.
- *
* If it wasn't empty we try to merge new data into
* the last buffer.
*
@@ -440,9 +450,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* spanning multiple pages.
*/
head = pipe->head;
- was_empty = true;
+ was_empty = pipe_empty(head, pipe->tail);
chars = total_len & (PAGE_SIZE-1);
- if (chars && !pipe_empty(head, pipe->tail)) {
+ if (chars && !was_empty) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;
@@ -553,10 +563,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* become empty while we dropped the lock.
*/
__pipe_unlock(pipe);
- if (was_empty) {
+ if (was_empty)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
__pipe_lock(pipe);
was_empty = pipe_empty(pipe->head, pipe->tail);
@@ -575,11 +584,13 @@ out:
* This is particularly important for small writes, because of
* how (for example) the GNU make jobserver uses small writes to
* wake up pending jobs
+ *
+ * Epoll nonsensically wants a wakeup whether the pipe
+ * was already empty or not.
*/
- if (was_empty) {
+ if (was_empty || pipe->poll_usage)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
if (wake_next_writer)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
@@ -639,6 +650,9 @@ pipe_poll(struct file *filp, poll_table *wait)
struct pipe_inode_info *pipe = filp->private_data;
unsigned int head, tail;
+ /* Epoll has some historical nasty semantics, this enables them */
+ pipe->poll_usage = 1;
+
/*
* Reading pipe state only -- no need for acquiring the semaphore.
*
@@ -781,8 +795,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
- user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
- pipe_bufs = 1;
+ user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
+ pipe_bufs = PIPE_MIN_DEF_BUFFERS;
}
if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
diff --git a/fs/read_write.c b/fs/read_write.c
index 9db7adf160d2..af057c57bdc6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -365,12 +365,8 @@ out_putf:
int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
- struct inode *inode;
- int retval = -EINVAL;
-
- inode = file_inode(file);
if (unlikely((ssize_t) count < 0))
- return retval;
+ return -EINVAL;
/*
* ranged mandatory locking does not apply to streams - it makes sense
@@ -381,19 +377,12 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
if (unlikely(pos < 0)) {
if (!unsigned_offsets(file))
- return retval;
+ return -EINVAL;
if (count >= -pos) /* both values are in 0..LLONG_MAX */
return -EOVERFLOW;
} else if (unlikely((loff_t) (pos + count) < 0)) {
if (!unsigned_offsets(file))
- return retval;
- }
-
- if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
- retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
- read_write == READ ? F_RDLCK : F_WRLCK);
- if (retval < 0)
- return retval;
+ return -EINVAL;
}
}
diff --git a/fs/remap_range.c b/fs/remap_range.c
index e4a5fdd7ad7b..6d4a9beaa097 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -99,24 +99,12 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
bool write)
{
- struct inode *inode = file_inode(file);
-
if (unlikely(pos < 0 || len < 0))
return -EINVAL;
if (unlikely((loff_t) (pos + len) < 0))
return -EINVAL;
- if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
- loff_t end = len ? pos + len - 1 : OFFSET_MAX;
- int retval;
-
- retval = locks_mandatory_area(inode, file, pos, end,
- write ? F_WRLCK : F_RDLCK);
- if (retval < 0)
- return retval;
- }
-
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
}
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 855f0e87066d..2db8bcf7ff85 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -49,8 +49,7 @@ static int copy_bio_to_actor(struct bio *bio,
bytes_to_copy = min_t(int, bytes_to_copy,
req_length - copied_bytes);
- memcpy(actor_addr + actor_offset,
- page_address(bvec->bv_page) + bvec->bv_offset + offset,
+ memcpy(actor_addr + actor_offset, bvec_virt(bvec) + offset,
bytes_to_copy);
actor_offset += bytes_to_copy;
@@ -177,7 +176,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
goto out_free_bio;
}
/* Extract the length of the metadata block */
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length = data[offset];
if (offset < bvec->bv_len - 1) {
length |= data[offset + 1] << 8;
@@ -186,7 +185,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
res = -EIO;
goto out_free_bio;
}
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length |= data[0] << 8;
}
bio_free_pages(bio);
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index 233d5582fbee..b685b6238316 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -101,7 +101,7 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
while (bio_next_segment(bio, &iter_all)) {
int avail = min(bytes, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
memcpy(buff, data + offset, avail);
buff += avail;
bytes -= avail;
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 97bb7d92ddcd..cb510a631968 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -76,7 +76,7 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
while (bio_next_segment(bio, &iter_all)) {
int avail = min(bytes, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
memcpy(buff, data + offset, avail);
buff += avail;
bytes -= avail;
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index e80419aed862..68f6d09bb3a2 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -146,7 +146,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
stream->buf.in = data + offset;
stream->buf.in_size = avail;
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index bcb881ec47f2..a20e9042146b 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -76,7 +76,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
stream->next_in = data + offset;
stream->avail_in = avail;
diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c
index b7cb1faa652d..0015cf8b5582 100644
--- a/fs/squashfs/zstd_wrapper.c
+++ b/fs/squashfs/zstd_wrapper.c
@@ -94,7 +94,7 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
}
avail = min(length, ((int)bvec->bv_len) - offset);
- data = page_address(bvec->bv_page) + bvec->bv_offset;
+ data = bvec_virt(bvec);
length -= avail;
in_buf.src = data + offset;
in_buf.size = avail;
diff --git a/fs/super.c b/fs/super.c
index 91b7f156735b..bcef3a6f4c4b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1203,7 +1203,7 @@ static int set_bdev_super(struct super_block *s, void *data)
{
s->s_bdev = data;
s->s_dev = s->s_bdev->bd_dev;
- s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+ s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue))
s->s_iflags |= SB_I_STABLE_WRITES;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c5509d2448e3..e9c96a0c79f1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -115,6 +115,22 @@ void timerfd_clock_was_set(void)
rcu_read_unlock();
}
+static void timerfd_resume_work(struct work_struct *work)
+{
+ timerfd_clock_was_set();
+}
+
+static DECLARE_WORK(timerfd_work, timerfd_resume_work);
+
+/*
+ * Invoked from timekeeping_resume(). Defer the actual update to work so
+ * timerfd_clock_was_set() runs in task context.
+ */
+void timerfd_resume(void)
+{
+ schedule_work(&timerfd_work);
+}
+
static void __timerfd_remove_cancel(struct timerfd_ctx *ctx)
{
if (ctx->might_cancel) {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2e4e1d159969..5cfa28cd00cd 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1630,6 +1630,17 @@ static const char *ubifs_get_link(struct dentry *dentry,
return fscrypt_get_symlink(inode, ui->data, ui->data_len, done);
}
+static int ubifs_symlink_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ ubifs_getattr(mnt_userns, path, stat, request_mask, query_flags);
+
+ if (IS_ENCRYPTED(d_inode(path->dentry)))
+ return fscrypt_symlink_getattr(path, stat);
+ return 0;
+}
+
const struct address_space_operations ubifs_file_address_operations = {
.readpage = ubifs_readpage,
.writepage = ubifs_writepage,
@@ -1655,7 +1666,7 @@ const struct inode_operations ubifs_file_inode_operations = {
const struct inode_operations ubifs_symlink_inode_operations = {
.get_link = ubifs_get_link,
.setattr = ubifs_setattr,
- .getattr = ubifs_getattr,
+ .getattr = ubifs_symlink_getattr,
.listxattr = ubifs_listxattr,
.update_time = ubifs_update_time,
};
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index c19dba45aa20..70abdfad2df1 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -35,7 +35,6 @@
#include "udf_i.h"
#include "udf_sb.h"
-
static int udf_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *dir = file_inode(file);
@@ -135,7 +134,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
lfi = cfi.lengthFileIdent;
if (fibh.sbh == fibh.ebh) {
- nameptr = fi->fileIdent + liu;
+ nameptr = udf_get_fi_ident(fi);
} else {
int poffset; /* Unpaded ending offset */
@@ -153,7 +152,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
}
}
nameptr = copy_name;
- memcpy(nameptr, fi->fileIdent + liu,
+ memcpy(nameptr, udf_get_fi_ident(fi),
lfi - poffset);
memcpy(nameptr + lfi - poffset,
fibh.ebh->b_data, poffset);
diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h
index 185c3e247648..de17a97e8667 100644
--- a/fs/udf/ecma_167.h
+++ b/fs/udf/ecma_167.h
@@ -307,14 +307,14 @@ struct logicalVolDesc {
struct regid impIdent;
uint8_t impUse[128];
struct extent_ad integritySeqExt;
- uint8_t partitionMaps[0];
+ uint8_t partitionMaps[];
} __packed;
/* Generic Partition Map (ECMA 167r3 3/10.7.1) */
struct genericPartitionMap {
uint8_t partitionMapType;
uint8_t partitionMapLength;
- uint8_t partitionMapping[0];
+ uint8_t partitionMapping[];
} __packed;
/* Partition Map Type (ECMA 167r3 3/10.7.1.1) */
@@ -342,7 +342,7 @@ struct unallocSpaceDesc {
struct tag descTag;
__le32 volDescSeqNum;
__le32 numAllocDescs;
- struct extent_ad allocDescs[0];
+ struct extent_ad allocDescs[];
} __packed;
/* Terminating Descriptor (ECMA 167r3 3/10.9) */
@@ -360,9 +360,9 @@ struct logicalVolIntegrityDesc {
uint8_t logicalVolContentsUse[32];
__le32 numOfPartitions;
__le32 lengthOfImpUse;
- __le32 freeSpaceTable[0];
- __le32 sizeTable[0];
- uint8_t impUse[0];
+ __le32 freeSpaceTable[];
+ /* __le32 sizeTable[]; */
+ /* uint8_t impUse[]; */
} __packed;
/* Integrity Type (ECMA 167r3 3/10.10.3) */
@@ -471,9 +471,9 @@ struct fileIdentDesc {
uint8_t lengthFileIdent;
struct long_ad icb;
__le16 lengthOfImpUse;
- uint8_t impUse[0];
- uint8_t fileIdent[0];
- uint8_t padding[0];
+ uint8_t impUse[];
+ /* uint8_t fileIdent[]; */
+ /* uint8_t padding[]; */
} __packed;
/* File Characteristics (ECMA 167r3 4/14.4.3) */
@@ -578,8 +578,8 @@ struct fileEntry {
__le64 uniqueID;
__le32 lengthExtendedAttr;
__le32 lengthAllocDescs;
- uint8_t extendedAttr[0];
- uint8_t allocDescs[0];
+ uint8_t extendedAttr[];
+ /* uint8_t allocDescs[]; */
} __packed;
/* Permissions (ECMA 167r3 4/14.9.5) */
@@ -632,7 +632,7 @@ struct genericFormat {
uint8_t attrSubtype;
uint8_t reserved[3];
__le32 attrLength;
- uint8_t attrData[0];
+ uint8_t attrData[];
} __packed;
/* Character Set Information (ECMA 167r3 4/14.10.3) */
@@ -643,7 +643,7 @@ struct charSetInfo {
__le32 attrLength;
__le32 escapeSeqLength;
uint8_t charSetType;
- uint8_t escapeSeq[0];
+ uint8_t escapeSeq[];
} __packed;
/* Alternate Permissions (ECMA 167r3 4/14.10.4) */
@@ -682,7 +682,7 @@ struct infoTimesExtAttr {
__le32 attrLength;
__le32 dataLength;
__le32 infoTimeExistence;
- uint8_t infoTimes[0];
+ uint8_t infoTimes[];
} __packed;
/* Device Specification (ECMA 167r3 4/14.10.7) */
@@ -694,7 +694,7 @@ struct deviceSpec {
__le32 impUseLength;
__le32 majorDeviceIdent;
__le32 minorDeviceIdent;
- uint8_t impUse[0];
+ uint8_t impUse[];
} __packed;
/* Implementation Use Extended Attr (ECMA 167r3 4/14.10.8) */
@@ -705,7 +705,7 @@ struct impUseExtAttr {
__le32 attrLength;
__le32 impUseLength;
struct regid impIdent;
- uint8_t impUse[0];
+ uint8_t impUse[];
} __packed;
/* Application Use Extended Attribute (ECMA 167r3 4/14.10.9) */
@@ -716,7 +716,7 @@ struct appUseExtAttr {
__le32 attrLength;
__le32 appUseLength;
struct regid appIdent;
- uint8_t appUse[0];
+ uint8_t appUse[];
} __packed;
#define EXTATTR_CHAR_SET 1
@@ -733,7 +733,7 @@ struct unallocSpaceEntry {
struct tag descTag;
struct icbtag icbTag;
__le32 lengthAllocDescs;
- uint8_t allocDescs[0];
+ uint8_t allocDescs[];
} __packed;
/* Space Bitmap Descriptor (ECMA 167r3 4/14.12) */
@@ -741,7 +741,7 @@ struct spaceBitmapDesc {
struct tag descTag;
__le32 numOfBits;
__le32 numOfBytes;
- uint8_t bitmap[0];
+ uint8_t bitmap[];
} __packed;
/* Partition Integrity Entry (ECMA 167r3 4/14.13) */
@@ -780,7 +780,7 @@ struct pathComponent {
uint8_t componentType;
uint8_t lengthComponentIdent;
__le16 componentFileVersionNum;
- dchars componentIdent[0];
+ dchars componentIdent[];
} __packed;
/* File Entry (ECMA 167r3 4/14.17) */
@@ -809,8 +809,8 @@ struct extendedFileEntry {
__le64 uniqueID;
__le32 lengthExtendedAttr;
__le32 lengthAllocDescs;
- uint8_t extendedAttr[0];
- uint8_t allocDescs[0];
+ uint8_t extendedAttr[];
+ /* uint8_t allocDescs[]; */
} __packed;
#endif /* _ECMA_167_H */
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4917670860a0..1d6b7a50736b 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -390,8 +390,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode,
dfibh.eoffset += (sfibh.eoffset - sfibh.soffset);
dfi = (struct fileIdentDesc *)(dbh->b_data + dfibh.soffset);
if (udf_write_fi(inode, sfi, dfi, &dfibh, sfi->impUse,
- sfi->fileIdent +
- le16_to_cpu(sfi->lengthOfImpUse))) {
+ udf_get_fi_ident(sfi))) {
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
brelse(dbh);
return NULL;
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index eab94527340d..1614d308d0f0 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -173,13 +173,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
else
offset = le32_to_cpu(eahd->appAttrLocation);
- while (offset < iinfo->i_lenEAttr) {
+ while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) {
+ uint32_t attrLength;
+
gaf = (struct genericFormat *)&ea[offset];
+ attrLength = le32_to_cpu(gaf->attrLength);
+
+ /* Detect undersized elements and buffer overflows */
+ if ((attrLength < sizeof(*gaf)) ||
+ (attrLength > (iinfo->i_lenEAttr - offset)))
+ break;
+
if (le32_to_cpu(gaf->attrType) == type &&
gaf->attrSubtype == subtype)
return gaf;
else
- offset += le32_to_cpu(gaf->attrLength);
+ offset += attrLength;
}
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 7c7c9bbbfa57..caeef08efed2 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -74,12 +74,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
if (fileident) {
if (adinicb || (offset + lfi < 0)) {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident, lfi);
+ memcpy(udf_get_fi_ident(sfi), fileident, lfi);
} else if (offset >= 0) {
memcpy(fibh->ebh->b_data + offset, fileident, lfi);
} else {
- memcpy((uint8_t *)sfi->fileIdent + liu, fileident,
- -offset);
+ memcpy(udf_get_fi_ident(sfi), fileident, -offset);
memcpy(fibh->ebh->b_data, fileident - offset,
lfi + offset);
}
@@ -88,11 +87,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
offset += lfi;
if (adinicb || (offset + padlen < 0)) {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, padlen);
+ memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen);
} else if (offset >= 0) {
memset(fibh->ebh->b_data + offset, 0x00, padlen);
} else {
- memset((uint8_t *)sfi->padding + liu + lfi, 0x00, -offset);
+ memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset);
memset(fibh->ebh->b_data, 0x00, padlen + offset);
}
@@ -226,7 +225,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
lfi = cfi->lengthFileIdent;
if (fibh->sbh == fibh->ebh) {
- nameptr = fi->fileIdent + liu;
+ nameptr = udf_get_fi_ident(fi);
} else {
int poffset; /* Unpaded ending offset */
@@ -246,7 +245,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
}
}
nameptr = copy_name;
- memcpy(nameptr, fi->fileIdent + liu,
+ memcpy(nameptr, udf_get_fi_ident(fi),
lfi - poffset);
memcpy(nameptr + lfi - poffset,
fibh->ebh->b_data, poffset);
diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h
index 22bc4fb2feb9..157de0ec0cd5 100644
--- a/fs/udf/osta_udf.h
+++ b/fs/udf/osta_udf.h
@@ -111,7 +111,7 @@ struct logicalVolIntegrityDescImpUse {
__le16 minUDFReadRev;
__le16 minUDFWriteRev;
__le16 maxUDFWriteRev;
- uint8_t impUse[0];
+ uint8_t impUse[];
} __packed;
/* Implementation Use Volume Descriptor (UDF 2.60 2.2.7) */
@@ -178,15 +178,6 @@ struct metadataPartitionMap {
uint8_t reserved2[5];
} __packed;
-/* Virtual Allocation Table (UDF 1.5 2.2.10) */
-struct virtualAllocationTable15 {
- __le32 vatEntry[0];
- struct regid vatIdent;
- __le32 previousVATICBLoc;
-} __packed;
-
-#define ICBTAG_FILE_TYPE_VAT15 0x00U
-
/* Virtual Allocation Table (UDF 2.60 2.2.11) */
struct virtualAllocationTable20 {
__le16 lengthHeader;
@@ -199,8 +190,8 @@ struct virtualAllocationTable20 {
__le16 minUDFWriteRev;
__le16 maxUDFWriteRev;
__le16 reserved;
- uint8_t impUse[0];
- __le32 vatEntry[0];
+ uint8_t impUse[];
+ /* __le32 vatEntry[]; */
} __packed;
#define ICBTAG_FILE_TYPE_VAT20 0xF8U
@@ -217,8 +208,7 @@ struct sparingTable {
__le16 reallocationTableLen;
__le16 reserved;
__le32 sequenceNum;
- struct sparingEntry
- mapEntry[0];
+ struct sparingEntry mapEntry[];
} __packed;
/* Metadata File (and Metadata Mirror File) (UDF 2.60 2.2.13.1) */
@@ -241,7 +231,7 @@ struct allocDescImpUse {
/* FreeEASpace (UDF 2.60 3.3.4.5.1.1) */
struct freeEaSpace {
__le16 headerChecksum;
- uint8_t freeEASpace[0];
+ uint8_t freeEASpace[];
} __packed;
/* DVD Copyright Management Information (UDF 2.60 3.3.4.5.1.2) */
@@ -265,7 +255,7 @@ struct LVExtensionEA {
/* FreeAppEASpace (UDF 2.60 3.3.4.6.1) */
struct freeAppEASpace {
__le16 headerChecksum;
- uint8_t freeEASpace[0];
+ uint8_t freeEASpace[];
} __packed;
/* UDF Defined System Stream (UDF 2.60 3.3.7) */
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 2f83c1204e20..b2d7c57d0688 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -108,16 +108,10 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb)
return NULL;
lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data;
partnum = le32_to_cpu(lvid->numOfPartitions);
- if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) -
- offsetof(struct logicalVolIntegrityDesc, impUse)) /
- (2 * sizeof(uint32_t)) < partnum) {
- udf_err(sb, "Logical volume integrity descriptor corrupted "
- "(numOfPartitions = %u)!\n", partnum);
- return NULL;
- }
/* The offset is to skip freeSpaceTable and sizeTable arrays */
offset = partnum * 2 * sizeof(uint32_t);
- return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
+ return (struct logicalVolIntegrityDescImpUse *)
+ (((uint8_t *)(lvid + 1)) + offset);
}
/* UDF filesystem type */
@@ -349,10 +343,10 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",lastblock=%u", sbi->s_last_block);
if (sbi->s_anchor != 0)
seq_printf(seq, ",anchor=%u", sbi->s_anchor);
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
- seq_puts(seq, ",utf8");
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP) && sbi->s_nls_map)
+ if (sbi->s_nls_map)
seq_printf(seq, ",iocharset=%s", sbi->s_nls_map->charset);
+ else
+ seq_puts(seq, ",iocharset=utf8");
return 0;
}
@@ -558,19 +552,24 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
/* Ignored (never implemented properly) */
break;
case Opt_utf8:
- uopt->flags |= (1 << UDF_FLAG_UTF8);
+ if (!remount) {
+ unload_nls(uopt->nls_map);
+ uopt->nls_map = NULL;
+ }
break;
case Opt_iocharset:
if (!remount) {
- if (uopt->nls_map)
- unload_nls(uopt->nls_map);
- /*
- * load_nls() failure is handled later in
- * udf_fill_super() after all options are
- * parsed.
- */
+ unload_nls(uopt->nls_map);
+ uopt->nls_map = NULL;
+ }
+ /* When nls_map is not loaded then UTF-8 is used */
+ if (!remount && strcmp(args[0].from, "utf8") != 0) {
uopt->nls_map = load_nls(args[0].from);
- uopt->flags |= (1 << UDF_FLAG_NLS_MAP);
+ if (!uopt->nls_map) {
+ pr_err("iocharset %s not found\n",
+ args[0].from);
+ return 0;
+ }
}
break;
case Opt_uforget:
@@ -1542,6 +1541,7 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
struct udf_sb_info *sbi = UDF_SB(sb);
struct logicalVolIntegrityDesc *lvid;
int indirections = 0;
+ u32 parts, impuselen;
while (++indirections <= UDF_MAX_LVID_NESTING) {
final_bh = NULL;
@@ -1568,15 +1568,27 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data;
if (lvid->nextIntegrityExt.extLength == 0)
- return;
+ goto check;
loc = leea_to_cpu(lvid->nextIntegrityExt);
}
udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n",
UDF_MAX_LVID_NESTING);
+out_err:
brelse(sbi->s_lvid_bh);
sbi->s_lvid_bh = NULL;
+ return;
+check:
+ parts = le32_to_cpu(lvid->numOfPartitions);
+ impuselen = le32_to_cpu(lvid->lengthOfImpUse);
+ if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize ||
+ sizeof(struct logicalVolIntegrityDesc) + impuselen +
+ 2 * parts * sizeof(u32) > sb->s_blocksize) {
+ udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), "
+ "ignoring.\n", parts, impuselen);
+ goto out_err;
+ }
}
/*
@@ -2139,21 +2151,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!udf_parse_options((char *)options, &uopt, false))
goto parse_options_failure;
- if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
- uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
- udf_err(sb, "utf8 cannot be combined with iocharset\n");
- goto parse_options_failure;
- }
- if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) {
- uopt.nls_map = load_nls_default();
- if (!uopt.nls_map)
- uopt.flags &= ~(1 << UDF_FLAG_NLS_MAP);
- else
- udf_debug("Using default NLS map\n");
- }
- if (!(uopt.flags & (1 << UDF_FLAG_NLS_MAP)))
- uopt.flags |= (1 << UDF_FLAG_UTF8);
-
fileset.logicalBlockNum = 0xFFFFFFFF;
fileset.partitionReferenceNum = 0xFFFF;
@@ -2308,8 +2305,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
error_out:
iput(sbi->s_vat_inode);
parse_options_failure:
- if (uopt.nls_map)
- unload_nls(uopt.nls_map);
+ unload_nls(uopt.nls_map);
if (lvid_open)
udf_close_lvid(sb);
brelse(sbi->s_lvid_bh);
@@ -2359,8 +2355,7 @@ static void udf_put_super(struct super_block *sb)
sbi = UDF_SB(sb);
iput(sbi->s_vat_inode);
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
- unload_nls(sbi->s_nls_map);
+ unload_nls(sbi->s_nls_map);
if (!sb_rdonly(sb))
udf_close_lvid(sb);
brelse(sbi->s_lvid_bh);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 758efe557a19..4fa620543d30 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -20,8 +20,6 @@
#define UDF_FLAG_UNDELETE 6
#define UDF_FLAG_UNHIDE 7
#define UDF_FLAG_VARCONV 8
-#define UDF_FLAG_NLS_MAP 9
-#define UDF_FLAG_UTF8 10
#define UDF_FLAG_UID_FORGET 11 /* save -1 for uid to disk */
#define UDF_FLAG_GID_FORGET 12
#define UDF_FLAG_UID_SET 13
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 9dd0814f1077..7e258f15b8ef 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -130,6 +130,10 @@ static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi)
le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent,
UDF_NAME_PAD);
}
+static inline uint8_t *udf_get_fi_ident(struct fileIdentDesc *fi)
+{
+ return ((uint8_t *)(fi + 1)) + le16_to_cpu(fi->lengthOfImpUse);
+}
/* file.c */
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 5fcfa96463eb..622569007b53 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -177,7 +177,7 @@ static int udf_name_from_CS0(struct super_block *sb,
return 0;
}
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+ if (UDF_SB(sb)->s_nls_map)
conv_f = UDF_SB(sb)->s_nls_map->uni2char;
else
conv_f = NULL;
@@ -285,7 +285,7 @@ static int udf_name_to_CS0(struct super_block *sb,
if (ocu_max_len <= 0)
return 0;
- if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+ if (UDF_SB(sb)->s_nls_map)
conv_f = UDF_SB(sb)->s_nls_map->char2uni;
else
conv_f = NULL;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 213a97a921bb..1cd3f940fa6a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1626,7 +1626,6 @@ xfs_swap_extents(
struct xfs_bstat *sbp = &sxp->sx_stat;
int src_log_flags, target_log_flags;
int error = 0;
- int lock_flags;
uint64_t f;
int resblks = 0;
unsigned int flags = 0;
@@ -1638,8 +1637,8 @@ xfs_swap_extents(
* do the rest of the checks.
*/
lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
- lock_flags = XFS_MMAPLOCK_EXCL;
- xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_lock_two(VFS_I(ip)->i_mapping,
+ VFS_I(tip)->i_mapping);
/* Verify that both files have the same format */
if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
@@ -1711,7 +1710,6 @@ xfs_swap_extents(
* or cancel will unlock the inodes from this point onwards.
*/
xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
- lock_flags |= XFS_ILOCK_EXCL;
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_ijoin(tp, tip, 0);
@@ -1830,13 +1828,16 @@ xfs_swap_extents(
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
+out_unlock_ilock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(tip, XFS_ILOCK_EXCL);
out_unlock:
- xfs_iunlock(ip, lock_flags);
- xfs_iunlock(tip, lock_flags);
+ filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping,
+ VFS_I(tip)->i_mapping);
unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
return error;
out_trans_cancel:
xfs_trans_cancel(tp);
- goto out_unlock;
+ goto out_unlock_ilock;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8ff42b3585e0..3ab73567a0f5 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -844,7 +844,7 @@ xfs_buf_readahead_map(
{
struct xfs_buf *bp;
- if (bdi_read_congested(target->bt_bdev->bd_bdi))
+ if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
return;
xfs_buf_read_map(target, map, nmaps,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index cc3cfb12df53..3dfbdcdb0d1c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1302,7 +1302,7 @@ xfs_file_llseek(
*
* mmap_lock (MM)
* sb_start_pagefault(vfs, freeze)
- * i_mmaplock (XFS - truncate serialisation)
+ * invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
@@ -1323,24 +1323,27 @@ __xfs_filemap_fault(
file_update_time(vmf->vma->vm_file);
}
- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
pfn_t pfn;
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL,
(write_fault && !vmf->cow_page) ?
&xfs_direct_write_iomap_ops :
&xfs_read_iomap_ops);
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
} else {
- if (write_fault)
+ if (write_fault) {
+ xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = iomap_page_mkwrite(vmf,
&xfs_buffered_write_iomap_ops);
- else
+ xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ } else {
ret = filemap_fault(vmf);
+ }
}
- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (write_fault)
sb_end_pagefault(inode->i_sb);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 990b72ae3635..f00145e1a976 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -132,7 +132,7 @@ xfs_ilock_attr_map_shared(
/*
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
- * multi-reader locks: i_mmap_lock and the i_lock. This routine allows
+ * multi-reader locks: invalidate_lock and the i_lock. This routine allows
* various combinations of the locks to be obtained.
*
* The 3 locks should always be ordered so that the IO lock is obtained first,
@@ -140,23 +140,23 @@ xfs_ilock_attr_map_shared(
*
* Basic locking order:
*
- * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
+ * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
*
* mmap_lock locking order:
*
* i_rwsem -> page lock -> mmap_lock
- * mmap_lock -> i_mmap_lock -> page_lock
+ * mmap_lock -> invalidate_lock -> page_lock
*
* The difference in mmap_lock locking order mean that we cannot hold the
- * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
- * fault in pages during copy in/out (for buffered IO) or require the mmap_lock
- * in get_user_pages() to map the user pages into the kernel address space for
- * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
- * page faults already hold the mmap_lock.
+ * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
+ * can fault in pages during copy in/out (for buffered IO) or require the
+ * mmap_lock in get_user_pages() to map the user pages into the kernel address
+ * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
+ * fault because page faults already hold the mmap_lock.
*
* Hence to serialise fully against both syscall and mmap based IO, we need to
- * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
- * taken in places where we need to invalidate the page cache in a race
+ * take both the i_rwsem and the invalidate_lock. These locks should *only* be
+ * both taken in places where we need to invalidate the page cache in a race
* free manner (e.g. truncate, hole punch and other extent manipulation
* functions).
*/
@@ -188,10 +188,13 @@ xfs_ilock(
XFS_IOLOCK_DEP(lock_flags));
}
- if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
- else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+ if (lock_flags & XFS_MMAPLOCK_EXCL) {
+ down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+ down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+ XFS_MMAPLOCK_DEP(lock_flags));
+ }
if (lock_flags & XFS_ILOCK_EXCL)
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
@@ -240,10 +243,10 @@ xfs_ilock_nowait(
}
if (lock_flags & XFS_MMAPLOCK_EXCL) {
- if (!mrtryupdate(&ip->i_mmaplock))
+ if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
- if (!mrtryaccess(&ip->i_mmaplock))
+ if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
goto out_undo_iolock;
}
@@ -258,9 +261,9 @@ xfs_ilock_nowait(
out_undo_mmaplock:
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
out_undo_iolock:
if (lock_flags & XFS_IOLOCK_EXCL)
up_write(&VFS_I(ip)->i_rwsem);
@@ -307,9 +310,9 @@ xfs_iunlock(
up_read(&VFS_I(ip)->i_rwsem);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrunlock_excl(&ip->i_mmaplock);
+ up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
else if (lock_flags & XFS_MMAPLOCK_SHARED)
- mrunlock_shared(&ip->i_mmaplock);
+ up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_ILOCK_EXCL)
mrunlock_excl(&ip->i_lock);
@@ -335,7 +338,7 @@ xfs_ilock_demote(
if (lock_flags & XFS_ILOCK_EXCL)
mrdemote(&ip->i_lock);
if (lock_flags & XFS_MMAPLOCK_EXCL)
- mrdemote(&ip->i_mmaplock);
+ downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
if (lock_flags & XFS_IOLOCK_EXCL)
downgrade_write(&VFS_I(ip)->i_rwsem);
@@ -343,9 +346,29 @@ xfs_ilock_demote(
}
#if defined(DEBUG) || defined(XFS_WARN)
-int
+static inline bool
+__xfs_rwsem_islocked(
+ struct rw_semaphore *rwsem,
+ bool shared)
+{
+ if (!debug_locks)
+ return rwsem_is_locked(rwsem);
+
+ if (!shared)
+ return lockdep_is_held_type(rwsem, 0);
+
+ /*
+ * We are checking that the lock is held at least in shared
+ * mode but don't care that it might be held exclusively
+ * (i.e. shared | excl). Hence we check if the lock is held
+ * in any mode rather than an explicit shared mode.
+ */
+ return lockdep_is_held_type(rwsem, -1);
+}
+
+bool
xfs_isilocked(
- xfs_inode_t *ip,
+ struct xfs_inode *ip,
uint lock_flags)
{
if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
@@ -355,20 +378,17 @@ xfs_isilocked(
}
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- if (!(lock_flags & XFS_MMAPLOCK_SHARED))
- return !!ip->i_mmaplock.mr_writer;
- return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
- if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
- if (!(lock_flags & XFS_IOLOCK_SHARED))
- return !debug_locks ||
- lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
- return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
+ if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+ (lock_flags & XFS_IOLOCK_SHARED));
}
ASSERT(0);
- return 0;
+ return false;
}
#endif
@@ -532,12 +552,10 @@ again:
}
/*
- * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * the mmaplock or the ilock, but not more than one type at a time. If we lock
- * more than one at a time, lockdep will report false positives saying we have
- * violated locking orders. The iolock must be double-locked separately since
- * we use i_rwsem for that. We now support taking one lock EXCL and the other
- * SHARED.
+ * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
+ * mmaplock must be double-locked separately since we use i_rwsem and
+ * invalidate_lock for that. We now support taking one lock EXCL and the
+ * other SHARED.
*/
void
xfs_lock_two_inodes(
@@ -555,15 +573,8 @@ xfs_lock_two_inodes(
ASSERT(hweight32(ip1_mode) == 1);
ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
- !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
-
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
@@ -3741,11 +3752,8 @@ xfs_ilock2_io_mmap(
ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
if (ret)
return ret;
- if (ip1 == ip2)
- xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
- else
- xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
- ip2, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
return 0;
}
@@ -3755,12 +3763,9 @@ xfs_iunlock2_io_mmap(
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
- bool same_inode = (ip1 == ip2);
-
- xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
- if (!same_inode)
- xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
+ filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
+ VFS_I(ip2)->i_mapping);
inode_unlock(VFS_I(ip2));
- if (!same_inode)
+ if (ip1 != ip2)
inode_unlock(VFS_I(ip1));
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 4b6703dbffb8..e0ae905554e2 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -40,7 +40,6 @@ typedef struct xfs_inode {
/* Transaction and locking information. */
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
- mrlock_t i_mmaplock; /* inode mmap IO lock */
atomic_t i_pincount; /* inode pin count */
/*
@@ -410,7 +409,7 @@ void xfs_ilock(xfs_inode_t *, uint);
int xfs_ilock_nowait(xfs_inode_t *, uint);
void xfs_iunlock(xfs_inode_t *, uint);
void xfs_ilock_demote(xfs_inode_t *, uint);
-int xfs_isilocked(xfs_inode_t *, uint);
+bool xfs_isilocked(struct xfs_inode *, uint);
uint xfs_ilock_data_map_shared(struct xfs_inode *);
uint xfs_ilock_attr_map_shared(struct xfs_inode *);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2c9e26a44546..102cbd606633 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -709,8 +709,6 @@ xfs_fs_inode_init_once(
atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock);
- mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- "xfsino", ip->i_ino);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino);
}
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 70055d486bf7..ddc346a9df9b 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -462,7 +462,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
inode_dio_wait(inode);
/* Serialize against page faults */
- down_write(&zi->i_mmap_sem);
+ filemap_invalidate_lock(inode->i_mapping);
/* Serialize against zonefs_iomap_begin() */
mutex_lock(&zi->i_truncate_mutex);
@@ -500,7 +500,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
unlock:
mutex_unlock(&zi->i_truncate_mutex);
- up_write(&zi->i_mmap_sem);
+ filemap_invalidate_unlock(inode->i_mapping);
return ret;
}
@@ -575,18 +575,6 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
return ret;
}
-static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
-{
- struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
- vm_fault_t ret;
-
- down_read(&zi->i_mmap_sem);
- ret = filemap_fault(vmf);
- up_read(&zi->i_mmap_sem);
-
- return ret;
-}
-
static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
@@ -607,16 +595,16 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
file_update_time(vmf->vma->vm_file);
/* Serialize against truncates */
- down_read(&zi->i_mmap_sem);
+ filemap_invalidate_lock_shared(inode->i_mapping);
ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
- up_read(&zi->i_mmap_sem);
+ filemap_invalidate_unlock_shared(inode->i_mapping);
sb_end_pagefault(inode->i_sb);
return ret;
}
static const struct vm_operations_struct zonefs_file_vm_ops = {
- .fault = zonefs_filemap_fault,
+ .fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = zonefs_filemap_page_mkwrite,
};
@@ -1155,7 +1143,6 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
inode_init_once(&zi->i_vnode);
mutex_init(&zi->i_truncate_mutex);
- init_rwsem(&zi->i_mmap_sem);
zi->i_wr_refcnt = 0;
return &zi->i_vnode;
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 51141907097c..7b147907c328 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -70,12 +70,11 @@ struct zonefs_inode_info {
* and changes to the inode private data, and in particular changes to
* a sequential file size on completion of direct IO writes.
* Serialization of mmap read IOs with truncate and syscall IO
- * operations is done with i_mmap_sem in addition to i_truncate_mutex.
- * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first,
- * i_truncate_mutex second).
+ * operations is done with invalidate_lock in addition to
+ * i_truncate_mutex. Only zonefs_seq_file_truncate() takes both lock
+ * (invalidate_lock first, i_truncate_mutex second).
*/
struct mutex i_truncate_mutex;
- struct rw_semaphore i_mmap_sem;
/* guarded by i_truncate_mutex */
unsigned int i_wr_refcnt;