summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/file.c5
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/fs_probe.c2
-rw-r--r--fs/afs/inode.c3
-rw-r--r--fs/afs/protocol_yfs.h11
-rw-r--r--fs/afs/rxrpc.c53
-rw-r--r--fs/afs/server_list.c4
-rw-r--r--fs/afs/vl_probe.c2
-rw-r--r--fs/afs/yfsclient.c2
-rw-r--r--fs/autofs/autofs_i.h13
-rw-r--r--fs/autofs/dev-ioctl.c27
-rw-r--r--fs/autofs/init.c2
-rw-r--r--fs/autofs/inode.c67
-rw-r--r--fs/autofs/root.c16
-rw-r--r--fs/autofs/waitq.c10
-rw-r--r--fs/bfs/bfs.h11
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c65
-rw-r--r--fs/binfmt_aout.c4
-rw-r--r--fs/binfmt_script.c10
-rw-r--r--fs/btrfs/ctree.c16
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/super.c82
-rw-r--r--fs/btrfs/volumes.c12
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/caps.c77
-rw-r--r--fs/ceph/inode.c60
-rw-r--r--fs/ceph/mds_client.c129
-rw-r--r--fs/ceph/mds_client.h16
-rw-r--r--fs/ceph/mdsmap.c1
-rw-r--r--fs/ceph/quota.c13
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h20
-rw-r--r--fs/cifs/cifssmb.c30
-rw-r--r--fs/cifs/connect.c9
-rw-r--r--fs/cifs/dfs_cache.c1
-rw-r--r--fs/cifs/file.c48
-rw-r--r--fs/cifs/inode.c10
-rw-r--r--fs/cifs/smb2file.c8
-rw-r--r--fs/cifs/smb2ops.c4
-rw-r--r--fs/cifs/smb2pdu.c71
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/cifs/transport.c122
-rw-r--r--fs/crypto/crypto.c28
-rw-r--r--fs/crypto/fname.c22
-rw-r--r--fs/crypto/fscrypt_private.h67
-rw-r--r--fs/crypto/keyinfo.c351
-rw-r--r--fs/crypto/policy.c5
-rw-r--r--fs/eventpoll.c220
-rw-r--r--fs/exec.c111
-rw-r--r--fs/exofs/super.c37
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inline.c6
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/readpage.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c8
-rw-r--r--fs/fat/fat.h30
-rw-r--r--fs/fat/fatent.c16
-rw-r--r--fs/fat/inode.c26
-rw-r--r--fs/fat/misc.c2
-rw-r--r--fs/hfsplus/dir.c1
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/inode.c21
-rw-r--r--fs/hugetlbfs/inode.c61
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namespace.c160
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4file.c8
-rw-r--r--fs/nfs/super.c34
-rw-r--r--fs/notify/inotify/inotify_user.c6
-rw-r--r--fs/ocfs2/aops.c3
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/orangefs/inode.c2
-rw-r--r--fs/orangefs/orangefs-bufmap.c2
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/base.c18
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/util.c1
-rw-r--r--fs/pstore/pmsg.c2
-rw-r--r--fs/pstore/ram.c12
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/read_write.c13
-rw-r--r--fs/readdir.c10
-rw-r--r--fs/select.c11
-rw-r--r--fs/super.c24
-rw-r--r--fs/sysfs/dir.c3
-rw-r--r--fs/sysfs/file.c6
-rw-r--r--fs/sysfs/group.c3
-rw-r--r--fs/sysfs/symlink.c3
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_fsops.c1
103 files changed, 1587 insertions, 963 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c
index d6bc3f5d784b..323ae9912203 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/gfp.h>
#include <linux/task_io_accounting_ops.h>
+#include <linux/mm.h>
#include "internal.h"
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
@@ -441,7 +442,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
/* Count the number of contiguous pages at the front of the list. Note
* that the list goes prev-wards rather than next-wards.
*/
- first = list_entry(pages->prev, struct page, lru);
+ first = lru_to_page(pages);
index = first->index + 1;
n = 1;
for (p = first->lru.prev; p != pages; p = p->prev) {
@@ -473,7 +474,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
* page at the end of the file.
*/
do {
- page = list_entry(pages->prev, struct page, lru);
+ page = lru_to_page(pages);
list_del(&page->lru);
index = page->index;
if (add_to_page_cache_lru(page, mapping, index,
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 0568fd986821..e432bd27a2e7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -208,7 +208,7 @@ again:
/* The new front of the queue now owns the state variables. */
next = list_entry(vnode->pending_locks.next,
struct file_lock, fl_u.afs.link);
- vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_key = key_get(afs_file_key(next->fl_file));
vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
goto again;
@@ -413,7 +413,7 @@ static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
/* The new front of the queue now owns the state variables. */
next = list_entry(vnode->pending_locks.next,
struct file_lock, fl_u.afs.link);
- vnode->lock_key = afs_file_key(next->fl_file);
+ vnode->lock_key = key_get(afs_file_key(next->fl_file));
vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
afs_lock_may_be_available(vnode);
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index fde6b4d4121e..3a9eaec06756 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -247,7 +247,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
}
}
- if (!still_probing || unlikely(signal_pending(current)))
+ if (!still_probing || signal_pending(current))
goto stop;
schedule();
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b17d3620414..1a4ce07fb406 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -414,7 +414,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
valid = true;
} else {
- vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
vnode->cb_v_break = vnode->volume->cb_v_break;
valid = false;
}
@@ -546,6 +545,8 @@ void afs_evict_inode(struct inode *inode)
#endif
afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+ key_put(vnode->lock_key);
+ vnode->lock_key = NULL;
_leave("");
}
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index 07bc10f076aa..d443e2bfa094 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -161,3 +161,14 @@ struct yfs_xdr_YFSStoreVolumeStatus {
struct yfs_xdr_u64 max_quota;
struct yfs_xdr_u64 file_quota;
} __packed;
+
+enum yfs_lock_type {
+ yfs_LockNone = -1,
+ yfs_LockRead = 0,
+ yfs_LockWrite = 1,
+ yfs_LockExtend = 2,
+ yfs_LockRelease = 3,
+ yfs_LockMandatoryRead = 0x100,
+ yfs_LockMandatoryWrite = 0x101,
+ yfs_LockMandatoryExtend = 0x102,
+};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a7b44863d502..2c588f9bbbda 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -23,6 +23,7 @@ struct workqueue_struct *afs_async_calls;
static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_delete_async_call(struct work_struct *);
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -203,20 +204,26 @@ void afs_put_call(struct afs_call *call)
}
}
+static struct afs_call *afs_get_call(struct afs_call *call,
+ enum afs_call_trace why)
+{
+ int u = atomic_inc_return(&call->usage);
+
+ trace_afs_call(call, why, u,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+ return call;
+}
+
/*
* Queue the call for actual work.
*/
static void afs_queue_call_work(struct afs_call *call)
{
if (call->type->work) {
- int u = atomic_inc_return(&call->usage);
-
- trace_afs_call(call, afs_call_trace_work, u,
- atomic_read(&call->net->nr_outstanding_calls),
- __builtin_return_address(0));
-
INIT_WORK(&call->work, call->type->work);
+ afs_get_call(call, afs_call_trace_work);
if (!queue_work(afs_wq, &call->work))
afs_put_call(call);
}
@@ -398,6 +405,12 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
}
}
+ /* If the call is going to be asynchronous, we need an extra ref for
+ * the call to hold itself so the caller need not hang on to its ref.
+ */
+ if (call->async)
+ afs_get_call(call, afs_call_trace_get);
+
/* create a call */
rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
(unsigned long)call,
@@ -438,15 +451,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
goto error_do_abort;
}
- /* at this point, an async call may no longer exist as it may have
- * already completed */
- if (call->async)
+ /* Note that at this point, we may have received the reply or an abort
+ * - and an asynchronous call may already have completed.
+ */
+ if (call->async) {
+ afs_put_call(call);
return -EINPROGRESS;
+ }
return afs_wait_for_call_to_complete(call, ac);
error_do_abort:
- call->state = AFS_CALL_COMPLETE;
if (ret != -ECONNABORTED) {
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret, "KSD");
@@ -463,8 +478,24 @@ error_do_abort:
error_kill_call:
if (call->type->done)
call->type->done(call);
- afs_put_call(call);
+
+ /* We need to dispose of the extra ref we grabbed for an async call.
+ * The call, however, might be queued on afs_async_calls and we need to
+ * make sure we don't get any more notifications that might requeue it.
+ */
+ if (call->rxcall) {
+ rxrpc_kernel_end_call(call->net->socket, call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->async) {
+ if (cancel_work_sync(&call->async_work))
+ afs_put_call(call);
+ afs_put_call(call);
+ }
+
ac->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ afs_put_call(call);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 95d0761cdb34..155dc14caef9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -42,9 +42,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
if (vldb->fs_mask[i] & type_mask)
nr_servers++;
- slist = kzalloc(sizeof(struct afs_server_list) +
- sizeof(struct afs_server_entry) * nr_servers,
- GFP_KERNEL);
+ slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
if (!slist)
goto error;
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index f0b032976487..f402ee8171a1 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -248,7 +248,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
}
}
- if (!still_probing || unlikely(signal_pending(current)))
+ if (!still_probing || signal_pending(current))
goto stop;
schedule();
}
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 12658c1363ae..5aa57929e8c2 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -803,7 +803,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
bp = xdr_encode_YFSFid(bp, &vnode->fid);
bp = xdr_encode_string(bp, name, namesz);
bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
- bp = xdr_encode_u32(bp, 0); /* ViceLockType */
+ bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 9f9cadbfbd7a..3e59f0ed777b 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -42,6 +42,8 @@
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__
+extern struct file_system_type autofs_fs_type;
+
/*
* Unified info structure. This is pointed to by both the dentry and
* inode structures. Each file in the filesystem has an instance of this
@@ -101,16 +103,19 @@ struct autofs_wait_queue {
#define AUTOFS_SBI_MAGIC 0x6d4a556d
+#define AUTOFS_SBI_CATATONIC 0x0001
+#define AUTOFS_SBI_STRICTEXPIRE 0x0002
+
struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
struct pid *oz_pgrp;
- int catatonic;
int version;
int sub_version;
int min_proto;
int max_proto;
+ unsigned int flags;
unsigned long exp_timeout;
unsigned int type;
struct super_block *sb;
@@ -126,8 +131,7 @@ struct autofs_sb_info {
static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
{
- return sb->s_magic != AUTOFS_SUPER_MAGIC ?
- NULL : (struct autofs_sb_info *)(sb->s_fs_info);
+ return (struct autofs_sb_info *)(sb->s_fs_info);
}
static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
@@ -141,7 +145,8 @@ static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
*/
static inline int autofs_oz_mode(struct autofs_sb_info *sbi)
{
- return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
+ return ((sbi->flags & AUTOFS_SBI_CATATONIC) ||
+ task_pgrp(current) == sbi->oz_pgrp);
}
struct inode *autofs_get_inode(struct super_block *, umode_t);
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 86eafda4a652..e9fe74d1541b 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -151,22 +151,6 @@ out:
return err;
}
-/*
- * Get the autofs super block info struct from the file opened on
- * the autofs mount point.
- */
-static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
-{
- struct autofs_sb_info *sbi = NULL;
- struct inode *inode;
-
- if (f) {
- inode = file_inode(f);
- sbi = autofs_sbi(inode->i_sb);
- }
- return sbi;
-}
-
/* Return autofs dev ioctl version */
static int autofs_dev_ioctl_version(struct file *fp,
struct autofs_sb_info *sbi,
@@ -366,7 +350,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
pipefd = param->setpipefd.pipefd;
mutex_lock(&sbi->wq_mutex);
- if (!sbi->catatonic) {
+ if (!(sbi->flags & AUTOFS_SBI_CATATONIC)) {
mutex_unlock(&sbi->wq_mutex);
return -EBUSY;
} else {
@@ -393,7 +377,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
swap(sbi->oz_pgrp, new_pid);
sbi->pipefd = pipefd;
sbi->pipe = pipe;
- sbi->catatonic = 0;
+ sbi->flags &= ~AUTOFS_SBI_CATATONIC;
}
out:
put_pid(new_pid);
@@ -658,6 +642,8 @@ static int _autofs_dev_ioctl(unsigned int command,
if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD &&
cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD &&
cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) {
+ struct super_block *sb;
+
fp = fget(param->ioctlfd);
if (!fp) {
if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD)
@@ -666,12 +652,13 @@ static int _autofs_dev_ioctl(unsigned int command,
goto out;
}
- sbi = autofs_dev_ioctl_sbi(fp);
- if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
+ sb = file_inode(fp)->i_sb;
+ if (sb->s_type != &autofs_fs_type) {
err = -EINVAL;
fput(fp);
goto out;
}
+ sbi = autofs_sbi(sb);
/*
* Admin needs to be able to set the mount catatonic in
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index 79ae07d9592f..c0c1db2cc6ea 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -16,7 +16,7 @@ static struct dentry *autofs_mount(struct file_system_type *fs_type,
return mount_nodev(fs_type, flags, data, autofs_fill_super);
}
-static struct file_system_type autofs_fs_type = {
+struct file_system_type autofs_fs_type = {
.owner = THIS_MODULE,
.name = "autofs",
.mount = autofs_mount,
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 846c052569dd..0e8ea2d9a2bb 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -87,6 +87,8 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",direct");
else
seq_printf(m, ",indirect");
+ if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE)
+ seq_printf(m, ",strictexpire");
#ifdef CONFIG_CHECKPOINT_RESTORE
if (sbi->pipe)
seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino);
@@ -109,7 +111,7 @@ static const struct super_operations autofs_sops = {
};
enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
- Opt_indirect, Opt_direct, Opt_offset};
+ Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire};
static const match_table_t tokens = {
{Opt_fd, "fd=%u"},
@@ -121,24 +123,28 @@ static const match_table_t tokens = {
{Opt_indirect, "indirect"},
{Opt_direct, "direct"},
{Opt_offset, "offset"},
+ {Opt_strictexpire, "strictexpire"},
{Opt_err, NULL}
};
-static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
- int *pgrp, bool *pgrp_set, unsigned int *type,
- int *minproto, int *maxproto)
+static int parse_options(char *options,
+ struct inode *root, int *pgrp, bool *pgrp_set,
+ struct autofs_sb_info *sbi)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
+ int pipefd = -1;
+ kuid_t uid;
+ kgid_t gid;
- *uid = current_uid();
- *gid = current_gid();
+ root->i_uid = current_uid();
+ root->i_gid = current_gid();
- *minproto = AUTOFS_MIN_PROTO_VERSION;
- *maxproto = AUTOFS_MAX_PROTO_VERSION;
+ sbi->min_proto = AUTOFS_MIN_PROTO_VERSION;
+ sbi->max_proto = AUTOFS_MAX_PROTO_VERSION;
- *pipefd = -1;
+ sbi->pipefd = -1;
if (!options)
return 1;
@@ -152,22 +158,25 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
token = match_token(p, tokens, args);
switch (token) {
case Opt_fd:
- if (match_int(args, pipefd))
+ if (match_int(args, &pipefd))
return 1;
+ sbi->pipefd = pipefd;
break;
case Opt_uid:
if (match_int(args, &option))
return 1;
- *uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(*uid))
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
return 1;
+ root->i_uid = uid;
break;
case Opt_gid:
if (match_int(args, &option))
return 1;
- *gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(*gid))
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
return 1;
+ root->i_gid = gid;
break;
case Opt_pgrp:
if (match_int(args, &option))
@@ -178,27 +187,30 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
case Opt_minproto:
if (match_int(args, &option))
return 1;
- *minproto = option;
+ sbi->min_proto = option;
break;
case Opt_maxproto:
if (match_int(args, &option))
return 1;
- *maxproto = option;
+ sbi->max_proto = option;
break;
case Opt_indirect:
- set_autofs_type_indirect(type);
+ set_autofs_type_indirect(&sbi->type);
break;
case Opt_direct:
- set_autofs_type_direct(type);
+ set_autofs_type_direct(&sbi->type);
break;
case Opt_offset:
- set_autofs_type_offset(type);
+ set_autofs_type_offset(&sbi->type);
+ break;
+ case Opt_strictexpire:
+ sbi->flags |= AUTOFS_SBI_STRICTEXPIRE;
break;
default:
return 1;
}
}
- return (*pipefd < 0);
+ return (sbi->pipefd < 0);
}
int autofs_fill_super(struct super_block *s, void *data, int silent)
@@ -206,7 +218,6 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
struct inode *root_inode;
struct dentry *root;
struct file *pipe;
- int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
int pgrp = 0;
@@ -222,12 +233,12 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
sbi->magic = AUTOFS_SBI_MAGIC;
sbi->pipefd = -1;
sbi->pipe = NULL;
- sbi->catatonic = 1;
sbi->exp_timeout = 0;
sbi->oz_pgrp = NULL;
sbi->sb = s;
sbi->version = 0;
sbi->sub_version = 0;
+ sbi->flags = AUTOFS_SBI_CATATONIC;
set_autofs_type_indirect(&sbi->type);
sbi->min_proto = 0;
sbi->max_proto = 0;
@@ -262,9 +273,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
root->d_fsdata = ino;
/* Can this call block? */
- if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
- &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto,
- &sbi->max_proto)) {
+ if (parse_options(data, root_inode, &pgrp, &pgrp_set, sbi)) {
pr_err("called with bogus options\n");
goto fail_dput;
}
@@ -303,8 +312,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
root_inode->i_fop = &autofs_root_operations;
root_inode->i_op = &autofs_dir_inode_operations;
- pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp));
- pipe = fget(pipefd);
+ pr_debug("pipe fd = %d, pgrp = %u\n",
+ sbi->pipefd, pid_nr(sbi->oz_pgrp));
+ pipe = fget(sbi->pipefd);
if (!pipe) {
pr_err("could not open pipe file descriptor\n");
@@ -314,8 +324,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
if (ret < 0)
goto fail_fput;
sbi->pipe = pipe;
- sbi->pipefd = pipefd;
- sbi->catatonic = 0;
+ sbi->flags &= ~AUTOFS_SBI_CATATONIC;
/*
* Success! Install the root dentry now to indicate completion.
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 782e57b911ab..1246f396bf0e 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -275,8 +275,11 @@ static int autofs_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
+ ino->last_used = jiffies;
+ return status;
}
- ino->last_used = jiffies;
+ if (!(sbi->flags & AUTOFS_SBI_STRICTEXPIRE))
+ ino->last_used = jiffies;
return status;
}
@@ -510,7 +513,8 @@ static struct dentry *autofs_lookup(struct inode *dir,
sbi = autofs_sbi(dir->i_sb);
pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n",
- current->pid, task_pgrp_nr(current), sbi->catatonic,
+ current->pid, task_pgrp_nr(current),
+ sbi->flags & AUTOFS_SBI_CATATONIC,
autofs_oz_mode(sbi));
active = autofs_lookup_active(dentry);
@@ -563,7 +567,7 @@ static int autofs_dir_symlink(struct inode *dir,
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
BUG_ON(!ino);
@@ -626,7 +630,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
if (atomic_dec_and_test(&ino->count)) {
@@ -714,7 +718,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
spin_lock(&sbi->lookup_lock);
@@ -759,7 +763,7 @@ static int autofs_dir_mkdir(struct inode *dir,
* autofs mount is catatonic but the state of an autofs
* file system needs to be preserved over restarts.
*/
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -EACCES;
pr_debug("dentry %p, creating %pd\n", dentry, dentry);
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index f6385c6ef0a5..15a3e31d0904 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -20,14 +20,14 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
struct autofs_wait_queue *wq, *nwq;
mutex_lock(&sbi->wq_mutex);
- if (sbi->catatonic) {
+ if (sbi->flags & AUTOFS_SBI_CATATONIC) {
mutex_unlock(&sbi->wq_mutex);
return;
}
pr_debug("entering catatonic mode\n");
- sbi->catatonic = 1;
+ sbi->flags |= AUTOFS_SBI_CATATONIC;
wq = sbi->queues;
sbi->queues = NULL; /* Erase all wait queues */
while (wq) {
@@ -255,7 +255,7 @@ static int validate_request(struct autofs_wait_queue **wait,
struct autofs_wait_queue *wq;
struct autofs_info *ino;
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
/* Wait in progress, continue; */
@@ -290,7 +290,7 @@ static int validate_request(struct autofs_wait_queue **wait,
if (mutex_lock_interruptible(&sbi->wq_mutex))
return -EINTR;
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
wq = autofs_find_wait(sbi, qstr);
@@ -359,7 +359,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
pid_t tgid;
/* In catatonic mode, we don't wait for nobody */
- if (sbi->catatonic)
+ if (sbi->flags & AUTOFS_SBI_CATATONIC)
return -ENOENT;
/*
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 67aef3bb89e4..606f9378b2f0 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -1,13 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/bfs/bfs.h
- * Copyright (C) 1999 Tigran Aivazian <tigran@veritas.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
*/
#ifndef _FS_BFS_BFS_H
#define _FS_BFS_BFS_H
#include <linux/bfs_fs.h>
+/* In theory BFS supports up to 512 inodes, numbered from 2 (for /) up to 513 inclusive.
+ In actual fact, attempting to create the 512th inode (i.e. inode No. 513 or file No. 511)
+ will fail with ENOSPC in bfs_add_entry(): the root directory cannot contain so many entries, counting '..'.
+ So, mkfs.bfs(8) should really limit its -N option to 511 and not 512. For now, we just print a warning
+ if a filesystem is mounted with such "impossible to fill up" number of inodes */
+#define BFS_MAX_LASTI 513
+
/*
* BFS file system in-core superblock info
*/
@@ -17,7 +24,7 @@ struct bfs_sb_info {
unsigned long si_freei;
unsigned long si_lf_eblk;
unsigned long si_lasti;
- unsigned long *si_imap;
+ DECLARE_BITMAP(si_imap, BFS_MAX_LASTI+1);
struct mutex bfs_lock;
};
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index f32f21c3bbc7..d8dfe3a0cb39 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,8 +2,8 @@
/*
* fs/bfs/dir.c
* BFS directory operations.
- * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
- * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
*/
#include <linux/time.h>
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 1476cdd90cfb..0dceefc54b48 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -2,7 +2,7 @@
/*
* fs/bfs/file.c
* BFS file operations.
- * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
*
* Make the file block allocation algorithm understand the size
* of the underlying block device.
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index d81c148682e7..d136b2aaafb3 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -1,10 +1,9 @@
/*
* fs/bfs/inode.c
* BFS superblock and inode operations.
- * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
+ * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com>
* From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
- *
- * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
*/
#include <linux/module.h>
@@ -118,12 +117,12 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct bfs_sb_info *info = BFS_SB(inode->i_sb);
unsigned int ino = (u16)inode->i_ino;
- unsigned long i_sblock;
+ unsigned long i_sblock;
struct bfs_inode *di;
struct buffer_head *bh;
int err = 0;
- dprintf("ino=%08x\n", ino);
+ dprintf("ino=%08x\n", ino);
di = find_inode(inode->i_sb, ino, &bh);
if (IS_ERR(di))
@@ -144,7 +143,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
- i_sblock = BFS_I(inode)->i_sblock;
+ i_sblock = BFS_I(inode)->i_sblock;
di->i_sblock = cpu_to_le32(i_sblock);
di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
@@ -188,13 +187,13 @@ static void bfs_evict_inode(struct inode *inode)
mark_buffer_dirty(bh);
brelse(bh);
- if (bi->i_dsk_ino) {
+ if (bi->i_dsk_ino) {
if (bi->i_sblock)
info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
info->si_freei++;
clear_bit(ino, info->si_imap);
- bfs_dump_imap("delete_inode", s);
- }
+ bfs_dump_imap("evict_inode", s);
+ }
/*
* If this was the last file, make the previous block
@@ -214,7 +213,6 @@ static void bfs_put_super(struct super_block *s)
return;
mutex_destroy(&info->bfs_lock);
- kfree(info->si_imap);
kfree(info);
s->s_fs_info = NULL;
}
@@ -311,8 +309,7 @@ void bfs_dump_imap(const char *prefix, struct super_block *s)
else
strcat(tmpbuf, "0");
}
- printf("BFS-fs: %s: lasti=%08lx <%s>\n",
- prefix, BFS_SB(s)->si_lasti, tmpbuf);
+ printf("%s: lasti=%08lx <%s>\n", prefix, BFS_SB(s)->si_lasti, tmpbuf);
free_page((unsigned long)tmpbuf);
#endif
}
@@ -322,7 +319,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
struct buffer_head *bh, *sbh;
struct bfs_super_block *bfs_sb;
struct inode *inode;
- unsigned i, imap_len;
+ unsigned i;
struct bfs_sb_info *info;
int ret = -EINVAL;
unsigned long i_sblock, i_eblock, i_eoff, s_size;
@@ -341,8 +338,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
bfs_sb = (struct bfs_super_block *)sbh->b_data;
if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
if (!silent)
- printf("No BFS filesystem on %s (magic=%08x)\n",
- s->s_id, le32_to_cpu(bfs_sb->s_magic));
+ printf("No BFS filesystem on %s (magic=%08x)\n", s->s_id, le32_to_cpu(bfs_sb->s_magic));
goto out1;
}
if (BFS_UNCLEAN(bfs_sb, s) && !silent)
@@ -351,18 +347,16 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
s->s_magic = BFS_MAGIC;
if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
- le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
- printf("Superblock is corrupted\n");
+ le32_to_cpu(bfs_sb->s_start) < sizeof(struct bfs_super_block) + sizeof(struct bfs_dirent)) {
+ printf("Superblock is corrupted on %s\n", s->s_id);
goto out1;
}
- info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
- sizeof(struct bfs_inode)
- + BFS_ROOT_INO - 1;
- imap_len = (info->si_lasti / 8) + 1;
- info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
- if (!info->si_imap) {
- printf("Cannot allocate %u bytes\n", imap_len);
+ info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / sizeof(struct bfs_inode) + BFS_ROOT_INO - 1;
+ if (info->si_lasti == BFS_MAX_LASTI)
+ printf("WARNING: filesystem %s was created with 512 inodes, the real maximum is 511, mounting anyway\n", s->s_id);
+ else if (info->si_lasti > BFS_MAX_LASTI) {
+ printf("Impossible last inode number %lu > %d on %s\n", info->si_lasti, BFS_MAX_LASTI, s->s_id);
goto out1;
}
for (i = 0; i < BFS_ROOT_INO; i++)
@@ -372,26 +366,25 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
inode = bfs_iget(s, BFS_ROOT_INO);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
- goto out2;
+ goto out1;
}
s->s_root = d_make_root(inode);
if (!s->s_root) {
ret = -ENOMEM;
- goto out2;
+ goto out1;
}
info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
- info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1
- - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
+ info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS;
info->si_freei = 0;
info->si_lf_eblk = 0;
/* can we read the last block? */
bh = sb_bread(s, info->si_blocks - 1);
if (!bh) {
- printf("Last block not available: %lu\n", info->si_blocks - 1);
+ printf("Last block not available on %s: %lu\n", s->s_id, info->si_blocks - 1);
ret = -EIO;
- goto out3;
+ goto out2;
}
brelse(bh);
@@ -425,11 +418,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
(i_eoff != le32_to_cpu(-1) && i_eoff > s_size) ||
i_sblock * BFS_BSIZE > i_eoff) {
- printf("Inode 0x%08x corrupted\n", i);
+ printf("Inode 0x%08x corrupted on %s\n", i, s->s_id);
brelse(bh);
ret = -EIO;
- goto out3;
+ goto out2;
}
if (!di->i_ino) {
@@ -445,14 +438,12 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
}
brelse(bh);
brelse(sbh);
- bfs_dump_imap("read_super", s);
+ bfs_dump_imap("fill_super", s);
return 0;
-out3:
+out2:
dput(s->s_root);
s->s_root = NULL;
-out2:
- kfree(info->si_imap);
out1:
brelse(sbh);
out:
@@ -482,7 +473,7 @@ static int __init init_bfs_fs(void)
int err = init_inodecache();
if (err)
goto out1;
- err = register_filesystem(&bfs_fs_type);
+ err = register_filesystem(&bfs_fs_type);
if (err)
goto out;
return 0;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index c3deb2e35f20..ca9725f18e00 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -78,9 +78,9 @@ static int aout_core_dump(struct coredump_params *cprm)
/* make sure we actually have a data and stack area to dump */
set_fs(USER_DS);
- if (!access_ok(VERIFY_READ, START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
+ if (!access_ok(START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
dump.u_dsize = 0;
- if (!access_ok(VERIFY_READ, START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
+ if (!access_ok(START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
dump.u_ssize = 0;
set_fs(KERNEL_DS);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 7cde3f46ad26..d0078cbb718b 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -42,10 +42,14 @@ static int load_script(struct linux_binprm *bprm)
fput(bprm->file);
bprm->file = NULL;
- bprm->buf[BINPRM_BUF_SIZE - 1] = '\0';
- if ((cp = strchr(bprm->buf, '\n')) == NULL)
- cp = bprm->buf+BINPRM_BUF_SIZE-1;
+ for (cp = bprm->buf+2;; cp++) {
+ if (cp >= bprm->buf + BINPRM_BUF_SIZE)
+ return -ENOEXEC;
+ if (!*cp || (*cp == '\n'))
+ break;
+ }
*cp = '\0';
+
while (cp > bprm->buf) {
cp--;
if ((*cp == ' ') || (*cp == '\t'))
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d92462fe66c8..f64aad613727 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1016,19 +1016,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
parent_start = parent->start;
/*
- * If we are COWing a node/leaf from the extent, chunk or device trees,
- * make sure that we do not finish block group creation of pending block
- * groups. We do this to avoid a deadlock.
+ * If we are COWing a node/leaf from the extent, chunk, device or free
+ * space trees, make sure that we do not finish block group creation of
+ * pending block groups. We do this to avoid a deadlock.
* COWing can result in allocation of a new chunk, and flushing pending
* block groups (btrfs_create_pending_block_groups()) can be triggered
* when finishing allocation of a new chunk. Creation of a pending block
- * group modifies the extent, chunk and device trees, therefore we could
- * deadlock with ourselves since we are holding a lock on an extent
- * buffer that btrfs_create_pending_block_groups() may try to COW later.
+ * group modifies the extent, chunk, device and free space trees,
+ * therefore we could deadlock with ourselves since we are holding a
+ * lock on an extent buffer that btrfs_create_pending_block_groups() may
+ * try to COW later.
*/
if (root == fs_info->extent_root ||
root == fs_info->chunk_root ||
- root == fs_info->dev_root)
+ root == fs_info->dev_root ||
+ root == fs_info->free_space_root)
trans->can_flush_pending_bgs = false;
cow = btrfs_alloc_tree_block(trans, root, parent_start,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f031a447a047..7a2a2621f0d9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
struct btrfs_trans_handle;
struct btrfs_transaction;
struct btrfs_pending_snapshot;
+struct btrfs_delayed_ref_root;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
@@ -786,6 +787,9 @@ enum {
* main phase. The fs_info::balance_ctl is initialized.
*/
BTRFS_FS_BALANCE_RUNNING,
+
+ /* Indicate that the cleaner thread is awake and doing something. */
+ BTRFS_FS_CLEANER_RUNNING,
};
struct btrfs_fs_info {
@@ -1144,9 +1148,6 @@ struct btrfs_fs_info {
struct mutex unused_bg_unpin_mutex;
struct mutex delete_unused_bgs_mutex;
- /* For btrfs to record security options */
- struct security_mnt_opts security_opts;
-
/*
* Chunks that can't be freed yet (under a trim/discard operation)
* and will be latter freed. Protected by fs_info->chunk_mutex.
@@ -2664,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
unsigned long count, u64 transid, int wait);
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head);
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
@@ -3021,7 +3025,6 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->free_space_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
- security_free_mnt_opts(&fs_info->security_opts);
kvfree(fs_info);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8da2f380d3c0..6a2a2a951705 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg)
while (1) {
again = 0;
+ set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
+
/* Make the cleaner go to sleep early. */
if (btrfs_need_cleaner_sleep(fs_info))
goto sleep;
@@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(fs_info);
sleep:
+ clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
if (kthread_should_park())
kthread_parkme();
if (kthread_should_stop())
@@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->ordered_root_lock);
}
spin_unlock(&fs_info->ordered_root_lock);
+
+ /*
+ * We need this here because if we've been flipped read-only we won't
+ * get sync() from the umount, so we need to make sure any ordered
+ * extents that haven't had their dirty pages IO start writeout yet
+ * actually get run and error out properly.
+ */
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
}
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (pin_bytes)
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
+ btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
btrfs_put_delayed_ref_head(head);
cond_resched();
spin_lock(&delayed_refs->lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b15afeae16df..d81035b7ea7d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
return ret ? ret : 1;
}
-static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head)
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+ struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head)
{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_delayed_ref_root *delayed_refs =
- &trans->transaction->delayed_refs;
int nr_items = 1; /* Dropping this ref head update. */
if (head->total_ref_mod < 0) {
@@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
}
}
- cleanup_ref_head_accounting(trans, head);
+ btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
trace_run_delayed_ref_head(fs_info, head, 0);
btrfs_delayed_ref_unlock(head);
@@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info,
ret = 0;
break;
case COMMIT_TRANS:
+ /*
+ * If we have pending delayed iputs then we could free up a
+ * bunch of pinned space, so make sure we run the iputs before
+ * we do our pinned bytes check below.
+ */
+ mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
+ btrfs_run_delayed_iputs(fs_info);
+ mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+
ret = may_commit_transaction(fs_info, space_info);
break;
default:
@@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
if (head->must_insert_reserved)
ret = 1;
- cleanup_ref_head_accounting(trans, head);
+ btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref_head(head);
return ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fc126b92ea59..52abe4082680 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4103,8 +4103,7 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
while (!list_empty(pages)) {
for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
- struct page *page = list_entry(pages->prev,
- struct page, lru);
+ struct page *page = lru_to_page(pages);
prefetchw(&page->flags);
list_del(&page->lru);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 43eb4535319d..5c349667c761 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3129,9 +3129,6 @@ out:
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
- /* Try to release some metadata so we don't get an OOM but don't wait */
- btrfs_btree_balance_dirty_nodelay(fs_info);
-
return ret;
}
@@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
ASSERT(list_empty(&binode->delayed_iput));
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
+ if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
+ wake_up_process(fs_info->cleaner_kthread);
}
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fab9443f6a42..9c8e1734429c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
inode_lock_nested(inode2, I_MUTEX_CHILD);
}
+static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
+static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ if (inode1 < inode2) {
+ swap(inode1, inode2);
+ swap(loff1, loff2);
+ } else if (inode1 == inode2 && loff2 < loff1) {
+ swap(loff1, loff2);
+ }
+ lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
struct inode *dst, u64 dst_loff)
{
@@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
return -EINVAL;
/*
- * Lock destination range to serialize with concurrent readpages().
+ * Lock destination range to serialize with concurrent readpages() and
+ * source range to serialize with relocation.
*/
- lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
+ btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
- unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
+ btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
return ret;
}
@@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
len = ALIGN(src->i_size, bs) - off;
if (destoff > inode->i_size) {
+ const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
+
ret = btrfs_cont_expand(inode, inode->i_size, destoff);
if (ret)
return ret;
+ /*
+ * We may have truncated the last block if the inode's size is
+ * not sector size aligned, so we need to wait for writeback to
+ * complete before proceeding further, otherwise we can race
+ * with cloning and attempt to increment a reference to an
+ * extent that no longer exists (writeback completed right after
+ * we found the previous extent covering eof and before we
+ * attempted to increment its reference count).
+ */
+ ret = btrfs_wait_ordered_range(inode, wb_start,
+ destoff - wb_start);
+ if (ret)
+ return ret;
}
/*
- * Lock destination range to serialize with concurrent readpages().
+ * Lock destination range to serialize with concurrent readpages() and
+ * source range to serialize with relocation.
*/
- lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
+ btrfs_double_extent_lock(src, off, inode, destoff, len);
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
- unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
+ btrfs_double_extent_unlock(src, off, inode, destoff, len);
/*
* Truncate page cache pages so that future reads will see the cloned
* data immediately and not the previous data.
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1b15b43905f8..7ea2d6b1f170 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6646,7 +6646,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
goto out;
}
- if (!access_ok(VERIFY_READ, arg->clone_sources,
+ if (!access_ok(arg->clone_sources,
sizeof(*arg->clone_sources) *
arg->clone_sources_count)) {
ret = -EFAULT;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 368a5b9e6c13..c5586ffd1426 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1458,56 +1458,6 @@ out:
return root;
}
-static int parse_security_options(char *orig_opts,
- struct security_mnt_opts *sec_opts)
-{
- char *secdata = NULL;
- int ret = 0;
-
- secdata = alloc_secdata();
- if (!secdata)
- return -ENOMEM;
- ret = security_sb_copy_data(orig_opts, secdata);
- if (ret) {
- free_secdata(secdata);
- return ret;
- }
- ret = security_sb_parse_opts_str(secdata, sec_opts);
- free_secdata(secdata);
- return ret;
-}
-
-static int setup_security_options(struct btrfs_fs_info *fs_info,
- struct super_block *sb,
- struct security_mnt_opts *sec_opts)
-{
- int ret = 0;
-
- /*
- * Call security_sb_set_mnt_opts() to check whether new sec_opts
- * is valid.
- */
- ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
- if (ret)
- return ret;
-
-#ifdef CONFIG_SECURITY
- if (!fs_info->security_opts.num_mnt_opts) {
- /* first time security setup, copy sec_opts to fs_info */
- memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
- } else {
- /*
- * Since SELinux (the only one supporting security_mnt_opts)
- * does NOT support changing context during remount/mount of
- * the same sb, this must be the same or part of the same
- * security options, just free it.
- */
- security_free_mnt_opts(sec_opts);
- }
-#endif
- return ret;
-}
-
/*
* Find a superblock for the given device / mount point.
*
@@ -1522,16 +1472,15 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
struct btrfs_device *device = NULL;
struct btrfs_fs_devices *fs_devices = NULL;
struct btrfs_fs_info *fs_info = NULL;
- struct security_mnt_opts new_sec_opts;
+ void *new_sec_opts = NULL;
fmode_t mode = FMODE_READ;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
- security_init_mnt_opts(&new_sec_opts);
if (data) {
- error = parse_security_options(data, &new_sec_opts);
+ error = security_sb_eat_lsm_opts(data, &new_sec_opts);
if (error)
return ERR_PTR(error);
}
@@ -1550,7 +1499,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
- security_init_mnt_opts(&fs_info->security_opts);
if (!fs_info->super_copy || !fs_info->super_for_commit) {
error = -ENOMEM;
goto error_fs_info;
@@ -1601,16 +1549,12 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
btrfs_sb(s)->bdev_holder = fs_type;
error = btrfs_fill_super(s, fs_devices, data);
}
+ if (!error)
+ error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
+ security_free_mnt_opts(&new_sec_opts);
if (error) {
deactivate_locked_super(s);
- goto error_sec_opts;
- }
-
- fs_info = btrfs_sb(s);
- error = setup_security_options(fs_info, s, &new_sec_opts);
- if (error) {
- deactivate_locked_super(s);
- goto error_sec_opts;
+ return ERR_PTR(error);
}
return dget(s->s_root);
@@ -1779,18 +1723,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_remount_prepare(fs_info);
if (data) {
- struct security_mnt_opts new_sec_opts;
+ void *new_sec_opts = NULL;
- security_init_mnt_opts(&new_sec_opts);
- ret = parse_security_options(data, &new_sec_opts);
+ ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
+ if (!ret)
+ ret = security_sb_remount(sb, new_sec_opts);
+ security_free_mnt_opts(&new_sec_opts);
if (ret)
goto restore;
- ret = setup_security_options(fs_info, sb,
- &new_sec_opts);
- if (ret) {
- security_free_mnt_opts(&new_sec_opts);
- goto restore;
- }
}
ret = btrfs_parse_options(fs_info, data, *flags);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2576b1a379c9..3e4f8f88353e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
ret = -EUCLEAN;
goto out;
}
+
+ /* It's possible this device is a dummy for seed device */
+ if (dev->disk_total_bytes == 0) {
+ dev = find_device(fs_info->fs_devices->seed, devid, NULL);
+ if (!dev) {
+ btrfs_err(fs_info, "failed to find seed devid %llu",
+ devid);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+
if (physical_offset + physical_len > dev->disk_total_bytes) {
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
diff --git a/fs/buffer.c b/fs/buffer.c
index d60d61e8ed7d..52d024bfdbc1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2366,7 +2366,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
balance_dirty_pages_ratelimited(mapping);
- if (unlikely(fatal_signal_pending(current))) {
+ if (fatal_signal_pending(current)) {
err = -EINTR;
goto out;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8eade7a993c1..a47c541f8006 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -306,7 +306,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct page *page = list_entry(page_list->prev, struct page, lru);
+ struct page *page = lru_to_page(page_list);
struct ceph_vino vino;
struct ceph_osd_request *req;
u64 off;
@@ -333,8 +333,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
if (got)
ceph_put_cap_refs(ci, got);
while (!list_empty(page_list)) {
- page = list_entry(page_list->prev,
- struct page, lru);
+ page = lru_to_page(page_list);
list_del(&page->lru);
put_page(page);
}
@@ -1495,10 +1494,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
if (err < 0 || off >= i_size_read(inode)) {
unlock_page(page);
put_page(page);
- if (err == -ENOMEM)
- ret = VM_FAULT_OOM;
- else
- ret = VM_FAULT_SIGBUS;
+ ret = vmf_error(err);
goto out_inline;
}
if (err < PAGE_SIZE)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f3496db4bb3e..bba28a5034ba 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -657,6 +657,9 @@ void ceph_add_cap(struct inode *inode,
session->s_nr_caps++;
spin_unlock(&session->s_cap_lock);
} else {
+ if (cap->cap_gen < session->s_cap_gen)
+ cap->issued = cap->implemented = CEPH_CAP_PIN;
+
/*
* auth mds of the inode changed. we received the cap export
* message, but still haven't received the cap import message.
@@ -1032,6 +1035,8 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci)
list_del_init(&ci->i_snap_realm_item);
ci->i_snap_realm_counter++;
ci->i_snap_realm = NULL;
+ if (realm->ino == ci->i_vino.ino)
+ realm->inode = NULL;
spin_unlock(&realm->inodes_with_caps_lock);
ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
realm);
@@ -1855,14 +1860,17 @@ retry_locked:
retain |= CEPH_CAP_ANY; /* be greedy */
} else if (S_ISDIR(inode->i_mode) &&
(issued & CEPH_CAP_FILE_SHARED) &&
- __ceph_dir_is_complete(ci)) {
+ __ceph_dir_is_complete(ci)) {
/*
* If a directory is complete, we want to keep
* the exclusive cap. So that MDS does not end up
* revoking the shared cap on every create/unlink
* operation.
*/
- want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+ if (IS_RDONLY(inode))
+ want = CEPH_CAP_ANY_SHARED;
+ else
+ want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
retain |= want;
} else {
@@ -1970,8 +1978,7 @@ retry_locked:
goto ack;
/* things we might delay */
- if ((cap->issued & ~retain) == 0 &&
- cap->mds_wanted == want)
+ if ((cap->issued & ~retain) == 0)
continue; /* nope, all good */
if (no_delay)
@@ -3048,7 +3055,8 @@ static void handle_cap_grant(struct inode *inode,
int used, wanted, dirty;
u64 size = le64_to_cpu(grant->size);
u64 max_size = le64_to_cpu(grant->max_size);
- int check_caps = 0;
+ unsigned char check_caps = 0;
+ bool was_stale = cap->cap_gen < session->s_cap_gen;
bool wake = false;
bool writeback = false;
bool queue_trunc = false;
@@ -3063,21 +3071,6 @@ static void handle_cap_grant(struct inode *inode,
/*
- * auth mds of the inode changed. we received the cap export message,
- * but still haven't received the cap import message. handle_cap_export
- * updated the new auth MDS' cap.
- *
- * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
- * that was sent before the cap import message. So don't remove caps.
- */
- if (ceph_seq_cmp(seq, cap->seq) <= 0) {
- WARN_ON(cap != ci->i_auth_cap);
- WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
- seq = cap->seq;
- newcaps |= cap->issued;
- }
-
- /*
* If CACHE is being revoked, and we have no dirty buffers,
* try to invalidate (once). (If there are dirty buffers, we
* will invalidate _after_ writeback.)
@@ -3096,6 +3089,24 @@ static void handle_cap_grant(struct inode *inode,
}
}
+ if (was_stale)
+ cap->issued = cap->implemented = CEPH_CAP_PIN;
+
+ /*
+ * auth mds of the inode changed. we received the cap export message,
+ * but still haven't received the cap import message. handle_cap_export
+ * updated the new auth MDS' cap.
+ *
+ * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
+ * that was sent before the cap import message. So don't remove caps.
+ */
+ if (ceph_seq_cmp(seq, cap->seq) <= 0) {
+ WARN_ON(cap != ci->i_auth_cap);
+ WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
+ seq = cap->seq;
+ newcaps |= cap->issued;
+ }
+
/* side effects now are allowed */
cap->cap_gen = session->s_cap_gen;
cap->seq = seq;
@@ -3200,13 +3211,20 @@ static void handle_cap_grant(struct inode *inode,
ceph_cap_string(wanted),
ceph_cap_string(used),
ceph_cap_string(dirty));
- if (wanted != le32_to_cpu(grant->wanted)) {
- dout("mds wanted %s -> %s\n",
- ceph_cap_string(le32_to_cpu(grant->wanted)),
- ceph_cap_string(wanted));
- /* imported cap may not have correct mds_wanted */
- if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
- check_caps = 1;
+
+ if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
+ (wanted & ~(cap->mds_wanted | newcaps))) {
+ /*
+ * If mds is importing cap, prior cap messages that update
+ * 'wanted' may get dropped by mds (migrate seq mismatch).
+ *
+ * We don't send cap message to update 'wanted' if what we
+ * want are already issued. If mds revokes caps, cap message
+ * that releases caps also tells mds what we want. But if
+ * caps got revoked by mds forcedly (session stale). We may
+ * haven't told mds what we want.
+ */
+ check_caps = 1;
}
/* revocation, grant, or no-op? */
@@ -3539,9 +3557,9 @@ retry:
goto out_unlock;
if (target < 0) {
- __ceph_remove_cap(cap, false);
- if (!ci->i_auth_cap)
+ if (cap->mds_wanted | cap->issued)
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
+ __ceph_remove_cap(cap, false);
goto out_unlock;
}
@@ -3569,7 +3587,6 @@ retry:
tcap->cap_id = t_cap_id;
tcap->seq = t_seq - 1;
tcap->issue_seq = t_seq - 1;
- tcap->mseq = t_mseq;
tcap->issued |= issued;
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 79dd5e6ed755..9d1f34d46627 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1098,8 +1098,9 @@ out_unlock:
* splice a dentry to an inode.
* caller must hold directory i_mutex for this to be safe.
*/
-static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
+static int splice_dentry(struct dentry **pdn, struct inode *in)
{
+ struct dentry *dn = *pdn;
struct dentry *realdn;
BUG_ON(d_inode(dn));
@@ -1132,28 +1133,23 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in));
- dn = realdn;
- /*
- * Caller should release 'dn' in the case of error.
- * If 'req->r_dentry' is passed to this function,
- * caller should leave 'req->r_dentry' untouched.
- */
- goto out;
- } else if (realdn) {
+ return PTR_ERR(realdn);
+ }
+
+ if (realdn) {
dout("dn %p (%d) spliced with %p (%d) "
"inode %p ino %llx.%llx\n",
dn, d_count(dn),
realdn, d_count(realdn),
d_inode(realdn), ceph_vinop(d_inode(realdn)));
dput(dn);
- dn = realdn;
+ *pdn = realdn;
} else {
BUG_ON(!ceph_dentry(dn));
dout("dn %p attached to %p ino %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)));
}
-out:
- return dn;
+ return 0;
}
/*
@@ -1340,7 +1336,12 @@ retry_lookup:
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
- dn = req->r_old_dentry; /* use old_dentry */
+ /* swap r_dentry and r_old_dentry in case that
+ * splice_dentry() gets called later. This is safe
+ * because no other place will use them */
+ req->r_dentry = req->r_old_dentry;
+ req->r_old_dentry = dn;
+ dn = req->r_dentry;
}
/* null dentry? */
@@ -1365,12 +1366,10 @@ retry_lookup:
if (d_really_is_negative(dn)) {
ceph_dir_clear_ordered(dir);
ihold(in);
- dn = splice_dentry(dn, in);
- if (IS_ERR(dn)) {
- err = PTR_ERR(dn);
+ err = splice_dentry(&req->r_dentry, in);
+ if (err < 0)
goto done;
- }
- req->r_dentry = dn; /* may have spliced */
+ dn = req->r_dentry; /* may have spliced */
} else if (d_really_is_positive(dn) && d_inode(dn) != in) {
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)),
@@ -1390,22 +1389,18 @@ retry_lookup:
} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
req->r_op == CEPH_MDS_OP_MKSNAP) &&
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
- struct dentry *dn = req->r_dentry;
struct inode *dir = req->r_parent;
/* fill out a snapdir LOOKUPSNAP dentry */
- BUG_ON(!dn);
BUG_ON(!dir);
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
- dout(" linking snapped dir %p to dn %p\n", in, dn);
+ BUG_ON(!req->r_dentry);
+ dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry);
ceph_dir_clear_ordered(dir);
ihold(in);
- dn = splice_dentry(dn, in);
- if (IS_ERR(dn)) {
- err = PTR_ERR(dn);
+ err = splice_dentry(&req->r_dentry, in);
+ if (err < 0)
goto done;
- }
- req->r_dentry = dn; /* may have spliced */
} else if (rinfo->head->is_dentry) {
struct ceph_vino *ptvino = NULL;
@@ -1669,8 +1664,6 @@ retry_lookup:
}
if (d_really_is_negative(dn)) {
- struct dentry *realdn;
-
if (ceph_security_xattr_deadlock(in)) {
dout(" skip splicing dn %p to inode %p"
" (security xattr deadlock)\n", dn, in);
@@ -1679,13 +1672,9 @@ retry_lookup:
goto next_item;
}
- realdn = splice_dentry(dn, in);
- if (IS_ERR(realdn)) {
- err = PTR_ERR(realdn);
- d_drop(dn);
+ err = splice_dentry(&dn, in);
+ if (err < 0)
goto next_item;
- }
- dn = realdn;
}
ceph_dentry(dn)->offset = rde->offset;
@@ -1701,8 +1690,7 @@ retry_lookup:
err = ret;
}
next_item:
- if (dn)
- dput(dn);
+ dput(dn);
}
out:
if (err == 0 && skipped == 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bd13a3267ae0..163fc74bf221 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1232,13 +1232,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
+ if (cap->mds_wanted | cap->issued)
+ ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
__ceph_remove_cap(cap, false);
if (!ci->i_auth_cap) {
struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc;
- ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
-
if (ci->i_wrbuffer_ref > 0 &&
READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
invalidate = true;
@@ -1355,6 +1355,12 @@ static void remove_session_caps(struct ceph_mds_session *session)
dispose_cap_releases(session->s_mdsc, &dispose);
}
+enum {
+ RECONNECT,
+ RENEWCAPS,
+ FORCE_RO,
+};
+
/*
* wake up any threads waiting on this session's caps. if the cap is
* old (didn't get renewed on the client reconnect), remove it now.
@@ -1365,23 +1371,34 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
void *arg)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ unsigned long ev = (unsigned long)arg;
- if (arg) {
+ if (ev == RECONNECT) {
spin_lock(&ci->i_ceph_lock);
ci->i_wanted_max_size = 0;
ci->i_requested_max_size = 0;
spin_unlock(&ci->i_ceph_lock);
+ } else if (ev == RENEWCAPS) {
+ if (cap->cap_gen < cap->session->s_cap_gen) {
+ /* mds did not re-issue stale cap */
+ spin_lock(&ci->i_ceph_lock);
+ cap->issued = cap->implemented = CEPH_CAP_PIN;
+ /* make sure mds knows what we want */
+ if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
+ ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
+ spin_unlock(&ci->i_ceph_lock);
+ }
+ } else if (ev == FORCE_RO) {
}
wake_up_all(&ci->i_cap_wq);
return 0;
}
-static void wake_up_session_caps(struct ceph_mds_session *session,
- int reconnect)
+static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
{
dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
iterate_session_caps(session, wake_up_session_cb,
- (void *)(unsigned long)reconnect);
+ (void *)(unsigned long)ev);
}
/*
@@ -1466,7 +1483,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc,
spin_unlock(&session->s_cap_lock);
if (wake)
- wake_up_session_caps(session, 0);
+ wake_up_session_caps(session, RENEWCAPS);
}
/*
@@ -2847,7 +2864,7 @@ static void handle_session(struct ceph_mds_session *session,
spin_lock(&session->s_cap_lock);
session->s_readonly = true;
spin_unlock(&session->s_cap_lock);
- wake_up_session_caps(session, 0);
+ wake_up_session_caps(session, FORCE_RO);
break;
case CEPH_SESSION_REJECT:
@@ -2943,11 +2960,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_inode_info *ci = cap->ci;
struct ceph_reconnect_state *recon_state = arg;
struct ceph_pagelist *pagelist = recon_state->pagelist;
- char *path;
- int pathlen, err;
- u64 pathbase;
+ int err;
u64 snap_follows;
- struct dentry *dentry;
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
inode, ceph_vinop(inode), cap, cap->cap_id,
@@ -2956,19 +2970,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
if (err)
return err;
- dentry = d_find_alias(inode);
- if (dentry) {
- path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out_dput;
- }
- } else {
- path = NULL;
- pathlen = 0;
- pathbase = 0;
- }
-
spin_lock(&ci->i_ceph_lock);
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
@@ -2980,7 +2981,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v2.issued = cpu_to_le32(cap->issued);
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
- rec.v2.pathbase = cpu_to_le64(pathbase);
+ rec.v2.pathbase = 0;
rec.v2.flock_len = (__force __le32)
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
@@ -2991,7 +2992,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
- rec.v1.pathbase = cpu_to_le64(pathbase);
+ rec.v1.pathbase = 0;
}
if (list_empty(&ci->i_cap_snaps)) {
@@ -3023,7 +3024,7 @@ encode_again:
GFP_NOFS);
if (!flocks) {
err = -ENOMEM;
- goto out_free;
+ goto out_err;
}
err = ceph_encode_locks_to_buffer(inode, flocks,
num_fcntl_locks,
@@ -3033,7 +3034,7 @@ encode_again:
flocks = NULL;
if (err == -ENOSPC)
goto encode_again;
- goto out_free;
+ goto out_err;
}
} else {
kfree(flocks);
@@ -3053,44 +3054,64 @@ encode_again:
sizeof(struct ceph_filelock);
rec.v2.flock_len = cpu_to_le32(struct_len);
- struct_len += sizeof(rec.v2);
- struct_len += sizeof(u32) + pathlen;
+ struct_len += sizeof(u32) + sizeof(rec.v2);
if (struct_v >= 2)
struct_len += sizeof(u64); /* snap_follows */
total_len += struct_len;
err = ceph_pagelist_reserve(pagelist, total_len);
+ if (err) {
+ kfree(flocks);
+ goto out_err;
+ }
- if (!err) {
- if (recon_state->msg_version >= 3) {
- ceph_pagelist_encode_8(pagelist, struct_v);
- ceph_pagelist_encode_8(pagelist, 1);
- ceph_pagelist_encode_32(pagelist, struct_len);
- }
- ceph_pagelist_encode_string(pagelist, path, pathlen);
- ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
- ceph_locks_to_pagelist(flocks, pagelist,
- num_fcntl_locks,
- num_flock_locks);
- if (struct_v >= 2)
- ceph_pagelist_encode_64(pagelist, snap_follows);
+ if (recon_state->msg_version >= 3) {
+ ceph_pagelist_encode_8(pagelist, struct_v);
+ ceph_pagelist_encode_8(pagelist, 1);
+ ceph_pagelist_encode_32(pagelist, struct_len);
}
+ ceph_pagelist_encode_string(pagelist, NULL, 0);
+ ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
+ ceph_locks_to_pagelist(flocks, pagelist,
+ num_fcntl_locks, num_flock_locks);
+ if (struct_v >= 2)
+ ceph_pagelist_encode_64(pagelist, snap_follows);
+
kfree(flocks);
} else {
- size_t size = sizeof(u32) + pathlen + sizeof(rec.v1);
- err = ceph_pagelist_reserve(pagelist, size);
- if (!err) {
- ceph_pagelist_encode_string(pagelist, path, pathlen);
- ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
+ u64 pathbase = 0;
+ int pathlen = 0;
+ char *path = NULL;
+ struct dentry *dentry;
+
+ dentry = d_find_alias(inode);
+ if (dentry) {
+ path = ceph_mdsc_build_path(dentry,
+ &pathlen, &pathbase, 0);
+ dput(dentry);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out_err;
+ }
+ rec.v1.pathbase = cpu_to_le64(pathbase);
}
+
+ err = ceph_pagelist_reserve(pagelist,
+ pathlen + sizeof(u32) + sizeof(rec.v1));
+ if (err) {
+ kfree(path);
+ goto out_err;
+ }
+
+ ceph_pagelist_encode_string(pagelist, path, pathlen);
+ ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
+
+ kfree(path);
}
recon_state->nr_caps++;
-out_free:
- kfree(path);
-out_dput:
- dput(dentry);
+out_err:
return err;
}
@@ -3339,7 +3360,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
pr_info("mds%d recovery completed\n", s->s_mds);
kick_requests(mdsc, i);
ceph_kick_flushing_caps(mdsc, s);
- wake_up_session_caps(s, 1);
+ wake_up_session_caps(s, RECONNECT);
}
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 32fcce0d4d3c..729da155ebf0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -17,14 +17,16 @@
#include <linux/ceph/auth.h>
/* The first 8 bits are reserved for old ceph releases */
-#define CEPHFS_FEATURE_MIMIC 8
-
-#define CEPHFS_FEATURES_ALL { \
- 0, 1, 2, 3, 4, 5, 6, 7, \
- CEPHFS_FEATURE_MIMIC, \
+#define CEPHFS_FEATURE_MIMIC 8
+#define CEPHFS_FEATURE_REPLY_ENCODING 9
+#define CEPHFS_FEATURE_RECLAIM_CLIENT 10
+#define CEPHFS_FEATURE_LAZY_CAP_WANTED 11
+
+#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
+ 0, 1, 2, 3, 4, 5, 6, 7, \
+ CEPHFS_FEATURE_MIMIC, \
+ CEPHFS_FEATURE_LAZY_CAP_WANTED, \
}
-
-#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL
#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 44e53abeb32a..1a2c5d390f7f 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -35,7 +35,6 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
/* pick */
n = prandom_u32() % n;
- i = 0;
for (i = 0; n > 0; i++, n--)
while (m->m_info[i].state <= 0)
i++;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 03f4d24db8fe..9455d3aef0c3 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -3,19 +3,6 @@
* quota.c - CephFS quota
*
* Copyright (C) 2017-2018 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/statfs.h>
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e9a7cc488da..da2cd8e89062 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_putc(m, ',');
pos = m->count;
- ret = ceph_print_client_options(m, fsc->client);
+ ret = ceph_print_client_options(m, fsc->client, false);
if (ret)
return ret;
@@ -640,7 +640,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
opt = NULL; /* fsc->client now owns this */
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->osdc.abort_on_full = true;
+ ceph_set_opt(fsc->client, ABORT_ON_FULL);
if (!fsopt->mds_namespace) {
ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 26776eddd85d..d1f9c2f3f575 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.15"
+#define CIFS_VERSION "2.16"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 01ded7038b19..94dbdbe5be34 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1438,6 +1438,7 @@ struct mid_q_entry {
int mid_state; /* wish this were enum but can not pass to wait_event */
unsigned int mid_flags;
__le16 command; /* smb command code */
+ unsigned int optype; /* operation type */
bool large_buf:1; /* if valid response, is pointer to large buf */
bool multiRsp:1; /* multiple trans2 responses for one request */
bool multiEnd:1; /* both received */
@@ -1574,6 +1575,25 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
kfree(param);
}
+static inline bool is_interrupt_error(int error)
+{
+ switch (error) {
+ case -EINTR:
+ case -ERESTARTSYS:
+ case -ERESTARTNOHAND:
+ case -ERESTARTNOINTR:
+ return true;
+ }
+ return false;
+}
+
+static inline bool is_retryable_error(int error)
+{
+ if (is_interrupt_error(error) || error == -EAGAIN)
+ return true;
+ return false;
+}
+
#define MID_FREE 0
#define MID_REQUEST_ALLOCATED 1
#define MID_REQUEST_SUBMITTED 2
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b1f49c1c543a..e18915415e13 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,24 +128,31 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
int rc;
struct dfs_cache_tgt_list tl;
struct dfs_cache_tgt_iterator *it = NULL;
- char tree[MAX_TREE_SIZE + 1];
+ char *tree;
const char *tcp_host;
size_t tcp_host_len;
const char *dfs_host;
size_t dfs_host_len;
+ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+ if (!tree)
+ return -ENOMEM;
+
if (tcon->ipc) {
- snprintf(tree, sizeof(tree), "\\\\%s\\IPC$",
+ snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
tcon->ses->server->hostname);
- return CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+ rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+ goto out;
}
- if (!tcon->dfs_path)
- return CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ if (!tcon->dfs_path) {
+ rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ goto out;
+ }
rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
if (rc)
- return rc;
+ goto out;
extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
&tcp_host_len);
@@ -165,7 +172,7 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
continue;
}
- snprintf(tree, sizeof(tree), "\\%s", tgt);
+ snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
if (!rc)
@@ -182,6 +189,8 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
rc = -ENOENT;
}
dfs_cache_free_tgts(&tl);
+out:
+ kfree(tree);
return rc;
}
#else
@@ -2114,7 +2123,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
for (j = 0; j < nr_pages; j++) {
unlock_page(wdata2->pages[j]);
- if (rc != 0 && rc != -EAGAIN) {
+ if (rc != 0 && !is_retryable_error(rc)) {
SetPageError(wdata2->pages[j]);
end_page_writeback(wdata2->pages[j]);
put_page(wdata2->pages[j]);
@@ -2123,7 +2132,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
if (rc) {
kref_put(&wdata2->refcount, cifs_writedata_release);
- if (rc == -EAGAIN)
+ if (is_retryable_error(rc))
continue;
break;
}
@@ -2132,7 +2141,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
i += nr_pages;
} while (i < wdata->nr_pages);
- mapping_set_error(inode->i_mapping, rc);
+ if (rc != 0 && !is_retryable_error(rc))
+ mapping_set_error(inode->i_mapping, rc);
kref_put(&wdata->refcount, cifs_writedata_release);
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 69b9d5606eba..683310f26171 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -433,9 +433,10 @@ static void reconn_inval_dfs_target(struct TCP_Server_Info *server,
kfree(server->hostname);
server->hostname = extract_hostname(name);
- if (!server->hostname) {
- cifs_dbg(FYI, "%s: failed to extract hostname from target: %d\n",
- __func__, -ENOMEM);
+ if (IS_ERR(server->hostname)) {
+ cifs_dbg(FYI,
+ "%s: failed to extract hostname from target: %ld\n",
+ __func__, PTR_ERR(server->hostname));
}
}
@@ -483,7 +484,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
cifs_sb = NULL;
} else {
rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list, &tgt_it);
- if (rc) {
+ if (rc && (rc != -EOPNOTSUPP)) {
cifs_dbg(VFS, "%s: no target servers for DFS failover\n",
__func__);
} else {
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index cd63c4a70875..09b7d0d4f6e4 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -776,6 +776,7 @@ static int get_tgt_list(const struct dfs_cache_entry *ce,
it->it_name = kstrndup(t->t_name, strlen(t->t_name),
GFP_KERNEL);
if (!it->it_name) {
+ kfree(it);
rc = -ENOMEM;
goto err_free_it;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5e405164394a..2c7689f3998d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -33,6 +33,7 @@
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/swap.h>
+#include <linux/mm.h>
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
@@ -732,7 +733,8 @@ reopen_success:
if (can_flush) {
rc = filemap_write_and_wait(inode->i_mapping);
- mapping_set_error(inode->i_mapping, rc);
+ if (!is_interrupt_error(rc))
+ mapping_set_error(inode->i_mapping, rc);
if (tcon->unix_ext)
rc = cifs_get_inode_info_unix(&inode, full_path,
@@ -1131,14 +1133,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf) {
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
free_xid(xid);
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1471,12 +1477,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+ PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+ PAGE_SIZE);
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -2109,6 +2119,7 @@ static int cifs_writepages(struct address_space *mapping,
pgoff_t end, index;
struct cifs_writedata *wdata;
int rc = 0;
+ int saved_rc = 0;
unsigned int xid;
/*
@@ -2137,8 +2148,10 @@ retry:
rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
&wsize, &credits);
- if (rc)
+ if (rc != 0) {
+ done = true;
break;
+ }
tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
@@ -2146,6 +2159,7 @@ retry:
&found_pages);
if (!wdata) {
rc = -ENOMEM;
+ done = true;
add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -2174,7 +2188,7 @@ retry:
if (rc != 0) {
add_credits_and_wake_if(server, wdata->credits, 0);
for (i = 0; i < nr_pages; ++i) {
- if (rc == -EAGAIN)
+ if (is_retryable_error(rc))
redirty_page_for_writepage(wbc,
wdata->pages[i]);
else
@@ -2182,7 +2196,7 @@ retry:
end_page_writeback(wdata->pages[i]);
put_page(wdata->pages[i]);
}
- if (rc != -EAGAIN)
+ if (!is_retryable_error(rc))
mapping_set_error(mapping, rc);
}
kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2192,6 +2206,15 @@ retry:
continue;
}
+ /* Return immediately if we received a signal during writing */
+ if (is_interrupt_error(rc)) {
+ done = true;
+ break;
+ }
+
+ if (rc != 0 && saved_rc == 0)
+ saved_rc = rc;
+
wbc->nr_to_write -= nr_pages;
if (wbc->nr_to_write <= 0)
done = true;
@@ -2209,6 +2232,9 @@ retry:
goto retry;
}
+ if (saved_rc != 0)
+ rc = saved_rc;
+
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;
@@ -2241,8 +2267,8 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
set_page_writeback(page);
retry_write:
rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
- if (rc == -EAGAIN) {
- if (wbc->sync_mode == WB_SYNC_ALL)
+ if (is_retryable_error(rc)) {
+ if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
goto retry_write;
redirty_page_for_writepage(wbc, page);
} else if (rc != 0) {
@@ -3964,7 +3990,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
INIT_LIST_HEAD(tmplist);
- page = list_entry(page_list->prev, struct page, lru);
+ page = lru_to_page(page_list);
/*
* Lock the page and put it in the cache. Since no one else
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 13fb59aadebc..478003644916 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2257,6 +2257,11 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
+ if (is_interrupt_error(rc)) {
+ rc = -ERESTARTSYS;
+ goto out;
+ }
+
mapping_set_error(inode->i_mapping, rc);
rc = 0;
@@ -2400,6 +2405,11 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
* the flush returns error?
*/
rc = filemap_write_and_wait(inode->i_mapping);
+ if (is_interrupt_error(rc)) {
+ rc = -ERESTARTSYS;
+ goto cifs_setattr_exit;
+ }
+
mapping_set_error(inode->i_mapping, rc);
rc = 0;
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 4ed10dd086e6..b204e84b87fb 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -122,12 +122,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
/*
* Accessing maxBuf is racy with cifs_reconnect - need to store value
- * and check it for zero before using.
+ * and check it before using.
*/
max_buf = tcon->ses->server->maxBuf;
- if (!max_buf)
+ if (max_buf < sizeof(struct smb2_lock_element))
return -EINVAL;
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
@@ -264,6 +266,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
return -EINVAL;
}
+ BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+ max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
max_num = max_buf / sizeof(struct smb2_lock_element);
buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 33100ef74d7f..cf7eb891804f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3472,8 +3472,10 @@ smb3_receive_transform(struct TCP_Server_Info *server,
}
/* TODO: add support for compounds containing READ. */
- if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
+ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) {
+ *num_mids = 1;
return receive_encrypted_read(server, &mids[0]);
+ }
return receive_encrypted_standard(server, mids, bufs, num_mids);
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e283590955cd..50811a7dc0e0 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -162,24 +162,31 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
int rc;
struct dfs_cache_tgt_list tl;
struct dfs_cache_tgt_iterator *it = NULL;
- char tree[MAX_TREE_SIZE + 1];
+ char *tree;
const char *tcp_host;
size_t tcp_host_len;
const char *dfs_host;
size_t dfs_host_len;
+ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+ if (!tree)
+ return -ENOMEM;
+
if (tcon->ipc) {
- snprintf(tree, sizeof(tree), "\\\\%s\\IPC$",
+ snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
tcon->ses->server->hostname);
- return SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+ rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+ goto out;
}
- if (!tcon->dfs_path)
- return SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ if (!tcon->dfs_path) {
+ rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+ goto out;
+ }
rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
if (rc)
- return rc;
+ goto out;
extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
&tcp_host_len);
@@ -199,7 +206,7 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
continue;
}
- snprintf(tree, sizeof(tree), "\\%s", tgt);
+ snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
if (!rc)
@@ -216,6 +223,8 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
rc = -ENOENT;
}
dfs_cache_free_tgts(&tl);
+out:
+ kfree(tree);
return rc;
}
#else
@@ -451,10 +460,6 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
}
-/* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */
-#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */
-
-
#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1)
#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2)
#define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100)
@@ -491,10 +496,24 @@ static void
assemble_neg_contexts(struct smb2_negotiate_req *req,
unsigned int *total_len)
{
- char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT;
+ char *pneg_ctxt = (char *)req;
unsigned int ctxt_len;
- *total_len += 2; /* Add 2 due to round to 8 byte boundary for 1st ctxt */
+ if (*total_len > 200) {
+ /* In case length corrupted don't want to overrun smb buffer */
+ cifs_dbg(VFS, "Bad frame length assembling neg contexts\n");
+ return;
+ }
+
+ /*
+ * round up total_len of fixed part of SMB3 negotiate request to 8
+ * byte boundary before adding negotiate contexts
+ */
+ *total_len = roundup(*total_len, 8);
+
+ pneg_ctxt = (*total_len) + (char *)req;
+ req->NegotiateContextOffset = cpu_to_le32(*total_len);
+
build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_preauth_neg_context), 8) * 8;
*total_len += ctxt_len;
@@ -508,7 +527,6 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
*total_len += sizeof(struct smb2_posix_neg_context);
- req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
req->NegotiateContextCount = cpu_to_le16(3);
}
@@ -724,8 +742,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
- req->DialectCount = cpu_to_le16(3);
- total_len += 6;
+ req->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
+ req->DialectCount = cpu_to_le16(4);
+ total_len += 8;
} else {
/* otherwise send specific dialect */
req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
@@ -749,7 +768,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
else {
memcpy(req->ClientGUID, server->client_guid,
SMB2_CLIENT_GUID_SIZE);
- if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+ if ((ses->server->vals->protocol_id == SMB311_PROT_ID) ||
+ (strcmp(ses->server->vals->version_string,
+ SMBDEFAULT_VERSION_STRING) == 0))
assemble_neg_contexts(req, &total_len);
}
iov[0].iov_base = (char *)req;
@@ -794,7 +815,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
ses->server->ops = &smb21_operations;
- }
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ ses->server->ops = &smb311_operations;
} else if (le16_to_cpu(rsp->DialectRevision) !=
ses->server->vals->protocol_id) {
/* if requested single dialect ensure returned dialect matched */
@@ -941,13 +963,14 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
pneg_inbuf->DialectCount = cpu_to_le16(2);
/* structure is big enough for 3 dialects, sending only 2 */
inbuflen = sizeof(*pneg_inbuf) -
- sizeof(pneg_inbuf->Dialects[0]);
+ (2 * sizeof(pneg_inbuf->Dialects[0]));
} else if (strcmp(tcon->ses->server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
- pneg_inbuf->DialectCount = cpu_to_le16(3);
+ pneg_inbuf->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(4);
/* structure is big enough for 3 dialects */
inbuflen = sizeof(*pneg_inbuf);
} else {
@@ -3264,12 +3287,14 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = shdr->CreditCharge;
+ shdr->CreditRequest =
+ cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
spin_lock(&server->req_lock);
server->credits += rdata->credits -
le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
+ rdata->credits = le16_to_cpu(shdr->CreditCharge);
flags |= CIFS_HAS_CREDITS;
}
@@ -3541,12 +3566,14 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (wdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = shdr->CreditCharge;
+ shdr->CreditRequest =
+ cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
spin_lock(&server->req_lock);
server->credits += wdata->credits -
le16_to_cpu(shdr->CreditCharge);
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
+ wdata->credits = le16_to_cpu(shdr->CreditCharge);
flags |= CIFS_HAS_CREDITS;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 05dea6750c33..7a2d0a2255e6 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -898,7 +898,7 @@ struct validate_negotiate_info_req {
__u8 Guid[SMB2_CLIENT_GUID_SIZE];
__le16 SecurityMode;
__le16 DialectCount;
- __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */
+ __le16 Dialects[4]; /* BB expand this if autonegotiate > 4 dialects */
} __packed;
struct validate_negotiate_info_rsp {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 5be7302853b6..202e0e84efdd 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -387,7 +387,7 @@ smbd_done:
if (rc < 0 && rc != -EINTR)
cifs_dbg(VFS, "Error %d sending data on socket to server\n",
rc);
- else
+ else if (rc > 0)
rc = 0;
return rc;
@@ -783,8 +783,34 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
}
static void
-cifs_noop_callback(struct mid_q_entry *mid)
+cifs_compound_callback(struct mid_q_entry *mid)
{
+ struct TCP_Server_Info *server = mid->server;
+ unsigned int optype = mid->optype;
+ unsigned int credits_received = 0;
+
+ if (mid->mid_state == MID_RESPONSE_RECEIVED) {
+ if (mid->resp_buf)
+ credits_received = server->ops->get_credits(mid);
+ else
+ cifs_dbg(FYI, "Bad state for cancelled MID\n");
+ }
+
+ add_credits(server, credits_received, optype);
+}
+
+static void
+cifs_compound_last_callback(struct mid_q_entry *mid)
+{
+ cifs_compound_callback(mid);
+ cifs_wake_up_task(mid);
+}
+
+static void
+cifs_cancelled_callback(struct mid_q_entry *mid)
+{
+ cifs_compound_callback(mid);
+ DeleteMidQEntry(mid);
}
int
@@ -795,7 +821,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
int i, j, rc = 0;
int timeout, optype;
struct mid_q_entry *midQ[MAX_COMPOUND];
- unsigned int credits = 0;
+ bool cancelled_mid[MAX_COMPOUND] = {false};
+ unsigned int credits[MAX_COMPOUND] = {0};
char *buf;
timeout = flags & CIFS_TIMEOUT_MASK;
@@ -813,13 +840,31 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
return -ENOENT;
/*
- * Ensure that we do not send more than 50 overlapping requests
- * to the same server. We may make this configurable later or
- * use ses->maxReq.
+ * Ensure we obtain 1 credit per request in the compound chain.
+ * It can be optimized further by waiting for all the credits
+ * at once but this can wait long enough if we don't have enough
+ * credits due to some heavy operations in progress or the server
+ * not granting us much, so a fallback to the current approach is
+ * needed anyway.
*/
- rc = wait_for_free_request(ses->server, timeout, optype);
- if (rc)
- return rc;
+ for (i = 0; i < num_rqst; i++) {
+ rc = wait_for_free_request(ses->server, timeout, optype);
+ if (rc) {
+ /*
+ * We haven't sent an SMB packet to the server yet but
+ * we already obtained credits for i requests in the
+ * compound chain - need to return those credits back
+ * for future use. Note that we need to call add_credits
+ * multiple times to match the way we obtained credits
+ * in the first place and to account for in flight
+ * requests correctly.
+ */
+ for (j = 0; j < i; j++)
+ add_credits(ses->server, 1, optype);
+ return rc;
+ }
+ credits[i] = 1;
+ }
/*
* Make sure that we sign in the same order that we send on this socket
@@ -835,18 +880,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
for (j = 0; j < i; j++)
cifs_delete_mid(midQ[j]);
mutex_unlock(&ses->server->srv_mutex);
+
/* Update # of requests on wire to server */
- add_credits(ses->server, 1, optype);
+ for (j = 0; j < num_rqst; j++)
+ add_credits(ses->server, credits[j], optype);
return PTR_ERR(midQ[i]);
}
midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
+ midQ[i]->optype = optype;
/*
- * We don't invoke the callback compounds unless it is the last
- * request.
+ * Invoke callback for every part of the compound chain
+ * to calculate credits properly. Wake up this thread only when
+ * the last element is received.
*/
if (i < num_rqst - 1)
- midQ[i]->callback = cifs_noop_callback;
+ midQ[i]->callback = cifs_compound_callback;
+ else
+ midQ[i]->callback = cifs_compound_last_callback;
}
cifs_in_send_inc(ses->server);
rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
@@ -860,8 +911,20 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
mutex_unlock(&ses->server->srv_mutex);
- if (rc < 0)
+ if (rc < 0) {
+ /* Sending failed for some reason - return credits back */
+ for (i = 0; i < num_rqst; i++)
+ add_credits(ses->server, credits[i], optype);
goto out;
+ }
+
+ /*
+ * At this point the request is passed to the network stack - we assume
+ * that any credits taken from the server structure on the client have
+ * been spent and we can't return them back. Once we receive responses
+ * we will collect credits granted by the server in the mid callbacks
+ * and add those credits to the server structure.
+ */
/*
* Compounding is never used during session establish.
@@ -875,36 +938,34 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(ses->server, midQ[i]);
- if (rc != 0) {
+ if (rc != 0)
+ break;
+ }
+ if (rc != 0) {
+ for (; i < num_rqst; i++) {
cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
midQ[i]->mid, le16_to_cpu(midQ[i]->command));
send_cancel(ses->server, &rqst[i], midQ[i]);
spin_lock(&GlobalMid_Lock);
if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
- midQ[i]->callback = DeleteMidQEntry;
- spin_unlock(&GlobalMid_Lock);
- add_credits(ses->server, 1, optype);
- return rc;
+ midQ[i]->callback = cifs_cancelled_callback;
+ cancelled_mid[i] = true;
+ credits[i] = 0;
}
spin_unlock(&GlobalMid_Lock);
}
}
- for (i = 0; i < num_rqst; i++)
- if (midQ[i]->resp_buf)
- credits += ses->server->ops->get_credits(midQ[i]);
- if (!credits)
- credits = 1;
-
for (i = 0; i < num_rqst; i++) {
if (rc < 0)
goto out;
rc = cifs_sync_mid_result(midQ[i], ses->server);
if (rc != 0) {
- add_credits(ses->server, credits, optype);
- return rc;
+ /* mark this mid as cancelled to not free it below */
+ cancelled_mid[i] = true;
+ goto out;
}
if (!midQ[i]->resp_buf ||
@@ -951,9 +1012,10 @@ out:
* This is prevented above by using a noop callback that will not
* wake this thread except for the very last PDU.
*/
- for (i = 0; i < num_rqst; i++)
- cifs_delete_mid(midQ[i]);
- add_credits(ses->server, credits, optype);
+ for (i = 0; i < num_rqst; i++) {
+ if (!cancelled_mid[i])
+ cifs_delete_mid(midQ[i]);
+ }
return rc;
}
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 0f46cf550907..4dc788e3bc96 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -133,15 +133,25 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags)
}
EXPORT_SYMBOL(fscrypt_get_ctx);
+void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
+ const struct fscrypt_info *ci)
+{
+ memset(iv, 0, ci->ci_mode->ivsize);
+ iv->lblk_num = cpu_to_le64(lblk_num);
+
+ if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY)
+ memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+
+ if (ci->ci_essiv_tfm != NULL)
+ crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
+}
+
int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
u64 lblk_num, struct page *src_page,
struct page *dest_page, unsigned int len,
unsigned int offs, gfp_t gfp_flags)
{
- struct {
- __le64 index;
- u8 padding[FS_IV_SIZE - sizeof(__le64)];
- } iv;
+ union fscrypt_iv iv;
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist dst, src;
@@ -151,15 +161,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
BUG_ON(len == 0);
- BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE);
- BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE);
- iv.index = cpu_to_le64(lblk_num);
- memset(iv.padding, 0, sizeof(iv.padding));
-
- if (ci->ci_essiv_tfm != NULL) {
- crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv,
- (u8 *)&iv);
- }
+ fscrypt_generate_iv(&iv, lblk_num, ci);
req = skcipher_request_alloc(tfm, gfp_flags);
if (!req)
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d7a0f682ca12..7ff40a73dbec 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -40,10 +40,11 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
{
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
- struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
- int res = 0;
- char iv[FS_CRYPTO_BLOCK_SIZE];
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
+ union fscrypt_iv iv;
struct scatterlist sg;
+ int res;
/*
* Copy the filename to the output buffer for encrypting in-place and
@@ -55,7 +56,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
memset(out + iname->len, 0, olen - iname->len);
/* Initialize the IV */
- memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+ fscrypt_generate_iv(&iv, 0, ci);
/* Set up the encryption request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
@@ -65,7 +66,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
crypto_req_done, &wait);
sg_init_one(&sg, out, olen);
- skcipher_request_set_crypt(req, &sg, &sg, olen, iv);
+ skcipher_request_set_crypt(req, &sg, &sg, olen, &iv);
/* Do the encryption */
res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
@@ -94,9 +95,10 @@ static int fname_decrypt(struct inode *inode,
struct skcipher_request *req = NULL;
DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
- struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
- int res = 0;
- char iv[FS_CRYPTO_BLOCK_SIZE];
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
+ union fscrypt_iv iv;
+ int res;
/* Allocate request */
req = skcipher_request_alloc(tfm, GFP_NOFS);
@@ -107,12 +109,12 @@ static int fname_decrypt(struct inode *inode,
crypto_req_done, &wait);
/* Initialize IV */
- memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
+ fscrypt_generate_iv(&iv, 0, ci);
/* Create decryption request */
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
- skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, &iv);
res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 79debfc9cef9..7424f851eb5c 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -17,7 +17,6 @@
#include <crypto/hash.h>
/* Encryption parameters */
-#define FS_IV_SIZE 16
#define FS_KEY_DERIVATION_NONCE_SIZE 16
/**
@@ -52,16 +51,42 @@ struct fscrypt_symlink_data {
} __packed;
/*
- * A pointer to this structure is stored in the file system's in-core
- * representation of an inode.
+ * fscrypt_info - the "encryption key" for an inode
+ *
+ * When an encrypted file's key is made available, an instance of this struct is
+ * allocated and stored in ->i_crypt_info. Once created, it remains until the
+ * inode is evicted.
*/
struct fscrypt_info {
+
+ /* The actual crypto transform used for encryption and decryption */
+ struct crypto_skcipher *ci_ctfm;
+
+ /*
+ * Cipher for ESSIV IV generation. Only set for CBC contents
+ * encryption, otherwise is NULL.
+ */
+ struct crypto_cipher *ci_essiv_tfm;
+
+ /*
+ * Encryption mode used for this inode. It corresponds to either
+ * ci_data_mode or ci_filename_mode, depending on the inode type.
+ */
+ struct fscrypt_mode *ci_mode;
+
+ /*
+ * If non-NULL, then this inode uses a master key directly rather than a
+ * derived key, and ci_ctfm will equal ci_master_key->mk_ctfm.
+ * Otherwise, this inode uses a derived key.
+ */
+ struct fscrypt_master_key *ci_master_key;
+
+ /* fields from the fscrypt_context */
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
- struct crypto_skcipher *ci_ctfm;
- struct crypto_cipher *ci_essiv_tfm;
- u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
+ u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
};
typedef enum {
@@ -83,6 +108,10 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
return true;
+ if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM &&
+ filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM)
+ return true;
+
return false;
}
@@ -107,6 +136,22 @@ fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...);
#define fscrypt_err(sb, fmt, ...) \
fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
+#define FSCRYPT_MAX_IV_SIZE 32
+
+union fscrypt_iv {
+ struct {
+ /* logical block number within the file */
+ __le64 lblk_num;
+
+ /* per-file nonce; only set in DIRECT_KEY mode */
+ u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+ };
+ u8 raw[FSCRYPT_MAX_IV_SIZE];
+};
+
+void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
+ const struct fscrypt_info *ci);
+
/* fname.c */
extern int fname_encrypt(struct inode *inode, const struct qstr *iname,
u8 *out, unsigned int olen);
@@ -115,6 +160,16 @@ extern bool fscrypt_fname_encrypted_size(const struct inode *inode,
u32 *encrypted_len_ret);
/* keyinfo.c */
+
+struct fscrypt_mode {
+ const char *friendly_name;
+ const char *cipher_str;
+ int keysize;
+ int ivsize;
+ bool logged_impl_name;
+ bool needs_essiv;
+};
+
extern void __exit fscrypt_essiv_cleanup(void);
#endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 7874c9bb2fc5..1e11a683f63d 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -10,15 +10,21 @@
*/
#include <keys/user-type.h>
+#include <linux/hashtable.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <crypto/aes.h>
+#include <crypto/algapi.h>
#include <crypto/sha.h>
#include <crypto/skcipher.h>
#include "fscrypt_private.h"
static struct crypto_shash *essiv_hash_tfm;
+/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */
+static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */
+static DEFINE_SPINLOCK(fscrypt_master_keys_lock);
+
/*
* Key derivation function. This generates the derived key by encrypting the
* master key with AES-128-ECB using the inode's nonce as the AES key.
@@ -123,56 +129,37 @@ invalid:
return ERR_PTR(-ENOKEY);
}
-/* Find the master key, then derive the inode's actual encryption key */
-static int find_and_derive_key(const struct inode *inode,
- const struct fscrypt_context *ctx,
- u8 *derived_key, unsigned int derived_keysize)
-{
- struct key *key;
- const struct fscrypt_key *payload;
- int err;
-
- key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
- ctx->master_key_descriptor,
- derived_keysize, &payload);
- if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
- key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
- ctx->master_key_descriptor,
- derived_keysize, &payload);
- }
- if (IS_ERR(key))
- return PTR_ERR(key);
- err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize);
- up_read(&key->sem);
- key_put(key);
- return err;
-}
-
-static struct fscrypt_mode {
- const char *friendly_name;
- const char *cipher_str;
- int keysize;
- bool logged_impl_name;
-} available_modes[] = {
+static struct fscrypt_mode available_modes[] = {
[FS_ENCRYPTION_MODE_AES_256_XTS] = {
.friendly_name = "AES-256-XTS",
.cipher_str = "xts(aes)",
.keysize = 64,
+ .ivsize = 16,
},
[FS_ENCRYPTION_MODE_AES_256_CTS] = {
.friendly_name = "AES-256-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 32,
+ .ivsize = 16,
},
[FS_ENCRYPTION_MODE_AES_128_CBC] = {
.friendly_name = "AES-128-CBC",
.cipher_str = "cbc(aes)",
.keysize = 16,
+ .ivsize = 16,
+ .needs_essiv = true,
},
[FS_ENCRYPTION_MODE_AES_128_CTS] = {
.friendly_name = "AES-128-CTS-CBC",
.cipher_str = "cts(cbc(aes))",
.keysize = 16,
+ .ivsize = 16,
+ },
+ [FS_ENCRYPTION_MODE_ADIANTUM] = {
+ .friendly_name = "Adiantum",
+ .cipher_str = "adiantum(xchacha12,aes)",
+ .keysize = 32,
+ .ivsize = 32,
},
};
@@ -198,14 +185,196 @@ select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
return ERR_PTR(-EINVAL);
}
-static void put_crypt_info(struct fscrypt_info *ci)
+/* Find the master key, then derive the inode's actual encryption key */
+static int find_and_derive_key(const struct inode *inode,
+ const struct fscrypt_context *ctx,
+ u8 *derived_key, const struct fscrypt_mode *mode)
{
- if (!ci)
+ struct key *key;
+ const struct fscrypt_key *payload;
+ int err;
+
+ key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
+ ctx->master_key_descriptor,
+ mode->keysize, &payload);
+ if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
+ key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
+ ctx->master_key_descriptor,
+ mode->keysize, &payload);
+ }
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+
+ if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) {
+ if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) {
+ fscrypt_warn(inode->i_sb,
+ "direct key mode not allowed with %s",
+ mode->friendly_name);
+ err = -EINVAL;
+ } else if (ctx->contents_encryption_mode !=
+ ctx->filenames_encryption_mode) {
+ fscrypt_warn(inode->i_sb,
+ "direct key mode not allowed with different contents and filenames modes");
+ err = -EINVAL;
+ } else {
+ memcpy(derived_key, payload->raw, mode->keysize);
+ err = 0;
+ }
+ } else {
+ err = derive_key_aes(payload->raw, ctx, derived_key,
+ mode->keysize);
+ }
+ up_read(&key->sem);
+ key_put(key);
+ return err;
+}
+
+/* Allocate and key a symmetric cipher object for the given encryption mode */
+static struct crypto_skcipher *
+allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key,
+ const struct inode *inode)
+{
+ struct crypto_skcipher *tfm;
+ int err;
+
+ tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
+ if (IS_ERR(tfm)) {
+ fscrypt_warn(inode->i_sb,
+ "error allocating '%s' transform for inode %lu: %ld",
+ mode->cipher_str, inode->i_ino, PTR_ERR(tfm));
+ return tfm;
+ }
+ if (unlikely(!mode->logged_impl_name)) {
+ /*
+ * fscrypt performance can vary greatly depending on which
+ * crypto algorithm implementation is used. Help people debug
+ * performance problems by logging the ->cra_driver_name the
+ * first time a mode is used. Note that multiple threads can
+ * race here, but it doesn't really matter.
+ */
+ mode->logged_impl_name = true;
+ pr_info("fscrypt: %s using implementation \"%s\"\n",
+ mode->friendly_name,
+ crypto_skcipher_alg(tfm)->base.cra_driver_name);
+ }
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
+ if (err)
+ goto err_free_tfm;
+
+ return tfm;
+
+err_free_tfm:
+ crypto_free_skcipher(tfm);
+ return ERR_PTR(err);
+}
+
+/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */
+struct fscrypt_master_key {
+ struct hlist_node mk_node;
+ refcount_t mk_refcount;
+ const struct fscrypt_mode *mk_mode;
+ struct crypto_skcipher *mk_ctfm;
+ u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+ u8 mk_raw[FS_MAX_KEY_SIZE];
+};
+
+static void free_master_key(struct fscrypt_master_key *mk)
+{
+ if (mk) {
+ crypto_free_skcipher(mk->mk_ctfm);
+ kzfree(mk);
+ }
+}
+
+static void put_master_key(struct fscrypt_master_key *mk)
+{
+ if (!refcount_dec_and_lock(&mk->mk_refcount, &fscrypt_master_keys_lock))
return;
+ hash_del(&mk->mk_node);
+ spin_unlock(&fscrypt_master_keys_lock);
- crypto_free_skcipher(ci->ci_ctfm);
- crypto_free_cipher(ci->ci_essiv_tfm);
- kmem_cache_free(fscrypt_info_cachep, ci);
+ free_master_key(mk);
+}
+
+/*
+ * Find/insert the given master key into the fscrypt_master_keys table. If
+ * found, it is returned with elevated refcount, and 'to_insert' is freed if
+ * non-NULL. If not found, 'to_insert' is inserted and returned if it's
+ * non-NULL; otherwise NULL is returned.
+ */
+static struct fscrypt_master_key *
+find_or_insert_master_key(struct fscrypt_master_key *to_insert,
+ const u8 *raw_key, const struct fscrypt_mode *mode,
+ const struct fscrypt_info *ci)
+{
+ unsigned long hash_key;
+ struct fscrypt_master_key *mk;
+
+ /*
+ * Careful: to avoid potentially leaking secret key bytes via timing
+ * information, we must key the hash table by descriptor rather than by
+ * raw key, and use crypto_memneq() when comparing raw keys.
+ */
+
+ BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE);
+ memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key));
+
+ spin_lock(&fscrypt_master_keys_lock);
+ hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) {
+ if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE) != 0)
+ continue;
+ if (mode != mk->mk_mode)
+ continue;
+ if (crypto_memneq(raw_key, mk->mk_raw, mode->keysize))
+ continue;
+ /* using existing tfm with same (descriptor, mode, raw_key) */
+ refcount_inc(&mk->mk_refcount);
+ spin_unlock(&fscrypt_master_keys_lock);
+ free_master_key(to_insert);
+ return mk;
+ }
+ if (to_insert)
+ hash_add(fscrypt_master_keys, &to_insert->mk_node, hash_key);
+ spin_unlock(&fscrypt_master_keys_lock);
+ return to_insert;
+}
+
+/* Prepare to encrypt directly using the master key in the given mode */
+static struct fscrypt_master_key *
+fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode,
+ const u8 *raw_key, const struct inode *inode)
+{
+ struct fscrypt_master_key *mk;
+ int err;
+
+ /* Is there already a tfm for this key? */
+ mk = find_or_insert_master_key(NULL, raw_key, mode, ci);
+ if (mk)
+ return mk;
+
+ /* Nope, allocate one. */
+ mk = kzalloc(sizeof(*mk), GFP_NOFS);
+ if (!mk)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&mk->mk_refcount, 1);
+ mk->mk_mode = mode;
+ mk->mk_ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
+ if (IS_ERR(mk->mk_ctfm)) {
+ err = PTR_ERR(mk->mk_ctfm);
+ mk->mk_ctfm = NULL;
+ goto err_free_mk;
+ }
+ memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE);
+ memcpy(mk->mk_raw, raw_key, mode->keysize);
+
+ return find_or_insert_master_key(mk, raw_key, mode, ci);
+
+err_free_mk:
+ free_master_key(mk);
+ return ERR_PTR(err);
}
static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
@@ -275,11 +444,67 @@ void __exit fscrypt_essiv_cleanup(void)
crypto_free_shash(essiv_hash_tfm);
}
+/*
+ * Given the encryption mode and key (normally the derived key, but for
+ * FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's
+ * symmetric cipher transform object(s).
+ */
+static int setup_crypto_transform(struct fscrypt_info *ci,
+ struct fscrypt_mode *mode,
+ const u8 *raw_key, const struct inode *inode)
+{
+ struct fscrypt_master_key *mk;
+ struct crypto_skcipher *ctfm;
+ int err;
+
+ if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) {
+ mk = fscrypt_get_master_key(ci, mode, raw_key, inode);
+ if (IS_ERR(mk))
+ return PTR_ERR(mk);
+ ctfm = mk->mk_ctfm;
+ } else {
+ mk = NULL;
+ ctfm = allocate_skcipher_for_mode(mode, raw_key, inode);
+ if (IS_ERR(ctfm))
+ return PTR_ERR(ctfm);
+ }
+ ci->ci_master_key = mk;
+ ci->ci_ctfm = ctfm;
+
+ if (mode->needs_essiv) {
+ /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */
+ WARN_ON(mode->ivsize != AES_BLOCK_SIZE);
+ WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY);
+
+ err = init_essiv_generator(ci, raw_key, mode->keysize);
+ if (err) {
+ fscrypt_warn(inode->i_sb,
+ "error initializing ESSIV generator for inode %lu: %d",
+ inode->i_ino, err);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void put_crypt_info(struct fscrypt_info *ci)
+{
+ if (!ci)
+ return;
+
+ if (ci->ci_master_key) {
+ put_master_key(ci->ci_master_key);
+ } else {
+ crypto_free_skcipher(ci->ci_ctfm);
+ crypto_free_cipher(ci->ci_essiv_tfm);
+ }
+ kmem_cache_free(fscrypt_info_cachep, ci);
+}
+
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
- struct crypto_skcipher *ctfm;
struct fscrypt_mode *mode;
u8 *raw_key = NULL;
int res;
@@ -312,74 +537,42 @@ int fscrypt_get_encryption_info(struct inode *inode)
if (ctx.flags & ~FS_POLICY_FLAGS_VALID)
return -EINVAL;
- crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
+ crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_flags = ctx.flags;
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
- crypt_info->ci_ctfm = NULL;
- crypt_info->ci_essiv_tfm = NULL;
- memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
- sizeof(crypt_info->ci_master_key));
+ memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE);
+ memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
mode = select_encryption_mode(crypt_info, inode);
if (IS_ERR(mode)) {
res = PTR_ERR(mode);
goto out;
}
+ WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
+ crypt_info->ci_mode = mode;
/*
- * This cannot be a stack buffer because it is passed to the scatterlist
- * crypto API as part of key derivation.
+ * This cannot be a stack buffer because it may be passed to the
+ * scatterlist crypto API as part of key derivation.
*/
res = -ENOMEM;
raw_key = kmalloc(mode->keysize, GFP_NOFS);
if (!raw_key)
goto out;
- res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize);
+ res = find_and_derive_key(inode, &ctx, raw_key, mode);
if (res)
goto out;
- ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
- if (IS_ERR(ctfm)) {
- res = PTR_ERR(ctfm);
- fscrypt_warn(inode->i_sb,
- "error allocating '%s' transform for inode %lu: %d",
- mode->cipher_str, inode->i_ino, res);
- goto out;
- }
- if (unlikely(!mode->logged_impl_name)) {
- /*
- * fscrypt performance can vary greatly depending on which
- * crypto algorithm implementation is used. Help people debug
- * performance problems by logging the ->cra_driver_name the
- * first time a mode is used. Note that multiple threads can
- * race here, but it doesn't really matter.
- */
- mode->logged_impl_name = true;
- pr_info("fscrypt: %s using implementation \"%s\"\n",
- mode->friendly_name,
- crypto_skcipher_alg(ctfm)->base.cra_driver_name);
- }
- crypt_info->ci_ctfm = ctfm;
- crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize);
+ res = setup_crypto_transform(crypt_info, mode, raw_key, inode);
if (res)
goto out;
- if (S_ISREG(inode->i_mode) &&
- crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
- res = init_essiv_generator(crypt_info, raw_key, mode->keysize);
- if (res) {
- fscrypt_warn(inode->i_sb,
- "error initializing ESSIV generator for inode %lu: %d",
- inode->i_ino, res);
- goto out;
- }
- }
if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
crypt_info = NULL;
out:
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index c6d431a5cce9..f490de921ce8 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -199,7 +199,8 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
child_ci = child->i_crypt_info;
if (parent_ci && child_ci) {
- return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
+ return memcmp(parent_ci->ci_master_key_descriptor,
+ child_ci->ci_master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode ==
@@ -254,7 +255,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
- memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+ memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8a5a1010886b..a5d219d920e7 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -381,7 +381,8 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
*/
static inline int ep_events_available(struct eventpoll *ep)
{
- return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+ return !list_empty_careful(&ep->rdllist) ||
+ READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -471,7 +472,6 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
* no re-entered.
*
* @ncalls: Pointer to the nested_calls structure to be used for this call.
- * @max_nests: Maximum number of allowed nesting calls.
* @nproc: Nested call core function pointer.
* @priv: Opaque data to be passed to the @nproc callback.
* @cookie: Cookie to be used to identify this nested call.
@@ -480,7 +480,7 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
* Returns: Returns the code returned by the @nproc callback, or -1 if
* the maximum recursion limit has been exceeded.
*/
-static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+static int ep_call_nested(struct nested_calls *ncalls,
int (*nproc)(void *, void *, int), void *priv,
void *cookie, void *ctx)
{
@@ -499,7 +499,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
*/
list_for_each_entry(tncur, lsthead, llink) {
if (tncur->ctx == ctx &&
- (tncur->cookie == cookie || ++call_nests > max_nests)) {
+ (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) {
/*
* Ops ... loop detected or maximum nest level reached.
* We abort this wake by breaking the cycle itself.
@@ -573,7 +573,7 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
{
int this_cpu = get_cpu();
- ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+ ep_call_nested(&poll_safewake_ncalls,
ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
put_cpu();
@@ -699,7 +699,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
*/
spin_lock_irq(&ep->wq.lock);
list_splice_init(&ep->rdllist, &txlist);
- ep->ovflist = NULL;
+ WRITE_ONCE(ep->ovflist, NULL);
spin_unlock_irq(&ep->wq.lock);
/*
@@ -713,7 +713,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* other events might have been queued by the poll callback.
* We re-insert them inside the main ready-list here.
*/
- for (nepi = ep->ovflist; (epi = nepi) != NULL;
+ for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
/*
* We need to check if the item is already in the list.
@@ -731,7 +731,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* releasing the lock, events will be queued in the normal way inside
* ep->rdllist.
*/
- ep->ovflist = EP_UNACTIVE_PTR;
+ WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
/*
* Quickly re-inject items left on "txlist".
@@ -1154,10 +1154,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
- if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
+ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
if (epi->next == EP_UNACTIVE_PTR) {
- epi->next = ep->ovflist;
- ep->ovflist = epi;
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
if (epi->ws) {
/*
* Activate ep->ws since epi->ws may get
@@ -1333,7 +1333,6 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
}
} else {
error = ep_call_nested(&poll_loop_ncalls,
- EP_MAX_NESTS,
reverse_path_check_proc,
child_file, child_file,
current);
@@ -1367,7 +1366,7 @@ static int reverse_path_check(void)
/* let's call this for all tfiles */
list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
path_count_init();
- error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ error = ep_call_nested(&poll_loop_ncalls,
reverse_path_check_proc, current_file,
current_file, current);
if (error)
@@ -1626,21 +1625,24 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head
{
struct ep_send_events_data *esed = priv;
__poll_t revents;
- struct epitem *epi;
- struct epoll_event __user *uevent;
+ struct epitem *epi, *tmp;
+ struct epoll_event __user *uevent = esed->events;
struct wakeup_source *ws;
poll_table pt;
init_poll_funcptr(&pt, NULL);
+ esed->res = 0;
/*
* We can loop without lock because we are passed a task private list.
* Items cannot vanish during the loop because ep_scan_ready_list() is
* holding "mtx" during this call.
*/
- for (esed->res = 0, uevent = esed->events;
- !list_empty(head) && esed->res < esed->maxevents;) {
- epi = list_first_entry(head, struct epitem, rdllink);
+ lockdep_assert_held(&ep->mtx);
+
+ list_for_each_entry_safe(epi, tmp, head, rdllink) {
+ if (esed->res >= esed->maxevents)
+ break;
/*
* Activate ep->ws before deactivating epi->ws to prevent
@@ -1660,42 +1662,42 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head
list_del_init(&epi->rdllink);
- revents = ep_item_poll(epi, &pt, 1);
-
/*
* If the event mask intersect the caller-requested one,
* deliver the event to userspace. Again, ep_scan_ready_list()
- * is holding "mtx", so no operations coming from userspace
+ * is holding ep->mtx, so no operations coming from userspace
* can change the item.
*/
- if (revents) {
- if (__put_user(revents, &uevent->events) ||
- __put_user(epi->event.data, &uevent->data)) {
- list_add(&epi->rdllink, head);
- ep_pm_stay_awake(epi);
- if (!esed->res)
- esed->res = -EFAULT;
- return 0;
- }
- esed->res++;
- uevent++;
- if (epi->event.events & EPOLLONESHOT)
- epi->event.events &= EP_PRIVATE_BITS;
- else if (!(epi->event.events & EPOLLET)) {
- /*
- * If this file has been added with Level
- * Trigger mode, we need to insert back inside
- * the ready list, so that the next call to
- * epoll_wait() will check again the events
- * availability. At this point, no one can insert
- * into ep->rdllist besides us. The epoll_ctl()
- * callers are locked out by
- * ep_scan_ready_list() holding "mtx" and the
- * poll callback will queue them in ep->ovflist.
- */
- list_add_tail(&epi->rdllink, &ep->rdllist);
- ep_pm_stay_awake(epi);
- }
+ revents = ep_item_poll(epi, &pt, 1);
+ if (!revents)
+ continue;
+
+ if (__put_user(revents, &uevent->events) ||
+ __put_user(epi->event.data, &uevent->data)) {
+ list_add(&epi->rdllink, head);
+ ep_pm_stay_awake(epi);
+ if (!esed->res)
+ esed->res = -EFAULT;
+ return 0;
+ }
+ esed->res++;
+ uevent++;
+ if (epi->event.events & EPOLLONESHOT)
+ epi->event.events &= EP_PRIVATE_BITS;
+ else if (!(epi->event.events & EPOLLET)) {
+ /*
+ * If this file has been added with Level
+ * Trigger mode, we need to insert back inside
+ * the ready list, so that the next call to
+ * epoll_wait() will check again the events
+ * availability. At this point, no one can insert
+ * into ep->rdllist besides us. The epoll_ctl()
+ * callers are locked out by
+ * ep_scan_ready_list() holding "mtx" and the
+ * poll callback will queue them in ep->ovflist.
+ */
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake(epi);
}
}
@@ -1747,6 +1749,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
u64 slack = 0;
+ bool waiter = false;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -1761,11 +1764,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
} else if (timeout == 0) {
/*
* Avoid the unnecessary trip to the wait queue loop, if the
- * caller specified a non blocking operation.
+ * caller specified a non blocking operation. We still need
+ * lock because we could race and not see an epi being added
+ * to the ready list while in irq callback. Thus incorrectly
+ * returning 0 back to userspace.
*/
timed_out = 1;
+
spin_lock_irq(&ep->wq.lock);
- goto check_events;
+ eavail = ep_events_available(ep);
+ spin_unlock_irq(&ep->wq.lock);
+
+ goto send_events;
}
fetch_events:
@@ -1773,64 +1783,66 @@ fetch_events:
if (!ep_events_available(ep))
ep_busy_loop(ep, timed_out);
- spin_lock_irq(&ep->wq.lock);
+ eavail = ep_events_available(ep);
+ if (eavail)
+ goto send_events;
- if (!ep_events_available(ep)) {
- /*
- * Busy poll timed out. Drop NAPI ID for now, we can add
- * it back in when we have moved a socket with a valid NAPI
- * ID onto the ready list.
- */
- ep_reset_busy_poll_napi_id(ep);
+ /*
+ * Busy poll timed out. Drop NAPI ID for now, we can add
+ * it back in when we have moved a socket with a valid NAPI
+ * ID onto the ready list.
+ */
+ ep_reset_busy_poll_napi_id(ep);
- /*
- * We don't have any available event to return to the caller.
- * We need to sleep here, and we will be wake up by
- * ep_poll_callback() when events will become available.
- */
+ /*
+ * We don't have any available event to return to the caller. We need
+ * to sleep here, and we will be woken by ep_poll_callback() when events
+ * become available.
+ */
+ if (!waiter) {
+ waiter = true;
init_waitqueue_entry(&wait, current);
- __add_wait_queue_exclusive(&ep->wq, &wait);
- for (;;) {
- /*
- * We don't want to sleep if the ep_poll_callback() sends us
- * a wakeup in between. That's why we set the task state
- * to TASK_INTERRUPTIBLE before doing the checks.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- /*
- * Always short-circuit for fatal signals to allow
- * threads to make a timely exit without the chance of
- * finding more events available and fetching
- * repeatedly.
- */
- if (fatal_signal_pending(current)) {
- res = -EINTR;
- break;
- }
- if (ep_events_available(ep) || timed_out)
- break;
- if (signal_pending(current)) {
- res = -EINTR;
- break;
- }
+ spin_lock_irq(&ep->wq.lock);
+ __add_wait_queue_exclusive(&ep->wq, &wait);
+ spin_unlock_irq(&ep->wq.lock);
+ }
- spin_unlock_irq(&ep->wq.lock);
- if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
- timed_out = 1;
+ for (;;) {
+ /*
+ * We don't want to sleep if the ep_poll_callback() sends us
+ * a wakeup in between. That's why we set the task state
+ * to TASK_INTERRUPTIBLE before doing the checks.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * Always short-circuit for fatal signals to allow
+ * threads to make a timely exit without the chance of
+ * finding more events available and fetching
+ * repeatedly.
+ */
+ if (fatal_signal_pending(current)) {
+ res = -EINTR;
+ break;
+ }
- spin_lock_irq(&ep->wq.lock);
+ eavail = ep_events_available(ep);
+ if (eavail)
+ break;
+ if (signal_pending(current)) {
+ res = -EINTR;
+ break;
}
- __remove_wait_queue(&ep->wq, &wait);
- __set_current_state(TASK_RUNNING);
+ if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
+ timed_out = 1;
+ break;
+ }
}
-check_events:
- /* Is it worth to try to dig for events ? */
- eavail = ep_events_available(ep);
- spin_unlock_irq(&ep->wq.lock);
+ __set_current_state(TASK_RUNNING);
+send_events:
/*
* Try to transfer events to user space. In case we get 0 events and
* there's still timeout left over, we go trying again in search of
@@ -1840,6 +1852,12 @@ check_events:
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
+ if (waiter) {
+ spin_lock_irq(&ep->wq.lock);
+ __remove_wait_queue(&ep->wq, &wait);
+ spin_unlock_irq(&ep->wq.lock);
+ }
+
return res;
}
@@ -1876,7 +1894,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
ep_tovisit = epi->ffd.file->private_data;
if (ep_tovisit->visited)
continue;
- error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ error = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, epi->ffd.file,
ep_tovisit, current);
if (error != 0)
@@ -1916,7 +1934,7 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
int ret;
struct eventpoll *ep_cur, *ep_next;
- ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ret = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, file, ep, current);
/* clear visited list */
list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
@@ -2172,7 +2190,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
return -EINVAL;
/* Verify that the area passed by the user is writeable */
- if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
+ if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
return -EFAULT;
/* Get the "struct file *" for the eventpoll file */
diff --git a/fs/exec.c b/fs/exec.c
index fc281b738a98..fb72d36f7823 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -218,55 +218,10 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (ret <= 0)
return NULL;
- if (write) {
- unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
- unsigned long ptr_size, limit;
-
- /*
- * Since the stack will hold pointers to the strings, we
- * must account for them as well.
- *
- * The size calculation is the entire vma while each arg page is
- * built, so each time we get here it's calculating how far it
- * is currently (rather than each call being just the newly
- * added size from the arg page). As a result, we need to
- * always add the entire size of the pointers, so that on the
- * last call to get_arg_page() we'll actually have the entire
- * correct size.
- */
- ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
- if (ptr_size > ULONG_MAX - size)
- goto fail;
- size += ptr_size;
-
- acct_arg_size(bprm, size / PAGE_SIZE);
-
- /*
- * We've historically supported up to 32 pages (ARG_MAX)
- * of argument strings even with small stacks
- */
- if (size <= ARG_MAX)
- return page;
-
- /*
- * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
- * (whichever is smaller) for the argv+env strings.
- * This ensures that:
- * - the remaining binfmt code will not run out of stack space,
- * - the program will have a reasonable amount of stack left
- * to work from.
- */
- limit = _STK_LIM / 4 * 3;
- limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
- if (size > limit)
- goto fail;
- }
+ if (write)
+ acct_arg_size(bprm, vma_pages(bprm->vma));
return page;
-
-fail:
- put_page(page);
- return NULL;
}
static void put_arg_page(struct page *page)
@@ -492,6 +447,50 @@ static int count(struct user_arg_ptr argv, int max)
return i;
}
+static int prepare_arg_pages(struct linux_binprm *bprm,
+ struct user_arg_ptr argv, struct user_arg_ptr envp)
+{
+ unsigned long limit, ptr_size;
+
+ bprm->argc = count(argv, MAX_ARG_STRINGS);
+ if (bprm->argc < 0)
+ return bprm->argc;
+
+ bprm->envc = count(envp, MAX_ARG_STRINGS);
+ if (bprm->envc < 0)
+ return bprm->envc;
+
+ /*
+ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
+ * (whichever is smaller) for the argv+env strings.
+ * This ensures that:
+ * - the remaining binfmt code will not run out of stack space,
+ * - the program will have a reasonable amount of stack left
+ * to work from.
+ */
+ limit = _STK_LIM / 4 * 3;
+ limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
+ /*
+ * We've historically supported up to 32 pages (ARG_MAX)
+ * of argument strings even with small stacks
+ */
+ limit = max_t(unsigned long, limit, ARG_MAX);
+ /*
+ * We must account for the size of all the argv and envp pointers to
+ * the argv and envp strings, since they will also take up space in
+ * the stack. They aren't stored until much later when we can't
+ * signal to the parent that the child has run out of stack space.
+ * Instead, calculate it here so it's possible to fail gracefully.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (limit <= ptr_size)
+ return -E2BIG;
+ limit -= ptr_size;
+
+ bprm->argmin = bprm->p - limit;
+ return 0;
+}
+
/*
* 'copy_strings()' copies argument/environment strings from the old
* processes's memory to the new process's stack. The call to get_user_pages()
@@ -527,6 +526,10 @@ static int copy_strings(int argc, struct user_arg_ptr argv,
pos = bprm->p;
str += len;
bprm->p -= len;
+#ifdef CONFIG_MMU
+ if (bprm->p < bprm->argmin)
+ goto out;
+#endif
while (len > 0) {
int offset, bytes_to_copy;
@@ -1084,7 +1087,7 @@ static int de_thread(struct task_struct *tsk)
__set_current_state(TASK_KILLABLE);
spin_unlock_irq(lock);
schedule();
- if (unlikely(__fatal_signal_pending(tsk)))
+ if (__fatal_signal_pending(tsk))
goto killed;
spin_lock_irq(lock);
}
@@ -1112,7 +1115,7 @@ static int de_thread(struct task_struct *tsk)
write_unlock_irq(&tasklist_lock);
cgroup_threadgroup_change_end(tsk);
schedule();
- if (unlikely(__fatal_signal_pending(tsk)))
+ if (__fatal_signal_pending(tsk))
goto killed;
}
@@ -1399,7 +1402,7 @@ EXPORT_SYMBOL(finalize_exec);
* Or, if exec fails before, free_bprm() should release ->cred and
* and unlock.
*/
-int prepare_bprm_creds(struct linux_binprm *bprm)
+static int prepare_bprm_creds(struct linux_binprm *bprm)
{
if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
return -ERESTARTNOINTR;
@@ -1789,12 +1792,8 @@ static int __do_execve_file(int fd, struct filename *filename,
if (retval)
goto out_unmark;
- bprm->argc = count(argv, MAX_ARG_STRINGS);
- if ((retval = bprm->argc) < 0)
- goto out;
-
- bprm->envc = count(envp, MAX_ARG_STRINGS);
- if ((retval = bprm->envc) < 0)
+ retval = prepare_arg_pages(bprm, argv, envp);
+ if (retval < 0)
goto out;
retval = prepare_binprm(bprm);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 906839a4da8f..fc80c7233fa5 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -705,21 +705,18 @@ out:
/*
* Read the superblock from the OSD and fill in the fields
*/
-static int exofs_fill_super(struct super_block *sb, void *data, int silent)
+static int exofs_fill_super(struct super_block *sb,
+ struct exofs_mountopt *opts,
+ struct exofs_sb_info *sbi,
+ int silent)
{
struct inode *root;
- struct exofs_mountopt *opts = data;
- struct exofs_sb_info *sbi; /*extended info */
struct osd_dev *od; /* Master device */
struct exofs_fscb fscb; /*on-disk superblock info */
struct ore_comp comp;
unsigned table_count;
int ret;
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
-
/* use mount options to fill superblock */
if (opts->is_osdname) {
struct osd_dev_info odi = {.systemid_len = 0};
@@ -863,7 +860,9 @@ static struct dentry *exofs_mount(struct file_system_type *type,
int flags, const char *dev_name,
void *data)
{
+ struct super_block *s;
struct exofs_mountopt opts;
+ struct exofs_sb_info *sbi;
int ret;
ret = parse_options(data, &opts);
@@ -872,9 +871,31 @@ static struct dentry *exofs_mount(struct file_system_type *type,
return ERR_PTR(ret);
}
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi) {
+ kfree(opts.dev_name);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ s = sget(type, NULL, set_anon_super, flags, NULL);
+
+ if (IS_ERR(s)) {
+ kfree(opts.dev_name);
+ kfree(sbi);
+ return ERR_CAST(s);
+ }
+
if (!opts.dev_name)
opts.dev_name = dev_name;
- return mount_nodev(type, flags, &opts, exofs_fill_super);
+
+
+ ret = exofs_fill_super(s, &opts, sbi, flags & SB_SILENT ? 1 : 0);
+ if (ret) {
+ deactivate_locked_super(s);
+ return ERR_PTR(ret);
+ }
+ s->s_flags |= SB_ACTIVE;
+ return dget(s->s_root);
}
/*
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 26a7fe5c4fd3..712f00995390 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -116,8 +116,16 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
+ ret = file_write_and_wait_range(file, start, end);
+ if (ret)
+ return ret;
+
if (!journal) {
- ret = __generic_file_fsync(file, start, end, datasync);
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL
+ };
+
+ ret = ext4_write_inode(inode, &wbc);
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
@@ -125,9 +133,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
- ret = file_write_and_wait_range(file, start, end);
- if (ret)
- return ret;
/*
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.
@@ -159,6 +164,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = err;
}
out:
+ err = file_check_and_advance_wb_err(file);
+ if (ret == 0)
+ ret = err;
trace_ext4_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 27373d88b5f0..56f6e1782d5f 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1890,12 +1890,12 @@ int ext4_inline_data_fiemap(struct inode *inode,
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
physical += offsetof(struct ext4_inode, i_block);
- if (physical)
- error = fiemap_fill_next_extent(fieinfo, start, physical,
- inline_len, flags);
brelse(iloc.bh);
out:
up_read(&EXT4_I(inode)->xattr_sem);
+ if (physical)
+ error = fiemap_fill_next_extent(fieinfo, start, physical,
+ inline_len, flags);
return (error < 0 ? error : 0);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9affabd07682..34d7e0703cc6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2778,7 +2778,8 @@ static int ext4_writepages(struct address_space *mapping,
* We may need to convert up to one extent per block in
* the page and we may dirty the inode.
*/
- rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits);
+ rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
+ PAGE_SIZE >> inode->i_blkbits);
}
/*
@@ -4833,7 +4834,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
gid_t i_gid;
projid_t i_projid;
- if (((flags & EXT4_IGET_NORMAL) &&
+ if ((!(flags & EXT4_IGET_SPECIAL) &&
(ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
(ino < EXT4_ROOT_INO) ||
(ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index f461d75ac049..6aa282ee455a 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -128,7 +128,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
prefetchw(&page->flags);
if (pages) {
- page = list_entry(pages->prev, struct page, lru);
+ page = lru_to_page(pages);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping, page->index,
readahead_gfp_mask(mapping)))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d6c142d73d99..fb12d3c17c1b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4902,7 +4902,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
ext4_superblock_csum_set(sb);
if (sync)
lock_buffer(sbh);
- if (buffer_write_io_error(sbh)) {
+ if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
* superblock failed. This could happen because the
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 78d501c1fb65..738e427e2d21 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -363,7 +363,7 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
*phys = 0;
*mapped_blocks = 0;
- if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) {
+ if (!is_fat32(sbi) && (inode->i_ino == MSDOS_ROOT_INO)) {
if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) {
*phys = sector + sbi->dir_start;
*mapped_blocks = 1;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c8366cb8eccd..9d01db37183f 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -57,7 +57,7 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1)
return;
/* root dir of FAT12/FAT16 */
- if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
+ if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO))
return;
bh = sb_find_get_block(sb, phys);
@@ -805,7 +805,7 @@ static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
return fat_generic_ioctl(filp, cmd, arg);
}
- if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
+ if (!access_ok(d1, sizeof(struct __fat_dirent[2])))
return -EFAULT;
/*
* Yes, we don't need this put_user() absolutely. However old
@@ -845,7 +845,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
}
- if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2])))
+ if (!access_ok(d1, sizeof(struct compat_dirent[2])))
return -EFAULT;
/*
* Yes, we don't need this put_user() absolutely. However old
@@ -1313,7 +1313,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
}
}
if (dir->i_ino == MSDOS_ROOT_INO) {
- if (sbi->fat_bits != 32)
+ if (!is_fat32(sbi))
goto error;
} else if (MSDOS_I(dir)->i_start == 0) {
fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 4e1b2f6df5e6..922a0c6ba46c 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -142,6 +142,34 @@ static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
return sb->s_fs_info;
}
+/*
+ * Functions that determine the variant of the FAT file system (i.e.,
+ * whether this is FAT12, FAT16 or FAT32.
+ */
+static inline bool is_fat12(const struct msdos_sb_info *sbi)
+{
+ return sbi->fat_bits == 12;
+}
+
+static inline bool is_fat16(const struct msdos_sb_info *sbi)
+{
+ return sbi->fat_bits == 16;
+}
+
+static inline bool is_fat32(const struct msdos_sb_info *sbi)
+{
+ return sbi->fat_bits == 32;
+}
+
+/* Maximum number of clusters */
+static inline u32 max_fat(struct super_block *sb)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+
+ return is_fat32(sbi) ? MAX_FAT32 :
+ is_fat16(sbi) ? MAX_FAT16 : MAX_FAT12;
+}
+
static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
{
return container_of(inode, struct msdos_inode_info, vfs_inode);
@@ -257,7 +285,7 @@ static inline int fat_get_start(const struct msdos_sb_info *sbi,
const struct msdos_dir_entry *de)
{
int cluster = le16_to_cpu(de->start);
- if (sbi->fat_bits == 32)
+ if (is_fat32(sbi))
cluster |= (le16_to_cpu(de->starthi) << 16);
return cluster;
}
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index f58c0cacc531..495edeafd60a 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -290,19 +290,17 @@ void fat_ent_access_init(struct super_block *sb)
mutex_init(&sbi->fat_lock);
- switch (sbi->fat_bits) {
- case 32:
+ if (is_fat32(sbi)) {
sbi->fatent_shift = 2;
sbi->fatent_ops = &fat32_ops;
- break;
- case 16:
+ } else if (is_fat16(sbi)) {
sbi->fatent_shift = 1;
sbi->fatent_ops = &fat16_ops;
- break;
- case 12:
+ } else if (is_fat12(sbi)) {
sbi->fatent_shift = -1;
sbi->fatent_ops = &fat12_ops;
- break;
+ } else {
+ fat_fs_error(sb, "invalid FAT variant, %u bits", sbi->fat_bits);
}
}
@@ -310,7 +308,7 @@ static void mark_fsinfo_dirty(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- if (sb_rdonly(sb) || sbi->fat_bits != 32)
+ if (sb_rdonly(sb) || !is_fat32(sbi))
return;
__mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC);
@@ -327,7 +325,7 @@ static inline int fat_ent_update_ptr(struct super_block *sb,
/* Is this fatent's blocks including this entry? */
if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr)
return 0;
- if (sbi->fat_bits == 12) {
+ if (is_fat12(sbi)) {
if ((offset + 1) < sb->s_blocksize) {
/* This entry is on bhs[0]. */
if (fatent->nr_bhs == 2) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index c0b5b5c3373b..79bb0e73a65f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -686,7 +686,7 @@ static void fat_set_state(struct super_block *sb,
b = (struct fat_boot_sector *) bh->b_data;
- if (sbi->fat_bits == 32) {
+ if (is_fat32(sbi)) {
if (set)
b->fat32.state |= FAT_STATE_DIRTY;
else
@@ -1396,7 +1396,7 @@ static int fat_read_root(struct inode *inode)
inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
- if (sbi->fat_bits == 32) {
+ if (is_fat32(sbi)) {
MSDOS_I(inode)->i_start = sbi->root_cluster;
error = fat_calc_dir_size(inode);
if (error < 0)
@@ -1423,7 +1423,7 @@ static unsigned long calc_fat_clusters(struct super_block *sb)
struct msdos_sb_info *sbi = MSDOS_SB(sb);
/* Divide first to avoid overflow */
- if (sbi->fat_bits != 12) {
+ if (!is_fat12(sbi)) {
unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits;
return ent_per_sec * sbi->fat_length;
}
@@ -1743,7 +1743,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
}
/* interpret volume ID as a little endian 32 bit integer */
- if (sbi->fat_bits == 32)
+ if (is_fat32(sbi))
sbi->vol_id = bpb.fat32_vol_id;
else /* fat 16 or 12 */
sbi->vol_id = bpb.fat16_vol_id;
@@ -1769,11 +1769,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus;
- if (sbi->fat_bits != 32)
+ if (!is_fat32(sbi))
sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12;
/* some OSes set FAT_STATE_DIRTY and clean it on unmount. */
- if (sbi->fat_bits == 32)
+ if (is_fat32(sbi))
sbi->dirty = bpb.fat32_state & FAT_STATE_DIRTY;
else /* fat 16 or 12 */
sbi->dirty = bpb.fat16_state & FAT_STATE_DIRTY;
@@ -1781,7 +1781,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
/* check that FAT table does not overflow */
fat_clusters = calc_fat_clusters(sb);
total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
- if (total_clusters > MAX_FAT(sb)) {
+ if (total_clusters > max_fat(sb)) {
if (!silent)
fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
total_clusters);
@@ -1803,11 +1803,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_ent_access_init(sb);
/*
- * The low byte of FAT's first entry must have same value with
- * media-field. But in real world, too many devices is
- * writing wrong value. So, removed that validity check.
+ * The low byte of the first FAT entry must have the same value as
+ * the media field of the boot sector. But in real world, too many
+ * devices are writing wrong values. So, removed that validity check.
*
- * if (FAT_FIRST_ENT(sb, media) != first)
+ * The removed check compared the first FAT entry to a value dependent
+ * on the media field like this:
+ * == (0x0F00 | media), for FAT12
+ * == (0XFF00 | media), for FAT16
+ * == (0x0FFFFF | media), for FAT32
*/
error = -EINVAL;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index fce0a76f3f1e..4fc950bb6433 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -64,7 +64,7 @@ int fat_clusters_flush(struct super_block *sb)
struct buffer_head *bh;
struct fat_boot_fsinfo *fsinfo;
- if (sbi->fat_bits != 32)
+ if (!is_fat32(sbi))
return 0;
bh = sb_bread(sb, sbi->fsinfo_sector);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f37662675c3a..29a9dcfbe81f 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -565,6 +565,7 @@ const struct inode_operations hfsplus_dir_inode_operations = {
.symlink = hfsplus_symlink,
.mknod = hfsplus_mknod,
.rename = hfsplus_rename,
+ .getattr = hfsplus_getattr,
.listxattr = hfsplus_listxattr,
};
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index dd7ad9f13e3a..b8471bf05def 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -488,6 +488,8 @@ void hfsplus_inode_write_fork(struct inode *inode,
struct hfsplus_fork_raw *fork);
int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd);
int hfsplus_cat_write_inode(struct inode *inode);
+int hfsplus_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags);
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d7ab9d8c4b67..d131c8ea7eb6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -270,6 +270,26 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
+int hfsplus_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
+
+ if (inode->i_flags & S_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+ if (inode->i_flags & S_IMMUTABLE)
+ stat->attributes |= STATX_ATTR_IMMUTABLE;
+ if (hip->userflags & HFSPLUS_FLG_NODUMP)
+ stat->attributes |= STATX_ATTR_NODUMP;
+
+ stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
+ STATX_ATTR_NODUMP;
+
+ generic_fillattr(inode, stat);
+ return 0;
+}
+
int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
int datasync)
{
@@ -329,6 +349,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
static const struct inode_operations hfsplus_file_inode_operations = {
.setattr = hfsplus_setattr,
+ .getattr = hfsplus_getattr,
.listxattr = hfsplus_listxattr,
};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2fcea5f8225..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
* truncation is indicated by end of range being LLONG_MAX
* In this case, we first scan the range and release found pages.
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
- * maps and global counts.
+ * maps and global counts. Page faults can not race with truncation
+ * in this routine. hugetlb_no_page() prevents page faults in the
+ * truncated range. It checks i_size before allocation, and again after
+ * with the page table lock for the page held. The same lock must be
+ * acquired to unmap a page.
* hole punch is indicated if end is not LLONG_MAX
* In the hole punch case we scan the range and release found pages.
* Only when releasing a page is the associated region/reserv map
* deleted. The region/reserv map for ranges without associated
- * pages are not modified.
- *
- * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent
- * races with page faults.
- *
+ * pages are not modified. Page faults can race with hole punch.
+ * This is indicated if we find a mapped page.
* Note: If the passed end of range value is beyond the end of file, but
* not LLONG_MAX this routine still performs a hole punch operation.
*/
@@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
for (i = 0; i < pagevec_count(&pvec); ++i) {
struct page *page = pvec.pages[i];
+ u32 hash;
index = page->index;
+ hash = hugetlb_fault_mutex_hash(h, current->mm,
+ &pseudo_vma,
+ mapping, index, 0);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
/*
- * A mapped page is impossible as callers should unmap
- * all references before calling. And, i_mmap_rwsem
- * prevents the creation of additional mappings.
+ * If page is mapped, it was faulted in after being
+ * unmapped in caller. Unmap (again) now after taking
+ * the fault mutex. The mutex will prevent faults
+ * until we finish removing the page.
+ *
+ * This race can only happen in the hole punch case.
+ * Getting here in a truncate operation is a bug.
*/
- VM_BUG_ON(page_mapped(page));
+ if (unlikely(page_mapped(page))) {
+ BUG_ON(truncate_op);
+
+ i_mmap_lock_write(mapping);
+ hugetlb_vmdelete_list(&mapping->i_mmap,
+ index * pages_per_huge_page(h),
+ (index + 1) * pages_per_huge_page(h));
+ i_mmap_unlock_write(mapping);
+ }
lock_page(page);
/*
@@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
unlock_page(page);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
huge_pagevec_release(&pvec);
cond_resched();
@@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
static void hugetlbfs_evict_inode(struct inode *inode)
{
- struct address_space *mapping = inode->i_mapping;
struct resv_map *resv_map;
- /*
- * The vfs layer guarantees that there are no other users of this
- * inode. Therefore, it would be safe to call remove_inode_hugepages
- * without holding i_mmap_rwsem. We acquire and hold here to be
- * consistent with other callers. Since there will be no contention
- * on the semaphore, overhead is negligible.
- */
- i_mmap_lock_write(mapping);
remove_inode_hugepages(inode, 0, LLONG_MAX);
- i_mmap_unlock_write(mapping);
-
resv_map = (struct resv_map *)inode->i_mapping->private_data;
/* root inode doesn't have the resv_map, so we should check it */
if (resv_map)
@@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
i_mmap_lock_write(mapping);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
- remove_inode_hugepages(inode, offset, LLONG_MAX);
i_mmap_unlock_write(mapping);
+ remove_inode_hugepages(inode, offset, LLONG_MAX);
return 0;
}
@@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
hugetlb_vmdelete_list(&mapping->i_mmap,
hole_start >> PAGE_SHIFT,
hole_end >> PAGE_SHIFT);
- remove_inode_hugepages(inode, hole_start, hole_end);
i_mmap_unlock_write(mapping);
+ remove_inode_hugepages(inode, hole_start, hole_end);
inode_unlock(inode);
}
@@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
/* addr is the offset within the file (zero based) */
addr = index * hpage_size;
- /*
- * fault mutex taken here, protects against fault path
- * and hole punch. inode_lock previously taken protects
- * against truncation.
- */
+ /* mutex taken here, fault path and hole punch */
hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
index, addr);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d64f622cac8b..fef3a6bf7c78 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -203,7 +203,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
fieinfo.fi_extents_start = ufiemap->fm_extents;
if (fiemap.fm_extent_count != 0 &&
- !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start,
+ !access_ok(fieinfo.fi_extents_start,
fieinfo.fi_extents_max * sizeof(struct fiemap_extent)))
return -EFAULT;
diff --git a/fs/locks.c b/fs/locks.c
index f0b24d98f36b..ff6af2c32601 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -453,7 +453,7 @@ static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
return;
spin_lock(&blocked_lock_lock);
list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
- list_for_each_entry(f, &fl->fl_blocked_requests, fl_blocked_member)
+ list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
f->fl_blocker = new;
spin_unlock(&blocked_lock_lock);
}
diff --git a/fs/namespace.c b/fs/namespace.c
index a7f91265ea67..a677b59efd74 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -26,6 +26,7 @@
#include <linux/memblock.h>
#include <linux/task_work.h>
#include <linux/sched/task.h>
+#include <uapi/linux/mount.h>
#include "pnode.h"
#include "internal.h"
@@ -245,13 +246,9 @@ out_free_cache:
* mnt_want/drop_write() will _keep_ the filesystem
* r/w.
*/
-int __mnt_is_readonly(struct vfsmount *mnt)
+bool __mnt_is_readonly(struct vfsmount *mnt)
{
- if (mnt->mnt_flags & MNT_READONLY)
- return 1;
- if (sb_rdonly(mnt->mnt_sb))
- return 1;
- return 0;
+ return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);
@@ -507,11 +504,12 @@ static int mnt_make_readonly(struct mount *mnt)
return ret;
}
-static void __mnt_unmake_readonly(struct mount *mnt)
+static int __mnt_unmake_readonly(struct mount *mnt)
{
lock_mount_hash();
mnt->mnt.mnt_flags &= ~MNT_READONLY;
unlock_mount_hash();
+ return 0;
}
int sb_prepare_remount_readonly(struct super_block *sb)
@@ -1360,7 +1358,7 @@ static void namespace_unlock(void)
if (likely(hlist_empty(&head)))
return;
- synchronize_rcu();
+ synchronize_rcu_expedited();
group_pin_kill(&head);
}
@@ -2215,21 +2213,91 @@ out:
return err;
}
-static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+/*
+ * Don't allow locked mount flags to be cleared.
+ *
+ * No locks need to be held here while testing the various MNT_LOCK
+ * flags because those flags can never be cleared once they are set.
+ */
+static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
{
- int error = 0;
- int readonly_request = 0;
+ unsigned int fl = mnt->mnt.mnt_flags;
+
+ if ((fl & MNT_LOCK_READONLY) &&
+ !(mnt_flags & MNT_READONLY))
+ return false;
- if (ms_flags & MS_RDONLY)
- readonly_request = 1;
- if (readonly_request == __mnt_is_readonly(mnt))
+ if ((fl & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV))
+ return false;
+
+ if ((fl & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID))
+ return false;
+
+ if ((fl & MNT_LOCK_NOEXEC) &&
+ !(mnt_flags & MNT_NOEXEC))
+ return false;
+
+ if ((fl & MNT_LOCK_ATIME) &&
+ ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
+ return false;
+
+ return true;
+}
+
+static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
+{
+ bool readonly_request = (mnt_flags & MNT_READONLY);
+
+ if (readonly_request == __mnt_is_readonly(&mnt->mnt))
return 0;
if (readonly_request)
- error = mnt_make_readonly(real_mount(mnt));
- else
- __mnt_unmake_readonly(real_mount(mnt));
- return error;
+ return mnt_make_readonly(mnt);
+
+ return __mnt_unmake_readonly(mnt);
+}
+
+/*
+ * Update the user-settable attributes on a mount. The caller must hold
+ * sb->s_umount for writing.
+ */
+static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
+{
+ lock_mount_hash();
+ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
+ mnt->mnt.mnt_flags = mnt_flags;
+ touch_mnt_namespace(mnt->mnt_ns);
+ unlock_mount_hash();
+}
+
+/*
+ * Handle reconfiguration of the mountpoint only without alteration of the
+ * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND
+ * to mount(2).
+ */
+static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
+{
+ struct super_block *sb = path->mnt->mnt_sb;
+ struct mount *mnt = real_mount(path->mnt);
+ int ret;
+
+ if (!check_mnt(mnt))
+ return -EINVAL;
+
+ if (path->dentry != mnt->mnt.mnt_root)
+ return -EINVAL;
+
+ if (!can_change_locked_flags(mnt, mnt_flags))
+ return -EPERM;
+
+ down_write(&sb->s_umount);
+ ret = change_mount_ro_state(mnt, mnt_flags);
+ if (ret == 0)
+ set_mount_attributes(mnt, mnt_flags);
+ up_write(&sb->s_umount);
+ return ret;
}
/*
@@ -2243,6 +2311,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
int err;
struct super_block *sb = path->mnt->mnt_sb;
struct mount *mnt = real_mount(path->mnt);
+ void *sec_opts = NULL;
if (!check_mnt(mnt))
return -EINVAL;
@@ -2250,50 +2319,25 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
- /* Don't allow changing of locked mnt flags.
- *
- * No locks need to be held here while testing the various
- * MNT_LOCK flags because those flags can never be cleared
- * once they are set.
- */
- if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
- !(mnt_flags & MNT_READONLY)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
- !(mnt_flags & MNT_NODEV)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
- !(mnt_flags & MNT_NOSUID)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
- !(mnt_flags & MNT_NOEXEC)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
- ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+ if (!can_change_locked_flags(mnt, mnt_flags))
return -EPERM;
- }
- err = security_sb_remount(sb, data);
+ if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) {
+ err = security_sb_eat_lsm_opts(data, &sec_opts);
+ if (err)
+ return err;
+ }
+ err = security_sb_remount(sb, sec_opts);
+ security_free_mnt_opts(&sec_opts);
if (err)
return err;
down_write(&sb->s_umount);
- if (ms_flags & MS_BIND)
- err = change_mount_flags(path->mnt, ms_flags);
- else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
- err = -EPERM;
- else
+ err = -EPERM;
+ if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
err = do_remount_sb(sb, sb_flags, data, 0);
- if (!err) {
- lock_mount_hash();
- mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
- mnt->mnt.mnt_flags = mnt_flags;
- touch_mnt_namespace(mnt->mnt_ns);
- unlock_mount_hash();
+ if (!err)
+ set_mount_attributes(mnt, mnt_flags);
}
up_write(&sb->s_umount);
return err;
@@ -2651,7 +2695,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
const char __user *f = from;
char c;
- if (!access_ok(VERIFY_READ, from, n))
+ if (!access_ok(from, n))
return n;
current->kernel_uaccess_faults_ok++;
@@ -2788,7 +2832,9 @@ long do_mount(const char *dev_name, const char __user *dir_name,
SB_LAZYTIME |
SB_I_VERSION);
- if (flags & MS_REMOUNT)
+ if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
+ retval = do_reconfigure_mnt(&path, mnt_flags);
+ else if (flags & MS_REMOUNT)
retval = do_remount(&path, flags, sb_flags, mnt_flags,
data_page);
else if (flags & MS_BIND)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7f80f036ebd9..b1e577302518 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -123,7 +123,7 @@ struct nfs_parsed_mount_data {
unsigned short protocol;
} nfs_server;
- struct security_mnt_opts lsm_opts;
+ void *lsm_opts;
struct net *net;
};
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 46d691ba04bc..45b2322e092d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,15 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
- ssize_t ret;
-
if (file_inode(file_in) == file_inode(file_out))
return -EINVAL;
-retry:
- ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
+ return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
}
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 7c942462d8c6..22ce3c8a2f46 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -929,7 +929,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
data->minorversion = 0;
data->need_mount = true;
data->net = current->nsproxy->net_ns;
- security_init_mnt_opts(&data->lsm_opts);
+ data->lsm_opts = NULL;
}
return data;
}
@@ -1206,7 +1206,7 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
static int nfs_parse_mount_options(char *raw,
struct nfs_parsed_mount_data *mnt)
{
- char *p, *string, *secdata;
+ char *p, *string;
int rc, sloppy = 0, invalid_option = 0;
unsigned short protofamily = AF_UNSPEC;
unsigned short mountfamily = AF_UNSPEC;
@@ -1217,20 +1217,10 @@ static int nfs_parse_mount_options(char *raw,
}
dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
- secdata = alloc_secdata();
- if (!secdata)
- goto out_nomem;
-
- rc = security_sb_copy_data(raw, secdata);
+ rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts);
if (rc)
goto out_security_failure;
- rc = security_sb_parse_opts_str(secdata, &mnt->lsm_opts);
- if (rc)
- goto out_security_failure;
-
- free_secdata(secdata);
-
while ((p = strsep(&raw, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
unsigned long option;
@@ -1682,7 +1672,6 @@ out_nomem:
printk(KERN_INFO "NFS: not enough memory to parse option\n");
return 0;
out_security_failure:
- free_secdata(secdata);
printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
return 0;
}
@@ -2081,14 +2070,9 @@ static int nfs23_validate_mount_data(void *options,
if (data->context[0]){
#ifdef CONFIG_SECURITY_SELINUX
int rc;
- char *opts_str = kmalloc(sizeof(data->context) + 8, GFP_KERNEL);
- if (!opts_str)
- return -ENOMEM;
- strcpy(opts_str, "context=");
data->context[NFS_MAX_CONTEXT_LEN] = '\0';
- strcat(opts_str, &data->context[0]);
- rc = security_sb_parse_opts_str(opts_str, &args->lsm_opts);
- kfree(opts_str);
+ rc = security_add_mnt_opt("context", data->context,
+ strlen(data->context), &args->lsm_opts);
if (rc)
return rc;
#else
@@ -2271,7 +2255,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
options->version <= 6))))
return 0;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nfs_alloc_parsed_mount_data();
if (data == NULL)
return -ENOMEM;
@@ -2310,8 +2294,10 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
/* compare new mount options with old ones */
error = nfs_compare_remount_data(nfss, data);
+ if (!error)
+ error = security_sb_remount(sb, data->lsm_opts);
out:
- kfree(data);
+ nfs_free_parsed_mount_data(data);
return error;
}
EXPORT_SYMBOL_GPL(nfs_remount);
@@ -2548,7 +2534,7 @@ int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
kflags |= SECURITY_LSM_NATIVE_LABELS;
- error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts,
+ error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts,
kflags, &kflags_out);
if (error)
goto err;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 105576daca4a..798f1253141a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -724,8 +724,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
return -EBADF;
/* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
- if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE)))
- return -EINVAL;
+ if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) {
+ ret = -EINVAL;
+ goto fput_and_out;
+ }
/* verify that this is indeed an inotify instance */
if (unlikely(f.file->f_op != &inotify_fops)) {
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index eb1ce30412dc..832c1759a09a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -30,6 +30,7 @@
#include <linux/quotaops.h>
#include <linux/blkdev.h>
#include <linux/uio.h>
+#include <linux/mm.h>
#include <cluster/masklog.h>
@@ -397,7 +398,7 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
* Check whether a remote node truncated this file - we just
* drop out in that case as it's not worth handling here.
*/
- last = list_entry(pages->prev, struct page, lru);
+ last = lru_to_page(pages);
start = (loff_t)last->index << PAGE_SHIFT;
if (start >= i_size_read(inode))
goto out_unlock;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b8fa1487cd85..8decbe95dcec 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -254,7 +254,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
if (!count)
return 0;
- if (!access_ok(VERIFY_WRITE, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
/* don't read past the lvb */
@@ -302,7 +302,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
if (!count)
return 0;
- if (!access_ok(VERIFY_READ, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
/* don't write past the lvb */
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index fe53381b26b1..f038235c64bd 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -77,7 +77,7 @@ static int orangefs_readpages(struct file *file,
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page;
- page = list_entry(pages->prev, struct page, lru);
+ page = lru_to_page(pages);
list_del(&page->lru);
if (!add_to_page_cache(page,
mapping,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index c4e98c9c1621..443bcd8c3c19 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -105,7 +105,7 @@ static int wait_for_free(struct slot_map *m)
left = t;
else
left = t + (left - n);
- if (unlikely(signal_pending(current)))
+ if (signal_pending(current))
left = -EINTR;
} while (left > 0);
diff --git a/fs/pnode.c b/fs/pnode.c
index 53d411a371ce..1100e810d855 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -10,6 +10,7 @@
#include <linux/mount.h>
#include <linux/fs.h>
#include <linux/nsproxy.h>
+#include <uapi/linux/mount.h>
#include "internal.h"
#include "pnode.h"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d7fd1ca807d2..633a63462573 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -581,8 +581,10 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
/*
* print the file header
*/
- seq_printf(m, "%-25s %-20s %-20s %-10s\n",
- "Limit", "Soft Limit", "Hard Limit", "Units");
+ seq_puts(m, "Limit "
+ "Soft Limit "
+ "Hard Limit "
+ "Units \n");
for (i = 0; i < RLIM_NLIMITS; i++) {
if (rlim[i].rlim_cur == RLIM_INFINITY)
@@ -2356,10 +2358,13 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
return -ESRCH;
if (p != current) {
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
count = -EPERM;
goto out;
}
+ rcu_read_unlock();
err = security_task_setscheduler(p);
if (err) {
@@ -2392,11 +2397,14 @@ static int timerslack_ns_show(struct seq_file *m, void *v)
return -ESRCH;
if (p != current) {
-
- if (!capable(CAP_SYS_NICE)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
+
err = security_task_getscheduler(p);
if (err)
goto out;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 5792f9e39466..da649ccd6804 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -59,7 +59,6 @@ static struct kmem_cache *pde_opener_cache __ro_after_init;
static struct inode *proc_alloc_inode(struct super_block *sb)
{
struct proc_inode *ei;
- struct inode *inode;
ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
if (!ei)
@@ -71,8 +70,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
ei->ns_ops = NULL;
- inode = &ei->vfs_inode;
- return inode;
+ return &ei->vfs_inode;
}
static void proc_i_callback(struct rcu_head *head)
diff --git a/fs/proc/util.c b/fs/proc/util.c
index b161cfa0f9fa..98f8adc17345 100644
--- a/fs/proc/util.c
+++ b/fs/proc/util.c
@@ -1,4 +1,5 @@
#include <linux/dcache.h>
+#include "internal.h"
unsigned name_to_int(const struct qstr *qstr)
{
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
index 24db02de1787..97fcef74e5af 100644
--- a/fs/pstore/pmsg.c
+++ b/fs/pstore/pmsg.c
@@ -33,7 +33,7 @@ static ssize_t write_pmsg(struct file *file, const char __user *buf,
record.size = count;
/* check outside lock, page in any data. write_user also checks */
- if (!access_ok(VERIFY_READ, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
mutex_lock(&pmsg_lock);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 96f7d32cd184..898c8321b343 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -128,7 +128,6 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
struct pstore_record *record)
{
struct persistent_ram_zone *prz;
- bool update = (record->type == PSTORE_TYPE_DMESG);
/* Give up if we never existed or have hit the end. */
if (!przs)
@@ -139,7 +138,7 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
return NULL;
/* Update old/shadowed buffer. */
- if (update)
+ if (prz->type == PSTORE_TYPE_DMESG)
persistent_ram_save_old(prz);
if (!persistent_ram_old_size(prz))
@@ -711,18 +710,15 @@ static int ramoops_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct ramoops_platform_data *pdata = dev->platform_data;
+ struct ramoops_platform_data pdata_local;
struct ramoops_context *cxt = &oops_cxt;
size_t dump_mem_sz;
phys_addr_t paddr;
int err = -EINVAL;
if (dev_of_node(dev) && !pdata) {
- pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
- if (!pdata) {
- pr_err("cannot allocate platform data buffer\n");
- err = -ENOMEM;
- goto fail_out;
- }
+ pdata = &pdata_local;
+ memset(pdata, 0, sizeof(*pdata));
err = ramoops_parse_dt(pdev, pdata);
if (err < 0)
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index c11711c2cc83..f375c0735351 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -357,7 +357,7 @@ int notrace persistent_ram_write_user(struct persistent_ram_zone *prz,
int rem, ret = 0, c = count;
size_t start;
- if (unlikely(!access_ok(VERIFY_READ, s, count)))
+ if (unlikely(!access_ok(s, count)))
return -EFAULT;
if (unlikely(c > prz->buffer_size)) {
s += c - prz->buffer_size;
diff --git a/fs/read_write.c b/fs/read_write.c
index 58f30537c47a..ff3c5e6f87cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -442,7 +442,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
- if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
+ if (unlikely(!access_ok(buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
@@ -538,7 +538,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
- if (unlikely(!access_ok(VERIFY_READ, buf, count)))
+ if (unlikely(!access_ok(buf, count)))
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
@@ -718,9 +718,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
return ret;
}
-/* A write operation does a read from user space and vice versa */
-#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
-
/**
* rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
* into the kernel and check that it is valid.
@@ -810,7 +807,7 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
goto out;
}
if (type >= 0
- && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
+ && unlikely(!access_ok(buf, len))) {
ret = -EFAULT;
goto out;
}
@@ -856,7 +853,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
*ret_pointer = iov;
ret = -EFAULT;
- if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
+ if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
goto out;
/*
@@ -881,7 +878,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
if (len < 0) /* size_t not fitting in compat_ssize_t .. */
goto out;
if (type >= 0 &&
- !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
+ !access_ok(compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/readdir.c b/fs/readdir.c
index d97f548e6323..2f6a4534e0df 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -105,7 +105,7 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
}
buf->result++;
dirent = buf->dirent;
- if (!access_ok(VERIFY_WRITE, dirent,
+ if (!access_ok(dirent,
(unsigned long)(dirent->d_name + namlen + 1) -
(unsigned long)dirent))
goto efault;
@@ -221,7 +221,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
};
int error;
- if (!access_ok(VERIFY_WRITE, dirent, count))
+ if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
@@ -304,7 +304,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
};
int error;
- if (!access_ok(VERIFY_WRITE, dirent, count))
+ if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
@@ -365,7 +365,7 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name,
}
buf->result++;
dirent = buf->dirent;
- if (!access_ok(VERIFY_WRITE, dirent,
+ if (!access_ok(dirent,
(unsigned long)(dirent->d_name + namlen + 1) -
(unsigned long)dirent))
goto efault;
@@ -475,7 +475,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
};
int error;
- if (!access_ok(VERIFY_WRITE, dirent, count))
+ if (!access_ok(dirent, count))
return -EFAULT;
f = fdget_pos(fd);
diff --git a/fs/select.c b/fs/select.c
index 4c8652390c94..d0f35dbc0e8f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -381,9 +381,6 @@ typedef struct {
#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long))
/*
- * We do a VERIFY_WRITE here even though we are only reading this time:
- * we'll write to it eventually..
- *
* Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
*/
static inline
@@ -782,7 +779,7 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
sigset_t __user *up = NULL;
if (sig) {
- if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
+ if (!access_ok(sig, sizeof(void *)+sizeof(size_t))
|| __get_user(up, (sigset_t __user * __user *)sig)
|| __get_user(sigsetsize,
(size_t __user *)(sig+sizeof(void *))))
@@ -802,7 +799,7 @@ SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *,
sigset_t __user *up = NULL;
if (sig) {
- if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t))
+ if (!access_ok(sig, sizeof(void *)+sizeof(size_t))
|| __get_user(up, (sigset_t __user * __user *)sig)
|| __get_user(sigsetsize,
(size_t __user *)(sig+sizeof(void *))))
@@ -1368,7 +1365,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
compat_uptr_t up = 0;
if (sig) {
- if (!access_ok(VERIFY_READ, sig,
+ if (!access_ok(sig,
sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
__get_user(up, (compat_uptr_t __user *)sig) ||
__get_user(sigsetsize,
@@ -1390,7 +1387,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
compat_uptr_t up = 0;
if (sig) {
- if (!access_ok(VERIFY_READ, sig,
+ if (!access_ok(sig,
sizeof(compat_uptr_t)+sizeof(compat_size_t)) ||
__get_user(up, (compat_uptr_t __user *)sig) ||
__get_user(sigsetsize,
diff --git a/fs/super.c b/fs/super.c
index ca53a08497ed..48e25eba8465 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -35,6 +35,7 @@
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
+#include <uapi/linux/mount.h>
#include "internal.h"
static int thaw_super_locked(struct super_block *sb);
@@ -1245,17 +1246,13 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
{
struct dentry *root;
struct super_block *sb;
- char *secdata = NULL;
int error = -ENOMEM;
+ void *sec_opts = NULL;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
- secdata = alloc_secdata();
- if (!secdata)
- goto out;
-
- error = security_sb_copy_data(data, secdata);
+ error = security_sb_eat_lsm_opts(data, &sec_opts);
if (error)
- goto out_free_secdata;
+ return ERR_PTR(error);
}
root = type->mount(type, flags, name, data);
@@ -1276,10 +1273,16 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
smp_wmb();
sb->s_flags |= SB_BORN;
- error = security_sb_kern_mount(sb, flags, secdata);
+ error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
if (error)
goto out_sb;
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) {
+ error = security_sb_kern_mount(sb);
+ if (error)
+ goto out_sb;
+ }
+
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
@@ -1290,14 +1293,13 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
"negative value (%lld)\n", type->name, sb->s_maxbytes);
up_write(&sb->s_umount);
- free_secdata(secdata);
+ security_free_mnt_opts(&sec_opts);
return root;
out_sb:
dput(root);
deactivate_locked_super(sb);
out_free_secdata:
- free_secdata(secdata);
-out:
+ security_free_mnt_opts(&sec_opts);
return ERR_PTR(error);
}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index feeae8081c22..aa85f2874a9f 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,7 +43,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj);
+ if (WARN_ON(!kobj))
+ return -EINVAL;
if (kobj->parent)
parent = kobj->parent->sd;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index bb71db63c99c..51398457fe00 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -325,7 +325,8 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj || !kobj->sd || !attr);
+ if (WARN_ON(!kobj || !kobj->sd || !attr))
+ return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
@@ -537,7 +538,8 @@ int sysfs_create_bin_file(struct kobject *kobj,
kuid_t uid;
kgid_t gid;
- BUG_ON(!kobj || !kobj->sd || !attr);
+ if (WARN_ON(!kobj || !kobj->sd || !attr))
+ return -EINVAL;
kobject_get_ownership(kobj, &uid, &gid);
return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1eb2d6307663..57038604d4a8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -112,7 +112,8 @@ static int internal_create_group(struct kobject *kobj, int update,
kgid_t gid;
int error;
- BUG_ON(!kobj || (!update && !kobj->sd));
+ if (WARN_ON(!kobj || (!update && !kobj->sd)))
+ return -EINVAL;
/* Updates may happen before the object has been instantiated */
if (unlikely(update && !kobj->sd))
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 215c225b2ca1..c4deecc80f67 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -23,7 +23,8 @@ static int sysfs_do_create_link_sd(struct kernfs_node *parent,
{
struct kernfs_node *kn, *target = NULL;
- BUG_ON(!name || !parent);
+ if (WARN_ON(!name || !parent))
+ return -EINVAL;
/*
* We don't own @target_kobj and it may be removed at any time.
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b21ea2ba768d..eedc5e0156ff 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1992,7 +1992,6 @@ xfs_buf_delwri_submit_buffers(
struct list_head *wait_list)
{
struct xfs_buf *bp, *n;
- LIST_HEAD (submit_list);
int pinned = 0;
struct blk_plug plug;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ec2e63a7963b..f3ef70c542e1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -40,7 +40,6 @@ xfs_growfs_data_private(
xfs_rfsblock_t new;
xfs_agnumber_t oagcount;
xfs_trans_t *tp;
- LIST_HEAD (buffer_list);
struct aghdr_init_data id = {};
nb = in->newblocks;