summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c7
-rw-r--r--fs/9p/vfs_inode_dotl.c7
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/adfs/super.c1
-rw-r--r--fs/afs/dir.c45
-rw-r--r--fs/afs/dynroot.c25
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/aio.c237
-rw-r--r--fs/anon_inodes.c30
-rw-r--r--fs/attr.c5
-rw-r--r--fs/autofs/dev-ioctl.c22
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/binfmt_elf.c7
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/acl.c13
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/check-integrity.c9
-rw-r--r--fs/btrfs/compression.c18
-rw-r--r--fs/btrfs/ctree.c53
-rw-r--r--fs/btrfs/ctree.h89
-rw-r--r--fs/btrfs/delayed-inode.c14
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/delayed-ref.c43
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/dev-replace.c29
-rw-r--r--fs/btrfs/dir-item.c4
-rw-r--r--fs/btrfs/disk-io.c113
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c883
-rw-r--r--fs/btrfs/extent_io.c163
-rw-r--r--fs/btrfs/extent_io.h16
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c128
-rw-r--r--fs/btrfs/free-space-cache.c19
-rw-r--r--fs/btrfs/free-space-tree.c2
-rw-r--r--fs/btrfs/inode-map.c12
-rw-r--r--fs/btrfs/inode.c267
-rw-r--r--fs/btrfs/ioctl.c71
-rw-r--r--fs/btrfs/ordered-data.c138
-rw-r--r--fs/btrfs/ordered-data.h23
-rw-r--r--fs/btrfs/print-tree.c39
-rw-r--r--fs/btrfs/qgroup.c270
-rw-r--r--fs/btrfs/qgroup.h46
-rw-r--r--fs/btrfs/raid56.c109
-rw-r--r--fs/btrfs/reada.c3
-rw-r--r--fs/btrfs/relocation.c216
-rw-r--r--fs/btrfs/root-tree.c22
-rw-r--r--fs/btrfs/scrub.c678
-rw-r--r--fs/btrfs/send.c172
-rw-r--r--fs/btrfs/struct-funcs.c1
-rw-r--r--fs/btrfs/super.c115
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c24
-rw-r--r--fs/btrfs/transaction.c11
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-checker.c115
-rw-r--r--fs/btrfs/tree-log.c270
-rw-r--r--fs/btrfs/volumes.c609
-rw-r--r--fs/btrfs/volumes.h31
-rw-r--r--fs/cachefiles/bind.c3
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/cachefiles/rdwr.c17
-rw-r--r--fs/ceph/file.c7
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/cifs/Kconfig59
-rw-r--r--fs/cifs/cache.c6
-rw-r--r--fs/cifs/cifs_debug.c66
-rw-r--r--fs/cifs/cifsencrypt.c12
-rw-r--r--fs/cifs/cifsfs.c33
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/cifsglob.h54
-rw-r--r--fs/cifs/cifsproto.h10
-rw-r--r--fs/cifs/cifssmb.c12
-rw-r--r--fs/cifs/connect.c111
-rw-r--r--fs/cifs/dir.c7
-rw-r--r--fs/cifs/fscache.c12
-rw-r--r--fs/cifs/fscache.h8
-rw-r--r--fs/cifs/inode.c38
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/netmisc.c19
-rw-r--r--fs/cifs/smb1ops.c4
-rw-r--r--fs/cifs/smb2inode.c6
-rw-r--r--fs/cifs/smb2misc.c13
-rw-r--r--fs/cifs/smb2ops.c540
-rw-r--r--fs/cifs/smb2pdu.c557
-rw-r--r--fs/cifs/smb2pdu.h24
-rw-r--r--fs/cifs/smb2proto.h24
-rw-r--r--fs/cifs/smb2transport.c9
-rw-r--r--fs/cifs/trace.h64
-rw-r--r--fs/cifs/transport.c211
-rw-r--r--fs/cifs/xattr.c28
-rw-r--r--fs/dax.c10
-rw-r--r--fs/dcache.c81
-rw-r--r--fs/efivarfs/inode.c4
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ext2/ialloc.c3
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/ext4/balloc.c9
-rw-r--r--fs/ext4/ext4.h32
-rw-r--r--fs/ext4/extents.c17
-rw-r--r--fs/ext4/ialloc.c16
-rw-r--r--fs/ext4/inline.c19
-rw-r--r--fs/ext4/inode.c81
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/mmp.c13
-rw-r--r--fs/ext4/move_extent.c4
-rw-r--r--fs/ext4/namei.c1
-rw-r--r--fs/ext4/super.c85
-rw-r--r--fs/ext4/sysfs.c32
-rw-r--r--fs/ext4/truncate.h4
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fat/inode.c20
-rw-r--r--fs/file_table.c85
-rw-r--r--fs/fscache/cache.c2
-rw-r--r--fs/fscache/cookie.c7
-rw-r--r--fs/fscache/object.c1
-rw-r--r--fs/fscache/operation.c6
-rw-r--r--fs/fuse/dir.c25
-rw-r--r--fs/gfs2/inode.c32
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hostfs/hostfs_kern.c28
-rw-r--r--fs/hpfs/dir.c23
-rw-r--r--fs/hugetlbfs/inode.c56
-rw-r--r--fs/inode.c53
-rw-r--r--fs/internal.h6
-rw-r--r--fs/iomap.c2
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jfs/jfs_dinode.h7
-rw-r--r--fs/jfs/jfs_imap.c8
-rw-r--r--fs/jfs/jfs_incore.h1
-rw-r--r--fs/jfs/jfs_inode.c10
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/jfs/super.c5
-rw-r--r--fs/locks.c20
-rw-r--r--fs/namei.c261
-rw-r--r--fs/namespace.c28
-rw-r--r--fs/nfs/dir.c23
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c28
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/open.c88
-rw-r--r--fs/pipe.c43
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/reiserfs/prints.c141
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/squashfs/cache.c3
-rw-r--r--fs/squashfs/file.c58
-rw-r--r--fs/squashfs/file_cache.c4
-rw-r--r--fs/squashfs/file_direct.c24
-rw-r--r--fs/squashfs/fragment.c17
-rw-r--r--fs/squashfs/squashfs.h3
-rw-r--r--fs/squashfs/squashfs_fs.h6
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/timerfd.c14
-rw-r--r--fs/udf/namei.c12
-rw-r--r--fs/ufs/ialloc.c3
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/userfaultfd.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c6
-rw-r--r--fs/xfs/xfs_iops.c2
164 files changed, 4258 insertions, 4623 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 42e102e2e74a..85ff859d3af5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -859,8 +859,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
static int
v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t mode,
- int *opened)
+ struct file *file, unsigned flags, umode_t mode)
{
int err;
u32 perm;
@@ -917,7 +916,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9inode->writeback_fid = (void *) inode_fid;
}
mutex_unlock(&v9inode->v_mutex);
- err = finish_open(file, dentry, generic_file_open, opened);
+ err = finish_open(file, dentry, generic_file_open);
if (err)
goto error;
@@ -925,7 +924,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
v9fs_cache_inode_set_cookie(d_inode(dentry), file);
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
out:
dput(res);
return err;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 7f6ae21a27b3..4823e1c46999 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -241,8 +241,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
static int
v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t omode,
- int *opened)
+ struct file *file, unsigned flags, umode_t omode)
{
int err = 0;
kgid_t gid;
@@ -352,13 +351,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
}
mutex_unlock(&v9inode->v_mutex);
/* Since we are opening a file, assign the open fid to the file */
- err = finish_open(file, dentry, generic_file_open, opened);
+ err = finish_open(file, dentry, generic_file_open);
if (err)
goto err_clunk_old_fid;
file->private_data = ofid;
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
v9fs_cache_inode_set_cookie(inode, file);
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
out:
v9fs_put_acl(dacl, pacl);
dput(res);
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index c836c425ca94..e91028d4340a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -287,7 +287,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
ADFS_I(inode)->mmu_private = inode->i_size;
}
- insert_inode_hash(inode);
+ inode_fake_hash(inode);
out:
return inode;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 71fa525d63a0..7e099a7a4eb1 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -291,6 +291,7 @@ static void destroy_inodecache(void)
static const struct super_operations adfs_sops = {
.alloc_inode = adfs_alloc_inode,
.destroy_inode = adfs_destroy_inode,
+ .drop_inode = generic_delete_inode,
.write_inode = adfs_write_inode,
.put_super = adfs_put_super,
.statfs = adfs_statfs,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 7d623008157f..855bf2b79fed 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -822,6 +822,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
{
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct inode *inode;
+ struct dentry *d;
struct key *key;
int ret;
@@ -862,43 +863,17 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
afs_stat_v(dvnode, n_lookup);
inode = afs_do_lookup(dir, dentry, key);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- if (ret == -ENOENT) {
- inode = afs_try_auto_mntpt(dentry, dir);
- if (!IS_ERR(inode)) {
- key_put(key);
- goto success;
- }
-
- ret = PTR_ERR(inode);
- }
-
- key_put(key);
- if (ret == -ENOENT) {
- d_add(dentry, NULL);
- _leave(" = NULL [negative]");
- return NULL;
- }
- _leave(" = %d [do]", ret);
- return ERR_PTR(ret);
- }
- dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version;
-
- /* instantiate the dentry */
key_put(key);
- if (IS_ERR(inode)) {
- _leave(" = %ld", PTR_ERR(inode));
- return ERR_CAST(inode);
+ if (inode == ERR_PTR(-ENOENT)) {
+ inode = afs_try_auto_mntpt(dentry, dir);
+ } else {
+ dentry->d_fsdata =
+ (void *)(unsigned long)dvnode->status.data_version;
}
-
-success:
- d_add(dentry, inode);
- _leave(" = 0 { ino=%lu v=%u }",
- d_inode(dentry)->i_ino,
- d_inode(dentry)->i_generation);
-
- return NULL;
+ d = d_splice_alias(inode, dentry);
+ if (!IS_ERR_OR_NULL(d))
+ d->d_fsdata = dentry->d_fsdata;
+ return d;
}
/*
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 174e843f0633..1cde710a8013 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -83,7 +83,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
out:
_leave("= %d", ret);
- return ERR_PTR(ret);
+ return ret == -ENOENT ? NULL : ERR_PTR(ret);
}
/*
@@ -141,12 +141,6 @@ out_p:
static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
- struct afs_vnode *vnode;
- struct inode *inode;
- int ret;
-
- vnode = AFS_FS_I(dir);
-
_enter("%pd", dentry);
ASSERTCMP(d_inode(dentry), ==, NULL);
@@ -160,22 +154,7 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
memcmp(dentry->d_name.name, "@cell", 5) == 0)
return afs_lookup_atcell(dentry);
- inode = afs_try_auto_mntpt(dentry, dir);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- if (ret == -ENOENT) {
- d_add(dentry, NULL);
- _leave(" = NULL [negative]");
- return NULL;
- }
- _leave(" = %d [do]", ret);
- return ERR_PTR(ret);
- }
-
- d_add(dentry, inode);
- _leave(" = 0 { ino=%lu v=%u }",
- d_inode(dentry)->i_ino, d_inode(dentry)->i_generation);
- return NULL;
+ return d_splice_alias(afs_try_auto_mntpt(dentry, dir), dentry);
}
const struct inode_operations afs_dynroot_inode_operations = {
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..183cc5418722 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
trace_afs_notify_call(rxcall, call);
call->need_attention = true;
- u = __atomic_add_unless(&call->usage, 1, 0);
+ u = atomic_fetch_add_unless(&call->usage, 1, 0);
if (u != 0) {
trace_afs_call(call, afs_call_trace_wake, u,
atomic_read(&call->net->nr_outstanding_calls),
diff --git a/fs/aio.c b/fs/aio.c
index 210df9da1283..b9350f3360c6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
@@ -18,6 +19,7 @@
#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/backing-dev.h>
+#include <linux/refcount.h>
#include <linux/uio.h>
#include <linux/sched/signal.h>
@@ -164,10 +166,21 @@ struct fsync_iocb {
bool datasync;
};
+struct poll_iocb {
+ struct file *file;
+ struct wait_queue_head *head;
+ __poll_t events;
+ bool woken;
+ bool cancelled;
+ struct wait_queue_entry wait;
+ struct work_struct work;
+};
+
struct aio_kiocb {
union {
struct kiocb rw;
struct fsync_iocb fsync;
+ struct poll_iocb poll;
};
struct kioctx *ki_ctx;
@@ -178,6 +191,7 @@ struct aio_kiocb {
struct list_head ki_list; /* the aio core uses this
* for cancellation */
+ refcount_t ki_refcnt;
/*
* If the aio_resfd field of the userspace iocb is not zero,
@@ -202,9 +216,7 @@ static const struct address_space_operations aio_ctx_aops;
static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
{
- struct qstr this = QSTR_INIT("[aio]", 5);
struct file *file;
- struct path path;
struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
if (IS_ERR(inode))
return ERR_CAST(inode);
@@ -213,31 +225,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
inode->i_mapping->private_data = ctx;
inode->i_size = PAGE_SIZE * nr_pages;
- path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
- if (!path.dentry) {
+ file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
+ O_RDWR, &aio_ring_fops);
+ if (IS_ERR(file))
iput(inode);
- return ERR_PTR(-ENOMEM);
- }
- path.mnt = mntget(aio_mnt);
-
- d_instantiate(path.dentry, inode);
- file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
- if (IS_ERR(file)) {
- path_put(&path);
- return file;
- }
-
- file->f_flags = O_RDWR;
return file;
}
static struct dentry *aio_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- static const struct dentry_operations ops = {
- .d_dname = simple_dname,
- };
- struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+ struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
AIO_RING_MAGIC);
if (!IS_ERR(root))
@@ -1015,6 +1013,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
percpu_ref_get(&ctx->reqs);
INIT_LIST_HEAD(&req->ki_list);
+ refcount_set(&req->ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
out_put:
@@ -1049,6 +1048,15 @@ out:
return ret;
}
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+ if (refcount_read(&iocb->ki_refcnt) == 0 ||
+ refcount_dec_and_test(&iocb->ki_refcnt)) {
+ percpu_ref_put(&iocb->ki_ctx->reqs);
+ kmem_cache_free(kiocb_cachep, iocb);
+ }
+}
+
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
@@ -1118,8 +1126,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
- kmem_cache_free(kiocb_cachep, iocb);
-
/*
* We have to order our ring_info tail store above and test
* of the wait list below outside the wait lock. This is
@@ -1130,8 +1136,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
-
- percpu_ref_put(&ctx->reqs);
+ iocb_put(iocb);
}
/* aio_read_events_ring
@@ -1592,6 +1597,182 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
return 0;
}
+static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+ struct file *file = iocb->poll.file;
+
+ aio_complete(iocb, mangle_poll(mask), 0);
+ fput(file);
+}
+
+static void aio_poll_complete_work(struct work_struct *work)
+{
+ struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ struct poll_table_struct pt = { ._key = req->events };
+ struct kioctx *ctx = iocb->ki_ctx;
+ __poll_t mask = 0;
+
+ if (!READ_ONCE(req->cancelled))
+ mask = vfs_poll(req->file, &pt) & req->events;
+
+ /*
+ * Note that ->ki_cancel callers also delete iocb from active_reqs after
+ * calling ->ki_cancel. We need the ctx_lock roundtrip here to
+ * synchronize with them. In the cancellation case the list_del_init
+ * itself is not actually needed, but harmless so we keep it in to
+ * avoid further branches in the fast path.
+ */
+ spin_lock_irq(&ctx->ctx_lock);
+ if (!mask && !READ_ONCE(req->cancelled)) {
+ add_wait_queue(req->head, &req->wait);
+ spin_unlock_irq(&ctx->ctx_lock);
+ return;
+ }
+ list_del_init(&iocb->ki_list);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ aio_poll_complete(iocb, mask);
+}
+
+/* assumes we are called with irqs disabled */
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+ struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+ struct poll_iocb *req = &aiocb->poll;
+
+ spin_lock(&req->head->lock);
+ WRITE_ONCE(req->cancelled, true);
+ if (!list_empty(&req->wait.entry)) {
+ list_del_init(&req->wait.entry);
+ schedule_work(&aiocb->poll.work);
+ }
+ spin_unlock(&req->head->lock);
+
+ return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ __poll_t mask = key_to_poll(key);
+
+ req->woken = true;
+
+ /* for instances that support it check for an event match first: */
+ if (mask) {
+ if (!(mask & req->events))
+ return 0;
+
+ /* try to complete the iocb inline if we can: */
+ if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+ list_del(&iocb->ki_list);
+ spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+ list_del_init(&req->wait.entry);
+ aio_poll_complete(iocb, mask);
+ return 1;
+ }
+ }
+
+ list_del_init(&req->wait.entry);
+ schedule_work(&req->work);
+ return 1;
+}
+
+struct aio_poll_table {
+ struct poll_table_struct pt;
+ struct aio_kiocb *iocb;
+ int error;
+};
+
+static void
+aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+ struct poll_table_struct *p)
+{
+ struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
+
+ /* multiple wait queues per file are not supported */
+ if (unlikely(pt->iocb->poll.head)) {
+ pt->error = -EINVAL;
+ return;
+ }
+
+ pt->error = 0;
+ pt->iocb->poll.head = head;
+ add_wait_queue(head, &pt->iocb->poll.wait);
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+ struct kioctx *ctx = aiocb->ki_ctx;
+ struct poll_iocb *req = &aiocb->poll;
+ struct aio_poll_table apt;
+ __poll_t mask;
+
+ /* reject any unknown events outside the normal event mask. */
+ if ((u16)iocb->aio_buf != iocb->aio_buf)
+ return -EINVAL;
+ /* reject fields that are not defined for poll */
+ if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+ return -EINVAL;
+
+ INIT_WORK(&req->work, aio_poll_complete_work);
+ req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+
+ apt.pt._qproc = aio_poll_queue_proc;
+ apt.pt._key = req->events;
+ apt.iocb = aiocb;
+ apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+
+ /* initialized the list so that we can do list_empty checks */
+ INIT_LIST_HEAD(&req->wait.entry);
+ init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+
+ /* one for removal from waitqueue, one for this function */
+ refcount_set(&aiocb->ki_refcnt, 2);
+
+ mask = vfs_poll(req->file, &apt.pt) & req->events;
+ if (unlikely(!req->head)) {
+ /* we did not manage to set up a waitqueue, done */
+ goto out;
+ }
+
+ spin_lock_irq(&ctx->ctx_lock);
+ spin_lock(&req->head->lock);
+ if (req->woken) {
+ /* wake_up context handles the rest */
+ mask = 0;
+ apt.error = 0;
+ } else if (mask || apt.error) {
+ /* if we get an error or a mask we are done */
+ WARN_ON_ONCE(list_empty(&req->wait.entry));
+ list_del_init(&req->wait.entry);
+ } else {
+ /* actually waiting for an event */
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
+ spin_unlock(&req->head->lock);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+out:
+ if (unlikely(apt.error)) {
+ fput(req->file);
+ return apt.error;
+ }
+
+ if (mask)
+ aio_poll_complete(aiocb, mask);
+ iocb_put(aiocb);
+ return 0;
+}
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
bool compat)
{
@@ -1665,6 +1846,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
case IOCB_CMD_FDSYNC:
ret = aio_fsync(&req->fsync, &iocb, true);
break;
+ case IOCB_CMD_POLL:
+ ret = aio_poll(req, &iocb);
+ break;
default:
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
@@ -1896,6 +2080,11 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
return ret;
}
+struct __aio_sigset {
+ const sigset_t __user *sigmask;
+ size_t sigsetsize;
+};
+
SYSCALL_DEFINE6(io_pgetevents,
aio_context_t, ctx_id,
long, min_nr,
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3168ee4e77f4..91262c34b797 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -71,8 +71,6 @@ struct file *anon_inode_getfile(const char *name,
const struct file_operations *fops,
void *priv, int flags)
{
- struct qstr this;
- struct path path;
struct file *file;
if (IS_ERR(anon_inode_inode))
@@ -82,39 +80,23 @@ struct file *anon_inode_getfile(const char *name,
return ERR_PTR(-ENOENT);
/*
- * Link the inode to a directory entry by creating a unique name
- * using the inode sequence number.
- */
- file = ERR_PTR(-ENOMEM);
- this.name = name;
- this.len = strlen(name);
- this.hash = 0;
- path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
- if (!path.dentry)
- goto err_module;
-
- path.mnt = mntget(anon_inode_mnt);
- /*
* We know the anon_inode inode count is always greater than zero,
* so ihold() is safe.
*/
ihold(anon_inode_inode);
-
- d_instantiate(path.dentry, anon_inode_inode);
-
- file = alloc_file(&path, OPEN_FMODE(flags), fops);
+ file = alloc_file_pseudo(anon_inode_inode, anon_inode_mnt, name,
+ flags & (O_ACCMODE | O_NONBLOCK), fops);
if (IS_ERR(file))
- goto err_dput;
+ goto err;
+
file->f_mapping = anon_inode_inode->i_mapping;
- file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
file->private_data = priv;
return file;
-err_dput:
- path_put(&path);
-err_module:
+err:
+ iput(anon_inode_inode);
module_put(fops->owner);
return file;
}
diff --git a/fs/attr.c b/fs/attr.c
index e3d53bf12240..d22e8187477f 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -120,7 +120,6 @@ EXPORT_SYMBOL(setattr_prepare);
* inode_newsize_ok - may this inode be truncated to a given size
* @inode: the inode to be truncated
* @offset: the new size to assign to the inode
- * @Returns: 0 on success, -ve errno on failure
*
* inode_newsize_ok must be called with i_mutex held.
*
@@ -130,6 +129,8 @@ EXPORT_SYMBOL(setattr_prepare);
* returned. @inode must be a file (not directory), with appropriate
* permissions to allow truncate (inode_newsize_ok does NOT check these
* conditions).
+ *
+ * Return: 0 on success, -ve errno on failure
*/
int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
@@ -205,7 +206,7 @@ EXPORT_SYMBOL(setattr_copy);
/**
* notify_change - modify attributes of a filesytem object
* @dentry: object affected
- * @iattr: new attributes
+ * @attr: new attributes
* @delegated_inode: returns inode, if the inode is delegated
*
* The caller must hold the i_mutex on the affected object.
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index ea4ca1445ab7..86eafda4a652 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -135,6 +135,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
cmd);
goto out;
}
+ } else {
+ unsigned int inr = _IOC_NR(cmd);
+
+ if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ||
+ inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD ||
+ inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) {
+ err = -EINVAL;
+ goto out;
+ }
}
err = 0;
@@ -271,7 +280,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
dev_t devid;
int err, fd;
- /* param->path has already been checked */
+ /* param->path has been checked in validate_dev_ioctl() */
+
if (!param->openmount.devid)
return -EINVAL;
@@ -433,10 +443,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
dev_t devid;
int err = -ENOENT;
- if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
- err = -EINVAL;
- goto out;
- }
+ /* param->path has been checked in validate_dev_ioctl() */
devid = sbi->sb->s_dev;
@@ -521,10 +528,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
unsigned int devid, magic;
int err = -ENOENT;
- if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
- err = -EINVAL;
- goto out;
- }
+ /* param->path has been checked in validate_dev_ioctl() */
name = param->path;
type = param->ismountpoint.in.type;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 125e8bbd22a2..8035d2a44561 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -134,7 +134,7 @@ static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
struct file *file, unsigned int open_flag,
- umode_t create_mode, int *opened)
+ umode_t create_mode)
{
return -EIO;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0ac456b52bdd..efae2fb0930a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1259,9 +1259,8 @@ static int load_elf_library(struct file *file)
goto out_free_ph;
}
- len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
- ELF_MIN_ALIGN - 1);
- bss = eppnt->p_memsz + eppnt->p_vaddr;
+ len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
+ bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
if (bss > len) {
error = vm_brk(len, bss - len);
if (error)
@@ -1752,7 +1751,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset *regset = &view->regsets[i];
do_thread_regset_writeback(t->task, regset);
if (regset->core_note_type && regset->get &&
- (!regset->active || regset->active(t->task, regset))) {
+ (!regset->active || regset->active(t->task, regset) > 0)) {
int ret;
size_t size = regset_size(t->task, regset);
void *data = kmalloc(size, GFP_KERNEL);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 4b5fff31ef27..aa4a7a23ff99 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
goto error;
if (fmt->flags & MISC_FMT_OPEN_FILE) {
- interp_file = filp_clone_open(fmt->interp_file);
+ interp_file = file_clone_open(fmt->interp_file);
if (!IS_ERR(interp_file))
deny_write_access(interp_file);
} else {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0dd87aaeb39a..aba25414231a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -221,7 +221,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
- return ret;
+ goto out;
ret = bio.bi_iter.bi_size;
if (iov_iter_rw(iter) == READ) {
@@ -250,12 +250,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
put_page(bvec->bv_page);
}
- if (vecs != inline_vecs)
- kfree(vecs);
-
if (unlikely(bio.bi_status))
ret = blk_status_to_errno(bio.bi_status);
+out:
+ if (vecs != inline_vecs)
+ kfree(vecs);
+
bio_uninit(&bio);
return ret;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15e1dfef56a5..3b66c957ea6f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -30,23 +30,22 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
- BUG();
+ return ERR_PTR(-EINVAL);
}
- size = btrfs_getxattr(inode, name, "", 0);
+ size = btrfs_getxattr(inode, name, NULL, 0);
if (size > 0) {
value = kzalloc(size, GFP_KERNEL);
if (!value)
return ERR_PTR(-ENOMEM);
size = btrfs_getxattr(inode, name, value, size);
}
- if (size > 0) {
+ if (size > 0)
acl = posix_acl_from_xattr(&init_user_ns, value, size);
- } else if (size == -ERANGE || size == -ENODATA || size == 0) {
+ else if (size == -ENODATA || size == 0)
acl = NULL;
- } else {
- acl = ERR_PTR(-EIO);
- }
+ else
+ acl = ERR_PTR(size);
kfree(value);
return acl;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0a8e2e29a66b..ae750b1574a2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -925,7 +925,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
type = btrfs_get_extent_inline_ref_type(leaf, iref,
BTRFS_REF_TYPE_ANY);
if (type == BTRFS_REF_TYPE_INVALID)
- return -EINVAL;
+ return -EUCLEAN;
offset = btrfs_extent_inline_ref_offset(leaf, iref);
@@ -1793,7 +1793,7 @@ static int get_extent_inline_ref(unsigned long *ptr,
*out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
BTRFS_REF_TYPE_ANY);
if (*out_type == BTRFS_REF_TYPE_INVALID)
- return -EINVAL;
+ return -EUCLEAN;
*ptr += btrfs_extent_inline_ref_size(*out_type);
WARN_ON(*ptr > end);
@@ -2225,7 +2225,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
fspath = init_data_container(total_bytes);
if (IS_ERR(fspath))
- return (void *)fspath;
+ return ERR_CAST(fspath);
ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
if (!ifp) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7e075343daa5..1343ac57b438 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -178,7 +178,7 @@ struct btrfs_inode {
struct btrfs_delayed_node *delayed_node;
/* File creation time. */
- struct timespec i_otime;
+ struct timespec64 i_otime;
/* Hook into fs_info->delayed_iputs */
struct list_head delayed_iput;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index a3fdb4fe967d..833cf3c35b4d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1539,7 +1539,12 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
}
device = multi->stripes[0].dev;
- block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev);
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
+ !device->bdev || !device->name)
+ block_ctx_out->dev = NULL;
+ else
+ block_ctx_out->dev = btrfsic_dev_state_lookup(
+ device->bdev->bd_dev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
block_ctx_out->start = bytenr;
block_ctx_out->len = len;
@@ -1624,7 +1629,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
bio = btrfs_io_bio_alloc(num_pages - i);
bio_set_dev(bio, block_ctx->dev->bdev);
bio->bi_iter.bi_sector = dev_bytenr >> 9;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d3e447b45bf7..9bfa66592aa7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -5,7 +5,6 @@
#include <linux/kernel.h>
#include <linux/bio.h>
-#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -14,10 +13,7 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
-#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/log2.h>
@@ -303,7 +299,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
struct bio *bio = NULL;
struct compressed_bio *cb;
unsigned long bytes_left;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
int pg_index = 0;
struct page *page;
u64 first_byte = disk_start;
@@ -342,9 +337,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- submit = io_tree->ops->merge_bio_hook(page, 0,
- PAGE_SIZE,
- bio, 0);
+ submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE, bio, 0);
page->mapping = NULL;
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
@@ -613,7 +606,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = bio->bi_iter.bi_size;
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
- bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
+ comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
refcount_set(&cb->pending_bios, 1);
@@ -626,9 +619,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- submit = tree->ops->merge_bio_hook(page, 0,
- PAGE_SIZE,
- comp_bio, 0);
+ submit = btrfs_merge_bio_hook(page, 0, PAGE_SIZE,
+ comp_bio, 0);
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
@@ -660,7 +652,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
- bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
+ comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4bc326df472e..d436fb4c002e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -888,11 +888,7 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
btrfs_root_last_snapshot(&root->root_item) ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
return 1;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
- btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
- return 1;
-#endif
+
return 0;
}
@@ -3128,8 +3124,7 @@ again:
* higher levels
*
*/
-static void fixup_low_keys(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_path *path,
struct btrfs_disk_key *key, int level)
{
int i;
@@ -3181,7 +3176,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
btrfs_set_item_key(eb, &disk_key, slot);
btrfs_mark_buffer_dirty(eb);
if (slot == 0)
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
/*
@@ -3359,17 +3354,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
root_add_used(root, fs_info->nodesize);
- memzero_extent_buffer(c, 0, sizeof(struct btrfs_header));
btrfs_set_header_nritems(c, 1);
- btrfs_set_header_level(c, level);
- btrfs_set_header_bytenr(c, c->start);
- btrfs_set_header_generation(c, trans->transid);
- btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(c, root->root_key.objectid);
-
- write_extent_buffer_fsid(c, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(c, fs_info->chunk_tree_uuid);
-
btrfs_set_node_key(c, &lower_key, 0);
btrfs_set_node_blockptr(c, 0, lower->start);
lower_gen = btrfs_header_generation(lower);
@@ -3498,15 +3483,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
return PTR_ERR(split);
root_add_used(root, fs_info->nodesize);
-
- memzero_extent_buffer(split, 0, sizeof(struct btrfs_header));
- btrfs_set_header_level(split, btrfs_header_level(c));
- btrfs_set_header_bytenr(split, split->start);
- btrfs_set_header_generation(split, trans->transid);
- btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(split, root->root_key.objectid);
- write_extent_buffer_fsid(split, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(split, fs_info->chunk_tree_uuid);
+ ASSERT(btrfs_header_level(c) == level);
ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
if (ret) {
@@ -3945,7 +3922,7 @@ static noinline int __push_leaf_left(struct btrfs_fs_info *fs_info,
clean_tree_block(fs_info, right);
btrfs_item_key(right, &disk_key, 0);
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
@@ -4292,15 +4269,6 @@ again:
root_add_used(root, fs_info->nodesize);
- memzero_extent_buffer(right, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(right, right->start);
- btrfs_set_header_generation(right, trans->transid);
- btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(right, root->root_key.objectid);
- btrfs_set_header_level(right, 0);
- write_extent_buffer_fsid(right, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(right, fs_info->chunk_tree_uuid);
-
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
@@ -4320,7 +4288,7 @@ again:
path->nodes[0] = right;
path->slots[0] = 0;
if (path->slots[1] == 0)
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
/*
* We create a new leaf 'right' for the required ins_len and
@@ -4642,7 +4610,7 @@ void btrfs_truncate_item(struct btrfs_fs_info *fs_info,
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
btrfs_set_item_key(leaf, &disk_key, slot);
if (slot == 0)
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
item = btrfs_item_nr(slot);
@@ -4744,7 +4712,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
if (path->slots[0] == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
@@ -4886,7 +4854,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
int ret;
@@ -4919,7 +4886,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- fixup_low_keys(fs_info, path, &disk_key, level + 1);
+ fixup_low_keys(path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
}
@@ -5022,7 +4989,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- fixup_low_keys(fs_info, path, &disk_key, 1);
+ fixup_low_keys(path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346aceea9..318be7864072 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -55,8 +55,6 @@ struct btrfs_ordered_sum;
#define BTRFS_OLDEST_GENERATION 0ULL
-#define BTRFS_COMPAT_EXTENT_TREE_V0
-
/*
* the max metadata block size. This limit is somewhat artificial,
* but the memmove costs go through the roof for larger blocks.
@@ -86,6 +84,14 @@ static const int btrfs_csum_sizes[] = { 4 };
#define BTRFS_DIRTY_METADATA_THRESH SZ_32M
+/*
+ * Use large batch size to reduce overhead of metadata updates. On the reader
+ * side, we only read it when we are close to ENOSPC and the read overhead is
+ * mostly related to the number of CPUs, so it is OK to use arbitrary large
+ * value here.
+ */
+#define BTRFS_TOTAL_BYTES_PINNED_BATCH SZ_128M
+
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
@@ -342,8 +348,8 @@ struct btrfs_path {
sizeof(struct btrfs_item))
struct btrfs_dev_replace {
u64 replace_state; /* see #define above */
- u64 time_started; /* seconds since 1-Jan-1970 */
- u64 time_stopped; /* seconds since 1-Jan-1970 */
+ time64_t time_started; /* seconds since 1-Jan-1970 */
+ time64_t time_stopped; /* seconds since 1-Jan-1970 */
atomic64_t num_write_errors;
atomic64_t num_uncorrectable_read_errors;
@@ -359,8 +365,6 @@ struct btrfs_dev_replace {
struct btrfs_device *srcdev;
struct btrfs_device *tgtdev;
- pid_t lock_owner;
- atomic_t nesting_level;
struct mutex lock_finishing_cancel_unmount;
rwlock_t lock;
atomic_t read_locks;
@@ -1213,7 +1217,6 @@ struct btrfs_root {
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
- char *name;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
@@ -2428,32 +2431,6 @@ static inline u32 btrfs_file_extent_inline_item_len(
return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
- int slot,
- const struct btrfs_file_extent_item *fi)
-{
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
- /*
- * return the space used on disk if this item isn't
- * compressed or encoded
- */
- if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
- btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
- btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
- return btrfs_file_extent_inline_item_len(eb,
- btrfs_item_nr(slot));
- }
-
- /* otherwise use the ram bytes field */
- return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
-}
-
-
/* btrfs_dev_stats_item */
static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
const struct btrfs_dev_stats_item *ptr,
@@ -2676,7 +2653,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins);
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins);
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
@@ -2716,15 +2692,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytes_used,
- u64 type, u64 chunk_offset, u64 size);
+ u64 bytes_used, u64 type, u64 chunk_offset,
+ u64 size);
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 group_start,
- struct extent_map *em);
+ u64 group_start, struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
@@ -2786,7 +2761,6 @@ void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush);
@@ -2803,8 +2777,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache);
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
@@ -2812,8 +2785,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end);
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
@@ -2822,10 +2794,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
-void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, const u64 type);
+void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
u64 start, u64 end);
+void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
@@ -3011,16 +2983,14 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
- const char *name, int name_len);
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
- const char *name, int name_len);
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 sequence, const char *name,
+ int name_len);
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+ int name_len);
int btrfs_del_root(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, const struct btrfs_key *key);
+ const struct btrfs_key *key);
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key,
struct btrfs_root_item *item);
@@ -3196,7 +3166,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
-void btrfs_set_range_writeback(void *private_data, u64 start, u64 end);
+void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end);
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
@@ -3452,7 +3422,7 @@ do { \
#ifdef CONFIG_BTRFS_ASSERT
__cold
-static inline void assfail(char *expr, char *file, int line)
+static inline void assfail(const char *expr, const char *file, int line)
{
pr_err("assertion failed: %s, file: %s, line: %d\n",
expr, file, line);
@@ -3465,6 +3435,13 @@ static inline void assfail(char *expr, char *file, int line)
#define ASSERT(expr) ((void)0)
#endif
+__cold
+static inline void btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
+{
+ btrfs_err(fs_info,
+"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
+}
+
__printf(5, 6)
__cold
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fe6caa7e698b..f51b509f2d9b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1222,7 +1222,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
@@ -1418,7 +1418,6 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
/* Will return 0 or -ENOMEM */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
const char *name, int name_len,
struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
@@ -1458,11 +1457,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
*/
BUG_ON(ret);
-
mutex_lock(&delayed_node->mutex);
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (unlikely(ret)) {
- btrfs_err(fs_info,
+ btrfs_err(trans->fs_info,
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
name_len, name, delayed_node->root->objectid,
delayed_node->inode_id, ret);
@@ -1495,7 +1493,6 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
}
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_inode *dir, u64 index)
{
struct btrfs_delayed_node *node;
@@ -1511,7 +1508,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
item_key.type = BTRFS_DIR_INDEX_KEY;
item_key.offset = index;
- ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key);
+ ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
+ &item_key);
if (!ret)
goto end;
@@ -1533,7 +1531,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&node->mutex);
ret = __btrfs_add_delayed_deletion_item(node, item);
if (unlikely(ret)) {
- btrfs_err(fs_info,
+ btrfs_err(trans->fs_info,
"err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
index, node->root->objectid, node->inode_id, ret);
BUG();
@@ -1837,7 +1835,7 @@ release_node:
int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_delayed_node *delayed_node;
/*
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index ca7a97f3ab6b..33536cd681d4 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -86,14 +86,12 @@ static inline void btrfs_init_delayed_root(
}
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
const char *name, int name_len,
struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 type,
u64 index);
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_inode *dir, u64 index);
int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 03dec673d12a..62ff545ba1f7 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -709,13 +709,13 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
* to make sure the delayed ref is eventually processed before this
* transaction commits.
*/
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -730,27 +730,33 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
if (!ref)
return -ENOMEM;
+ head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
+ if (!head_ref) {
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+ return -ENOMEM;
+ }
+
+ if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
+ is_fstree(ref_root)) {
+ record = kmalloc(sizeof(*record), GFP_NOFS);
+ if (!record) {
+ kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+ return -ENOMEM;
+ }
+ }
+
if (parent)
ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
else
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
+
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
ref_root, action, ref_type);
ref->root = ref_root;
ref->parent = parent;
ref->level = level;
- head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
- if (!head_ref)
- goto free_ref;
-
- if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
- is_fstree(ref_root)) {
- record = kmalloc(sizeof(*record), GFP_NOFS);
- if (!record)
- goto free_head_ref;
- }
-
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
ref_root, 0, action, false, is_system);
head_ref->extent_op = extent_op;
@@ -779,25 +785,18 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
-
-free_head_ref:
- kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
-free_ref:
- kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
-
- return -ENOMEM;
}
/*
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
int *old_ref_mod, int *new_ref_mod)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index ea1aecb6a50d..d9f2a4ebd5db 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -234,14 +234,12 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea
kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
}
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int *old_ref_mod, int *new_ref_mod);
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
+int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e2ba0419297a..dec01970d8c5 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -6,14 +6,9 @@
#include <linux/sched.h>
#include <linux/bio.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
#include <linux/kthread.h>
#include <linux/math64.h>
-#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -465,7 +460,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
* go to the tgtdev as well (refer to btrfs_map_block()).
*/
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
- dev_replace->time_started = get_seconds();
+ dev_replace->time_started = ktime_get_real_seconds();
dev_replace->cursor_left = 0;
dev_replace->committed_cursor_left = 0;
dev_replace->cursor_left_last_write_of_item = 0;
@@ -511,7 +506,7 @@ leave:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
btrfs_dev_replace_write_unlock(dev_replace);
- btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+ btrfs_destroy_dev_replace_tgtdev(tgt_device);
return ret;
}
@@ -618,7 +613,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
/* replace old device with new one in mapping tree */
@@ -637,7 +632,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_rm_dev_replace_blocked(fs_info);
if (tgt_device)
- btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+ btrfs_destroy_dev_replace_tgtdev(tgt_device);
btrfs_rm_dev_replace_unblocked(fs_info);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
@@ -663,7 +658,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->commit_bytes_used = src_device->bytes_used;
- btrfs_assign_next_active_device(fs_info, src_device, tgt_device);
+ btrfs_assign_next_active_device(src_device, tgt_device);
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++;
@@ -672,11 +667,17 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_rm_dev_replace_blocked(fs_info);
- btrfs_rm_dev_replace_remove_srcdev(fs_info, src_device);
+ btrfs_rm_dev_replace_remove_srcdev(src_device);
btrfs_rm_dev_replace_unblocked(fs_info);
/*
+ * Increment dev_stats_ccnt so that btrfs_run_dev_stats() will
+ * update on-disk dev stats value during commit transaction
+ */
+ atomic_inc(&tgt_device->dev_stats_ccnt);
+
+ /*
* this is again a consistent state where no dev_replace procedure
* is running, the target device is part of the filesystem, the
* source device is not part of the filesystem anymore and its 1st
@@ -807,7 +808,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
break;
}
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_write_unlock(dev_replace);
btrfs_scrub_cancel(fs_info);
@@ -826,7 +827,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
btrfs_dev_name(tgt_device));
if (tgt_device)
- btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
+ btrfs_destroy_dev_replace_tgtdev(tgt_device);
leave:
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
@@ -848,7 +849,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
- dev_replace->time_stopped = get_seconds();
+ dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_info(fs_info, "suspending dev_replace for unmount");
break;
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 39e9766d1cbd..a678b07fcf01 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -160,8 +160,8 @@ second_insert:
}
btrfs_release_path(path);
- ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
- name_len, dir, &disk_key, type, index);
+ ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir,
+ &disk_key, type, index);
out_free:
btrfs_free_path(path);
if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 205092dc9390..5124c15705ce 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -5,8 +5,6 @@
#include <linux/fs.h>
#include <linux/blkdev.h>
-#include <linux/scatterlist.h>
-#include <linux/swap.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h>
@@ -54,7 +52,6 @@
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
-static void free_fs_root(struct btrfs_root *root);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
@@ -108,12 +105,9 @@ void __cold btrfs_end_io_wq_exit(void)
*/
struct async_submit_bio {
void *private_data;
- struct btrfs_fs_info *fs_info;
struct bio *bio;
extent_submit_bio_start_t *submit_bio_start;
- extent_submit_bio_done_t *submit_bio_done;
int mirror_num;
- unsigned long bio_flags;
/*
* bio_offset is optional, can be used if the pages in the bio
* can't tell us where in the file the bio should go
@@ -212,7 +206,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
@@ -615,8 +609,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
- found_start, eb->start);
+ btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu",
+ eb->start, found_start);
ret = -EIO;
goto err;
}
@@ -628,8 +622,8 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
- btrfs_err(fs_info, "bad tree block level %d",
- (int)btrfs_header_level(eb));
+ btrfs_err(fs_info, "bad tree block level %d on %llu",
+ (int)btrfs_header_level(eb), eb->start);
ret = -EIO;
goto err;
}
@@ -779,7 +773,7 @@ static void run_one_async_done(struct btrfs_work *work)
return;
}
- async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
+ btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num);
}
static void run_one_async_free(struct btrfs_work *work)
@@ -793,8 +787,7 @@ static void run_one_async_free(struct btrfs_work *work)
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
- extent_submit_bio_start_t *submit_bio_start,
- extent_submit_bio_done_t *submit_bio_done)
+ extent_submit_bio_start_t *submit_bio_start)
{
struct async_submit_bio *async;
@@ -803,16 +796,13 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
return BLK_STS_RESOURCE;
async->private_data = private_data;
- async->fs_info = fs_info;
async->bio = bio;
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
- async->submit_bio_done = submit_bio_done;
btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
run_one_async_done, run_one_async_free);
- async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
async->status = 0;
@@ -851,24 +841,6 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
return btree_csum_one_bio(bio);
}
-static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
- int mirror_num)
-{
- struct inode *inode = private_data;
- blk_status_t ret;
-
- /*
- * when we're called for a write, we're already in the async
- * submission context. Just jump into btrfs_map_bio
- */
- ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
- if (ret) {
- bio->bi_status = ret;
- bio_endio(bio);
- }
- return ret;
-}
-
static int check_async_write(struct btrfs_inode *bi)
{
if (atomic_read(&bi->sync_writers))
@@ -911,8 +883,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
*/
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
bio_offset, private_data,
- btree_submit_bio_start,
- btree_submit_bio_done);
+ btree_submit_bio_start);
}
if (ret)
@@ -961,8 +932,9 @@ static int btree_writepages(struct address_space *mapping,
fs_info = BTRFS_I(mapping->host)->root->fs_info;
/* this is a bit racy, but that's ok */
- ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ fs_info->dirty_metadata_batch);
if (ret < 0)
return 0;
}
@@ -1181,7 +1153,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->highest_objectid = 0;
root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0;
- root->name = NULL;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@@ -1292,15 +1263,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
goto fail;
}
- memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
-
- write_extent_buffer_fsid(leaf, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
btrfs_mark_buffer_dirty(leaf);
root->commit_root = btrfs_root_node(root);
@@ -1374,14 +1337,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
return ERR_CAST(leaf);
}
- memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
root->node = leaf;
- write_extent_buffer_fsid(root->node, fs_info->fsid);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
return root;
@@ -1546,7 +1503,7 @@ int btrfs_init_fs_root(struct btrfs_root *root)
return 0;
fail:
- /* the caller is responsible to call free_fs_root */
+ /* The caller is responsible to call btrfs_free_fs_root */
return ret;
}
@@ -1651,14 +1608,14 @@ again:
ret = btrfs_insert_fs_root(fs_info, root);
if (ret) {
if (ret == -EEXIST) {
- free_fs_root(root);
+ btrfs_free_fs_root(root);
goto again;
}
goto fail;
}
return root;
fail:
- free_fs_root(root);
+ btrfs_free_fs_root(root);
return ERR_PTR(ret);
}
@@ -1803,7 +1760,7 @@ static int transaction_kthread(void *arg)
struct btrfs_trans_handle *trans;
struct btrfs_transaction *cur;
u64 transid;
- unsigned long now;
+ time64_t now;
unsigned long delay;
bool cannot_commit;
@@ -1819,7 +1776,7 @@ static int transaction_kthread(void *arg)
goto sleep;
}
- now = get_seconds();
+ now = ktime_get_seconds();
if (cur->state < TRANS_STATE_BLOCKED &&
!test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time ||
@@ -2196,8 +2153,6 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
{
- fs_info->dev_replace.lock_owner = 0;
- atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
rwlock_init(&fs_info->dev_replace.lock);
atomic_set(&fs_info->dev_replace.read_locks, 0);
@@ -3075,6 +3030,13 @@ retry_root_backup:
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
+ ret = btrfs_verify_dev_extents(fs_info);
+ if (ret) {
+ btrfs_err(fs_info,
+ "failed to verify dev extents against chunks: %d",
+ ret);
+ goto fail_block_groups;
+ }
ret = btrfs_recover_balance(fs_info);
if (ret) {
btrfs_err(fs_info, "failed to recover balance: %d", ret);
@@ -3875,10 +3837,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
__btrfs_remove_free_space_cache(root->free_ino_pinned);
if (root->free_ino_ctl)
__btrfs_remove_free_space_cache(root->free_ino_ctl);
- free_fs_root(root);
+ btrfs_free_fs_root(root);
}
-static void free_fs_root(struct btrfs_root *root)
+void btrfs_free_fs_root(struct btrfs_root *root)
{
iput(root->ino_cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
@@ -3890,15 +3852,9 @@ static void free_fs_root(struct btrfs_root *root)
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
- kfree(root->name);
btrfs_put_fs_root(root);
}
-void btrfs_free_fs_root(struct btrfs_root *root)
-{
- free_fs_root(root);
-}
-
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
u64 root_objectid = 0;
@@ -4104,10 +4060,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/*
* This is a fast path so only do this check if we have sanity tests
- * enabled. Normal people shouldn't be marking dummy buffers as dirty
+ * enabled. Normal people shouldn't be using umapped buffers as dirty
* outside of the sanity tests.
*/
- if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags)))
+ if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
return;
#endif
root = BTRFS_I(buf->pages[0]->mapping->host)->root;
@@ -4150,8 +4106,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
if (flush_delayed)
btrfs_balance_delayed_items(fs_info);
- ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
- BTRFS_DIRTY_METADATA_THRESH);
+ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH,
+ fs_info->dirty_metadata_batch);
if (ret > 0) {
balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping);
}
@@ -4563,21 +4520,11 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
return 0;
}
-static struct btrfs_fs_info *btree_fs_info(void *private_data)
-{
- struct inode *inode = private_data;
- return btrfs_sb(inode->i_sb);
-}
-
static const struct extent_io_ops btree_extent_io_ops = {
/* mandatory callbacks */
.submit_bio_hook = btree_submit_bio_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
- /* note we're sharing with inode.c for the merge bio hook */
- .merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_failed_hook = btree_io_failed_hook,
- .set_range_writeback = btrfs_set_range_writeback,
- .tree_fs_info = btree_fs_info,
/* optional callbacks */
};
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 1a3d277b027b..4cccba22640f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -120,8 +120,9 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
- extent_submit_bio_start_t *submit_bio_start,
- extent_submit_bio_done_t *submit_bio_done);
+ extent_submit_bio_start_t *submit_bio_start);
+blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
+ int mirror_num);
int btrfs_write_tree_block(struct extent_buffer *buf);
void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d9fe58c0080..de6f75f5547b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -52,24 +52,21 @@ enum {
};
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_node *node, u64 parent,
- u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extra_op);
+ struct btrfs_delayed_ref_node *node, u64 parent,
+ u64 root_objectid, u64 owner_objectid,
+ u64 owner_offset, int refs_to_drop,
+ struct btrfs_delayed_extent_op *extra_op);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
struct btrfs_extent_item *ei);
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod);
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 flags,
+static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
int force);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
@@ -220,9 +217,9 @@ static int add_excluded_extent(struct btrfs_fs_info *fs_info,
return 0;
}
-static void free_excluded_extents(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+static void free_excluded_extents(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
u64 start, end;
start = cache->key.objectid;
@@ -234,9 +231,9 @@ static void free_excluded_extents(struct btrfs_fs_info *fs_info,
start, end, EXTENT_UPTODATE);
}
-static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
u64 bytenr;
u64 *logical;
int stripe_len;
@@ -558,7 +555,7 @@ static noinline void caching_thread(struct btrfs_work *work)
caching_ctl->progress = (u64)-1;
up_read(&fs_info->commit_root_sem);
- free_excluded_extents(fs_info, block_group);
+ free_excluded_extents(block_group);
mutex_unlock(&caching_ctl->mutex);
wake_up(&caching_ctl->wait);
@@ -666,7 +663,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
wake_up(&caching_ctl->wait);
if (ret == 1) {
put_caching_control(caching_ctl);
- free_excluded_extents(fs_info, cache);
+ free_excluded_extents(cache);
return 0;
}
} else {
@@ -758,7 +755,8 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
- percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
}
/*
@@ -870,18 +868,16 @@ search_again:
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- struct btrfs_extent_item_v0 *ei0;
- BUG_ON(item_size != sizeof(*ei0));
- ei0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item_v0);
- num_refs = btrfs_extent_refs_v0(leaf, ei0);
- /* FIXME: this isn't correct for data */
- extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
-#else
- BUG();
-#endif
+ ret = -EINVAL;
+ btrfs_print_v0_err(fs_info);
+ if (trans)
+ btrfs_abort_transaction(trans, ret);
+ else
+ btrfs_handle_fs_error(fs_info, ret, NULL);
+
+ goto out_free;
}
+
BUG_ON(num_refs == 0);
} else {
num_refs = 0;
@@ -1039,89 +1035,6 @@ out_free:
* tree block info structure.
*/
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
- u64 owner, u32 extra_size)
-{
- struct btrfs_root *root = fs_info->extent_root;
- struct btrfs_extent_item *item;
- struct btrfs_extent_item_v0 *ei0;
- struct btrfs_extent_ref_v0 *ref0;
- struct btrfs_tree_block_info *bi;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u32 new_size = sizeof(*item);
- u64 refs;
- int ret;
-
- leaf = path->nodes[0];
- BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- ei0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item_v0);
- refs = btrfs_extent_refs_v0(leaf, ei0);
-
- if (owner == (u64)-1) {
- while (1) {
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- return ret;
- BUG_ON(ret > 0); /* Corruption */
- leaf = path->nodes[0];
- }
- btrfs_item_key_to_cpu(leaf, &found_key,
- path->slots[0]);
- BUG_ON(key.objectid != found_key.objectid);
- if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
- path->slots[0]++;
- continue;
- }
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- owner = btrfs_ref_objectid_v0(leaf, ref0);
- break;
- }
- }
- btrfs_release_path(path);
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID)
- new_size += sizeof(*bi);
-
- new_size -= sizeof(*ei0);
- ret = btrfs_search_slot(trans, root, &key, path,
- new_size + extra_size, 1);
- if (ret < 0)
- return ret;
- BUG_ON(ret); /* Corruption */
-
- btrfs_extend_item(fs_info, path, new_size);
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- btrfs_set_extent_refs(leaf, item, refs);
- /* FIXME: get real generation */
- btrfs_set_extent_generation(leaf, item, 0);
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_set_extent_flags(leaf, item,
- BTRFS_EXTENT_FLAG_TREE_BLOCK |
- BTRFS_BLOCK_FLAG_FULL_BACKREF);
- bi = (struct btrfs_tree_block_info *)(item + 1);
- /* FIXME: get first key of the block */
- memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi));
- btrfs_set_tree_block_level(leaf, bi, (int)owner);
- } else {
- btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
- }
- btrfs_mark_buffer_dirty(leaf);
- return 0;
-}
-#endif
-
/*
* is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
* is_data == BTRFS_REF_TYPE_DATA, data type is requried,
@@ -1216,13 +1129,12 @@ static int match_extent_data_ref(struct extent_buffer *leaf,
}
static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid,
u64 owner, u64 offset)
{
- struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_root *root = trans->fs_info->extent_root;
struct btrfs_key key;
struct btrfs_extent_data_ref *ref;
struct extent_buffer *leaf;
@@ -1251,17 +1163,6 @@ again:
if (parent) {
if (!ret)
return 0;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- key.type = BTRFS_EXTENT_REF_V0_KEY;
- btrfs_release_path(path);
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
- err = ret;
- goto fail;
- }
- if (!ret)
- return 0;
-#endif
goto fail;
}
@@ -1304,13 +1205,12 @@ fail:
}
static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid, u64 owner,
u64 offset, int refs_to_add)
{
- struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_root *root = trans->fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
u32 size;
@@ -1384,7 +1284,6 @@ fail:
}
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
int refs_to_drop, int *last_ref)
{
@@ -1406,13 +1305,10 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
ref2 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_shared_data_ref);
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+ btrfs_print_v0_err(trans->fs_info);
+ btrfs_abort_transaction(trans, -EINVAL);
+ return -EINVAL;
} else {
BUG();
}
@@ -1421,21 +1317,13 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
num_refs -= refs_to_drop;
if (num_refs == 0) {
- ret = btrfs_del_item(trans, fs_info->extent_root, path);
+ ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
*last_ref = 1;
} else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- else {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- btrfs_set_ref_count_v0(leaf, ref0, num_refs);
- }
-#endif
btrfs_mark_buffer_dirty(leaf);
}
return ret;
@@ -1453,6 +1341,8 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
if (iref) {
/*
* If type is invalid, we should have bailed out earlier than
@@ -1475,13 +1365,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
ref2 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_shared_data_ref);
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
} else {
WARN_ON(1);
}
@@ -1489,12 +1372,11 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
}
static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
{
- struct btrfs_root *root = fs_info->extent_root;
+ struct btrfs_root *root = trans->fs_info->extent_root;
struct btrfs_key key;
int ret;
@@ -1510,20 +1392,10 @@ static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
ret = -ENOENT;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (ret == -ENOENT && parent) {
- btrfs_release_path(path);
- key.type = BTRFS_EXTENT_REF_V0_KEY;
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0)
- ret = -ENOENT;
- }
-#endif
return ret;
}
static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 root_objectid)
@@ -1540,7 +1412,7 @@ static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
key.offset = root_objectid;
}
- ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
+ ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
path, &key, 0);
btrfs_release_path(path);
return ret;
@@ -1599,13 +1471,13 @@ static int find_next_key(struct btrfs_path *path, int level,
*/
static noinline_for_stack
int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int insert)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_key key;
struct extent_buffer *leaf;
@@ -1635,8 +1507,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
extra_size = -1;
/*
- * Owner is our parent level, so we can just add one to get the level
- * for the block we are interested in.
+ * Owner is our level, so we can just add one to get the level for the
+ * block we are interested in.
*/
if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
key.type = BTRFS_METADATA_ITEM_KEY;
@@ -1684,23 +1556,12 @@ again:
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- if (!insert) {
- err = -ENOENT;
- goto out;
- }
- ret = convert_extent_item_v0(trans, fs_info, path, owner,
- extra_size);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (unlikely(item_size < sizeof(*ei))) {
+ err = -EINVAL;
+ btrfs_print_v0_err(fs_info);
+ btrfs_abort_transaction(trans, err);
+ goto out;
}
-#endif
- BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
flags = btrfs_extent_flags(leaf, ei);
@@ -1727,7 +1588,7 @@ again:
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
if (type == BTRFS_REF_TYPE_INVALID) {
- err = -EINVAL;
+ err = -EUCLEAN;
goto out;
}
@@ -1863,7 +1724,6 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
}
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref **ref_ret,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -1871,9 +1731,9 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
{
int ret;
- ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset, 0);
+ ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
+ num_bytes, parent, root_objectid,
+ owner, offset, 0);
if (ret != -ENOENT)
return ret;
@@ -1881,12 +1741,11 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
*ref_ret = NULL;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
- parent, root_objectid);
+ ret = lookup_tree_block_ref(trans, path, bytenr, parent,
+ root_objectid);
} else {
- ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
- parent, root_objectid, owner,
- offset);
+ ret = lookup_extent_data_ref(trans, path, bytenr, parent,
+ root_objectid, owner, offset);
}
return ret;
}
@@ -1895,14 +1754,14 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
* helper to update/remove inline back ref
*/
static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
+void update_inline_extent_backref(struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
struct btrfs_delayed_extent_op *extent_op,
int *last_ref)
{
- struct extent_buffer *leaf;
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
struct btrfs_extent_data_ref *dref = NULL;
struct btrfs_shared_data_ref *sref = NULL;
@@ -1913,7 +1772,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
int type;
u64 refs;
- leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
@@ -1965,7 +1823,6 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
static noinline_for_stack
int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner,
@@ -1975,15 +1832,15 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_extent_inline_ref *iref;
int ret;
- ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset, 1);
+ ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
+ num_bytes, parent, root_objectid,
+ owner, offset, 1);
if (ret == 0) {
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
- update_inline_extent_backref(fs_info, path, iref,
- refs_to_add, extent_op, NULL);
+ update_inline_extent_backref(path, iref, refs_to_add,
+ extent_op, NULL);
} else if (ret == -ENOENT) {
- setup_inline_extent_backref(fs_info, path, iref, parent,
+ setup_inline_extent_backref(trans->fs_info, path, iref, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
ret = 0;
@@ -1992,7 +1849,6 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
}
static int insert_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
u64 bytenr, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add)
@@ -2000,18 +1856,17 @@ static int insert_extent_backref(struct btrfs_trans_handle *trans,
int ret;
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
BUG_ON(refs_to_add != 1);
- ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
- parent, root_objectid);
+ ret = insert_tree_block_ref(trans, path, bytenr, parent,
+ root_objectid);
} else {
- ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
- parent, root_objectid,
- owner, offset, refs_to_add);
+ ret = insert_extent_data_ref(trans, path, bytenr, parent,
+ root_objectid, owner, offset,
+ refs_to_add);
}
return ret;
}
static int remove_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_drop, int is_data, int *last_ref)
@@ -2020,14 +1875,14 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
- update_inline_extent_backref(fs_info, path, iref,
- -refs_to_drop, NULL, last_ref);
+ update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
+ last_ref);
} else if (is_data) {
- ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
+ ret = remove_extent_data_ref(trans, path, refs_to_drop,
last_ref);
} else {
*last_ref = 1;
- ret = btrfs_del_item(trans, fs_info->extent_root, path);
+ ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
}
return ret;
}
@@ -2185,13 +2040,13 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
owner, offset, BTRFS_ADD_DELAYED_REF);
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_tree_ref(trans, bytenr,
num_bytes, parent,
root_objectid, (int)owner,
BTRFS_ADD_DELAYED_REF, NULL,
&old_ref_mod, &new_ref_mod);
} else {
- ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_data_ref(trans, bytenr,
num_bytes, parent,
root_objectid, owner, offset,
0, BTRFS_ADD_DELAYED_REF,
@@ -2207,8 +2062,41 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * __btrfs_inc_extent_ref - insert backreference for a given extent
+ *
+ * @trans: Handle of transaction
+ *
+ * @node: The delayed ref node used to get the bytenr/length for
+ * extent whose references are incremented.
+ *
+ * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
+ * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
+ * bytenr of the parent block. Since new extents are always
+ * created with indirect references, this will only be the case
+ * when relocating a shared extent. In that case, root_objectid
+ * will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must
+ * be 0
+ *
+ * @root_objectid: The id of the root where this modification has originated,
+ * this can be either one of the well-known metadata trees or
+ * the subvolume id which references this extent.
+ *
+ * @owner: For data extents it is the inode number of the owning file.
+ * For metadata extents this parameter holds the level in the
+ * tree of the extent.
+ *
+ * @offset: For metadata extents the offset is ignored and is currently
+ * always passed as 0. For data extents it is the fileoffset
+ * this extent belongs to.
+ *
+ * @refs_to_add Number of references to add
+ *
+ * @extent_op Pointer to a structure, holding information necessary when
+ * updating a tree block's flags
+ *
+ */
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add,
@@ -2230,10 +2118,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
- ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
- num_bytes, parent, root_objectid,
- owner, offset,
- refs_to_add, extent_op);
+ ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
+ parent, root_objectid, owner,
+ offset, refs_to_add, extent_op);
if ((ret < 0 && ret != -EAGAIN) || !ret)
goto out;
@@ -2256,8 +2143,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path->reada = READA_FORWARD;
path->leave_spinning = 1;
/* now insert the actual backref */
- ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
- root_objectid, owner, offset, refs_to_add);
+ ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
+ owner, offset, refs_to_add);
if (ret)
btrfs_abort_transaction(trans, ret);
out:
@@ -2266,7 +2153,6 @@ out:
}
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
@@ -2283,7 +2169,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
ref = btrfs_delayed_node_to_data_ref(node);
- trace_run_delayed_data_ref(fs_info, node, ref, node->action);
+ trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_DATA_REF_KEY)
parent = ref->parent;
@@ -2292,17 +2178,16 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
if (extent_op)
flags |= extent_op->flags_to_set;
- ret = alloc_reserved_file_extent(trans, fs_info,
- parent, ref_root, flags,
- ref->objectid, ref->offset,
- &ins, node->ref_mod);
+ ret = alloc_reserved_file_extent(trans, parent, ref_root,
+ flags, ref->objectid,
+ ref->offset, &ins,
+ node->ref_mod);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent,
- ref_root, ref->objectid,
- ref->offset, node->ref_mod,
- extent_op);
+ ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
+ ref->objectid, ref->offset,
+ node->ref_mod, extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, fs_info, node, parent,
+ ret = __btrfs_free_extent(trans, node, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
extent_op);
@@ -2331,10 +2216,10 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
}
static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_extent_op *extent_op)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_key key;
struct btrfs_path *path;
struct btrfs_extent_item *ei;
@@ -2400,18 +2285,14 @@ again:
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+
+ if (unlikely(item_size < sizeof(*ei))) {
+ err = -EINVAL;
+ btrfs_print_v0_err(fs_info);
+ btrfs_abort_transaction(trans, err);
+ goto out;
}
-#endif
- BUG_ON(item_size < sizeof(*ei));
+
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
__run_delayed_extent_op(extent_op, leaf, ei);
@@ -2422,7 +2303,6 @@ out:
}
static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
@@ -2433,14 +2313,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 ref_root = 0;
ref = btrfs_delayed_node_to_tree_ref(node);
- trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
+ trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
parent = ref->parent;
ref_root = ref->root;
if (node->ref_mod != 1) {
- btrfs_err(fs_info,
+ btrfs_err(trans->fs_info,
"btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
node->bytenr, node->ref_mod, node->action, ref_root,
parent);
@@ -2450,13 +2330,10 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, node, extent_op);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, fs_info, node,
- parent, ref_root,
- ref->level, 0, 1,
- extent_op);
+ ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
+ ref->level, 0, 1, extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, fs_info, node,
- parent, ref_root,
+ ret = __btrfs_free_extent(trans, node, parent, ref_root,
ref->level, 0, 1, extent_op);
} else {
BUG();
@@ -2466,7 +2343,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
/* helper function to actually process a single delayed ref entry */
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
@@ -2475,18 +2351,18 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
if (trans->aborted) {
if (insert_reserved)
- btrfs_pin_extent(fs_info, node->bytenr,
+ btrfs_pin_extent(trans->fs_info, node->bytenr,
node->num_bytes, 1);
return 0;
}
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
node->type == BTRFS_SHARED_BLOCK_REF_KEY)
- ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
+ ret = run_delayed_tree_ref(trans, node, extent_op,
insert_reserved);
else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
- ret = run_delayed_data_ref(trans, fs_info, node, extent_op,
+ ret = run_delayed_data_ref(trans, node, extent_op,
insert_reserved);
else
BUG();
@@ -2528,7 +2404,6 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref
}
static int cleanup_extent_op(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
@@ -2542,21 +2417,22 @@ static int cleanup_extent_op(struct btrfs_trans_handle *trans,
return 0;
}
spin_unlock(&head->lock);
- ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
+ ret = run_delayed_extent_op(trans, head, extent_op);
btrfs_free_delayed_extent_op(extent_op);
return ret ? ret : 1;
}
static int cleanup_ref_head(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head)
{
+
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_root *delayed_refs;
int ret;
delayed_refs = &trans->transaction->delayed_refs;
- ret = cleanup_extent_op(trans, fs_info, head);
+ ret = cleanup_extent_op(trans, head);
if (ret < 0) {
unselect_delayed_ref_head(delayed_refs, head);
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
@@ -2598,8 +2474,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
flags = BTRFS_BLOCK_GROUP_METADATA;
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
- percpu_counter_add(&space_info->total_bytes_pinned,
- -head->num_bytes);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned,
+ -head->num_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
if (head->is_data) {
spin_lock(&delayed_refs->lock);
@@ -2705,7 +2582,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* up and move on to the next ref_head.
*/
if (!ref) {
- ret = cleanup_ref_head(trans, fs_info, locked_ref);
+ ret = cleanup_ref_head(trans, locked_ref);
if (ret > 0 ) {
/* We dropped our lock, we need to loop. */
ret = 0;
@@ -2752,7 +2629,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
locked_ref->extent_op = NULL;
spin_unlock(&locked_ref->lock);
- ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
+ ret = run_one_delayed_ref(trans, ref, extent_op,
must_insert_reserved);
btrfs_free_delayed_extent_op(extent_op);
@@ -3227,12 +3104,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
ret = 1;
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
- goto out;
- }
-#endif
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
if (item_size != sizeof(*ei) +
@@ -4060,11 +3931,7 @@ static void update_space_info(struct btrfs_fs_info *info, u64 flags,
struct btrfs_space_info *found;
int factor;
- if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
+ factor = btrfs_bg_type_to_factor(flags);
found = __find_space_info(info, flags);
ASSERT(found);
@@ -4289,7 +4156,7 @@ again:
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = do_chunk_alloc(trans, fs_info, alloc_target,
+ ret = do_chunk_alloc(trans, alloc_target,
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans);
if (ret < 0) {
@@ -4309,9 +4176,10 @@ again:
* allocation, and no removed chunk in current transaction,
* don't bother committing the transaction.
*/
- have_pinned_space = percpu_counter_compare(
+ have_pinned_space = __percpu_counter_compare(
&data_sinfo->total_bytes_pinned,
- used + bytes - data_sinfo->total_bytes);
+ used + bytes - data_sinfo->total_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
@@ -4358,7 +4226,7 @@ commit_trans:
data_sinfo->flags, bytes, 1);
spin_unlock(&data_sinfo->lock);
- return ret;
+ return 0;
}
int btrfs_check_data_free_space(struct inode *inode,
@@ -4511,9 +4379,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
* for allocating a chunk, otherwise if it's false, reserve space necessary for
* removing a chunk.
*/
-void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type)
+void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *info;
u64 left;
u64 thresh;
@@ -4552,7 +4420,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
* the paths we visit in the chunk tree (they were already COWed
* or created in the current transaction for example).
*/
- ret = btrfs_alloc_chunk(trans, fs_info, flags);
+ ret = btrfs_alloc_chunk(trans, flags);
}
if (!ret) {
@@ -4573,11 +4441,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 flags, int force)
+static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ int force)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
- int wait_for_alloc = 0;
+ bool wait_for_alloc = false;
+ bool should_alloc = false;
int ret = 0;
/* Don't re-enter if we're already allocating a chunk */
@@ -4587,45 +4457,44 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
-again:
- spin_lock(&space_info->lock);
- if (force < space_info->force_alloc)
- force = space_info->force_alloc;
- if (space_info->full) {
- if (should_alloc_chunk(fs_info, space_info, force))
- ret = -ENOSPC;
- else
- ret = 0;
- spin_unlock(&space_info->lock);
- return ret;
- }
-
- if (!should_alloc_chunk(fs_info, space_info, force)) {
- spin_unlock(&space_info->lock);
- return 0;
- } else if (space_info->chunk_alloc) {
- wait_for_alloc = 1;
- } else {
- space_info->chunk_alloc = 1;
- }
-
- spin_unlock(&space_info->lock);
-
- mutex_lock(&fs_info->chunk_mutex);
+ do {
+ spin_lock(&space_info->lock);
+ if (force < space_info->force_alloc)
+ force = space_info->force_alloc;
+ should_alloc = should_alloc_chunk(fs_info, space_info, force);
+ if (space_info->full) {
+ /* No more free physical space */
+ if (should_alloc)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+ spin_unlock(&space_info->lock);
+ return ret;
+ } else if (!should_alloc) {
+ spin_unlock(&space_info->lock);
+ return 0;
+ } else if (space_info->chunk_alloc) {
+ /*
+ * Someone is already allocating, so we need to block
+ * until this someone is finished and then loop to
+ * recheck if we should continue with our allocation
+ * attempt.
+ */
+ wait_for_alloc = true;
+ spin_unlock(&space_info->lock);
+ mutex_lock(&fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->chunk_mutex);
+ } else {
+ /* Proceed with allocation */
+ space_info->chunk_alloc = 1;
+ wait_for_alloc = false;
+ spin_unlock(&space_info->lock);
+ }
- /*
- * The chunk_mutex is held throughout the entirety of a chunk
- * allocation, so once we've acquired the chunk_mutex we know that the
- * other guy is done and we need to recheck and see if we should
- * allocate.
- */
- if (wait_for_alloc) {
- mutex_unlock(&fs_info->chunk_mutex);
- wait_for_alloc = 0;
cond_resched();
- goto again;
- }
+ } while (wait_for_alloc);
+ mutex_lock(&fs_info->chunk_mutex);
trans->allocating_chunk = true;
/*
@@ -4651,9 +4520,9 @@ again:
* Check if we have enough space in SYSTEM chunk because we may need
* to update devices.
*/
- check_system_chunk(trans, fs_info, flags);
+ check_system_chunk(trans, flags);
- ret = btrfs_alloc_chunk(trans, fs_info, flags);
+ ret = btrfs_alloc_chunk(trans, flags);
trans->allocating_chunk = false;
spin_lock(&space_info->lock);
@@ -4703,6 +4572,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
u64 space_size;
u64 avail;
u64 used;
+ int factor;
/* Don't overcommit when in mixed mode. */
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
@@ -4737,10 +4607,8 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
* doesn't include the parity drive, so we don't have to
* change the math
*/
- if (profile & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- avail >>= 1;
+ factor = btrfs_bg_type_to_factor(profile);
+ avail = div_u64(avail, factor);
/*
* If we aren't flushing all things, let us overcommit up to
@@ -4912,8 +4780,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
return 0;
/* See if there is enough pinned space to make this reservation */
- if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes) >= 0)
+ if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
goto commit;
/*
@@ -4930,8 +4799,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
bytes -= delayed_rsv->size;
spin_unlock(&delayed_rsv->lock);
- if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes) < 0) {
+ if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+ bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
return -ENOSPC;
}
@@ -4984,7 +4854,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
ret = PTR_ERR(trans);
break;
}
- ret = do_chunk_alloc(trans, fs_info,
+ ret = do_chunk_alloc(trans,
btrfs_metadata_alloc_profile(fs_info),
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans);
@@ -5659,11 +5529,6 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
kfree(rsv);
}
-void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
-{
- kfree(rsv);
-}
-
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
enum btrfs_reserve_flush_enum flush)
@@ -6019,7 +5884,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
@@ -6092,7 +5957,7 @@ out_fail:
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
spin_lock(&inode->lock);
@@ -6121,7 +5986,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
bool qgroup_free)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned num_extents;
spin_lock(&inode->lock);
@@ -6219,12 +6084,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
cache = btrfs_lookup_block_group(info, bytenr);
if (!cache)
return -ENOENT;
- if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
+ factor = btrfs_bg_type_to_factor(cache->flags);
+
/*
* If this block group has free space cache written out, we
* need to make sure to load it if we are removing space. This
@@ -6268,8 +6129,9 @@ static int update_block_group(struct btrfs_trans_handle *trans,
trace_btrfs_space_reservation(info, "pinned",
cache->space_info->flags,
num_bytes, 1);
- percpu_counter_add(&cache->space_info->total_bytes_pinned,
- num_bytes);
+ percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
+ num_bytes,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
@@ -6279,7 +6141,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
if (list_empty(&cache->dirty_list)) {
list_add_tail(&cache->dirty_list,
&trans->transaction->dirty_bgs);
- trans->transaction->num_dirty_bgs++;
+ trans->transaction->num_dirty_bgs++;
btrfs_get_block_group(cache);
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
@@ -6290,16 +6152,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
* dirty list to avoid races between cleaner kthread and space
* cache writeout.
*/
- if (!alloc && old_val == 0) {
- spin_lock(&info->unused_bgs_lock);
- if (list_empty(&cache->bg_list)) {
- btrfs_get_block_group(cache);
- trace_btrfs_add_unused_block_group(cache);
- list_add_tail(&cache->bg_list,
- &info->unused_bgs);
- }
- spin_unlock(&info->unused_bgs_lock);
- }
+ if (!alloc && old_val == 0)
+ btrfs_mark_bg_unused(cache);
btrfs_put_block_group(cache);
total -= num_bytes;
@@ -6347,7 +6201,8 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info,
trace_btrfs_space_reservation(fs_info, "pinned",
cache->space_info->flags, num_bytes, 1);
- percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes);
+ percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
+ num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
set_extent_dirty(fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
return 0;
@@ -6711,7 +6566,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
trace_btrfs_space_reservation(fs_info, "pinned",
space_info->flags, len, 0);
space_info->max_extent_size = 0;
- percpu_counter_add(&space_info->total_bytes_pinned, -len);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned,
+ -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
@@ -6815,12 +6671,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
}
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *info,
- struct btrfs_delayed_ref_node *node, u64 parent,
- u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extent_op)
+ struct btrfs_delayed_ref_node *node, u64 parent,
+ u64 root_objectid, u64 owner_objectid,
+ u64 owner_offset, int refs_to_drop,
+ struct btrfs_delayed_extent_op *extent_op)
{
+ struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_key key;
struct btrfs_path *path;
struct btrfs_root *extent_root = info->extent_root;
@@ -6852,9 +6708,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (is_data)
skinny_metadata = false;
- ret = lookup_extent_backref(trans, info, path, &iref,
- bytenr, num_bytes, parent,
- root_objectid, owner_objectid,
+ ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
+ parent, root_objectid, owner_objectid,
owner_offset);
if (ret == 0) {
extent_slot = path->slots[0];
@@ -6877,14 +6732,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
break;
extent_slot--;
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
- if (found_extent && item_size < sizeof(*ei))
- found_extent = 0;
-#endif
+
if (!found_extent) {
BUG_ON(iref);
- ret = remove_extent_backref(trans, info, path, NULL,
+ ret = remove_extent_backref(trans, path, NULL,
refs_to_drop,
is_data, &last_ref);
if (ret) {
@@ -6957,42 +6808,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, extent_slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- BUG_ON(found_extent || extent_slot != path->slots[0]);
- ret = convert_extent_item_v0(trans, info, path, owner_objectid,
- 0);
- if (ret < 0) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
-
- btrfs_release_path(path);
- path->leave_spinning = 1;
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
-
- ret = btrfs_search_slot(trans, extent_root, &key, path,
- -1, 1);
- if (ret) {
- btrfs_err(info,
- "umm, got %d back from search, was looking for %llu",
- ret, bytenr);
- btrfs_print_leaf(path->nodes[0]);
- }
- if (ret < 0) {
- btrfs_abort_transaction(trans, ret);
- goto out;
- }
-
- extent_slot = path->slots[0];
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, extent_slot);
+ if (unlikely(item_size < sizeof(*ei))) {
+ ret = -EINVAL;
+ btrfs_print_v0_err(info);
+ btrfs_abort_transaction(trans, ret);
+ goto out;
}
-#endif
- BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(leaf, extent_slot,
struct btrfs_extent_item);
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
@@ -7028,9 +6849,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
}
if (found_extent) {
- ret = remove_extent_backref(trans, info, path,
- iref, refs_to_drop,
- is_data, &last_ref);
+ ret = remove_extent_backref(trans, path, iref,
+ refs_to_drop, is_data,
+ &last_ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -7172,7 +6993,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
root->root_key.objectid,
btrfs_header_level(buf), 0,
BTRFS_DROP_DELAYED_REF);
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
+ ret = btrfs_add_delayed_tree_ref(trans, buf->start,
buf->len, parent,
root->root_key.objectid,
btrfs_header_level(buf),
@@ -7251,13 +7072,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
old_ref_mod = new_ref_mod = 0;
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_tree_ref(trans, bytenr,
num_bytes, parent,
root_objectid, (int)owner,
BTRFS_DROP_DELAYED_REF, NULL,
&old_ref_mod, &new_ref_mod);
} else {
- ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
+ ret = btrfs_add_delayed_data_ref(trans, bytenr,
num_bytes, parent,
root_objectid, owner, offset,
0, BTRFS_DROP_DELAYED_REF,
@@ -7534,7 +7355,7 @@ search:
* for the proper type.
*/
if (!block_group_bits(block_group, flags)) {
- u64 extra = BTRFS_BLOCK_GROUP_DUP |
+ u64 extra = BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6 |
@@ -7738,7 +7559,7 @@ unclustered_alloc:
goto loop;
}
checks:
- search_start = ALIGN(offset, fs_info->stripesize);
+ search_start = round_up(offset, fs_info->stripesize);
/* move on to the next group */
if (search_start + num_bytes >
@@ -7750,7 +7571,6 @@ checks:
if (offset < search_start)
btrfs_add_free_space(block_group, offset,
search_start - offset);
- BUG_ON(offset > search_start);
ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
num_bytes, delalloc);
@@ -7826,8 +7646,7 @@ loop:
goto out;
}
- ret = do_chunk_alloc(trans, fs_info, flags,
- CHUNK_ALLOC_FORCE);
+ ret = do_chunk_alloc(trans, flags, CHUNK_ALLOC_FORCE);
/*
* If we can't allocate a new chunk we've already looped
@@ -8053,11 +7872,11 @@ int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
}
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset,
struct btrfs_key *ins, int ref_mod)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_extent_item *extent_item;
struct btrfs_extent_inline_ref *iref;
@@ -8231,7 +8050,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
@@ -8240,7 +8058,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
root->root_key.objectid, owner, offset,
BTRFS_ADD_DELAYED_EXTENT);
- ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
+ ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
ins->offset, 0,
root->root_key.objectid, owner,
offset, ram_bytes,
@@ -8254,10 +8072,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
* space cache bits as well
*/
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_block_group_cache *block_group;
struct btrfs_space_info *space_info;
@@ -8285,15 +8103,15 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
- ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid,
- 0, owner, offset, ins, 1);
+ ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
+ offset, ins, 1);
btrfs_put_block_group(block_group);
return ret;
}
static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- u64 bytenr, int level)
+ u64 bytenr, int level, u64 owner)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *buf;
@@ -8302,7 +8120,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (IS_ERR(buf))
return buf;
- btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
clean_tree_block(fs_info, buf);
@@ -8311,6 +8128,14 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_lock_blocking(buf);
set_extent_buffer_uptodate(buf);
+ memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
+ btrfs_set_header_level(buf, level);
+ btrfs_set_header_bytenr(buf, buf->start);
+ btrfs_set_header_generation(buf, trans->transid);
+ btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(buf, owner);
+ write_extent_buffer_fsid(buf, fs_info->fsid);
+ write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
buf->log_index = root->log_transid % 2;
/*
@@ -8419,7 +8244,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (btrfs_is_testing(fs_info)) {
buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
- level);
+ level, root_objectid);
if (!IS_ERR(buf))
root->alloc_bytenr += blocksize;
return buf;
@@ -8435,7 +8260,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
if (ret)
goto out_unuse;
- buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
+ buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
+ root_objectid);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto out_free_reserved;
@@ -8467,7 +8293,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
root_objectid, level, 0,
BTRFS_ADD_DELAYED_EXTENT);
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
+ ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
ins.offset, parent,
root_objectid, level,
BTRFS_ADD_DELAYED_EXTENT,
@@ -8499,7 +8325,6 @@ struct walk_control {
int keep_locks;
int reada_slot;
int reada_count;
- int for_reloc;
};
#define DROP_REFERENCE 1
@@ -8819,7 +8644,7 @@ skip:
}
if (need_account) {
- ret = btrfs_qgroup_trace_subtree(trans, root, next,
+ ret = btrfs_qgroup_trace_subtree(trans, next,
generation, level - 1);
if (ret) {
btrfs_err_rl(fs_info,
@@ -8919,7 +8744,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
else
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb);
+ ret = btrfs_qgroup_trace_leaf_items(trans, eb);
if (ret) {
btrfs_err_rl(fs_info,
"error %d accounting leaf items. Quota is out of sync, rescan required.",
@@ -9136,7 +8961,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
- wc->for_reloc = for_reloc;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
while (1) {
@@ -9199,7 +9023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
if (err)
goto out_end_trans;
- ret = btrfs_del_root(trans, fs_info, &root->root_key);
+ ret = btrfs_del_root(trans, &root->root_key);
if (ret) {
btrfs_abort_transaction(trans, ret);
err = ret;
@@ -9302,7 +9126,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
- wc->for_reloc = 1;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
while (1) {
@@ -9417,10 +9240,10 @@ out:
return ret;
}
-int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache)
+int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_trans_handle *trans;
u64 alloc_flags;
int ret;
@@ -9454,7 +9277,7 @@ again:
*/
alloc_flags = update_block_group_flags(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
- ret = do_chunk_alloc(trans, fs_info, alloc_flags,
+ ret = do_chunk_alloc(trans, alloc_flags,
CHUNK_ALLOC_FORCE);
/*
* ENOSPC is allowed here, we may have enough space
@@ -9471,8 +9294,7 @@ again:
if (!ret)
goto out;
alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
- ret = do_chunk_alloc(trans, fs_info, alloc_flags,
- CHUNK_ALLOC_FORCE);
+ ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
ret = inc_block_group_ro(cache, 0);
@@ -9480,7 +9302,7 @@ out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = update_block_group_flags(fs_info, cache->flags);
mutex_lock(&fs_info->chunk_mutex);
- check_system_chunk(trans, fs_info, alloc_flags);
+ check_system_chunk(trans, alloc_flags);
mutex_unlock(&fs_info->chunk_mutex);
}
mutex_unlock(&fs_info->ro_block_group_mutex);
@@ -9489,12 +9311,11 @@ out:
return ret;
}
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type)
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
- u64 alloc_flags = get_alloc_profile(fs_info, type);
+ u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
- return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE);
+ return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
/*
@@ -9520,13 +9341,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
continue;
}
- if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP))
- factor = 2;
- else
- factor = 1;
-
+ factor = btrfs_bg_type_to_factor(block_group->flags);
free_bytes += (block_group->key.offset -
btrfs_block_group_used(&block_group->item)) *
factor;
@@ -9717,6 +9532,8 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ struct btrfs_block_group_item bg;
+ u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9751,8 +9568,32 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
"logical %llu len %llu found bg but no related chunk",
found_key.objectid, found_key.offset);
ret = -ENOENT;
+ } else if (em->start != found_key.objectid ||
+ em->len != found_key.offset) {
+ btrfs_err(fs_info,
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
+ found_key.objectid, found_key.offset,
+ em->start, em->len);
+ ret = -EUCLEAN;
} else {
- ret = 0;
+ read_extent_buffer(leaf, &bg,
+ btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bg));
+ flags = btrfs_block_group_flags(&bg) &
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if (flags != (em->map_lookup->type &
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+ found_key.objectid,
+ found_key.offset, flags,
+ (BTRFS_BLOCK_GROUP_TYPE_MASK &
+ em->map_lookup->type));
+ ret = -EUCLEAN;
+ } else {
+ ret = 0;
+ }
}
free_extent_map(em);
goto out;
@@ -9847,7 +9688,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
*/
if (block_group->cached == BTRFS_CACHE_NO ||
block_group->cached == BTRFS_CACHE_ERROR)
- free_excluded_extents(info, block_group);
+ free_excluded_extents(block_group);
btrfs_remove_free_space_cache(block_group);
ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
@@ -10003,6 +9844,62 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
return cache;
}
+
+/*
+ * Iterate all chunks and verify that each of them has the corresponding block
+ * group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map *em;
+ struct btrfs_block_group_cache *bg;
+ u64 start = 0;
+ int ret = 0;
+
+ while (1) {
+ read_lock(&map_tree->map_tree.lock);
+ /*
+ * lookup_extent_mapping will return the first extent map
+ * intersecting the range, so setting @len to 1 is enough to
+ * get the first chunk.
+ */
+ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+ read_unlock(&map_tree->map_tree.lock);
+ if (!em)
+ break;
+
+ bg = btrfs_lookup_block_group(fs_info, em->start);
+ if (!bg) {
+ btrfs_err(fs_info,
+ "chunk start=%llu len=%llu doesn't have corresponding block group",
+ em->start, em->len);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ break;
+ }
+ if (bg->key.objectid != em->start ||
+ bg->key.offset != em->len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
+ em->start, em->len,
+ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
+ bg->key.objectid, bg->key.offset,
+ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ break;
+ }
+ start = em->start + em->len;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ }
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_path *path;
@@ -10089,13 +9986,13 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
* info has super bytes accounted for, otherwise we'll think
* we have more space than we actually do.
*/
- ret = exclude_super_stripes(info, cache);
+ ret = exclude_super_stripes(cache);
if (ret) {
/*
* We may have excluded something, so call this just in
* case.
*/
- free_excluded_extents(info, cache);
+ free_excluded_extents(cache);
btrfs_put_block_group(cache);
goto error;
}
@@ -10110,14 +10007,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- free_excluded_extents(info, cache);
+ free_excluded_extents(cache);
} else if (btrfs_block_group_used(&cache->item) == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, found_key.objectid,
found_key.objectid +
found_key.offset);
- free_excluded_extents(info, cache);
+ free_excluded_extents(cache);
}
ret = btrfs_add_block_group_cache(info, cache);
@@ -10140,15 +10037,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
if (btrfs_chunk_readonly(info, cache->key.objectid)) {
inc_block_group_ro(cache, 1);
} else if (btrfs_block_group_used(&cache->item) == 0) {
- spin_lock(&info->unused_bgs_lock);
- /* Should always be true but just in case. */
- if (list_empty(&cache->bg_list)) {
- btrfs_get_block_group(cache);
- trace_btrfs_add_unused_block_group(cache);
- list_add_tail(&cache->bg_list,
- &info->unused_bgs);
- }
- spin_unlock(&info->unused_bgs_lock);
+ ASSERT(list_empty(&cache->bg_list));
+ btrfs_mark_bg_unused(cache);
}
}
@@ -10176,7 +10066,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
btrfs_add_raid_kobjects(info);
init_global_block_rsv(info);
- ret = 0;
+ ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
return ret;
@@ -10206,8 +10096,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
sizeof(item));
if (ret)
btrfs_abort_transaction(trans, ret);
- ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid,
- key.offset);
+ ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
@@ -10218,10 +10107,10 @@ next:
trans->can_flush_pending_bgs = can_flush_pending_bgs;
}
-int btrfs_make_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytes_used,
+int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *cache;
int ret;
@@ -10240,20 +10129,20 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->needs_free_space = 1;
- ret = exclude_super_stripes(fs_info, cache);
+ ret = exclude_super_stripes(cache);
if (ret) {
/*
* We may have excluded something, so call this just in
* case.
*/
- free_excluded_extents(fs_info, cache);
+ free_excluded_extents(cache);
btrfs_put_block_group(cache);
return ret;
}
add_new_free_space(cache, chunk_offset, chunk_offset + size);
- free_excluded_extents(fs_info, cache);
+ free_excluded_extents(cache);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(cache)) {
@@ -10311,9 +10200,9 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
}
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 group_start,
- struct extent_map *em)
+ u64 group_start, struct extent_map *em)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_path *path;
struct btrfs_block_group_cache *block_group;
@@ -10337,18 +10226,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* Free the reserved super bytes from this block group before
* remove it.
*/
- free_excluded_extents(fs_info, block_group);
+ free_excluded_extents(block_group);
btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
block_group->key.offset);
memcpy(&key, &block_group->key, sizeof(key));
index = btrfs_bg_flags_to_raid_index(block_group->flags);
- if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
+ factor = btrfs_bg_type_to_factor(block_group->flags);
/* make sure this block group isn't part of an allocation cluster */
cluster = &fs_info->data_alloc_cluster;
@@ -10687,7 +10571,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
- if (block_group->reserved ||
+ if (block_group->reserved || block_group->pinned ||
btrfs_block_group_used(&block_group->item) ||
block_group->ro ||
list_is_singular(&block_group->list)) {
@@ -10764,8 +10648,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
space_info->bytes_pinned -= block_group->pinned;
space_info->bytes_readonly += block_group->pinned;
- percpu_counter_add(&space_info->total_bytes_pinned,
- -block_group->pinned);
+ percpu_counter_add_batch(&space_info->total_bytes_pinned,
+ -block_group->pinned,
+ BTRFS_TOTAL_BYTES_PINNED_BATCH);
block_group->pinned = 0;
spin_unlock(&block_group->lock);
@@ -10782,8 +10667,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong.
*/
- ret = btrfs_remove_chunk(trans, fs_info,
- block_group->key.objectid);
+ ret = btrfs_remove_chunk(trans, block_group->key.objectid);
if (ret) {
if (trimming)
@@ -11066,3 +10950,16 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
!atomic_read(&root->will_be_snapshotted));
}
}
+
+void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
+{
+ struct btrfs_fs_info *fs_info = bg->fs_info;
+
+ spin_lock(&fs_info->unused_bgs_lock);
+ if (list_empty(&bg->bg_list)) {
+ btrfs_get_block_group(bg);
+ trace_btrfs_add_unused_block_group(bg);
+ list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e55843f536bc..628f1aef34b0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -140,14 +140,6 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits,
static void flush_write_bio(struct extent_page_data *epd);
-static inline struct btrfs_fs_info *
-tree_fs_info(struct extent_io_tree *tree)
-{
- if (tree->ops)
- return tree->ops->tree_fs_info(tree->private_data);
- return NULL;
-}
-
int __init extent_io_init(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
@@ -564,8 +556,10 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
{
- btrfs_panic(tree_fs_info(tree), err,
- "Locking error: Extent tree was modified by another thread while locked.");
+ struct inode *inode = tree->private_data;
+
+ btrfs_panic(btrfs_sb(inode->i_sb), err,
+ "locking error: extent tree was modified by another thread while locked");
}
/*
@@ -1386,14 +1380,6 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
}
}
-/*
- * helper function to set both pages and extents in the tree writeback
- */
-static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
-{
- tree->ops->set_range_writeback(tree->private_data, start, end);
-}
-
/* find the first state struct with 'bits' set after 'start', and
* return it. tree->lock must be held. NULL will returned if
* nothing was found after 'start'
@@ -2059,7 +2045,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int mirror_num)
{
u64 start = eb->start;
- unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
+ int i, num_pages = num_extent_pages(eb);
int ret = 0;
if (sb_rdonly(fs_info->sb))
@@ -2398,7 +2384,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
start - page_offset(page),
(int)phy_offset, failed_bio->bi_end_io,
NULL);
- bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
+ bio->bi_opf = REQ_OP_READ | read_mode;
btrfs_debug(btrfs_sb(inode->i_sb),
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
@@ -2790,8 +2776,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
else
contig = bio_end_sector(bio) == sector;
- if (tree->ops && tree->ops->merge_bio_hook(page, offset,
- page_size, bio, bio_flags))
+ if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size,
+ bio, bio_flags))
can_merge = false;
if (prev_bio_flags != bio_flags || !contig || !can_merge ||
@@ -3422,7 +3408,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
continue;
}
- set_range_writeback(tree, cur, cur + iosize - 1);
+ btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
"page %lu not writeback, cur %llu end %llu",
@@ -3538,7 +3524,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
struct btrfs_fs_info *fs_info,
struct extent_page_data *epd)
{
- unsigned long i, num_pages;
+ int i, num_pages;
int flush = 0;
int ret = 0;
@@ -3588,7 +3574,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
if (!ret)
return ret;
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
@@ -3712,13 +3698,13 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
u32 nritems;
- unsigned long i, num_pages;
+ int i, num_pages;
unsigned long start, end;
unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
int ret = 0;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
atomic_set(&eb->io_pages, num_pages);
/* set btree blocks beyond nritems with 0 to avoid stale content. */
@@ -4238,8 +4224,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
struct extent_map *em;
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
- struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
- struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree;
+ struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
+ struct extent_io_tree *tree = &btrfs_inode->io_tree;
+ struct extent_map_tree *map = &btrfs_inode->extent_tree;
if (gfpflags_allow_blocking(mask) &&
page->mapping->host->i_size > SZ_16M) {
@@ -4262,6 +4249,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask)
extent_map_end(em) - 1,
EXTENT_LOCKED | EXTENT_WRITEBACK,
0, NULL)) {
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &btrfs_inode->runtime_flags);
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
@@ -4640,23 +4629,20 @@ int extent_buffer_under_io(struct extent_buffer *eb)
}
/*
- * Helper for releasing extent buffer page.
+ * Release all pages attached to the extent buffer.
*/
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
+static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
{
- unsigned long index;
- struct page *page;
- int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+ int i;
+ int num_pages;
+ int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
BUG_ON(extent_buffer_under_io(eb));
- index = num_extent_pages(eb->start, eb->len);
- if (index == 0)
- return;
+ num_pages = num_extent_pages(eb);
+ for (i = 0; i < num_pages; i++) {
+ struct page *page = eb->pages[i];
- do {
- index--;
- page = eb->pages[index];
if (!page)
continue;
if (mapped)
@@ -4688,7 +4674,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
/* One for when we allocated the page */
put_page(page);
- } while (index != 0);
+ }
}
/*
@@ -4696,7 +4682,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
*/
static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
{
- btrfs_release_extent_buffer_page(eb);
+ btrfs_release_extent_buffer_pages(eb);
__free_extent_buffer(eb);
}
@@ -4740,10 +4726,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
{
- unsigned long i;
+ int i;
struct page *p;
struct extent_buffer *new;
- unsigned long num_pages = num_extent_pages(src->start, src->len);
+ int num_pages = num_extent_pages(src);
new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
if (new == NULL)
@@ -4763,7 +4749,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
}
set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
- set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
+ set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
return new;
}
@@ -4772,15 +4758,14 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len)
{
struct extent_buffer *eb;
- unsigned long num_pages;
- unsigned long i;
-
- num_pages = num_extent_pages(start, len);
+ int num_pages;
+ int i;
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
return NULL;
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
eb->pages[i] = alloc_page(GFP_NOFS);
if (!eb->pages[i])
@@ -4788,7 +4773,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
}
set_extent_buffer_uptodate(eb);
btrfs_set_header_nritems(eb, 0);
- set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+ set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
return eb;
err:
@@ -4840,11 +4825,11 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
static void mark_extent_buffer_accessed(struct extent_buffer *eb,
struct page *accessed)
{
- unsigned long num_pages, i;
+ int num_pages, i;
check_buffer_tree_ref(eb);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
@@ -4941,8 +4926,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{
unsigned long len = fs_info->nodesize;
- unsigned long num_pages = num_extent_pages(start, len);
- unsigned long i;
+ int num_pages;
+ int i;
unsigned long index = start >> PAGE_SHIFT;
struct extent_buffer *eb;
struct extent_buffer *exists = NULL;
@@ -4964,6 +4949,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
if (!eb)
return ERR_PTR(-ENOMEM);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
if (!p) {
@@ -5006,8 +4992,11 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
uptodate = 0;
/*
- * see below about how we avoid a nasty race with release page
- * and why we unlock later
+ * We can't unlock the pages just yet since the extent buffer
+ * hasn't been properly inserted in the radix tree, this
+ * opens a race with btree_releasepage which can free a page
+ * while we are still filling in all pages for the buffer and
+ * we could crash.
*/
}
if (uptodate)
@@ -5036,21 +5025,12 @@ again:
set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
/*
- * there is a race where release page may have
- * tried to find this extent buffer in the radix
- * but failed. It will tell the VM it is safe to
- * reclaim the, and it will clear the page private bit.
- * We must make sure to set the page private bit properly
- * after the extent buffer is in the radix tree so
- * it doesn't get lost
+ * Now it's safe to unlock the pages because any calls to
+ * btree_releasepage will correctly detect that a page belongs to a
+ * live buffer and won't free them prematurely.
*/
- SetPageChecked(eb->pages[0]);
- for (i = 1; i < num_pages; i++) {
- p = eb->pages[i];
- ClearPageChecked(p);
- unlock_page(p);
- }
- unlock_page(eb->pages[0]);
+ for (i = 0; i < num_pages; i++)
+ unlock_page(eb->pages[i]);
return eb;
free_eb:
@@ -5072,9 +5052,10 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
__free_extent_buffer(eb);
}
-/* Expects to have eb->eb_lock already held */
static int release_extent_buffer(struct extent_buffer *eb)
{
+ lockdep_assert_held(&eb->refs_lock);
+
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
@@ -5091,9 +5072,9 @@ static int release_extent_buffer(struct extent_buffer *eb)
}
/* Should be safe to release our pages at this point */
- btrfs_release_extent_buffer_page(eb);
+ btrfs_release_extent_buffer_pages(eb);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
+ if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
__free_extent_buffer(eb);
return 1;
}
@@ -5124,7 +5105,7 @@ void free_extent_buffer(struct extent_buffer *eb)
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
- test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
+ test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))
atomic_dec(&eb->refs);
if (atomic_read(&eb->refs) == 2 &&
@@ -5156,11 +5137,11 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
void clear_extent_buffer_dirty(struct extent_buffer *eb)
{
- unsigned long i;
- unsigned long num_pages;
+ int i;
+ int num_pages;
struct page *page;
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
@@ -5186,15 +5167,15 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
int set_extent_buffer_dirty(struct extent_buffer *eb)
{
- unsigned long i;
- unsigned long num_pages;
+ int i;
+ int num_pages;
int was_dirty = 0;
check_buffer_tree_ref(eb);
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
WARN_ON(atomic_read(&eb->refs) == 0);
WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
@@ -5205,12 +5186,12 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
void clear_extent_buffer_uptodate(struct extent_buffer *eb)
{
- unsigned long i;
+ int i;
struct page *page;
- unsigned long num_pages;
+ int num_pages;
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
if (page)
@@ -5220,12 +5201,12 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
void set_extent_buffer_uptodate(struct extent_buffer *eb)
{
- unsigned long i;
+ int i;
struct page *page;
- unsigned long num_pages;
+ int num_pages;
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
SetPageUptodate(page);
@@ -5235,13 +5216,13 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait, int mirror_num)
{
- unsigned long i;
+ int i;
struct page *page;
int err;
int ret = 0;
int locked_pages = 0;
int all_uptodate = 1;
- unsigned long num_pages;
+ int num_pages;
unsigned long num_reads = 0;
struct bio *bio = NULL;
unsigned long bio_flags = 0;
@@ -5249,7 +5230,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
- num_pages = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
if (wait == WAIT_NONE) {
@@ -5573,11 +5554,11 @@ void copy_extent_buffer_full(struct extent_buffer *dst,
struct extent_buffer *src)
{
int i;
- unsigned num_pages;
+ int num_pages;
ASSERT(dst->len == src->len);
- num_pages = num_extent_pages(dst->start, dst->len);
+ num_pages = num_extent_pages(dst);
for (i = 0; i < num_pages; i++)
copy_page(page_address(dst->pages[i]),
page_address(src->pages[i]));
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0bfd4aeb822d..b4d03e677e1d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -46,7 +46,7 @@
#define EXTENT_BUFFER_STALE 6
#define EXTENT_BUFFER_WRITEBACK 7
#define EXTENT_BUFFER_READ_ERR 8 /* read IO error */
-#define EXTENT_BUFFER_DUMMY 9
+#define EXTENT_BUFFER_UNMAPPED 9
#define EXTENT_BUFFER_IN_TREE 10
#define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */
@@ -92,9 +92,6 @@ typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
-typedef blk_status_t (extent_submit_bio_done_t)(void *private_data,
- struct bio *bio, int mirror_num);
-
struct extent_io_ops {
/*
* The following callbacks must be allways defined, the function
@@ -104,12 +101,7 @@ struct extent_io_ops {
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
struct page *page, u64 start, u64 end,
int mirror);
- int (*merge_bio_hook)(struct page *page, unsigned long offset,
- size_t size, struct bio *bio,
- unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
- struct btrfs_fs_info *(*tree_fs_info)(void *private_data);
- void (*set_range_writeback)(void *private_data, u64 start, u64 end);
/*
* Optional hooks, called if the pointer is not NULL
@@ -440,10 +432,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
-static inline unsigned long num_extent_pages(u64 start, u64 len)
+static inline int num_extent_pages(const struct extent_buffer *eb)
{
- return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
- (start >> PAGE_SHIFT);
+ return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
+ (eb->start >> PAGE_SHIFT);
}
static inline void extent_buffer_get(struct extent_buffer *eb)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f9dd6d1836a3..ba74827beb32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -922,7 +922,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const bool new_inline,
struct extent_map *em)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
@@ -942,7 +942,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
btrfs_file_extent_num_bytes(leaf, fi);
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
- size = btrfs_file_extent_inline_len(leaf, slot, fi);
+ size = btrfs_file_extent_ram_bytes(leaf, fi);
extent_end = ALIGN(extent_start + size,
fs_info->sectorsize);
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 51e77d72068a..2be00e873e92 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -5,14 +5,11 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
-#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
-#include <linux/mpage.h>
#include <linux/falloc.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/slab.h>
@@ -83,7 +80,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct inode_defrag *entry;
struct rb_node **p;
struct rb_node *parent = NULL;
@@ -135,8 +132,8 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct inode_defrag *defrag;
u64 transid;
int ret;
@@ -185,7 +182,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
struct inode_defrag *defrag)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
if (!__need_auto_defrag(fs_info))
@@ -833,8 +830,7 @@ next_slot:
btrfs_file_extent_num_bytes(leaf, fi);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = key.offset +
- btrfs_file_extent_inline_len(leaf,
- path->slots[0], fi);
+ btrfs_file_extent_ram_bytes(leaf, fi);
} else {
/* can't happen */
BUG();
@@ -1133,7 +1129,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 start, u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_path *path;
@@ -1470,7 +1466,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
u64 *lockstart, u64 *lockend,
struct extent_state **cached_state)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 start_pos;
u64 last_pos;
int i;
@@ -1526,7 +1522,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
@@ -1569,10 +1565,11 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
return ret;
}
-static noinline ssize_t __btrfs_buffered_write(struct file *file,
- struct iov_iter *i,
- loff_t pos)
+static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
+ struct iov_iter *i)
{
+ struct file *file = iocb->ki_filp;
+ loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1804,7 +1801,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- loff_t pos = iocb->ki_pos;
+ loff_t pos;
ssize_t written;
ssize_t written_buffered;
loff_t endbyte;
@@ -1815,8 +1812,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (written < 0 || !iov_iter_count(from))
return written;
- pos += written;
- written_buffered = __btrfs_buffered_write(file, from, pos);
+ pos = iocb->ki_pos;
+ written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
err = written_buffered;
goto out;
@@ -1953,7 +1950,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_DIRECT) {
num_written = __btrfs_direct_write(iocb, from);
} else {
- num_written = __btrfs_buffered_write(file, from, pos);
+ num_written = btrfs_buffered_write(iocb, from);
if (num_written > 0)
iocb->ki_pos = pos + num_written;
if (clean_page)
@@ -2042,7 +2039,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0, err;
- bool full_sync = false;
u64 len;
/*
@@ -2066,96 +2062,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
inode_lock(inode);
atomic_inc(&root->log_batch);
- full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags);
+
/*
- * We might have have had more pages made dirty after calling
- * start_ordered_ops and before acquiring the inode's i_mutex.
+ * We have to do this here to avoid the priority inversion of waiting on
+ * IO of a lower priority task while holding a transaciton open.
*/
- if (full_sync) {
- /*
- * For a full sync, we need to make sure any ordered operations
- * start and finish before we start logging the inode, so that
- * all extents are persisted and the respective file extent
- * items are in the fs/subvol btree.
- */
- ret = btrfs_wait_ordered_range(inode, start, len);
- } else {
- /*
- * Start any new ordered operations before starting to log the
- * inode. We will wait for them to finish in btrfs_sync_log().
- *
- * Right before acquiring the inode's mutex, we might have new
- * writes dirtying pages, which won't immediately start the
- * respective ordered operations - that is done through the
- * fill_delalloc callbacks invoked from the writepage and
- * writepages address space operations. So make sure we start
- * all ordered operations before starting to log our inode. Not
- * doing this means that while logging the inode, writeback
- * could start and invoke writepage/writepages, which would call
- * the fill_delalloc callbacks (cow_file_range,
- * submit_compressed_extents). These callbacks add first an
- * extent map to the modified list of extents and then create
- * the respective ordered operation, which means in
- * tree-log.c:btrfs_log_inode() we might capture all existing
- * ordered operations (with btrfs_get_logged_extents()) before
- * the fill_delalloc callback adds its ordered operation, and by
- * the time we visit the modified list of extent maps (with
- * btrfs_log_changed_extents()), we see and process the extent
- * map they created. We then use the extent map to construct a
- * file extent item for logging without waiting for the
- * respective ordered operation to finish - this file extent
- * item points to a disk location that might not have yet been
- * written to, containing random data - so after a crash a log
- * replay will make our inode have file extent items that point
- * to disk locations containing invalid data, as we returned
- * success to userspace without waiting for the respective
- * ordered operation to finish, because it wasn't captured by
- * btrfs_get_logged_extents().
- */
- ret = start_ordered_ops(inode, start, end);
- }
+ ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
inode_unlock(inode);
goto out;
}
atomic_inc(&root->log_batch);
- /*
- * If the last transaction that changed this file was before the current
- * transaction and we have the full sync flag set in our inode, we can
- * bail out now without any syncing.
- *
- * Note that we can't bail out if the full sync flag isn't set. This is
- * because when the full sync flag is set we start all ordered extents
- * and wait for them to fully complete - when they complete they update
- * the inode's last_trans field through:
- *
- * btrfs_finish_ordered_io() ->
- * btrfs_update_inode_fallback() ->
- * btrfs_update_inode() ->
- * btrfs_set_inode_last_trans()
- *
- * So we are sure that last_trans is up to date and can do this check to
- * bail out safely. For the fast path, when the full sync flag is not
- * set in our inode, we can not do it because we start only our ordered
- * extents and don't wait for them to complete (that is when
- * btrfs_finish_ordered_io runs), so here at this point their last_trans
- * value might be less than or equals to fs_info->last_trans_committed,
- * and setting a speculative last_trans for an inode when a buffered
- * write is made (such as fs_info->generation + 1 for example) would not
- * be reliable since after setting the value and before fsync is called
- * any number of transactions can start and commit (transaction kthread
- * commits the current transaction periodically), and a transaction
- * commit does not start nor waits for ordered extents to complete.
- */
smp_mb();
if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
- (full_sync && BTRFS_I(inode)->last_trans <=
- fs_info->last_trans_committed) ||
- (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
- BTRFS_I(inode)->last_trans
- <= fs_info->last_trans_committed)) {
+ BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) {
/*
* We've had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
@@ -2239,13 +2160,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
}
- if (!full_sync) {
- ret = btrfs_wait_ordered_range(inode, start, len);
- if (ret) {
- btrfs_end_transaction(trans);
- goto out;
- }
- }
ret = btrfs_commit_transaction(trans);
} else {
ret = btrfs_end_transaction(trans);
@@ -2310,7 +2224,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path, u64 offset, u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d5f80cb300be..0adf38b00fa0 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -71,10 +71,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
inode = btrfs_iget(fs_info->sb, &location, root, NULL);
if (IS_ERR(inode))
return inode;
- if (is_bad_inode(inode)) {
- iput(inode);
- return ERR_PTR(-ENOENT);
- }
mapping_set_gfp_mask(inode->i_mapping,
mapping_gfp_constraint(inode->i_mapping,
@@ -300,9 +296,9 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
check_crcs = 1;
- /* Make sure we can fit our crcs into the first page */
+ /* Make sure we can fit our crcs and generation into the first page */
if (write && check_crcs &&
- (num_pages * sizeof(u32)) >= PAGE_SIZE)
+ (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
return -ENOSPC;
memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
@@ -547,7 +543,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
io_ctl_map_page(io_ctl, 0);
}
- memcpy(io_ctl->cur, bitmap, PAGE_SIZE);
+ copy_page(io_ctl->cur, bitmap);
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
if (io_ctl->index < io_ctl->num_pages)
io_ctl_map_page(io_ctl, 0);
@@ -607,7 +603,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
if (ret)
return ret;
- memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE);
+ copy_page(entry->bitmap, io_ctl->cur);
io_ctl_unmap_page(io_ctl);
return 0;
@@ -655,7 +651,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct btrfs_io_ctl io_ctl;
@@ -1123,13 +1119,10 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
{
int ret;
struct inode *inode = io_ctl->inode;
- struct btrfs_fs_info *fs_info;
if (!inode)
return 0;
- fs_info = btrfs_sb(inode->i_sb);
-
/* Flush the dirty pages in the cache file. */
ret = flush_dirty_cache(inode);
if (ret)
@@ -1145,7 +1138,7 @@ out:
BTRFS_I(inode)->generation = 0;
if (block_group) {
#ifdef DEBUG
- btrfs_err(fs_info,
+ btrfs_err(root->fs_info,
"failed to write free space cache for block group %llu",
block_group->key.objectid);
#endif
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index b5950aacd697..d6736595ec57 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1236,7 +1236,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
if (ret)
goto abort;
- ret = btrfs_del_root(trans, fs_info, &free_space_root->root_key);
+ ret = btrfs_del_root(trans, &free_space_root->root_key);
if (ret)
goto abort;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 12fcd8897c33..ffca2abf13d0 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -3,7 +3,6 @@
* Copyright (C) 2007 Oracle. All rights reserved.
*/
-#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/pagemap.h>
@@ -244,8 +243,6 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
return;
while (1) {
- bool add_to_ctl = true;
-
spin_lock(rbroot_lock);
n = rb_first(rbroot);
if (!n) {
@@ -257,15 +254,14 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress)
- add_to_ctl = false;
- else if (info->offset + info->bytes > root->ino_cache_progress)
- count = root->ino_cache_progress - info->offset + 1;
+ count = 0;
else
- count = info->bytes;
+ count = min(root->ino_cache_progress - info->offset + 1,
+ info->bytes);
rb_erase(&info->offset_index, rbroot);
spin_unlock(rbroot_lock);
- if (add_to_ctl)
+ if (count)
__btrfs_add_free_space(root->fs_info, ctl,
info->offset, count);
kmem_cache_free(btrfs_free_space_cachep, info);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eba61bcb9bb3..9357a19d2bff 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -14,17 +14,13 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
-#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
-#include <linux/mount.h>
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
@@ -1443,8 +1439,7 @@ next_slot:
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
- btrfs_file_extent_inline_len(leaf,
- path->slots[0], fi);
+ btrfs_file_extent_ram_bytes(leaf, fi);
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else {
@@ -1752,7 +1747,7 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = root->fs_info;
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
@@ -1903,8 +1898,8 @@ static void btrfs_clear_bit_hook(void *private_data,
}
/*
- * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
- * we don't create bios that span stripes or chunks
+ * Merge bio hook, this must check the chunk tree to make sure we don't create
+ * bios that span stripes or chunks
*
* return 1 if page cannot be merged to bio
* return 0 if page can be merged to bio
@@ -1962,7 +1957,7 @@ static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
+blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num)
{
struct inode *inode = private_data;
@@ -2035,8 +2030,7 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
/* we're doing a write, do the async checksumming */
ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
bio_offset, inode,
- btrfs_submit_bio_start,
- btrfs_submit_bio_done);
+ btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -3610,18 +3604,15 @@ static int btrfs_read_locked_inode(struct inode *inode)
filled = true;
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto make_bad;
- }
+ if (!path)
+ return -ENOMEM;
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret) {
- if (ret > 0)
- ret = -ENOENT;
- goto make_bad;
+ btrfs_free_path(path);
+ return ret;
}
leaf = path->nodes[0];
@@ -3774,11 +3765,6 @@ cache_acl:
btrfs_sync_inode_flags_to_i_flags(inode);
return 0;
-
-make_bad:
- btrfs_free_path(path);
- make_bad_inode(inode);
- return ret;
}
/*
@@ -3984,7 +3970,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto err;
}
skip_backref:
- ret = btrfs_delete_delayed_dir_index(trans, fs_info, dir, index);
+ ret = btrfs_delete_delayed_dir_index(trans, dir, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto err;
@@ -4087,11 +4073,10 @@ out:
}
static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, u64 objectid,
- const char *name, int name_len)
+ struct inode *dir, u64 objectid,
+ const char *name, int name_len)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
@@ -4124,9 +4109,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- ret = btrfs_del_root_ref(trans, fs_info, objectid,
- root->root_key.objectid, dir_ino,
- &index, name, name_len);
+ ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
+ dir_ino, &index, name, name_len);
if (ret < 0) {
if (ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
@@ -4145,12 +4129,11 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- btrfs_release_path(path);
index = key.offset;
}
btrfs_release_path(path);
- ret = btrfs_delete_delayed_dir_index(trans, fs_info, BTRFS_I(dir), index);
+ ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -4243,9 +4226,9 @@ again:
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
- if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ if (objectid < btrfs_ino(entry))
node = node->rb_left;
- else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ else if (objectid > btrfs_ino(entry))
node = node->rb_right;
else
break;
@@ -4253,7 +4236,7 @@ again:
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
+ if (objectid <= btrfs_ino(entry)) {
node = prev;
break;
}
@@ -4262,7 +4245,7 @@ again:
}
while (node) {
entry = rb_entry(node, struct btrfs_inode, rb_node);
- objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
+ objectid = btrfs_ino(entry) + 1;
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
@@ -4343,10 +4326,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
- ret = btrfs_unlink_subvol(trans, root, dir,
- dest->root_key.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
+ ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
+ dentry->d_name.name, dentry->d_name.len);
if (ret) {
err = ret;
btrfs_abort_transaction(trans, ret);
@@ -4441,7 +4422,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
return PTR_ERR(trans);
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- err = btrfs_unlink_subvol(trans, root, dir,
+ err = btrfs_unlink_subvol(trans, dir,
BTRFS_I(inode)->location.objectid,
dentry->d_name.name,
dentry->d_name.len);
@@ -4643,8 +4624,8 @@ search_again:
BTRFS_I(inode), leaf, fi,
found_key.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- item_end += btrfs_file_extent_inline_len(leaf,
- path->slots[0], fi);
+ item_end += btrfs_file_extent_ram_bytes(leaf,
+ fi);
trace_btrfs_truncate_show_fi_inline(
BTRFS_I(inode), leaf, fi, path->slots[0],
@@ -5615,9 +5596,9 @@ static void inode_tree_add(struct inode *inode)
parent = *p;
entry = rb_entry(parent, struct btrfs_inode, rb_node);
- if (ino < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ if (ino < btrfs_ino(entry))
p = &parent->rb_left;
- else if (ino > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+ else if (ino > btrfs_ino(entry))
p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
@@ -5708,16 +5689,21 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
int ret;
ret = btrfs_read_locked_inode(inode);
- if (!is_bad_inode(inode)) {
+ if (!ret) {
inode_tree_add(inode);
unlock_new_inode(inode);
if (new)
*new = 1;
} else {
- unlock_new_inode(inode);
- iput(inode);
- ASSERT(ret < 0);
- inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
+ iget_failed(inode);
+ /*
+ * ret > 0 can come from btrfs_search_slot called by
+ * btrfs_read_locked_inode, this means the inode item
+ * was not found.
+ */
+ if (ret > 0)
+ ret = -ENOENT;
+ inode = ERR_PTR(ret);
}
}
@@ -5745,7 +5731,7 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_mtime = current_time(inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
- BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime);
+ BTRFS_I(inode)->i_otime = inode->i_mtime;
return inode;
}
@@ -6027,32 +6013,6 @@ err:
return ret;
}
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret = 0;
- bool nolock = false;
-
- if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
- return 0;
-
- if (btrfs_fs_closing(root->fs_info) &&
- btrfs_is_free_space_inode(BTRFS_I(inode)))
- nolock = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans);
- }
- return ret;
-}
-
/*
* This is somewhat expensive, updating the tree every time the
* inode changes. But, it is most likely to find the inode in cache.
@@ -6335,8 +6295,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
location->type = BTRFS_INODE_ITEM_KEY;
ret = btrfs_insert_inode_locked(inode);
- if (ret < 0)
+ if (ret < 0) {
+ iput(inode);
goto fail;
+ }
path->leave_spinning = 1;
ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
@@ -6349,7 +6311,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_mtime = current_time(inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
- BTRFS_I(inode)->i_otime = timespec64_to_timespec(inode->i_mtime);
+ BTRFS_I(inode)->i_otime = inode->i_mtime;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@@ -6395,12 +6357,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
return inode;
fail_unlock:
- unlock_new_inode(inode);
+ discard_new_inode(inode);
fail:
if (dir && name)
BTRFS_I(dir)->index_cnt--;
btrfs_free_path(path);
- iput(inode);
return ERR_PTR(ret);
}
@@ -6419,7 +6380,6 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
int ret = 0;
struct btrfs_key key;
struct btrfs_root *root = parent_inode->root;
@@ -6435,7 +6395,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
}
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
- ret = btrfs_add_root_ref(trans, fs_info, key.objectid,
+ ret = btrfs_add_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
index, name, name_len);
} else if (add_backref) {
@@ -6471,7 +6431,7 @@ fail_dir_item:
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
u64 local_index;
int err;
- err = btrfs_del_root_ref(trans, fs_info, key.objectid,
+ err = btrfs_del_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
&local_index, name, name_len);
@@ -6505,7 +6465,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
int err;
- int drop_inode = 0;
u64 objectid;
u64 index = 0;
@@ -6527,6 +6486,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_unlock;
}
@@ -6541,31 +6501,24 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
0, index);
- if (err) {
- goto out_unlock_inode;
- } else {
- btrfs_update_inode(trans, root, inode);
- d_instantiate_new(dentry, inode);
- }
+ if (err)
+ goto out_unlock;
+
+ btrfs_update_inode(trans, root, inode);
+ d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
- if (drop_inode) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
return err;
-
-out_unlock_inode:
- drop_inode = 1;
- unlock_new_inode(inode);
- goto out_unlock;
-
}
static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -6575,7 +6528,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
- int drop_inode_on_err = 0;
int err;
u64 objectid;
u64 index = 0;
@@ -6598,9 +6550,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_unlock;
}
- drop_inode_on_err = 1;
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@@ -6613,33 +6565,28 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
err = btrfs_update_inode(trans, root, inode);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
0, index);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
- if (err && drop_inode_on_err) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
-
-out_unlock_inode:
- unlock_new_inode(inode);
- goto out_unlock;
-
}
static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -6748,6 +6695,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
S_IFDIR | mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_fail;
}
@@ -6758,34 +6706,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_fail_inode;
+ goto out_fail;
btrfs_i_size_write(BTRFS_I(inode), 0);
err = btrfs_update_inode(trans, root, inode);
if (err)
- goto out_fail_inode;
+ goto out_fail;
err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
dentry->d_name.name,
dentry->d_name.len, 0, index);
if (err)
- goto out_fail_inode;
+ goto out_fail;
d_instantiate_new(dentry, inode);
drop_on_err = 0;
out_fail:
btrfs_end_transaction(trans);
- if (drop_on_err) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
-
-out_fail_inode:
- unlock_new_inode(inode);
- goto out_fail;
}
static noinline int uncompress_inline(struct btrfs_path *path,
@@ -6847,7 +6791,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
size_t pg_offset, u64 start, u64 len,
int create)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
int err = 0;
u64 extent_start = 0;
@@ -6943,7 +6887,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
extent_start);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
- size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+
+ size = btrfs_file_extent_ram_bytes(leaf, item);
extent_end = ALIGN(extent_start + size,
fs_info->sectorsize);
@@ -6994,7 +6939,7 @@ next:
if (new_inline)
goto out;
- size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+ size = btrfs_file_extent_ram_bytes(leaf, item);
extent_offset = page_offset(page) + pg_offset - extent_start;
copy_size = min_t(u64, PAGE_SIZE - pg_offset,
size - extent_offset);
@@ -7865,7 +7810,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
isector >>= inode->i_sb->s_blocksize_bits;
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
pgoff, isector, repair_endio, repair_arg);
- bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
+ bio->bi_opf = REQ_OP_READ | read_mode;
btrfs_debug(BTRFS_I(inode)->root->fs_info,
"repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
@@ -8299,8 +8244,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
if (write && async_submit) {
ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
file_offset, inode,
- btrfs_submit_bio_start_direct_io,
- btrfs_submit_bio_done);
+ btrfs_submit_bio_start_direct_io);
goto err;
} else if (write) {
/*
@@ -9540,8 +9484,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, root, old_dir,
- root_objectid,
+ ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else { /* src is an inode */
@@ -9560,8 +9503,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, dest, new_dir,
- root_objectid,
+ ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
new_dentry->d_name.name,
new_dentry->d_name.len);
} else { /* dest is an inode */
@@ -9821,7 +9763,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
+ ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else {
@@ -9843,8 +9785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
root_objectid = BTRFS_I(new_inode)->location.objectid;
- ret = btrfs_unlink_subvol(trans, dest, new_dir,
- root_objectid,
+ ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
new_dentry->d_name.name,
new_dentry->d_name.len);
BUG_ON(new_inode->i_nlink == 0);
@@ -10115,7 +10056,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
struct btrfs_key key;
struct inode *inode = NULL;
int err;
- int drop_inode = 0;
u64 objectid;
u64 index = 0;
int name_len;
@@ -10148,6 +10088,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
objectid, S_IFLNK|S_IRWXUGO, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
+ inode = NULL;
goto out_unlock;
}
@@ -10164,12 +10105,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_unlock_inode;
+ goto out_unlock;
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
- goto out_unlock_inode;
+ goto out_unlock;
}
key.objectid = btrfs_ino(BTRFS_I(inode));
key.offset = 0;
@@ -10179,7 +10120,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
datasize);
if (err) {
btrfs_free_path(path);
- goto out_unlock_inode;
+ goto out_unlock;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -10211,26 +10152,19 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (!err)
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
BTRFS_I(inode), 0, index);
- if (err) {
- drop_inode = 1;
- goto out_unlock_inode;
- }
+ if (err)
+ goto out_unlock;
d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
- if (drop_inode) {
+ if (err && inode) {
inode_dec_link_count(inode);
- iput(inode);
+ discard_new_inode(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
-
-out_unlock_inode:
- drop_inode = 1;
- unlock_new_inode(inode);
- goto out_unlock;
}
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -10439,14 +10373,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = btrfs_init_inode_security(trans, inode, dir, NULL);
if (ret)
- goto out_inode;
+ goto out;
ret = btrfs_update_inode(trans, root, inode);
if (ret)
- goto out_inode;
+ goto out;
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret)
- goto out_inode;
+ goto out;
/*
* We set number of links to 0 in btrfs_new_inode(), and here we set
@@ -10456,21 +10390,15 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
* d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
*/
set_nlink(inode, 1);
- unlock_new_inode(inode);
d_tmpfile(dentry, inode);
+ unlock_new_inode(inode);
mark_inode_dirty(inode);
-
out:
btrfs_end_transaction(trans);
- if (ret)
- iput(inode);
+ if (ret && inode)
+ discard_new_inode(inode);
btrfs_btree_balance_dirty(fs_info);
return ret;
-
-out_inode:
- unlock_new_inode(inode);
- goto out;
-
}
__attribute__((const))
@@ -10479,12 +10407,6 @@ static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
return -EAGAIN;
}
-static struct btrfs_fs_info *iotree_fs_info(void *private_data)
-{
- struct inode *inode = private_data;
- return btrfs_sb(inode->i_sb);
-}
-
static void btrfs_check_extent_io_range(void *private_data, const char *caller,
u64 start, u64 end)
{
@@ -10499,9 +10421,9 @@ static void btrfs_check_extent_io_range(void *private_data, const char *caller,
}
}
-void btrfs_set_range_writeback(void *private_data, u64 start, u64 end)
+void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
- struct inode *inode = private_data;
+ struct inode *inode = tree->private_data;
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
struct page *page;
@@ -10557,10 +10479,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
/* mandatory callbacks */
.submit_bio_hook = btrfs_submit_bio_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
- .merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
- .tree_fs_info = iotree_fs_info,
- .set_range_writeback = btrfs_set_range_writeback,
/* optional callbacks */
.fill_delalloc = run_delalloc_range,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 43ecbe620dea..d3a5d2a41e5f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5,23 +5,18 @@
#include <linux/kernel.h>
#include <linux/bio.h>
-#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
-#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mount.h>
-#include <linux/mpage.h>
#include <linux/namei.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
-#include <linux/bit_spinlock.h>
#include <linux/security.h>
#include <linux/xattr.h>
#include <linux/mm.h>
@@ -606,7 +601,7 @@ static noinline int create_subvol(struct inode *dir,
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
- ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit);
+ ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
if (ret)
goto fail;
@@ -616,14 +611,6 @@ static noinline int create_subvol(struct inode *dir,
goto fail;
}
- memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, objectid);
-
- write_extent_buffer_fsid(leaf, fs_info->fsid);
- write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
btrfs_mark_buffer_dirty(leaf);
inode_item = &root_item->inode;
@@ -711,8 +698,7 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
- ret = btrfs_add_root_ref(trans, fs_info,
- objectid, root->root_key.objectid,
+ ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
BUG_ON(ret);
@@ -2507,8 +2493,8 @@ out:
static noinline int btrfs_ioctl_ino_lookup(struct file *file,
void __user *argp)
{
- struct btrfs_ioctl_ino_lookup_args *args;
- struct inode *inode;
+ struct btrfs_ioctl_ino_lookup_args *args;
+ struct inode *inode;
int ret = 0;
args = memdup_user(argp, sizeof(*args));
@@ -2941,8 +2927,14 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = btrfs_defrag_root(root);
break;
case S_IFREG:
- if (!(file->f_mode & FMODE_WRITE)) {
- ret = -EINVAL;
+ /*
+ * Note that this does not check the file descriptor for write
+ * access. This prevents defragmenting executables that are
+ * running and allows defrag on files open in read-only mode.
+ */
+ if (!capable(CAP_SYS_ADMIN) &&
+ inode_permission(inode, MAY_WRITE)) {
+ ret = -EPERM;
goto out;
}
@@ -3165,10 +3157,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
di_args->total_bytes = btrfs_device_get_total_bytes(dev);
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
if (dev->name) {
- struct rcu_string *name;
-
- name = rcu_dereference(dev->name);
- strncpy(di_args->path, name->str, sizeof(di_args->path) - 1);
+ strncpy(di_args->path, rcu_str_deref(dev->name),
+ sizeof(di_args->path) - 1);
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
@@ -3327,11 +3317,13 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
if (pg) {
unlock_page(pg);
put_page(pg);
+ cmp->src_pages[i] = NULL;
}
pg = cmp->dst_pages[i];
if (pg) {
unlock_page(pg);
put_page(pg);
+ cmp->dst_pages[i] = NULL;
}
}
}
@@ -5116,9 +5108,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_ctl_args *sa;
- struct btrfs_trans_handle *trans = NULL;
int ret;
- int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -5134,28 +5124,19 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
}
down_write(&fs_info->subvol_sem);
- trans = btrfs_start_transaction(fs_info->tree_root, 2);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
- ret = btrfs_quota_enable(trans, fs_info);
+ ret = btrfs_quota_enable(fs_info);
break;
case BTRFS_QUOTA_CTL_DISABLE:
- ret = btrfs_quota_disable(trans, fs_info);
+ ret = btrfs_quota_disable(fs_info);
break;
default:
ret = -EINVAL;
break;
}
- err = btrfs_commit_transaction(trans);
- if (err && !ret)
- ret = err;
-out:
kfree(sa);
up_write(&fs_info->subvol_sem);
drop_write:
@@ -5193,15 +5174,13 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
}
if (sa->assign) {
- ret = btrfs_add_qgroup_relation(trans, fs_info,
- sa->src, sa->dst);
+ ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
} else {
- ret = btrfs_del_qgroup_relation(trans, fs_info,
- sa->src, sa->dst);
+ ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
}
/* update qgroup status and info */
- err = btrfs_run_qgroups(trans, fs_info);
+ err = btrfs_run_qgroups(trans);
if (err < 0)
btrfs_handle_fs_error(fs_info, err,
"failed to update qgroup status and info");
@@ -5219,7 +5198,6 @@ drop_write:
static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_create_args *sa;
struct btrfs_trans_handle *trans;
@@ -5251,9 +5229,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
}
if (sa->create) {
- ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid);
+ ret = btrfs_create_qgroup(trans, sa->qgroupid);
} else {
- ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid);
+ ret = btrfs_remove_qgroup(trans, sa->qgroupid);
}
err = btrfs_end_transaction(trans);
@@ -5270,7 +5248,6 @@ drop_write:
static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_limit_args *sa;
struct btrfs_trans_handle *trans;
@@ -5303,7 +5280,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
qgroupid = root->root_key.objectid;
}
- ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim);
+ ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim);
err = btrfs_end_transaction(trans);
if (err && !ret)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2e1a1694a33d..0c4ef208b8b9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,7 +6,6 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
-#include <linux/pagevec.h>
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
@@ -421,129 +420,6 @@ out:
return ret == 0;
}
-/* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
- struct list_head *logged_list,
- const loff_t start,
- const loff_t end)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct btrfs_ordered_extent *ordered;
- struct rb_node *n;
- struct rb_node *prev;
-
- tree = &inode->ordered_tree;
- spin_lock_irq(&tree->lock);
- n = __tree_search(&tree->tree, end, &prev);
- if (!n)
- n = prev;
- for (; n; n = rb_prev(n)) {
- ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
- if (ordered->file_offset > end)
- continue;
- if (entry_end(ordered) <= start)
- break;
- if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
- continue;
- list_add(&ordered->log_list, logged_list);
- refcount_inc(&ordered->refs);
- }
- spin_unlock_irq(&tree->lock);
-}
-
-void btrfs_put_logged_extents(struct list_head *logged_list)
-{
- struct btrfs_ordered_extent *ordered;
-
- while (!list_empty(logged_list)) {
- ordered = list_first_entry(logged_list,
- struct btrfs_ordered_extent,
- log_list);
- list_del_init(&ordered->log_list);
- btrfs_put_ordered_extent(ordered);
- }
-}
-
-void btrfs_submit_logged_extents(struct list_head *logged_list,
- struct btrfs_root *log)
-{
- int index = log->log_transid % 2;
-
- spin_lock_irq(&log->log_extents_lock[index]);
- list_splice_tail(logged_list, &log->logged_list[index]);
- spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *log, u64 transid)
-{
- struct btrfs_ordered_extent *ordered;
- int index = transid % 2;
-
- spin_lock_irq(&log->log_extents_lock[index]);
- while (!list_empty(&log->logged_list[index])) {
- struct inode *inode;
- ordered = list_first_entry(&log->logged_list[index],
- struct btrfs_ordered_extent,
- log_list);
- list_del_init(&ordered->log_list);
- inode = ordered->inode;
- spin_unlock_irq(&log->log_extents_lock[index]);
-
- if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
- u64 start = ordered->file_offset;
- u64 end = ordered->file_offset + ordered->len - 1;
-
- WARN_ON(!inode);
- filemap_fdatawrite_range(inode->i_mapping, start, end);
- }
- wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
- &ordered->flags));
-
- /*
- * In order to keep us from losing our ordered extent
- * information when committing the transaction we have to make
- * sure that any logged extents are completed when we go to
- * commit the transaction. To do this we simply increase the
- * current transactions pending_ordered counter and decrement it
- * when the ordered extent completes.
- */
- if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
- struct btrfs_ordered_inode_tree *tree;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock_irq(&tree->lock);
- if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
- set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
- atomic_inc(&trans->transaction->pending_ordered);
- }
- spin_unlock_irq(&tree->lock);
- }
- btrfs_put_ordered_extent(ordered);
- spin_lock_irq(&log->log_extents_lock[index]);
- }
- spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
-{
- struct btrfs_ordered_extent *ordered;
- int index = transid % 2;
-
- spin_lock_irq(&log->log_extents_lock[index]);
- while (!list_empty(&log->logged_list[index])) {
- ordered = list_first_entry(&log->logged_list[index],
- struct btrfs_ordered_extent,
- log_list);
- list_del_init(&ordered->log_list);
- spin_unlock_irq(&log->log_extents_lock[index]);
- btrfs_put_ordered_extent(ordered);
- spin_lock_irq(&log->log_extents_lock[index]);
- }
- spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
/*
* used to drop a reference on an ordered extent. This will free
* the extent if the last reference is dropped
@@ -913,20 +789,6 @@ out:
return entry;
}
-bool btrfs_have_ordered_extents_in_range(struct inode *inode,
- u64 file_offset,
- u64 len)
-{
- struct btrfs_ordered_extent *oe;
-
- oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len);
- if (oe) {
- btrfs_put_ordered_extent(oe);
- return true;
- }
- return false;
-}
-
/*
* lookup and return any extent before 'file_offset'. NULL is returned
* if none is found
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3be443fb3001..02d813aaa261 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -54,15 +54,11 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
* has done its due diligence in updating
* the isize. */
-#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
- ordered extent */
-#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
+#define BTRFS_ORDERED_TRUNCATED 8 /* Set when we have to truncate an extent */
-#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
- * in the logging code. */
-#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
+#define BTRFS_ORDERED_PENDING 9 /* We are waiting for this ordered extent to
* complete in the current transaction. */
-#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */
+#define BTRFS_ORDERED_REGULAR 10 /* Regular IO for COW */
struct btrfs_ordered_extent {
/* logical offset in the file */
@@ -182,9 +178,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
struct btrfs_inode *inode,
u64 file_offset,
u64 len);
-bool btrfs_have_ordered_extents_in_range(struct inode *inode,
- u64 file_offset,
- u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
@@ -193,16 +186,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len);
u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
- struct list_head *logged_list,
- const loff_t start,
- const loff_t end);
-void btrfs_put_logged_extents(struct list_head *logged_list);
-void btrfs_submit_logged_extents(struct list_head *logged_list,
- struct btrfs_root *log);
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *log, u64 transid);
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
int __init ordered_data_init(void);
void __cold ordered_data_exit(void);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index a4e11cf04671..df49931ffe92 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -52,17 +52,9 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
u64 offset;
int ref_index = 0;
- if (item_size < sizeof(*ei)) {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- struct btrfs_extent_item_v0 *ei0;
- BUG_ON(item_size != sizeof(*ei0));
- ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
- pr_info("\t\textent refs %u\n",
- btrfs_extent_refs_v0(eb, ei0));
- return;
-#else
- BUG();
-#endif
+ if (unlikely(item_size < sizeof(*ei))) {
+ btrfs_print_v0_err(eb->fs_info);
+ btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
}
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
@@ -133,20 +125,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
WARN_ON(ptr > end);
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
-{
- struct btrfs_extent_ref_v0 *ref0;
-
- ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
- printk("\t\textent back ref root %llu gen %llu owner %llu num_refs %lu\n",
- btrfs_ref_root_v0(eb, ref0),
- btrfs_ref_generation_v0(eb, ref0),
- btrfs_ref_objectid_v0(eb, ref0),
- (unsigned long)btrfs_ref_count_v0(eb, ref0));
-}
-#endif
-
static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
u32 item_size)
{
@@ -267,8 +245,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(l, fi) ==
BTRFS_FILE_EXTENT_INLINE) {
- pr_info("\t\tinline extent data size %u\n",
- btrfs_file_extent_inline_len(l, i, fi));
+ pr_info("\t\tinline extent data size %llu\n",
+ btrfs_file_extent_ram_bytes(l, fi));
break;
}
pr_info("\t\textent data disk bytenr %llu nr %llu\n",
@@ -280,11 +258,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
btrfs_file_extent_ram_bytes(l, fi));
break;
case BTRFS_EXTENT_REF_V0_KEY:
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- print_extent_ref_v0(l, i);
-#else
- BUG();
-#endif
+ btrfs_print_v0_err(fs_info);
+ btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c25dc47210a3..4353bb69bb86 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -530,11 +530,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
fs_info->qgroup_ulist = NULL;
}
-static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *quota_root,
- u64 src, u64 dst)
+static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
int ret;
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -554,11 +554,11 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
return ret;
}
-static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *quota_root,
- u64 src, u64 dst)
+static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
int ret;
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -653,10 +653,10 @@ out:
return ret;
}
-static int del_qgroup_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *quota_root, u64 qgroupid)
+static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
{
int ret;
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -700,9 +700,9 @@ out:
}
static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_qgroup *qgroup)
{
+ struct btrfs_root *quota_root = trans->fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
@@ -718,7 +718,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
@@ -742,9 +742,10 @@ out:
}
static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct btrfs_qgroup *qgroup)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
@@ -752,7 +753,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
int ret;
int slot;
- if (btrfs_is_testing(root->fs_info))
+ if (btrfs_is_testing(fs_info))
return 0;
key.objectid = 0;
@@ -763,7 +764,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
@@ -786,10 +787,10 @@ out:
return ret;
}
-static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_root *root)
+static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *l;
@@ -805,7 +806,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
if (ret > 0)
ret = -ENOENT;
@@ -875,8 +876,7 @@ out:
return ret;
}
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -886,6 +886,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_qgroup *qgroup = NULL;
+ struct btrfs_trans_handle *trans = NULL;
int ret = 0;
int slot;
@@ -893,9 +894,25 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
if (fs_info->quota_root)
goto out;
+ /*
+ * 1 for quota root item
+ * 1 for BTRFS_QGROUP_STATUS item
+ *
+ * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
+ * per subvolume. However those are not currently reserved since it
+ * would be a lot of overkill.
+ */
+ trans = btrfs_start_transaction(tree_root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
+
fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -906,12 +923,14 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
BTRFS_QUOTA_TREE_OBJECTID);
if (IS_ERR(quota_root)) {
ret = PTR_ERR(quota_root);
+ btrfs_abort_transaction(trans, ret);
goto out;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
+ btrfs_abort_transaction(trans, ret);
goto out_free_root;
}
@@ -921,8 +940,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*ptr));
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
leaf = path->nodes[0];
ptr = btrfs_item_ptr(leaf, path->slots[0],
@@ -944,9 +965,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
if (ret > 0)
goto out_add_root;
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
-
+ }
while (1) {
slot = path->slots[0];
@@ -956,18 +978,23 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
if (found_key.type == BTRFS_ROOT_REF_KEY) {
ret = add_qgroup_item(trans, quota_root,
found_key.offset);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
qgroup = add_qgroup_rb(fs_info, found_key.offset);
if (IS_ERR(qgroup)) {
ret = PTR_ERR(qgroup);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
}
ret = btrfs_next_item(tree_root, path);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
if (ret)
break;
}
@@ -975,18 +1002,28 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
out_add_root:
btrfs_release_path(path);
ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
+ }
qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
if (IS_ERR(qgroup)) {
ret = PTR_ERR(qgroup);
+ btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
spin_unlock(&fs_info->qgroup_lock);
+
+ ret = btrfs_commit_transaction(trans);
+ if (ret) {
+ trans = NULL;
+ goto out_free_path;
+ }
+
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
@@ -1006,20 +1043,35 @@ out:
if (ret) {
ulist_free(fs_info->qgroup_ulist);
fs_info->qgroup_ulist = NULL;
+ if (trans)
+ btrfs_end_transaction(trans);
}
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root;
+ struct btrfs_trans_handle *trans = NULL;
int ret = 0;
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
+
+ /*
+ * 1 For the root item
+ *
+ * We should also reserve enough items for the quota tree deletion in
+ * btrfs_clean_quota_tree but this is not done.
+ */
+ trans = btrfs_start_transaction(fs_info->tree_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
@@ -1031,12 +1083,16 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
btrfs_free_qgroup_config(fs_info);
ret = btrfs_clean_quota_tree(trans, quota_root);
- if (ret)
- goto out;
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto end_trans;
+ }
- ret = btrfs_del_root(trans, fs_info, &quota_root->root_key);
- if (ret)
- goto out;
+ ret = btrfs_del_root(trans, &quota_root->root_key);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto end_trans;
+ }
list_del(&quota_root->dirty_list);
@@ -1048,6 +1104,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
free_extent_buffer(quota_root->node);
free_extent_buffer(quota_root->commit_root);
kfree(quota_root);
+
+end_trans:
+ ret = btrfs_end_transaction(trans);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
@@ -1177,9 +1236,10 @@ out:
return ret;
}
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *parent;
struct btrfs_qgroup *member;
@@ -1216,13 +1276,13 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
}
}
- ret = add_qgroup_relation_item(trans, quota_root, src, dst);
+ ret = add_qgroup_relation_item(trans, src, dst);
if (ret)
goto out;
- ret = add_qgroup_relation_item(trans, quota_root, dst, src);
+ ret = add_qgroup_relation_item(trans, dst, src);
if (ret) {
- del_qgroup_relation_item(trans, quota_root, src, dst);
+ del_qgroup_relation_item(trans, src, dst);
goto out;
}
@@ -1240,9 +1300,10 @@ out:
return ret;
}
-static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *parent;
struct btrfs_qgroup *member;
@@ -1276,8 +1337,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans,
ret = -ENOENT;
goto out;
exist:
- ret = del_qgroup_relation_item(trans, quota_root, src, dst);
- err = del_qgroup_relation_item(trans, quota_root, dst, src);
+ ret = del_qgroup_relation_item(trans, src, dst);
+ err = del_qgroup_relation_item(trans, dst, src);
if (err && !ret)
ret = err;
@@ -1290,21 +1351,22 @@ out:
return ret;
}
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret = 0;
mutex_lock(&fs_info->qgroup_ioctl_lock);
- ret = __del_qgroup_relation(trans, fs_info, src, dst);
+ ret = __del_qgroup_relation(trans, src, dst);
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid)
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
@@ -1336,9 +1398,9 @@ out:
return ret;
}
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid)
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup_list *list;
@@ -1362,16 +1424,15 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
goto out;
}
}
- ret = del_qgroup_item(trans, quota_root, qgroupid);
+ ret = del_qgroup_item(trans, qgroupid);
if (ret && ret != -ENOENT)
goto out;
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
struct btrfs_qgroup_list, next_group);
- ret = __del_qgroup_relation(trans, fs_info,
- qgroupid,
- list->group->qgroupid);
+ ret = __del_qgroup_relation(trans, qgroupid,
+ list->group->qgroupid);
if (ret)
goto out;
}
@@ -1384,10 +1445,10 @@ out:
return ret;
}
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid,
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
@@ -1451,7 +1512,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
spin_unlock(&fs_info->qgroup_lock);
- ret = update_qgroup_limit_item(trans, quota_root, qgroup);
+ ret = update_qgroup_limit_item(trans, qgroup);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_info(fs_info, "unable to update quota limit for %llu",
@@ -1519,10 +1580,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
return 0;
}
-int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
- gfp_t gfp_flag)
+int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, gfp_t gfp_flag)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_qgroup_extent_record *record;
struct btrfs_delayed_ref_root *delayed_refs;
int ret;
@@ -1530,8 +1591,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
|| bytenr == 0 || num_bytes == 0)
return 0;
- if (WARN_ON(trans == NULL))
- return -EINVAL;
record = kmalloc(sizeof(*record), gfp_flag);
if (!record)
return -ENOMEM;
@@ -1552,9 +1611,9 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *eb)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int nr = btrfs_header_nritems(eb);
int i, extent_type, ret;
struct btrfs_key key;
@@ -1584,8 +1643,8 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
- ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr,
- num_bytes, GFP_NOFS);
+ ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes,
+ GFP_NOFS);
if (ret)
return ret;
}
@@ -1655,11 +1714,10 @@ static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
}
int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct extent_buffer *root_eb,
u64 root_gen, int root_level)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret = 0;
int level;
struct extent_buffer *eb = root_eb;
@@ -1678,7 +1736,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
}
if (root_level == 0) {
- ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb);
+ ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
goto out;
}
@@ -1736,8 +1794,7 @@ walk_down:
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
- ret = btrfs_qgroup_trace_extent(trans, fs_info,
- child_bytenr,
+ ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
fs_info->nodesize,
GFP_NOFS);
if (ret)
@@ -1745,8 +1802,8 @@ walk_down:
}
if (level == 0) {
- ret = btrfs_qgroup_trace_leaf_items(trans,fs_info,
- path->nodes[level]);
+ ret = btrfs_qgroup_trace_leaf_items(trans,
+ path->nodes[level]);
if (ret)
goto out;
@@ -1981,12 +2038,11 @@ static int maybe_fs_roots(struct ulist *roots)
return is_fstree(unode->val);
}
-int
-btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 bytenr, u64 num_bytes,
- struct ulist *old_roots, struct ulist *new_roots)
+int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, struct ulist *old_roots,
+ struct ulist *new_roots)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct ulist *qgroups = NULL;
struct ulist *tmp = NULL;
u64 seq;
@@ -2116,9 +2172,10 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
ulist_del(record->old_roots, qgroup_to_skip,
0);
}
- ret = btrfs_qgroup_account_extent(trans, fs_info,
- record->bytenr, record->num_bytes,
- record->old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(trans, record->bytenr,
+ record->num_bytes,
+ record->old_roots,
+ new_roots);
record->old_roots = NULL;
new_roots = NULL;
}
@@ -2136,9 +2193,9 @@ cleanup:
/*
* called from commit_transaction. Writes all changed qgroups to disk.
*/
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root = fs_info->quota_root;
int ret = 0;
@@ -2152,11 +2209,11 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_qgroup, dirty);
list_del_init(&qgroup->dirty);
spin_unlock(&fs_info->qgroup_lock);
- ret = update_qgroup_info_item(trans, quota_root, qgroup);
+ ret = update_qgroup_info_item(trans, qgroup);
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- ret = update_qgroup_limit_item(trans, quota_root, qgroup);
+ ret = update_qgroup_limit_item(trans, qgroup);
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2168,7 +2225,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
spin_unlock(&fs_info->qgroup_lock);
- ret = update_qgroup_status_item(trans, fs_info, quota_root);
+ ret = update_qgroup_status_item(trans);
if (ret)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2181,13 +2238,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
* cause a transaction abort so we take extra care here to only error
* when a readonly fs is a reasonable outcome.
*/
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
- struct btrfs_qgroup_inherit *inherit)
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
+ u64 objectid, struct btrfs_qgroup_inherit *inherit)
{
int ret = 0;
int i;
u64 *i_qgroups;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *quota_root = fs_info->quota_root;
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
@@ -2229,22 +2286,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- if (srcid) {
- struct btrfs_root *srcroot;
- struct btrfs_key srckey;
-
- srckey.objectid = srcid;
- srckey.type = BTRFS_ROOT_ITEM_KEY;
- srckey.offset = (u64)-1;
- srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
- if (IS_ERR(srcroot)) {
- ret = PTR_ERR(srcroot);
- goto out;
- }
-
- level_size = fs_info->nodesize;
- }
-
/*
* add qgroup to all inherited groups
*/
@@ -2253,12 +2294,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
if (*i_qgroups == 0)
continue;
- ret = add_qgroup_relation_item(trans, quota_root,
- objectid, *i_qgroups);
+ ret = add_qgroup_relation_item(trans, objectid,
+ *i_qgroups);
if (ret && ret != -EEXIST)
goto out;
- ret = add_qgroup_relation_item(trans, quota_root,
- *i_qgroups, objectid);
+ ret = add_qgroup_relation_item(trans, *i_qgroups,
+ objectid);
if (ret && ret != -EEXIST)
goto out;
}
@@ -2281,7 +2322,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;
- ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
+ ret = update_qgroup_limit_item(trans, dstgroup);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_info(fs_info,
@@ -2301,6 +2342,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
* our counts don't go crazy, so at this point the only
* difference between the two roots should be the root node.
*/
+ level_size = fs_info->nodesize;
dstgroup->rfer = srcgroup->rfer;
dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
dstgroup->excl = level_size;
@@ -2598,10 +2640,10 @@ static bool is_last_leaf(struct btrfs_path *path)
* returns < 0 on error, 0 when more leafs are to be scanned.
* returns 1 when done.
*/
-static int
-qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
- struct btrfs_trans_handle *trans)
+static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_key found;
struct extent_buffer *scratch_leaf = NULL;
struct ulist *roots = NULL;
@@ -2669,8 +2711,8 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (ret < 0)
goto out;
/* For rescan, just pass old_roots as NULL */
- ret = btrfs_qgroup_account_extent(trans, fs_info,
- found.objectid, num_bytes, NULL, roots);
+ ret = btrfs_qgroup_account_extent(trans, found.objectid,
+ num_bytes, NULL, roots);
if (ret < 0)
goto out;
}
@@ -2716,7 +2758,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
err = -EINTR;
} else {
- err = qgroup_rescan_leaf(fs_info, path, trans);
+ err = qgroup_rescan_leaf(trans, path);
}
if (err > 0)
btrfs_commit_transaction(trans);
@@ -2751,7 +2793,7 @@ out:
err);
goto done;
}
- ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
+ ret = update_qgroup_status_item(trans);
if (ret < 0) {
err = ret;
btrfs_err(fs_info, "fail to update qgroup status: %d", err);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index d60dd06445ce..54b8bb282c0e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -141,24 +141,19 @@ struct btrfs_qgroup {
#define QGROUP_RELEASE (1<<1)
#define QGROUP_FREE (1<<2)
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
+int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
+int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
bool interruptible);
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid);
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid);
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid,
+int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst);
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
+ u64 dst);
+int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
+int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
+int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
@@ -217,9 +212,8 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
* Return <0 for error, like memory allocation failure or invalid parameter
* (NULL trans)
*/
-int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
- gfp_t gfp_flag);
+int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, gfp_t gfp_flag);
/*
* Inform qgroup to trace all leaf items of data
@@ -228,7 +222,6 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
* Return <0 for error(ENOMEM)
*/
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
/*
* Inform qgroup to trace a whole subtree, including all its child tree
@@ -241,20 +234,15 @@ int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
* Return <0 for error(ENOMEM or tree search error)
*/
int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct extent_buffer *root_eb,
u64 root_gen, int root_level);
-int
-btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 bytenr, u64 num_bytes,
- struct ulist *old_roots, struct ulist *new_roots);
+int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
+ u64 num_bytes, struct ulist *old_roots,
+ struct ulist *new_roots);
int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
- struct btrfs_qgroup_inherit *inherit);
+int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
+int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
+ u64 objectid, struct btrfs_qgroup_inherit *inherit);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes,
enum btrfs_qgroup_rsv_type type);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5e4ad134b9ad..df41d7049936 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -5,32 +5,19 @@
*/
#include <linux/sched.h>
-#include <linux/wait.h>
#include <linux/bio.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
-#include <linux/ratelimit.h>
-#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/hash.h>
#include <linux/list_sort.h>
#include <linux/raid/xor.h>
#include <linux/mm.h>
-#include <asm/div64.h>
#include "ctree.h"
-#include "extent_map.h"
#include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
#include "volumes.h"
#include "raid56.h"
#include "async-thread.h"
-#include "check-integrity.h"
-#include "rcu-string.h"
/* set when additional merges to this rbio are not allowed */
#define RBIO_RMW_LOCKED_BIT 1
@@ -175,8 +162,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
static void rmw_work(struct btrfs_work *work);
static void read_rebuild_work(struct btrfs_work *work);
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
-static void async_read_rebuild(struct btrfs_raid_bio *rbio);
static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
static void __free_raid_bio(struct btrfs_raid_bio *rbio);
@@ -185,7 +170,13 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check);
-static void async_scrub_parity(struct btrfs_raid_bio *rbio);
+static void scrub_parity_work(struct btrfs_work *work);
+
+static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
+{
+ btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
+ btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
+}
/*
* the stripe hash table is used for locking, and to collect
@@ -260,7 +251,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
s = kmap(rbio->bio_pages[i]);
d = kmap(rbio->stripe_pages[i]);
- memcpy(d, s, PAGE_SIZE);
+ copy_page(d, s);
kunmap(rbio->bio_pages[i]);
kunmap(rbio->stripe_pages[i]);
@@ -516,32 +507,21 @@ static void run_xor(void **pages, int src_cnt, ssize_t len)
}
/*
- * returns true if the bio list inside this rbio
- * covers an entire stripe (no rmw required).
- * Must be called with the bio list lock held, or
- * at a time when you know it is impossible to add
- * new bios into the list
+ * Returns true if the bio list inside this rbio covers an entire stripe (no
+ * rmw required).
*/
-static int __rbio_is_full(struct btrfs_raid_bio *rbio)
+static int rbio_is_full(struct btrfs_raid_bio *rbio)
{
+ unsigned long flags;
unsigned long size = rbio->bio_list_bytes;
int ret = 1;
+ spin_lock_irqsave(&rbio->bio_list_lock, flags);
if (size != rbio->nr_data * rbio->stripe_len)
ret = 0;
-
BUG_ON(size > rbio->nr_data * rbio->stripe_len);
- return ret;
-}
-
-static int rbio_is_full(struct btrfs_raid_bio *rbio)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&rbio->bio_list_lock, flags);
- ret = __rbio_is_full(rbio);
spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
+
return ret;
}
@@ -812,16 +792,16 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
spin_unlock_irqrestore(&h->lock, flags);
if (next->operation == BTRFS_RBIO_READ_REBUILD)
- async_read_rebuild(next);
+ start_async_work(next, read_rebuild_work);
else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
steal_rbio(rbio, next);
- async_read_rebuild(next);
+ start_async_work(next, read_rebuild_work);
} else if (next->operation == BTRFS_RBIO_WRITE) {
steal_rbio(rbio, next);
- async_rmw_stripe(next);
+ start_async_work(next, rmw_work);
} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
steal_rbio(rbio, next);
- async_scrub_parity(next);
+ start_async_work(next, scrub_parity_work);
}
goto done_nolock;
@@ -1275,7 +1255,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
pointers);
} else {
/* raid5 */
- memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+ copy_page(pointers[nr_data], pointers[0]);
run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
@@ -1343,7 +1323,7 @@ write_data:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
submit_bio(bio);
}
@@ -1508,20 +1488,6 @@ cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
}
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL);
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
-static void async_read_rebuild(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- read_rebuild_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
/*
* the stripe must be locked by the caller. It will
* unlock after all the writes are done
@@ -1599,7 +1565,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_rmw_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -1652,7 +1618,7 @@ static int partial_stripe_write(struct btrfs_raid_bio *rbio)
ret = lock_stripe_add(rbio);
if (ret == 0)
- async_rmw_stripe(rbio);
+ start_async_work(rbio, rmw_work);
return 0;
}
@@ -1720,8 +1686,11 @@ static void run_plug(struct btrfs_plug_cb *plug)
list_del_init(&cur->plug_list);
if (rbio_is_full(cur)) {
+ int ret;
+
/* we have a full stripe, send it down */
- full_stripe_write(cur);
+ ret = full_stripe_write(cur);
+ BUG_ON(ret);
continue;
}
if (last) {
@@ -1941,9 +1910,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
BUG_ON(failb != -1);
pstripe:
/* Copy parity block into failed block to start with */
- memcpy(pointers[faila],
- pointers[rbio->nr_data],
- PAGE_SIZE);
+ copy_page(pointers[faila], pointers[rbio->nr_data]);
/* rearrange the pointer array */
p = pointers[faila];
@@ -2145,7 +2112,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_recover_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -2448,7 +2415,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
pointers);
} else {
/* raid5 */
- memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+ copy_page(pointers[nr_data], pointers[0]);
run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
@@ -2456,7 +2423,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
parity = kmap(p);
if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
- memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
+ copy_page(parity, pointers[rbio->scrubp]);
else
/* Parity is right, needn't writeback */
bitmap_clear(rbio->dbitmap, pagenr, 1);
@@ -2517,7 +2484,7 @@ submit_write:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
submit_bio(bio);
}
@@ -2699,7 +2666,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid56_parity_scrub_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -2728,18 +2695,10 @@ static void scrub_parity_work(struct btrfs_work *work)
raid56_parity_scrub_stripe(rbio);
}
-static void async_scrub_parity(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- scrub_parity_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
- async_scrub_parity(rbio);
+ start_async_work(rbio, scrub_parity_work);
}
/* The following code is used for dev replace of a missing RAID 5/6 device. */
@@ -2781,5 +2740,5 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
- async_read_rebuild(rbio);
+ start_async_work(rbio, read_rebuild_work);
}
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 40f1bcef394d..dec14b739b10 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -7,7 +7,6 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
-#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include "ctree.h"
@@ -355,7 +354,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
dev = bbio->stripes[nzones].dev;
/* cannot read ahead on missing device. */
- if (!dev->bdev)
+ if (!dev->bdev)
continue;
zone = reada_find_zone(dev, logical, bbio);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 879b76fa881a..8783a1776540 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -586,29 +586,6 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
return btrfs_get_fs_root(fs_info, &key, false);
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static noinline_for_stack
-struct btrfs_root *find_tree_root(struct reloc_control *rc,
- struct extent_buffer *leaf,
- struct btrfs_extent_ref_v0 *ref0)
-{
- struct btrfs_root *root;
- u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
- u64 generation = btrfs_ref_generation_v0(leaf, ref0);
-
- BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
-
- root = read_fs_root(rc->extent_root->fs_info, root_objectid);
- BUG_ON(IS_ERR(root));
-
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
- generation != btrfs_root_generation(&root->root_item))
- return NULL;
-
- return root;
-}
-#endif
-
static noinline_for_stack
int find_inline_backref(struct extent_buffer *leaf, int slot,
unsigned long *ptr, unsigned long *end)
@@ -621,12 +598,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
btrfs_item_key_to_cpu(leaf, &key, slot);
item_size = btrfs_item_size_nr(leaf, slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) {
- WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
+ btrfs_print_v0_err(leaf->fs_info);
+ btrfs_handle_fs_error(leaf->fs_info, -EINVAL, NULL);
return 1;
}
-#endif
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
WARN_ON(!(btrfs_extent_flags(leaf, ei) &
BTRFS_EXTENT_FLAG_TREE_BLOCK));
@@ -792,7 +768,7 @@ again:
type = btrfs_get_extent_inline_ref_type(eb, iref,
BTRFS_REF_TYPE_BLOCK);
if (type == BTRFS_REF_TYPE_INVALID) {
- err = -EINVAL;
+ err = -EUCLEAN;
goto out;
}
key.type = type;
@@ -811,29 +787,7 @@ again:
goto next;
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
- key.type == BTRFS_EXTENT_REF_V0_KEY) {
- if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(eb, path1->slots[0],
- struct btrfs_extent_ref_v0);
- if (key.objectid == key.offset) {
- root = find_tree_root(rc, eb, ref0);
- if (root && !should_ignore_root(root))
- cur->root = root;
- else
- list_add(&cur->list, &useless);
- break;
- }
- if (is_cowonly_root(btrfs_ref_root_v0(eb,
- ref0)))
- cur->cowonly = 1;
- }
-#else
- ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY);
if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
-#endif
if (key.objectid == key.offset) {
/*
* only root blocks of reloc trees use
@@ -876,6 +830,12 @@ again:
edge->node[UPPER] = upper;
goto next;
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+ err = -EINVAL;
+ btrfs_print_v0_err(rc->extent_root->fs_info);
+ btrfs_handle_fs_error(rc->extent_root->fs_info, err,
+ NULL);
+ goto out;
} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
goto next;
}
@@ -1321,18 +1281,19 @@ static void __del_reloc_root(struct btrfs_root *root)
struct mapping_node *node = NULL;
struct reloc_control *rc = fs_info->reloc_ctl;
- spin_lock(&rc->reloc_root_tree.lock);
- rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
- if (rb_node) {
- node = rb_entry(rb_node, struct mapping_node, rb_node);
- rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ if (rc) {
+ spin_lock(&rc->reloc_root_tree.lock);
+ rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+ root->node->start);
+ if (rb_node) {
+ node = rb_entry(rb_node, struct mapping_node, rb_node);
+ rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ }
+ spin_unlock(&rc->reloc_root_tree.lock);
+ if (!node)
+ return;
+ BUG_ON((struct btrfs_root *)node->data != root);
}
- spin_unlock(&rc->reloc_root_tree.lock);
-
- if (!node)
- return;
- BUG_ON((struct btrfs_root *)node->data != root);
spin_lock(&fs_info->trans_lock);
list_del_init(&root->root_list);
@@ -1918,13 +1879,12 @@ again:
* and tree block numbers, if current trans doesn't free
* data reloc tree inode.
*/
- ret = btrfs_qgroup_trace_subtree(trans, src, parent,
+ ret = btrfs_qgroup_trace_subtree(trans, parent,
btrfs_header_generation(parent),
btrfs_header_level(parent));
if (ret < 0)
break;
- ret = btrfs_qgroup_trace_subtree(trans, dest,
- path->nodes[level],
+ ret = btrfs_qgroup_trace_subtree(trans, path->nodes[level],
btrfs_header_generation(path->nodes[level]),
btrfs_header_level(path->nodes[level]));
if (ret < 0)
@@ -3333,48 +3293,6 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
return 0;
}
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int get_ref_objectid_v0(struct reloc_control *rc,
- struct btrfs_path *path,
- struct btrfs_key *extent_key,
- u64 *ref_objectid, int *path_change)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- struct btrfs_extent_ref_v0 *ref0;
- int ret;
- int slot;
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- while (1) {
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(rc->extent_root, path);
- if (ret < 0)
- return ret;
- BUG_ON(ret > 0);
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (path_change)
- *path_change = 1;
- }
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != extent_key->objectid)
- return -ENOENT;
-
- if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
- slot++;
- continue;
- }
- ref0 = btrfs_item_ptr(leaf, slot,
- struct btrfs_extent_ref_v0);
- *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
- break;
- }
- return 0;
-}
-#endif
-
/*
* helper to add a tree block to the list.
* the major work is getting the generation and level of the block
@@ -3407,23 +3325,12 @@ static int add_tree_block(struct reloc_control *rc,
level = (int)extent_key->offset;
}
generation = btrfs_extent_generation(eb, ei);
+ } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
+ btrfs_print_v0_err(eb->fs_info);
+ btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+ return -EINVAL;
} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- u64 ref_owner;
- int ret;
-
- BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
- ret = get_ref_objectid_v0(rc, path, extent_key,
- &ref_owner, NULL);
- if (ret < 0)
- return ret;
- BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
- level = (int)ref_owner;
- /* FIXME: get real generation */
- generation = 0;
-#else
BUG();
-#endif
}
btrfs_release_path(path);
@@ -3563,11 +3470,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root, NULL);
- if (IS_ERR(inode) || is_bad_inode(inode)) {
- if (!IS_ERR(inode))
- iput(inode);
+ if (IS_ERR(inode))
return -ENOENT;
- }
truncate:
ret = btrfs_check_trunc_cache_free_space(fs_info,
@@ -3781,12 +3685,7 @@ int add_data_references(struct reloc_control *rc,
eb = path->nodes[0];
ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
- ptr = end;
- else
-#endif
- ptr += sizeof(struct btrfs_extent_item);
+ ptr += sizeof(struct btrfs_extent_item);
while (ptr < end) {
iref = (struct btrfs_extent_inline_ref *)ptr;
@@ -3801,7 +3700,7 @@ int add_data_references(struct reloc_control *rc,
ret = find_data_references(rc, extent_key,
eb, dref, blocks);
} else {
- ret = -EINVAL;
+ ret = -EUCLEAN;
btrfs_err(rc->extent_root->fs_info,
"extent %llu slot %d has an invalid inline ref type",
eb->start, path->slots[0]);
@@ -3832,13 +3731,7 @@ int add_data_references(struct reloc_control *rc,
if (key.objectid != extent_key->objectid)
break;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
- key.type == BTRFS_EXTENT_REF_V0_KEY) {
-#else
- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
-#endif
ret = __add_tree_block(rc, key.offset, blocksize,
blocks);
} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
@@ -3846,6 +3739,10 @@ int add_data_references(struct reloc_control *rc,
struct btrfs_extent_data_ref);
ret = find_data_references(rc, extent_key,
eb, dref, blocks);
+ } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
+ btrfs_print_v0_err(eb->fs_info);
+ btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+ ret = -EINVAL;
} else {
ret = 0;
}
@@ -4084,41 +3981,13 @@ restart:
flags = btrfs_extent_flags(path->nodes[0], ei);
ret = check_extent_flags(flags);
BUG_ON(ret);
-
+ } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
+ err = -EINVAL;
+ btrfs_print_v0_err(trans->fs_info);
+ btrfs_abort_transaction(trans, err);
+ break;
} else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- u64 ref_owner;
- int path_change = 0;
-
- BUG_ON(item_size !=
- sizeof(struct btrfs_extent_item_v0));
- ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
- &path_change);
- if (ret < 0) {
- err = ret;
- break;
- }
- if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
- flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
- else
- flags = BTRFS_EXTENT_FLAG_DATA;
-
- if (path_change) {
- btrfs_release_path(path);
-
- path->search_commit_root = 1;
- path->skip_locking = 1;
- ret = btrfs_search_slot(NULL, rc->extent_root,
- &key, path, 0, 0);
- if (ret < 0) {
- err = ret;
- break;
- }
- BUG_ON(ret > 0);
- }
-#else
BUG();
-#endif
}
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
@@ -4169,8 +4038,7 @@ restart:
}
}
if (trans && progress && err == -ENOSPC) {
- ret = btrfs_force_chunk_alloc(trans, fs_info,
- rc->block_group->flags);
+ ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
if (ret == 1) {
err = 0;
progress = 0;
@@ -4284,7 +4152,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, root, NULL);
- BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
+ BUG_ON(IS_ERR(inode));
BTRFS_I(inode)->index_cnt = group->key.objectid;
err = btrfs_orphan_add(trans, BTRFS_I(inode));
@@ -4375,7 +4243,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
BUG_ON(!rc->block_group);
- ret = btrfs_inc_block_group_ro(fs_info, rc->block_group);
+ ret = btrfs_inc_block_group_ro(rc->block_group);
if (ret) {
err = ret;
goto out;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index c451285976ac..65bda0682928 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -320,9 +320,9 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
/* drop the root item for 'key' from the tree root */
int btrfs_del_root(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, const struct btrfs_key *key)
+ const struct btrfs_key *key)
{
- struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_root *root = trans->fs_info->tree_root;
struct btrfs_path *path;
int ret;
@@ -341,13 +341,12 @@ out:
return ret;
}
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
- const char *name, int name_len)
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 *sequence, const char *name,
+ int name_len)
{
- struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_path *path;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
@@ -413,12 +412,11 @@ out:
*
* Will return 0, -ENOMEM, or anything from the CoW path
*/
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
- const char *name, int name_len)
+int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
+ u64 ref_id, u64 dirid, u64 sequence, const char *name,
+ int name_len)
{
- struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_key key;
int ret;
struct btrfs_path *path;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 572306036477..3be1456b5116 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -188,32 +188,6 @@ struct scrub_ctx {
refcount_t refs;
};
-struct scrub_fixup_nodatasum {
- struct scrub_ctx *sctx;
- struct btrfs_device *dev;
- u64 logical;
- struct btrfs_root *root;
- struct btrfs_work work;
- int mirror_num;
-};
-
-struct scrub_nocow_inode {
- u64 inum;
- u64 offset;
- u64 root;
- struct list_head list;
-};
-
-struct scrub_copy_nocow_ctx {
- struct scrub_ctx *sctx;
- u64 logical;
- u64 len;
- int mirror_num;
- u64 physical_for_dev_replace;
- struct list_head inodes;
- struct btrfs_work work;
-};
-
struct scrub_warning {
struct btrfs_path *path;
u64 extent_item_size;
@@ -232,8 +206,6 @@ struct full_stripe_lock {
static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct scrub_block *sblocks_for_recheck);
@@ -277,13 +249,6 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
static void scrub_wr_submit(struct scrub_ctx *sctx);
static void scrub_wr_bio_end_io(struct bio *bio);
static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
-static int write_page_nocow(struct scrub_ctx *sctx,
- u64 physical_for_dev_replace, struct page *page);
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
- struct scrub_copy_nocow_ctx *ctx);
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
- int mirror_num, u64 physical_for_dev_replace);
-static void copy_nocow_pages_worker(struct btrfs_work *work);
static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx);
@@ -555,60 +520,6 @@ out:
return ret;
}
-/*
- * used for workers that require transaction commits (i.e., for the
- * NOCOW case)
- */
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
-{
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- refcount_inc(&sctx->refs);
- /*
- * increment scrubs_running to prevent cancel requests from
- * completing as long as a worker is running. we must also
- * increment scrubs_paused to prevent deadlocking on pause
- * requests used for transactions commits (as the worker uses a
- * transaction context). it is safe to regard the worker
- * as paused for all matters practical. effectively, we only
- * avoid cancellation requests from completing.
- */
- mutex_lock(&fs_info->scrub_lock);
- atomic_inc(&fs_info->scrubs_running);
- atomic_inc(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
-
- /*
- * check if @scrubs_running=@scrubs_paused condition
- * inside wait_event() is not an atomic operation.
- * which means we may inc/dec @scrub_running/paused
- * at any time. Let's wake up @scrub_pause_wait as
- * much as we can to let commit transaction blocked less.
- */
- wake_up(&fs_info->scrub_pause_wait);
-
- atomic_inc(&sctx->workers_pending);
-}
-
-/* used for workers that require transaction commits */
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
-{
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- /*
- * see scrub_pending_trans_workers_inc() why we're pretending
- * to be paused in the scrub counters
- */
- mutex_lock(&fs_info->scrub_lock);
- atomic_dec(&fs_info->scrubs_running);
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- atomic_dec(&sctx->workers_pending);
- wake_up(&fs_info->scrub_pause_wait);
- wake_up(&sctx->list_wait);
- scrub_put_ctx(sctx);
-}
-
static void scrub_free_csums(struct scrub_ctx *sctx)
{
while (!list_empty(&sctx->csum_list)) {
@@ -882,194 +793,6 @@ out:
btrfs_free_path(path);
}
-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
-{
- struct page *page = NULL;
- unsigned long index;
- struct scrub_fixup_nodatasum *fixup = fixup_ctx;
- int ret;
- int corrected = 0;
- struct btrfs_key key;
- struct inode *inode = NULL;
- struct btrfs_fs_info *fs_info;
- u64 end = offset + PAGE_SIZE - 1;
- struct btrfs_root *local_root;
- int srcu_index;
-
- key.objectid = root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- fs_info = fixup->root->fs_info;
- srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
- local_root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(local_root)) {
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- return PTR_ERR(local_root);
- }
-
- key.type = BTRFS_INODE_ITEM_KEY;
- key.objectid = inum;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- index = offset >> PAGE_SHIFT;
-
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (PageUptodate(page)) {
- if (PageDirty(page)) {
- /*
- * we need to write the data to the defect sector. the
- * data that was in that sector is not in memory,
- * because the page was modified. we must not write the
- * modified page to that sector.
- *
- * TODO: what could be done here: wait for the delalloc
- * runner to write out that page (might involve
- * COW) and see whether the sector is still
- * referenced afterwards.
- *
- * For the meantime, we'll treat this error
- * incorrectable, although there is a chance that a
- * later scrub will find the bad sector again and that
- * there's no dirty page in memory, then.
- */
- ret = -EIO;
- goto out;
- }
- ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
- fixup->logical, page,
- offset - page_offset(page),
- fixup->mirror_num);
- unlock_page(page);
- corrected = !ret;
- } else {
- /*
- * we need to get good data first. the general readpage path
- * will call repair_io_failure for us, we just have to make
- * sure we read the bad mirror.
- */
- ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED);
- if (ret) {
- /* set_extent_bits should give proper error */
- WARN_ON(ret > 0);
- if (ret > 0)
- ret = -EFAULT;
- goto out;
- }
-
- ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
- btrfs_get_extent,
- fixup->mirror_num);
- wait_on_page_locked(page);
-
- corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
- end, EXTENT_DAMAGED, 0, NULL);
- if (!corrected)
- clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED);
- }
-
-out:
- if (page)
- put_page(page);
-
- iput(inode);
-
- if (ret < 0)
- return ret;
-
- if (ret == 0 && corrected) {
- /*
- * we only need to call readpage for one of the inodes belonging
- * to this extent. so make iterate_extent_inodes stop
- */
- return 1;
- }
-
- return -EIO;
-}
-
-static void scrub_fixup_nodatasum(struct btrfs_work *work)
-{
- struct btrfs_fs_info *fs_info;
- int ret;
- struct scrub_fixup_nodatasum *fixup;
- struct scrub_ctx *sctx;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_path *path;
- int uncorrectable = 0;
-
- fixup = container_of(work, struct scrub_fixup_nodatasum, work);
- sctx = fixup->sctx;
- fs_info = fixup->root->fs_info;
-
- path = btrfs_alloc_path();
- if (!path) {
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.malloc_errors;
- spin_unlock(&sctx->stat_lock);
- uncorrectable = 1;
- goto out;
- }
-
- trans = btrfs_join_transaction(fixup->root);
- if (IS_ERR(trans)) {
- uncorrectable = 1;
- goto out;
- }
-
- /*
- * the idea is to trigger a regular read through the standard path. we
- * read a page from the (failed) logical address by specifying the
- * corresponding copynum of the failed sector. thus, that readpage is
- * expected to fail.
- * that is the point where on-the-fly error correction will kick in
- * (once it's finished) and rewrite the failed sector if a good copy
- * can be found.
- */
- ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
- scrub_fixup_readpage, fixup, false);
- if (ret < 0) {
- uncorrectable = 1;
- goto out;
- }
- WARN_ON(ret != 1);
-
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.corrected_errors;
- spin_unlock(&sctx->stat_lock);
-
-out:
- if (trans && !IS_ERR(trans))
- btrfs_end_transaction(trans);
- if (uncorrectable) {
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.uncorrectable_errors;
- spin_unlock(&sctx->stat_lock);
- btrfs_dev_replace_stats_inc(
- &fs_info->dev_replace.num_uncorrectable_read_errors);
- btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (nodatasum) error at logical %llu on dev %s",
- fixup->logical, rcu_str_deref(fixup->dev->name));
- }
-
- btrfs_free_path(path);
- kfree(fixup);
-
- scrub_pending_trans_workers_dec(sctx);
-}
-
static inline void scrub_get_recover(struct scrub_recover *recover)
{
refcount_inc(&recover->refs);
@@ -1151,11 +874,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
return ret;
}
- if (sctx->is_dev_replace && !is_metadata && !have_csum) {
- sblocks_for_recheck = NULL;
- goto nodatasum_case;
- }
-
/*
* read all mirrors one after the other. This includes to
* re-read the extent or metadata block that failed (that was
@@ -1268,36 +986,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
goto out;
}
- if (!is_metadata && !have_csum) {
- struct scrub_fixup_nodatasum *fixup_nodatasum;
-
- WARN_ON(sctx->is_dev_replace);
-
-nodatasum_case:
-
- /*
- * !is_metadata and !have_csum, this means that the data
- * might not be COWed, that it might be modified
- * concurrently. The general strategy to work on the
- * commit root does not help in the case when COW is not
- * used.
- */
- fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
- if (!fixup_nodatasum)
- goto did_not_correct_error;
- fixup_nodatasum->sctx = sctx;
- fixup_nodatasum->dev = dev;
- fixup_nodatasum->logical = logical;
- fixup_nodatasum->root = fs_info->extent_root;
- fixup_nodatasum->mirror_num = failed_mirror_index + 1;
- scrub_pending_trans_workers_inc(sctx);
- btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
- scrub_fixup_nodatasum, NULL, NULL);
- btrfs_queue_work(fs_info->scrub_workers,
- &fixup_nodatasum->work);
- goto out;
- }
-
/*
* now build and submit the bios for the other mirrors, check
* checksums.
@@ -1865,7 +1553,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
bio = btrfs_io_bio_alloc(1);
bio_set_dev(bio, page_bad->dev->bdev);
bio->bi_iter.bi_sector = page_bad->physical >> 9;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
@@ -1960,7 +1648,7 @@ again:
bio->bi_end_io = scrub_wr_bio_end_io;
bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
@@ -2360,7 +2048,7 @@ again:
bio->bi_end_io = scrub_bio_end_io;
bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
@@ -2799,17 +2487,10 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
have_csum = scrub_find_csum(sctx, logical, csum);
if (have_csum == 0)
++sctx->stat.no_csum;
- if (0 && sctx->is_dev_replace && !have_csum) {
- ret = copy_nocow_pages(sctx, logical, l,
- mirror_num,
- physical_for_dev_replace);
- goto behind_scrub_pages;
- }
}
ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
mirror_num, have_csum ? csum : NULL, 0,
physical_for_dev_replace);
-behind_scrub_pages:
if (ret)
return ret;
len -= l;
@@ -3862,7 +3543,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* -> btrfs_scrub_pause()
*/
scrub_pause_on(fs_info);
- ret = btrfs_inc_block_group_ro(fs_info, cache);
+ ret = btrfs_inc_block_group_ro(cache);
if (!ret && is_dev_replace) {
/*
* If we are doing a device replace wait for any tasks
@@ -3981,14 +3662,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
btrfs_block_group_used(&cache->item) == 0) {
spin_unlock(&cache->lock);
- spin_lock(&fs_info->unused_bgs_lock);
- if (list_empty(&cache->bg_list)) {
- btrfs_get_block_group(cache);
- trace_btrfs_add_unused_block_group(cache);
- list_add_tail(&cache->bg_list,
- &fs_info->unused_bgs);
- }
- spin_unlock(&fs_info->unused_bgs_lock);
+ btrfs_mark_bg_unused(cache);
} else {
spin_unlock(&cache->lock);
}
@@ -4071,10 +3745,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (!fs_info->scrub_wr_completion_workers)
goto fail_scrub_wr_completion_workers;
- fs_info->scrub_nocow_workers =
- btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
- if (!fs_info->scrub_nocow_workers)
- goto fail_scrub_nocow_workers;
fs_info->scrub_parity_workers =
btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
max_active, 2);
@@ -4085,8 +3755,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
return 0;
fail_scrub_parity_workers:
- btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
-fail_scrub_nocow_workers:
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
fail_scrub_wr_completion_workers:
btrfs_destroy_workqueue(fs_info->scrub_workers);
@@ -4099,7 +3767,6 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
if (--fs_info->scrub_workers_refcnt == 0) {
btrfs_destroy_workqueue(fs_info->scrub_workers);
btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
- btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
@@ -4112,7 +3779,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
struct scrub_ctx *sctx;
int ret;
struct btrfs_device *dev;
- struct rcu_string *name;
if (btrfs_fs_closing(fs_info))
return -EINVAL;
@@ -4166,11 +3832,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- rcu_read_lock();
- name = rcu_dereference(dev->name);
- btrfs_err(fs_info, "scrub: device %s is not writable",
- name->str);
- rcu_read_unlock();
+ btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
+ rcu_str_deref(dev->name));
return -EROFS;
}
@@ -4358,330 +4021,3 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
*extent_dev = bbio->stripes[0].dev;
btrfs_put_bbio(bbio);
}
-
-static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
- int mirror_num, u64 physical_for_dev_replace)
-{
- struct scrub_copy_nocow_ctx *nocow_ctx;
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
- if (!nocow_ctx) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
- return -ENOMEM;
- }
-
- scrub_pending_trans_workers_inc(sctx);
-
- nocow_ctx->sctx = sctx;
- nocow_ctx->logical = logical;
- nocow_ctx->len = len;
- nocow_ctx->mirror_num = mirror_num;
- nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
- btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
- copy_nocow_pages_worker, NULL, NULL);
- INIT_LIST_HEAD(&nocow_ctx->inodes);
- btrfs_queue_work(fs_info->scrub_nocow_workers,
- &nocow_ctx->work);
-
- return 0;
-}
-
-static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
-{
- struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
- struct scrub_nocow_inode *nocow_inode;
-
- nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
- if (!nocow_inode)
- return -ENOMEM;
- nocow_inode->inum = inum;
- nocow_inode->offset = offset;
- nocow_inode->root = root;
- list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
- return 0;
-}
-
-#define COPY_COMPLETE 1
-
-static void copy_nocow_pages_worker(struct btrfs_work *work)
-{
- struct scrub_copy_nocow_ctx *nocow_ctx =
- container_of(work, struct scrub_copy_nocow_ctx, work);
- struct scrub_ctx *sctx = nocow_ctx->sctx;
- struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct btrfs_root *root = fs_info->extent_root;
- u64 logical = nocow_ctx->logical;
- u64 len = nocow_ctx->len;
- int mirror_num = nocow_ctx->mirror_num;
- u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
- int ret;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_path *path;
- int not_written = 0;
-
- path = btrfs_alloc_path();
- if (!path) {
- spin_lock(&sctx->stat_lock);
- sctx->stat.malloc_errors++;
- spin_unlock(&sctx->stat_lock);
- not_written = 1;
- goto out;
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- not_written = 1;
- goto out;
- }
-
- ret = iterate_inodes_from_logical(logical, fs_info, path,
- record_inode_for_nocow, nocow_ctx, false);
- if (ret != 0 && ret != -ENOENT) {
- btrfs_warn(fs_info,
- "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
- logical, physical_for_dev_replace, len, mirror_num,
- ret);
- not_written = 1;
- goto out;
- }
-
- btrfs_end_transaction(trans);
- trans = NULL;
- while (!list_empty(&nocow_ctx->inodes)) {
- struct scrub_nocow_inode *entry;
- entry = list_first_entry(&nocow_ctx->inodes,
- struct scrub_nocow_inode,
- list);
- list_del_init(&entry->list);
- ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
- entry->root, nocow_ctx);
- kfree(entry);
- if (ret == COPY_COMPLETE) {
- ret = 0;
- break;
- } else if (ret) {
- break;
- }
- }
-out:
- while (!list_empty(&nocow_ctx->inodes)) {
- struct scrub_nocow_inode *entry;
- entry = list_first_entry(&nocow_ctx->inodes,
- struct scrub_nocow_inode,
- list);
- list_del_init(&entry->list);
- kfree(entry);
- }
- if (trans && !IS_ERR(trans))
- btrfs_end_transaction(trans);
- if (not_written)
- btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
- num_uncorrectable_read_errors);
-
- btrfs_free_path(path);
- kfree(nocow_ctx);
-
- scrub_pending_trans_workers_dec(sctx);
-}
-
-static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
- u64 logical)
-{
- struct extent_state *cached_state = NULL;
- struct btrfs_ordered_extent *ordered;
- struct extent_io_tree *io_tree;
- struct extent_map *em;
- u64 lockstart = start, lockend = start + len - 1;
- int ret = 0;
-
- io_tree = &inode->io_tree;
-
- lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
- ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
- if (ordered) {
- btrfs_put_ordered_extent(ordered);
- ret = 1;
- goto out_unlock;
- }
-
- em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out_unlock;
- }
-
- /*
- * This extent does not actually cover the logical extent anymore,
- * move on to the next inode.
- */
- if (em->block_start > logical ||
- em->block_start + em->block_len < logical + len ||
- test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
- free_extent_map(em);
- ret = 1;
- goto out_unlock;
- }
- free_extent_map(em);
-
-out_unlock:
- unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
- return ret;
-}
-
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
- struct scrub_copy_nocow_ctx *nocow_ctx)
-{
- struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info;
- struct btrfs_key key;
- struct inode *inode;
- struct page *page;
- struct btrfs_root *local_root;
- struct extent_io_tree *io_tree;
- u64 physical_for_dev_replace;
- u64 nocow_ctx_logical;
- u64 len = nocow_ctx->len;
- unsigned long index;
- int srcu_index;
- int ret = 0;
- int err = 0;
-
- key.objectid = root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
- local_root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(local_root)) {
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- return PTR_ERR(local_root);
- }
-
- key.type = BTRFS_INODE_ITEM_KEY;
- key.objectid = inum;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- /* Avoid truncate/dio/punch hole.. */
- inode_lock(inode);
- inode_dio_wait(inode);
-
- physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
- io_tree = &BTRFS_I(inode)->io_tree;
- nocow_ctx_logical = nocow_ctx->logical;
-
- ret = check_extent_to_block(BTRFS_I(inode), offset, len,
- nocow_ctx_logical);
- if (ret) {
- ret = ret > 0 ? 0 : ret;
- goto out;
- }
-
- while (len >= PAGE_SIZE) {
- index = offset >> PAGE_SHIFT;
-again:
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- btrfs_err(fs_info, "find_or_create_page() failed");
- ret = -ENOMEM;
- goto out;
- }
-
- if (PageUptodate(page)) {
- if (PageDirty(page))
- goto next_page;
- } else {
- ClearPageError(page);
- err = extent_read_full_page(io_tree, page,
- btrfs_get_extent,
- nocow_ctx->mirror_num);
- if (err) {
- ret = err;
- goto next_page;
- }
-
- lock_page(page);
- /*
- * If the page has been remove from the page cache,
- * the data on it is meaningless, because it may be
- * old one, the new data may be written into the new
- * page in the page cache.
- */
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
- put_page(page);
- goto again;
- }
- if (!PageUptodate(page)) {
- ret = -EIO;
- goto next_page;
- }
- }
-
- ret = check_extent_to_block(BTRFS_I(inode), offset, len,
- nocow_ctx_logical);
- if (ret) {
- ret = ret > 0 ? 0 : ret;
- goto next_page;
- }
-
- err = write_page_nocow(nocow_ctx->sctx,
- physical_for_dev_replace, page);
- if (err)
- ret = err;
-next_page:
- unlock_page(page);
- put_page(page);
-
- if (ret)
- break;
-
- offset += PAGE_SIZE;
- physical_for_dev_replace += PAGE_SIZE;
- nocow_ctx_logical += PAGE_SIZE;
- len -= PAGE_SIZE;
- }
- ret = COPY_COMPLETE;
-out:
- inode_unlock(inode);
- iput(inode);
- return ret;
-}
-
-static int write_page_nocow(struct scrub_ctx *sctx,
- u64 physical_for_dev_replace, struct page *page)
-{
- struct bio *bio;
- struct btrfs_device *dev;
-
- dev = sctx->wr_tgtdev;
- if (!dev)
- return -EIO;
- if (!dev->bdev) {
- btrfs_warn_rl(dev->fs_info,
- "scrub write_page_nocow(bdev == NULL) is unexpected");
- return -EIO;
- }
- bio = btrfs_io_bio_alloc(1);
- bio->bi_iter.bi_size = 0;
- bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
- bio_set_dev(bio, dev->bdev);
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
- /* bio_add_page won't fail on a freshly allocated bio */
- bio_add_page(bio, page, PAGE_SIZE, 0);
-
- if (btrfsic_submit_bio_wait(bio)) {
- bio_put(bio);
- btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
- return -EIO;
- }
-
- bio_put(bio);
- return 0;
-}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c47f62b19226..ba8950bfd9c7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -100,6 +100,7 @@ struct send_ctx {
u64 cur_inode_rdev;
u64 cur_inode_last_extent;
u64 cur_inode_next_write_offset;
+ bool ignore_cur_inode;
u64 send_progress;
@@ -1500,7 +1501,7 @@ static int read_symlink(struct btrfs_root *root,
BUG_ON(compression);
off = btrfs_file_extent_inline_start(ei);
- len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
+ len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
@@ -5006,6 +5007,15 @@ static int send_hole(struct send_ctx *sctx, u64 end)
u64 len;
int ret = 0;
+ /*
+ * A hole that starts at EOF or beyond it. Since we do not yet support
+ * fallocate (for extent preallocation and hole punching), sending a
+ * write of zeroes starting at EOF or beyond would later require issuing
+ * a truncate operation which would undo the write and achieve nothing.
+ */
+ if (offset >= sctx->cur_inode_size)
+ return 0;
+
if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
return send_update_extent(sctx, offset, end - offset);
@@ -5160,7 +5170,7 @@ static int clone_range(struct send_ctx *sctx,
ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
type = btrfs_file_extent_type(leaf, ei);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
+ ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
ext_len = PAGE_ALIGN(ext_len);
} else {
ext_len = btrfs_file_extent_num_bytes(leaf, ei);
@@ -5236,8 +5246,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], ei);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(path->nodes[0],
- path->slots[0], ei);
+ len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
/*
* it is possible the inline item won't cover the whole page,
* but there may be items after this page. Make
@@ -5375,7 +5384,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
}
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
- right_len = btrfs_file_extent_inline_len(eb, slot, ei);
+ right_len = btrfs_file_extent_ram_bytes(eb, ei);
right_len = PAGE_ALIGN(right_len);
} else {
right_len = btrfs_file_extent_num_bytes(eb, ei);
@@ -5496,8 +5505,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], fi);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- u64 size = btrfs_file_extent_inline_len(path->nodes[0],
- path->slots[0], fi);
+ u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
extent_end = ALIGN(key.offset + size,
sctx->send_root->fs_info->sectorsize);
} else {
@@ -5560,7 +5568,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) ==
BTRFS_FILE_EXTENT_INLINE) {
- u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+ u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
extent_end = ALIGN(key.offset + size,
root->fs_info->sectorsize);
@@ -5606,8 +5614,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], fi);
if (type == BTRFS_FILE_EXTENT_INLINE) {
- u64 size = btrfs_file_extent_inline_len(path->nodes[0],
- path->slots[0], fi);
+ u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
extent_end = ALIGN(key->offset + size,
sctx->send_root->fs_info->sectorsize);
} else {
@@ -5799,6 +5806,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
int pending_move = 0;
int refs_processed = 0;
+ if (sctx->ignore_cur_inode)
+ return 0;
+
ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
&refs_processed);
if (ret < 0)
@@ -5917,6 +5927,93 @@ out:
return ret;
}
+struct parent_paths_ctx {
+ struct list_head *refs;
+ struct send_ctx *sctx;
+};
+
+static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
+ void *ctx)
+{
+ struct parent_paths_ctx *ppctx = ctx;
+
+ return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
+ ppctx->refs);
+}
+
+/*
+ * Issue unlink operations for all paths of the current inode found in the
+ * parent snapshot.
+ */
+static int btrfs_unlink_all_paths(struct send_ctx *sctx)
+{
+ LIST_HEAD(deleted_refs);
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct parent_paths_ctx ctx;
+ int ret;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ ctx.refs = &deleted_refs;
+ ctx.sctx = sctx;
+
+ while (true) {
+ struct extent_buffer *eb = path->nodes[0];
+ int slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(sctx->parent_root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.objectid != sctx->cur_ino)
+ break;
+ if (key.type != BTRFS_INODE_REF_KEY &&
+ key.type != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
+ record_parent_ref, &ctx);
+ if (ret < 0)
+ goto out;
+
+ path->slots[0]++;
+ }
+
+ while (!list_empty(&deleted_refs)) {
+ struct recorded_ref *ref;
+
+ ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
+ ret = send_unlink(sctx, ref->full_path);
+ if (ret < 0)
+ goto out;
+ fs_path_free(ref->full_path);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret)
+ __free_recorded_refs(&deleted_refs);
+ return ret;
+}
+
static int changed_inode(struct send_ctx *sctx,
enum btrfs_compare_tree_result result)
{
@@ -5931,6 +6028,7 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 0;
sctx->cur_inode_last_extent = (u64)-1;
sctx->cur_inode_next_write_offset = 0;
+ sctx->ignore_cur_inode = false;
/*
* Set send_progress to current inode. This will tell all get_cur_xxx
@@ -5971,6 +6069,33 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_new_gen = 1;
}
+ /*
+ * Normally we do not find inodes with a link count of zero (orphans)
+ * because the most common case is to create a snapshot and use it
+ * for a send operation. However other less common use cases involve
+ * using a subvolume and send it after turning it to RO mode just
+ * after deleting all hard links of a file while holding an open
+ * file descriptor against it or turning a RO snapshot into RW mode,
+ * keep an open file descriptor against a file, delete it and then
+ * turn the snapshot back to RO mode before using it for a send
+ * operation. So if we find such cases, ignore the inode and all its
+ * items completely if it's a new inode, or if it's a changed inode
+ * make sure all its previous paths (from the parent snapshot) are all
+ * unlinked and all other the inode items are ignored.
+ */
+ if (result == BTRFS_COMPARE_TREE_NEW ||
+ result == BTRFS_COMPARE_TREE_CHANGED) {
+ u32 nlinks;
+
+ nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
+ if (nlinks == 0) {
+ sctx->ignore_cur_inode = true;
+ if (result == BTRFS_COMPARE_TREE_CHANGED)
+ ret = btrfs_unlink_all_paths(sctx);
+ goto out;
+ }
+ }
+
if (result == BTRFS_COMPARE_TREE_NEW) {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = 1;
@@ -6309,15 +6434,17 @@ static int changed_cb(struct btrfs_path *left_path,
key->objectid == BTRFS_FREE_SPACE_OBJECTID)
goto out;
- if (key->type == BTRFS_INODE_ITEM_KEY)
+ if (key->type == BTRFS_INODE_ITEM_KEY) {
ret = changed_inode(sctx, result);
- else if (key->type == BTRFS_INODE_REF_KEY ||
- key->type == BTRFS_INODE_EXTREF_KEY)
- ret = changed_ref(sctx, result);
- else if (key->type == BTRFS_XATTR_ITEM_KEY)
- ret = changed_xattr(sctx, result);
- else if (key->type == BTRFS_EXTENT_DATA_KEY)
- ret = changed_extent(sctx, result);
+ } else if (!sctx->ignore_cur_inode) {
+ if (key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_INODE_EXTREF_KEY)
+ ret = changed_ref(sctx, result);
+ else if (key->type == BTRFS_XATTR_ITEM_KEY)
+ ret = changed_xattr(sctx, result);
+ else if (key->type == BTRFS_EXTENT_DATA_KEY)
+ ret = changed_extent(sctx, result);
+ }
out:
return ret;
@@ -6328,7 +6455,6 @@ static int full_send_tree(struct send_ctx *sctx)
int ret;
struct btrfs_root *send_root = sctx->send_root;
struct btrfs_key key;
- struct btrfs_key found_key;
struct btrfs_path *path;
struct extent_buffer *eb;
int slot;
@@ -6350,17 +6476,13 @@ static int full_send_tree(struct send_ctx *sctx)
while (1) {
eb = path->nodes[0];
slot = path->slots[0];
- btrfs_item_key_to_cpu(eb, &found_key, slot);
+ btrfs_item_key_to_cpu(eb, &key, slot);
- ret = changed_cb(path, NULL, &found_key,
+ ret = changed_cb(path, NULL, &key,
BTRFS_COMPARE_TREE_NEW, sctx);
if (ret < 0)
goto out;
- key.objectid = found_key.objectid;
- key.type = found_key.type;
- key.offset = found_key.offset + 1;
-
ret = btrfs_next_item(send_root, path);
if (ret < 0)
goto out;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b7b4acb12833..4c13b737f568 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -3,7 +3,6 @@
* Copyright (C) 2007 Oracle. All rights reserved.
*/
-#include <linux/highmem.h>
#include <asm/unaligned.h>
#include "ctree.h"
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 81107ad49f3a..6601c9aa5e35 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -5,7 +5,6 @@
#include <linux/blkdev.h>
#include <linux/module.h>
-#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
@@ -15,8 +14,6 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mount.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
@@ -468,9 +465,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_subvolrootid:
case Opt_device:
/*
- * These are parsed by btrfs_parse_subvol_options
- * and btrfs_parse_early_options
- * and can be happily ignored here.
+ * These are parsed by btrfs_parse_subvol_options or
+ * btrfs_parse_device_options and can be ignored here.
*/
break;
case Opt_nodatasum:
@@ -760,6 +756,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_recovery:
btrfs_warn(info,
"'recovery' is deprecated, use 'usebackuproot' instead");
+ /* fall through */
case Opt_usebackuproot:
btrfs_info(info,
"trying to use backup root at mount time");
@@ -885,13 +882,16 @@ out:
* All other options will be parsed on much later in the mount process and
* only when we need to allocate a new super block.
*/
-static int btrfs_parse_early_options(const char *options, fmode_t flags,
- void *holder, struct btrfs_fs_devices **fs_devices)
+static int btrfs_parse_device_options(const char *options, fmode_t flags,
+ void *holder)
{
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
+ struct btrfs_device *device = NULL;
int error = 0;
+ lockdep_assert_held(&uuid_mutex);
+
if (!options)
return 0;
@@ -917,11 +917,13 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
error = -ENOMEM;
goto out;
}
- error = btrfs_scan_one_device(device_name,
- flags, holder, fs_devices);
+ device = btrfs_scan_one_device(device_name, flags,
+ holder);
kfree(device_name);
- if (error)
+ if (IS_ERR(device)) {
+ error = PTR_ERR(device);
goto out;
+ }
}
}
@@ -935,8 +937,8 @@ out:
*
* The value is later passed to mount_subvol()
*/
-static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
- char **subvol_name, u64 *subvol_objectid)
+static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
+ u64 *subvol_objectid)
{
substring_t args[MAX_OPT_ARGS];
char *opts, *orig, *p;
@@ -948,7 +950,7 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
/*
* strsep changes the string, duplicate it because
- * btrfs_parse_early_options gets called later
+ * btrfs_parse_device_options gets called later
*/
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
@@ -1517,6 +1519,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
{
struct block_device *bdev = NULL;
struct super_block *s;
+ struct btrfs_device *device = NULL;
struct btrfs_fs_devices *fs_devices = NULL;
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
@@ -1526,12 +1529,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
- error = btrfs_parse_early_options(data, mode, fs_type,
- &fs_devices);
- if (error) {
- return ERR_PTR(error);
- }
-
security_init_mnt_opts(&new_sec_opts);
if (data) {
error = parse_security_options(data, &new_sec_opts);
@@ -1539,10 +1536,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
return ERR_PTR(error);
}
- error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
- if (error)
- goto error_sec_opts;
-
/*
* Setup a dummy root and fs_info for test/set super. This is because
* we don't actually fill this stuff out until open_ctree, but we need
@@ -1555,8 +1548,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
goto error_sec_opts;
}
- fs_info->fs_devices = fs_devices;
-
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
security_init_mnt_opts(&fs_info->security_opts);
@@ -1565,7 +1556,25 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
goto error_fs_info;
}
+ mutex_lock(&uuid_mutex);
+ error = btrfs_parse_device_options(data, mode, fs_type);
+ if (error) {
+ mutex_unlock(&uuid_mutex);
+ goto error_fs_info;
+ }
+
+ device = btrfs_scan_one_device(device_name, mode, fs_type);
+ if (IS_ERR(device)) {
+ mutex_unlock(&uuid_mutex);
+ error = PTR_ERR(device);
+ goto error_fs_info;
+ }
+
+ fs_devices = device->fs_devices;
+ fs_info->fs_devices = fs_devices;
+
error = btrfs_open_devices(fs_devices, mode, fs_type);
+ mutex_unlock(&uuid_mutex);
if (error)
goto error_fs_info;
@@ -1650,8 +1659,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
- error = btrfs_parse_subvol_options(data, mode,
- &subvol_name, &subvol_objectid);
+ error = btrfs_parse_subvol_options(data, &subvol_name,
+ &subvol_objectid);
if (error) {
kfree(subvol_name);
return ERR_PTR(error);
@@ -2098,14 +2107,9 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
btrfs_account_ro_block_groups_free_space(found);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
- if (!list_empty(&found->block_groups[i])) {
- switch (i) {
- case BTRFS_RAID_DUP:
- case BTRFS_RAID_RAID1:
- case BTRFS_RAID_RAID10:
- factor = 2;
- }
- }
+ if (!list_empty(&found->block_groups[i]))
+ factor = btrfs_bg_type_to_factor(
+ btrfs_raid_array[i].bg_flag);
}
}
@@ -2222,7 +2226,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct btrfs_ioctl_vol_args *vol;
- struct btrfs_fs_devices *fs_devices;
+ struct btrfs_device *device = NULL;
int ret = -ENOTTY;
if (!capable(CAP_SYS_ADMIN))
@@ -2234,15 +2238,24 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case BTRFS_IOC_SCAN_DEV:
- ret = btrfs_scan_one_device(vol->name, FMODE_READ,
- &btrfs_root_fs_type, &fs_devices);
+ mutex_lock(&uuid_mutex);
+ device = btrfs_scan_one_device(vol->name, FMODE_READ,
+ &btrfs_root_fs_type);
+ ret = PTR_ERR_OR_ZERO(device);
+ mutex_unlock(&uuid_mutex);
break;
case BTRFS_IOC_DEVICES_READY:
- ret = btrfs_scan_one_device(vol->name, FMODE_READ,
- &btrfs_root_fs_type, &fs_devices);
- if (ret)
+ mutex_lock(&uuid_mutex);
+ device = btrfs_scan_one_device(vol->name, FMODE_READ,
+ &btrfs_root_fs_type);
+ if (IS_ERR(device)) {
+ mutex_unlock(&uuid_mutex);
+ ret = PTR_ERR(device);
break;
- ret = !(fs_devices->num_devices == fs_devices->total_devices);
+ }
+ ret = !(device->fs_devices->num_devices ==
+ device->fs_devices->total_devices);
+ mutex_unlock(&uuid_mutex);
break;
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
ret = btrfs_ioctl_get_supported_features((void __user*)arg);
@@ -2290,7 +2303,6 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
struct btrfs_fs_devices *cur_devices;
struct btrfs_device *dev, *first_dev = NULL;
struct list_head *head;
- struct rcu_string *name;
/*
* Lightweight locking of the devices. We should not need
@@ -2314,12 +2326,10 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
cur_devices = cur_devices->seed;
}
- if (first_dev) {
- name = rcu_dereference(first_dev->name);
- seq_escape(m, name->str, " \t\n\\");
- } else {
+ if (first_dev)
+ seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
+ else
WARN_ON(1);
- }
rcu_read_unlock();
return 0;
}
@@ -2331,7 +2341,6 @@ static const struct super_operations btrfs_super_ops = {
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
- .write_inode = btrfs_write_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
@@ -2369,7 +2378,7 @@ static __cold void btrfs_interface_exit(void)
static void __init btrfs_print_mod_info(void)
{
- pr_info("Btrfs loaded, crc32c=%s"
+ static const char options[] = ""
#ifdef CONFIG_BTRFS_DEBUG
", debug=on"
#endif
@@ -2382,8 +2391,8 @@ static void __init btrfs_print_mod_info(void)
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
", ref-verify=on"
#endif
- "\n",
- crc32c_impl());
+ ;
+ pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
}
static int __init init_btrfs_fs(void)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4a4e960c7c66..3717c864ba23 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -7,10 +7,8 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
-#include <linux/buffer_head.h>
#include <linux/kobject.h>
#include <linux/bug.h>
-#include <linux/genhd.h>
#include <linux/debugfs.h>
#include "ctree.h"
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index ace94db09d29..412b910b04cc 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -216,7 +216,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
btrfs_init_dummy_trans(&trans, fs_info);
test_msg("qgroup basic add");
- ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
+ ret = btrfs_create_qgroup(&trans, BTRFS_FS_TREE_OBJECTID);
if (ret) {
test_err("couldn't create a qgroup %d", ret);
return ret;
@@ -249,8 +249,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
@@ -285,8 +285,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return -EINVAL;
@@ -322,7 +322,7 @@ static int test_multiple_refs(struct btrfs_root *root,
* We have BTRFS_FS_TREE_OBJECTID created already from the
* previous test.
*/
- ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
+ ret = btrfs_create_qgroup(&trans, BTRFS_FIRST_FREE_OBJECTID);
if (ret) {
test_err("couldn't create a qgroup %d", ret);
return ret;
@@ -350,8 +350,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
@@ -385,8 +385,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
@@ -426,8 +426,8 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
- nodesize, old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots,
+ new_roots);
if (ret) {
test_err("couldn't account space for a qgroup %d", ret);
return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ff5f6c719976..3b84f5015029 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -241,7 +241,7 @@ loop:
refcount_set(&cur_trans->use_count, 2);
atomic_set(&cur_trans->pending_ordered, 0);
cur_trans->flags = 0;
- cur_trans->start_time = get_seconds();
+ cur_trans->start_time = ktime_get_seconds();
memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
@@ -680,7 +680,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
trans = start_transaction(root, 0, TRANS_ATTACH,
BTRFS_RESERVE_NO_FLUSH, true);
- if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
+ if (trans == ERR_PTR(-ENOENT))
btrfs_wait_for_commit(root->fs_info, 0);
return trans;
@@ -1152,7 +1152,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
ret = btrfs_run_dev_replace(trans, fs_info);
if (ret)
return ret;
- ret = btrfs_run_qgroups(trans, fs_info);
+ ret = btrfs_run_qgroups(trans);
if (ret)
return ret;
@@ -1355,8 +1355,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
goto out;
/* Now qgroup are all updated, we can inherit it to new qgroups */
- ret = btrfs_qgroup_inherit(trans, fs_info,
- src->root_key.objectid, dst_objectid,
+ ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid,
inherit);
if (ret < 0)
goto out;
@@ -1574,7 +1573,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/*
* insert root back/forward references
*/
- ret = btrfs_add_root_ref(trans, fs_info, objectid,
+ ret = btrfs_add_root_ref(trans, objectid,
parent_root->root_key.objectid,
btrfs_ino(BTRFS_I(parent_inode)), index,
dentry->d_name.name, dentry->d_name.len);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94439482a0ec..4cbb1b55387d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -48,7 +48,7 @@ struct btrfs_transaction {
int aborted;
struct list_head list;
struct extent_io_tree dirty_pages;
- unsigned long start_time;
+ time64_t start_time;
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
wait_queue_head_t pending_wait;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 8d40e7dd8c30..db835635372f 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -19,6 +19,7 @@
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
+#include "volumes.h"
/*
* Error message should follow the following format:
@@ -353,6 +354,102 @@ static int check_dir_item(struct btrfs_fs_info *fs_info,
return 0;
}
+__printf(4, 5)
+__cold
+static void block_group_err(const struct btrfs_fs_info *fs_info,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+ key.objectid, key.offset, &vaf);
+ va_end(args);
+}
+
+static int check_block_group_item(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_block_group_item bgi;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u64 flags;
+ u64 type;
+
+ /*
+ * Here we don't really care about alignment since extent allocator can
+ * handle it. We care more about the size, as if one block group is
+ * larger than maximum size, it's must be some obvious corruption.
+ */
+ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group size, have %llu expect (0, %llu]",
+ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+ return -EUCLEAN;
+ }
+
+ if (item_size != sizeof(bgi)) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid item size, have %u expect %zu",
+ item_size, sizeof(bgi));
+ return -EUCLEAN;
+ }
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ if (btrfs_block_group_chunk_objectid(&bgi) !=
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group chunk objectid, have %llu expect %llu",
+ btrfs_block_group_chunk_objectid(&bgi),
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ return -EUCLEAN;
+ }
+
+ if (btrfs_block_group_used(&bgi) > key->offset) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group used, have %llu expect [0, %llu)",
+ btrfs_block_group_used(&bgi), key->offset);
+ return -EUCLEAN;
+ }
+
+ flags = btrfs_block_group_flags(&bgi);
+ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
+ block_group_err(fs_info, leaf, slot,
+"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
+ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
+ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
+ return -EUCLEAN;
+ }
+
+ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ if (type != BTRFS_BLOCK_GROUP_DATA &&
+ type != BTRFS_BLOCK_GROUP_METADATA &&
+ type != BTRFS_BLOCK_GROUP_SYSTEM &&
+ type != (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA)) {
+ block_group_err(fs_info, leaf, slot,
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
+ type, hweight64(type),
+ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
/*
* Common point to switch the item-specific validation.
*/
@@ -374,6 +471,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info,
case BTRFS_XATTR_ITEM_KEY:
ret = check_dir_item(fs_info, leaf, key, slot);
break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ ret = check_block_group_item(fs_info, leaf, key, slot);
+ break;
}
return ret;
}
@@ -396,9 +496,22 @@ static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
* skip this check for relocation trees.
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+ u64 owner = btrfs_header_owner(leaf);
struct btrfs_root *check_root;
- key.objectid = btrfs_header_owner(leaf);
+ /* These trees must never be empty */
+ if (owner == BTRFS_ROOT_TREE_OBJECTID ||
+ owner == BTRFS_CHUNK_TREE_OBJECTID ||
+ owner == BTRFS_EXTENT_TREE_OBJECTID ||
+ owner == BTRFS_DEV_TREE_OBJECTID ||
+ owner == BTRFS_FS_TREE_OBJECTID ||
+ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ generic_err(fs_info, leaf, 0,
+ "invalid root, root %llu must never be empty",
+ owner);
+ return -EUCLEAN;
+ }
+ key.objectid = owner;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f8220ec02036..1650dc44a5e3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -545,12 +545,8 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
- if (IS_ERR(inode)) {
+ if (IS_ERR(inode))
inode = NULL;
- } else if (is_bad_inode(inode)) {
- iput(inode);
- inode = NULL;
- }
return inode;
}
@@ -597,7 +593,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
nbytes = 0;
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- size = btrfs_file_extent_inline_len(eb, slot, item);
+ size = btrfs_file_extent_ram_bytes(eb, item);
nbytes = btrfs_file_extent_ram_bytes(eb, item);
extent_end = ALIGN(start + size,
fs_info->sectorsize);
@@ -685,7 +681,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* as the owner of the file extent changed from log tree
* (doesn't affect qgroup) to fs/file tree(affects qgroup)
*/
- ret = btrfs_qgroup_trace_extent(trans, fs_info,
+ ret = btrfs_qgroup_trace_extent(trans,
btrfs_file_extent_disk_bytenr(eb, item),
btrfs_file_extent_disk_num_bytes(eb, item),
GFP_NOFS);
@@ -715,7 +711,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* allocation tree
*/
ret = btrfs_alloc_logged_file_extent(trans,
- fs_info,
root->root_key.objectid,
key->objectid, offset, &ins);
if (ret)
@@ -1291,6 +1286,46 @@ again:
return ret;
}
+static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir,
+ const u8 ref_type, const char *name,
+ const int namelen)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ const u64 parent_id = btrfs_ino(BTRFS_I(dir));
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.type = ref_type;
+ if (key.type == BTRFS_INODE_REF_KEY)
+ key.offset = parent_id;
+ else
+ key.offset = btrfs_extref_hash(parent_id, name, namelen);
+
+ ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ if (key.type == BTRFS_INODE_EXTREF_KEY)
+ ret = btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0], parent_id,
+ name, namelen, NULL);
+ else
+ ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
+ name, namelen, NULL);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* replay one inode back reference item found in the log tree.
* eb, slot and key refer to the buffer and key found in the log tree.
@@ -1400,6 +1435,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
}
+ /*
+ * If a reference item already exists for this inode
+ * with the same parent and name, but different index,
+ * drop it and the corresponding directory index entries
+ * from the parent before adding the new reference item
+ * and dir index entries, otherwise we would fail with
+ * -EEXIST returned from btrfs_add_link() below.
+ */
+ ret = btrfs_inode_ref_exists(inode, dir, key->type,
+ name, namelen);
+ if (ret > 0) {
+ ret = btrfs_unlink_inode(trans, root,
+ BTRFS_I(dir),
+ BTRFS_I(inode),
+ name, namelen);
+ /*
+ * If we dropped the link count to 0, bump it so
+ * that later the iput() on the inode will not
+ * free it. We will fixup the link count later.
+ */
+ if (!ret && inode->i_nlink == 0)
+ inc_nlink(inode);
+ }
+ if (ret < 0)
+ goto out;
+
/* insert our name */
ret = btrfs_add_link(trans, BTRFS_I(dir),
BTRFS_I(inode),
@@ -2120,7 +2181,7 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
+ if (!log_di || log_di == ERR_PTR(-ENOENT)) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
@@ -2933,7 +2994,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* bail out if we need to do a full commit */
if (btrfs_need_log_full_commit(fs_info, trans)) {
ret = -EAGAIN;
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
goto out;
}
@@ -2951,7 +3011,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (ret) {
blk_finish_plug(&plug);
btrfs_abort_transaction(trans, ret);
- btrfs_free_logged_extents(log, log_transid);
btrfs_set_log_full_commit(fs_info, trans);
mutex_unlock(&root->log_mutex);
goto out;
@@ -3002,7 +3061,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out;
}
btrfs_wait_tree_log_extents(log, mark);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out;
@@ -3020,7 +3078,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (atomic_read(&log_root_tree->log_commit[index2])) {
blk_finish_plug(&plug);
ret = btrfs_wait_tree_log_extents(log, mark);
- btrfs_wait_logged_extents(trans, log, log_transid);
wait_log_commit(log_root_tree,
root_log_ctx.log_transid);
mutex_unlock(&log_root_tree->log_mutex);
@@ -3045,7 +3102,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (btrfs_need_log_full_commit(fs_info, trans)) {
blk_finish_plug(&plug);
btrfs_wait_tree_log_extents(log, mark);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
@@ -3058,7 +3114,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_set_log_full_commit(fs_info, trans);
btrfs_abort_transaction(trans, ret);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
@@ -3068,11 +3123,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
EXTENT_NEW | EXTENT_DIRTY);
if (ret) {
btrfs_set_log_full_commit(fs_info, trans);
- btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
- btrfs_wait_logged_extents(trans, log, log_transid);
btrfs_set_super_log_root(fs_info->super_for_commit,
log_root_tree->node->start);
@@ -3159,14 +3212,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
}
- /*
- * We may have short-circuited the log tree with the full commit logic
- * and left ordered extents on our list, so clear these out to keep us
- * from leaking inodes and memory.
- */
- btrfs_free_logged_extents(log, 0);
- btrfs_free_logged_extents(log, 1);
-
free_extent_buffer(log->node);
kfree(log);
}
@@ -3756,7 +3801,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
int start_slot, int nr, int inode_only,
u64 logged_isize)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
unsigned long src_offset;
unsigned long dst_offset;
struct btrfs_root *log = inode->root->log_root;
@@ -3937,9 +3982,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(src, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(src,
- src_path->slots[0],
- extent);
+ len = btrfs_file_extent_ram_bytes(src, extent);
*last_extent = ALIGN(key.offset + len,
fs_info->sectorsize);
} else {
@@ -4004,7 +4047,7 @@ fill_holes:
extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(src, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(src, i, extent);
+ len = btrfs_file_extent_ram_bytes(src, extent);
extent_end = ALIGN(key.offset + len,
fs_info->sectorsize);
} else {
@@ -4078,131 +4121,32 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
return 0;
}
-static int wait_ordered_extents(struct btrfs_trans_handle *trans,
- struct inode *inode,
- struct btrfs_root *root,
- const struct extent_map *em,
- const struct list_head *logged_list,
- bool *ordered_io_error)
+static int log_extent_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_root *log_root,
+ const struct extent_map *em)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_ordered_extent *ordered;
- struct btrfs_root *log = root->log_root;
- u64 mod_start = em->mod_start;
- u64 mod_len = em->mod_len;
- const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
u64 csum_offset;
u64 csum_len;
LIST_HEAD(ordered_sums);
int ret = 0;
- *ordered_io_error = false;
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+ if (inode->flags & BTRFS_INODE_NODATASUM ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
em->block_start == EXTENT_MAP_HOLE)
return 0;
- /*
- * Wait far any ordered extent that covers our extent map. If it
- * finishes without an error, first check and see if our csums are on
- * our outstanding ordered extents.
- */
- list_for_each_entry(ordered, logged_list, log_list) {
- struct btrfs_ordered_sum *sum;
-
- if (!mod_len)
- break;
-
- if (ordered->file_offset + ordered->len <= mod_start ||
- mod_start + mod_len <= ordered->file_offset)
- continue;
-
- if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
- const u64 start = ordered->file_offset;
- const u64 end = ordered->file_offset + ordered->len - 1;
-
- WARN_ON(ordered->inode != inode);
- filemap_fdatawrite_range(inode->i_mapping, start, end);
- }
-
- wait_event(ordered->wait,
- (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) ||
- test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)));
-
- if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) {
- /*
- * Clear the AS_EIO/AS_ENOSPC flags from the inode's
- * i_mapping flags, so that the next fsync won't get
- * an outdated io error too.
- */
- filemap_check_errors(inode->i_mapping);
- *ordered_io_error = true;
- break;
- }
- /*
- * We are going to copy all the csums on this ordered extent, so
- * go ahead and adjust mod_start and mod_len in case this
- * ordered extent has already been logged.
- */
- if (ordered->file_offset > mod_start) {
- if (ordered->file_offset + ordered->len >=
- mod_start + mod_len)
- mod_len = ordered->file_offset - mod_start;
- /*
- * If we have this case
- *
- * |--------- logged extent ---------|
- * |----- ordered extent ----|
- *
- * Just don't mess with mod_start and mod_len, we'll
- * just end up logging more csums than we need and it
- * will be ok.
- */
- } else {
- if (ordered->file_offset + ordered->len <
- mod_start + mod_len) {
- mod_len = (mod_start + mod_len) -
- (ordered->file_offset + ordered->len);
- mod_start = ordered->file_offset +
- ordered->len;
- } else {
- mod_len = 0;
- }
- }
-
- if (skip_csum)
- continue;
-
- /*
- * To keep us from looping for the above case of an ordered
- * extent that falls inside of the logged extent.
- */
- if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
- &ordered->flags))
- continue;
-
- list_for_each_entry(sum, &ordered->list, list) {
- ret = btrfs_csum_file_blocks(trans, log, sum);
- if (ret)
- break;
- }
- }
-
- if (*ordered_io_error || !mod_len || ret || skip_csum)
- return ret;
-
+ /* If we're compressed we have to save the entire range of csums. */
if (em->compress_type) {
csum_offset = 0;
csum_len = max(em->block_len, em->orig_block_len);
} else {
- csum_offset = mod_start - em->start;
- csum_len = mod_len;
+ csum_offset = em->mod_start - em->start;
+ csum_len = em->mod_len;
}
/* block start is already adjusted for the file extent offset. */
- ret = btrfs_lookup_csums_range(fs_info->csum_root,
+ ret = btrfs_lookup_csums_range(trans->fs_info->csum_root,
em->block_start + csum_offset,
em->block_start + csum_offset +
csum_len - 1, &ordered_sums, 0);
@@ -4214,7 +4158,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_csum_file_blocks(trans, log, sums);
+ ret = btrfs_csum_file_blocks(trans, log_root, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -4226,7 +4170,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, struct btrfs_root *root,
const struct extent_map *em,
struct btrfs_path *path,
- const struct list_head *logged_list,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *log = root->log_root;
@@ -4238,18 +4181,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
u64 block_len;
int ret;
int extent_inserted = 0;
- bool ordered_io_err = false;
- ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em,
- logged_list, &ordered_io_err);
+ ret = log_extent_csums(trans, inode, log, em);
if (ret)
return ret;
- if (ordered_io_err) {
- ctx->io_err = -EIO;
- return ctx->io_err;
- }
-
btrfs_init_map_token(&token);
ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start,
@@ -4424,7 +4360,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
struct btrfs_path *path,
- struct list_head *logged_list,
struct btrfs_log_ctx *ctx,
const u64 start,
const u64 end)
@@ -4480,20 +4415,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
}
list_sort(NULL, &extents, extent_cmp);
- btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
- /*
- * Some ordered extents started by fsync might have completed
- * before we could collect them into the list logged_list, which
- * means they're gone, not in our logged_list nor in the inode's
- * ordered tree. We want the application/user space to know an
- * error happened while attempting to persist file data so that
- * it can take proper action. If such error happened, we leave
- * without writing to the log tree and the fsync must report the
- * file data write error and not commit the current transaction.
- */
- ret = filemap_check_errors(inode->vfs_inode.i_mapping);
- if (ret)
- ctx->io_err = ret;
process:
while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list);
@@ -4512,8 +4433,7 @@ process:
write_unlock(&tree->lock);
- ret = log_one_extent(trans, inode, root, em, path, logged_list,
- ctx);
+ ret = log_one_extent(trans, inode, root, em, path, ctx);
write_lock(&tree->lock);
clear_em_logging(tree, em);
free_extent_map(em);
@@ -4712,9 +4632,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
if (btrfs_file_extent_type(leaf, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_inline_len(leaf,
- path->slots[0],
- extent);
+ len = btrfs_file_extent_ram_bytes(leaf, extent);
ASSERT(len == i_size ||
(len == fs_info->sectorsize &&
btrfs_file_extent_compression(leaf, extent) !=
@@ -4898,7 +4816,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key min_key;
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
- LIST_HEAD(logged_list);
u64 last_extent = 0;
int err = 0;
int ret;
@@ -5094,8 +5011,7 @@ again:
* we don't need to do more work nor fallback to
* a transaction commit.
*/
- if (IS_ERR(other_inode) &&
- PTR_ERR(other_inode) == -ENOENT) {
+ if (other_inode == ERR_PTR(-ENOENT)) {
goto next_key;
} else if (IS_ERR(other_inode)) {
err = PTR_ERR(other_inode);
@@ -5235,7 +5151,7 @@ log_extents:
}
if (fast_search) {
ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
- &logged_list, ctx, start, end);
+ ctx, start, end);
if (ret) {
err = ret;
goto out_unlock;
@@ -5286,10 +5202,6 @@ log_extents:
inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
out_unlock:
- if (unlikely(err))
- btrfs_put_logged_extents(&logged_list);
- else
- btrfs_submit_logged_extents(&logged_list, log);
mutex_unlock(&inode->log_mutex);
btrfs_free_path(path);
@@ -5585,7 +5497,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_log_ctx *ctx)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
@@ -6120,7 +6032,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+ struct btrfs_fs_info *fs_info = trans->fs_info;
/*
* this will force the logging code to walk the dentry chain
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e034ad9e23b4..da86706123ff 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8,15 +8,12 @@
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/semaphore.h>
#include <linux/uuid.h>
#include <linux/list_sort.h>
-#include <asm/div64.h>
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -634,44 +631,48 @@ static void pending_bios_fn(struct btrfs_work *work)
* devices.
*/
static void btrfs_free_stale_devices(const char *path,
- struct btrfs_device *skip_dev)
+ struct btrfs_device *skip_device)
{
- struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
- struct btrfs_device *dev, *tmp_dev;
+ struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
+ struct btrfs_device *device, *tmp_device;
- list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
-
- if (fs_devs->opened)
+ list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
+ mutex_lock(&fs_devices->device_list_mutex);
+ if (fs_devices->opened) {
+ mutex_unlock(&fs_devices->device_list_mutex);
continue;
+ }
- list_for_each_entry_safe(dev, tmp_dev,
- &fs_devs->devices, dev_list) {
+ list_for_each_entry_safe(device, tmp_device,
+ &fs_devices->devices, dev_list) {
int not_found = 0;
- if (skip_dev && skip_dev == dev)
+ if (skip_device && skip_device == device)
continue;
- if (path && !dev->name)
+ if (path && !device->name)
continue;
rcu_read_lock();
if (path)
- not_found = strcmp(rcu_str_deref(dev->name),
+ not_found = strcmp(rcu_str_deref(device->name),
path);
rcu_read_unlock();
if (not_found)
continue;
/* delete the stale device */
- if (fs_devs->num_devices == 1) {
- btrfs_sysfs_remove_fsid(fs_devs);
- list_del(&fs_devs->fs_list);
- free_fs_devices(fs_devs);
+ fs_devices->num_devices--;
+ list_del(&device->dev_list);
+ btrfs_free_device(device);
+
+ if (fs_devices->num_devices == 0)
break;
- } else {
- fs_devs->num_devices--;
- list_del(&dev->dev_list);
- btrfs_free_device(dev);
- }
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+ if (fs_devices->num_devices == 0) {
+ btrfs_sysfs_remove_fsid(fs_devices);
+ list_del(&fs_devices->fs_list);
+ free_fs_devices(fs_devices);
}
}
}
@@ -750,7 +751,8 @@ error_brelse:
* error pointer when failed
*/
static noinline struct btrfs_device *device_list_add(const char *path,
- struct btrfs_super_block *disk_super)
+ struct btrfs_super_block *disk_super,
+ bool *new_device_added)
{
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices;
@@ -764,21 +766,26 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
+ mutex_lock(&fs_devices->device_list_mutex);
list_add(&fs_devices->fs_list, &fs_uuids);
device = NULL;
} else {
+ mutex_lock(&fs_devices->device_list_mutex);
device = find_device(fs_devices, devid,
disk_super->dev_item.uuid);
}
if (!device) {
- if (fs_devices->opened)
+ if (fs_devices->opened) {
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
+ }
device = btrfs_alloc_device(NULL, &devid,
disk_super->dev_item.uuid);
if (IS_ERR(device)) {
+ mutex_unlock(&fs_devices->device_list_mutex);
/* we can safely leave the fs_devices entry around */
return device;
}
@@ -786,17 +793,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
btrfs_free_device(device);
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-ENOMEM);
}
rcu_assign_pointer(device->name, name);
- mutex_lock(&fs_devices->device_list_mutex);
list_add_rcu(&device->dev_list, &fs_devices->devices);
fs_devices->num_devices++;
- mutex_unlock(&fs_devices->device_list_mutex);
device->fs_devices = fs_devices;
- btrfs_free_stale_devices(path, device);
+ *new_device_added = true;
if (disk_super->label[0])
pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
@@ -840,12 +846,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
* with larger generation number or the last-in if
* generation are equal.
*/
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EEXIST);
}
name = rcu_string_strdup(path, GFP_NOFS);
- if (!name)
+ if (!name) {
+ mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-ENOMEM);
+ }
rcu_string_free(device->name);
rcu_assign_pointer(device->name, name);
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
@@ -865,6 +874,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
fs_devices->total_devices = btrfs_super_num_devices(disk_super);
+ mutex_unlock(&fs_devices->device_list_mutex);
return device;
}
@@ -1004,7 +1014,7 @@ static void btrfs_close_bdev(struct btrfs_device *device)
blkdev_put(device->bdev, device->mode);
}
-static void btrfs_prepare_close_one_device(struct btrfs_device *device)
+static void btrfs_close_one_device(struct btrfs_device *device)
{
struct btrfs_fs_devices *fs_devices = device->fs_devices;
struct btrfs_device *new_device;
@@ -1022,6 +1032,8 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
fs_devices->missing_devices--;
+ btrfs_close_bdev(device);
+
new_device = btrfs_alloc_device(NULL, &device->devid,
device->uuid);
BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
@@ -1035,39 +1047,23 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device)
list_replace_rcu(&device->dev_list, &new_device->dev_list);
new_device->fs_devices = device->fs_devices;
+
+ call_rcu(&device->rcu, free_device_rcu);
}
static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *tmp;
- struct list_head pending_put;
-
- INIT_LIST_HEAD(&pending_put);
if (--fs_devices->opened > 0)
return 0;
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
- btrfs_prepare_close_one_device(device);
- list_add(&device->dev_list, &pending_put);
+ btrfs_close_one_device(device);
}
mutex_unlock(&fs_devices->device_list_mutex);
- /*
- * btrfs_show_devname() is using the device_list_mutex,
- * sometimes call to blkdev_put() leads vfs calling
- * into this func. So do put outside of device_list_mutex,
- * as of now.
- */
- while (!list_empty(&pending_put)) {
- device = list_first_entry(&pending_put,
- struct btrfs_device, dev_list);
- list_del(&device->dev_list);
- btrfs_close_bdev(device);
- call_rcu(&device->rcu, free_device_rcu);
- }
-
WARN_ON(fs_devices->open_devices);
WARN_ON(fs_devices->rw_devices);
fs_devices->opened = 0;
@@ -1146,6 +1142,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
{
int ret;
+ lockdep_assert_held(&uuid_mutex);
+
mutex_lock(&fs_devices->device_list_mutex);
if (fs_devices->opened) {
fs_devices->opened++;
@@ -1215,16 +1213,18 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
* and we are not allowed to call set_blocksize during the scan. The superblock
* is read via pagecache
*/
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
- struct btrfs_fs_devices **fs_devices_ret)
+struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
+ void *holder)
{
struct btrfs_super_block *disk_super;
- struct btrfs_device *device;
+ bool new_device_added = false;
+ struct btrfs_device *device = NULL;
struct block_device *bdev;
struct page *page;
- int ret = 0;
u64 bytenr;
+ lockdep_assert_held(&uuid_mutex);
+
/*
* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
@@ -1236,112 +1236,25 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ return ERR_CAST(bdev);
if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
- ret = -EINVAL;
+ device = ERR_PTR(-EINVAL);
goto error_bdev_put;
}
- mutex_lock(&uuid_mutex);
- device = device_list_add(path, disk_super);
- if (IS_ERR(device))
- ret = PTR_ERR(device);
- else
- *fs_devices_ret = device->fs_devices;
- mutex_unlock(&uuid_mutex);
+ device = device_list_add(path, disk_super, &new_device_added);
+ if (!IS_ERR(device)) {
+ if (new_device_added)
+ btrfs_free_stale_devices(path, device);
+ }
btrfs_release_disk_super(page);
error_bdev_put:
blkdev_put(bdev, flags);
- return ret;
-}
-
-/* helper to account the used device space in the range */
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
- u64 end, u64 *length)
-{
- struct btrfs_key key;
- struct btrfs_root *root = device->fs_info->dev_root;
- struct btrfs_dev_extent *dev_extent;
- struct btrfs_path *path;
- u64 extent_end;
- int ret;
- int slot;
- struct extent_buffer *l;
-
- *length = 0;
-
- if (start >= device->total_bytes ||
- test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = READA_FORWARD;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid, key.type);
- if (ret < 0)
- goto out;
- }
-
- while (1) {
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
-
- break;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.objectid < device->devid)
- goto next;
-
- if (key.objectid > device->devid)
- break;
-
- if (key.type != BTRFS_DEV_EXTENT_KEY)
- goto next;
-
- dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- extent_end = key.offset + btrfs_dev_extent_length(l,
- dev_extent);
- if (key.offset <= start && extent_end > end) {
- *length = end - start + 1;
- break;
- } else if (key.offset <= start && extent_end > start)
- *length += extent_end - start;
- else if (key.offset > start && extent_end <= end)
- *length += extent_end - key.offset;
- else if (key.offset > start && key.offset <= end) {
- *length += end - key.offset + 1;
- break;
- } else if (key.offset > end)
- break;
-
-next:
- path->slots[0]++;
- }
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
+ return device;
}
static int contains_pending_extent(struct btrfs_transaction *transaction,
@@ -1753,10 +1666,8 @@ error:
* the btrfs_device struct should be fully filled in
*/
static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_device *device)
{
- struct btrfs_root *root = fs_info->chunk_root;
int ret;
struct btrfs_path *path;
struct btrfs_dev_item *dev_item;
@@ -1772,8 +1683,8 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*dev_item));
+ ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
+ &key, sizeof(*dev_item));
if (ret)
goto out;
@@ -1798,7 +1709,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
ptr = btrfs_device_uuid(dev_item);
write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
ptr = btrfs_device_fsid(dev_item);
- write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE);
+ write_extent_buffer(leaf, trans->fs_info->fsid, ptr, BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(leaf);
ret = 0;
@@ -1922,9 +1833,10 @@ static struct btrfs_device * btrfs_find_next_active_device(
* where this function called, there should be always be another device (or
* this_dev) which is active.
*/
-void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *device, struct btrfs_device *this_dev)
+void btrfs_assign_next_active_device(struct btrfs_device *device,
+ struct btrfs_device *this_dev)
{
+ struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_device *next_device;
if (this_dev)
@@ -2027,11 +1939,14 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
cur_devices->num_devices--;
cur_devices->total_devices--;
+ /* Update total_devices of the parent fs_devices if it's seed */
+ if (cur_devices != fs_devices)
+ fs_devices->total_devices--;
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
cur_devices->missing_devices--;
- btrfs_assign_next_active_device(fs_info, device, NULL);
+ btrfs_assign_next_active_device(device, NULL);
if (device->bdev) {
cur_devices->open_devices--;
@@ -2082,12 +1997,11 @@ error_undo:
goto out;
}
-void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *srcdev)
+void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
{
struct btrfs_fs_devices *fs_devices;
- lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
+ lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
/*
* in case of fs with no seed, srcdev->fs_devices will point
@@ -2149,10 +2063,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
}
}
-void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *tgtdev)
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
{
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
WARN_ON(!tgtdev);
mutex_lock(&fs_devices->device_list_mutex);
@@ -2164,7 +2077,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
fs_devices->num_devices--;
- btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
+ btrfs_assign_next_active_device(tgtdev, NULL);
list_del_rcu(&tgtdev->dev_list);
@@ -2295,7 +2208,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&seed_devices->alloc_list);
mutex_init(&seed_devices->device_list_mutex);
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
synchronize_rcu);
list_for_each_entry(device, &seed_devices->devices, dev_list)
@@ -2315,7 +2228,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
generate_random_uuid(fs_devices->fsid);
memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
super_flags = btrfs_super_flags(disk_super) &
~BTRFS_SUPER_FLAG_SEEDING;
@@ -2405,15 +2318,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct block_device *bdev;
- struct list_head *devices;
struct super_block *sb = fs_info->sb;
struct rcu_string *name;
- u64 tmp;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ u64 orig_super_total_bytes;
+ u64 orig_super_num_devices;
int seeding_dev = 0;
int ret = 0;
bool unlocked = false;
- if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
+ if (sb_rdonly(sb) && !fs_devices->seeding)
return -EROFS;
bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
@@ -2421,7 +2335,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (IS_ERR(bdev))
return PTR_ERR(bdev);
- if (fs_info->fs_devices->seeding) {
+ if (fs_devices->seeding) {
seeding_dev = 1;
down_write(&sb->s_umount);
mutex_lock(&uuid_mutex);
@@ -2429,18 +2343,16 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
filemap_write_and_wait(bdev->bd_inode->i_mapping);
- devices = &fs_info->fs_devices->devices;
-
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- list_for_each_entry(device, devices, dev_list) {
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev == bdev) {
ret = -EEXIST;
mutex_unlock(
- &fs_info->fs_devices->device_list_mutex);
+ &fs_devices->device_list_mutex);
goto error;
}
}
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
device = btrfs_alloc_device(fs_info, NULL, NULL);
if (IS_ERR(device)) {
@@ -2489,33 +2401,34 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
}
}
- device->fs_devices = fs_info->fs_devices;
+ device->fs_devices = fs_devices;
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
- list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices);
- list_add(&device->dev_alloc_list,
- &fs_info->fs_devices->alloc_list);
- fs_info->fs_devices->num_devices++;
- fs_info->fs_devices->open_devices++;
- fs_info->fs_devices->rw_devices++;
- fs_info->fs_devices->total_devices++;
- fs_info->fs_devices->total_rw_bytes += device->total_bytes;
+ list_add_rcu(&device->dev_list, &fs_devices->devices);
+ list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
+ fs_devices->num_devices++;
+ fs_devices->open_devices++;
+ fs_devices->rw_devices++;
+ fs_devices->total_devices++;
+ fs_devices->total_rw_bytes += device->total_bytes;
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
if (!blk_queue_nonrot(q))
- fs_info->fs_devices->rotating = 1;
+ fs_devices->rotating = 1;
- tmp = btrfs_super_total_bytes(fs_info->super_copy);
+ orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
btrfs_set_super_total_bytes(fs_info->super_copy,
- round_down(tmp + device->total_bytes, fs_info->sectorsize));
+ round_down(orig_super_total_bytes + device->total_bytes,
+ fs_info->sectorsize));
- tmp = btrfs_super_num_devices(fs_info->super_copy);
- btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
+ orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
+ btrfs_set_super_num_devices(fs_info->super_copy,
+ orig_super_num_devices + 1);
/* add sysfs device entry */
- btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
+ btrfs_sysfs_add_device_link(fs_devices, device);
/*
* we've got more storage, clear any full flags on the space
@@ -2524,7 +2437,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
btrfs_clear_space_info_full(fs_info);
mutex_unlock(&fs_info->chunk_mutex);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ mutex_unlock(&fs_devices->device_list_mutex);
if (seeding_dev) {
mutex_lock(&fs_info->chunk_mutex);
@@ -2536,7 +2449,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
}
}
- ret = btrfs_add_dev_item(trans, fs_info, device);
+ ret = btrfs_add_dev_item(trans, device);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
@@ -2556,7 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
*/
snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
fs_info->fsid);
- if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf))
+ if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf))
btrfs_warn(fs_info,
"sysfs: failed to create fsid for sprout");
}
@@ -2591,7 +2504,23 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
return ret;
error_sysfs:
- btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+ btrfs_sysfs_rm_device_link(fs_devices, device);
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ mutex_lock(&fs_info->chunk_mutex);
+ list_del_rcu(&device->dev_list);
+ list_del(&device->dev_alloc_list);
+ fs_info->fs_devices->num_devices--;
+ fs_info->fs_devices->open_devices--;
+ fs_info->fs_devices->rw_devices--;
+ fs_info->fs_devices->total_devices--;
+ fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
+ atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
+ btrfs_set_super_total_bytes(fs_info->super_copy,
+ orig_super_total_bytes);
+ btrfs_set_super_num_devices(fs_info->super_copy,
+ orig_super_num_devices);
+ mutex_unlock(&fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
error_trans:
if (seeding_dev)
sb->s_flags |= SB_RDONLY;
@@ -2695,9 +2624,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
return btrfs_update_device(trans, device);
}
-static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset)
+static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->chunk_root;
int ret;
struct btrfs_path *path;
@@ -2806,9 +2735,9 @@ static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
return em;
}
-int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset)
+int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct extent_map *em;
struct map_lookup *map;
u64 dev_extent_len = 0;
@@ -2827,7 +2756,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
map = em->map_lookup;
mutex_lock(&fs_info->chunk_mutex);
- check_system_chunk(trans, fs_info, map->type);
+ check_system_chunk(trans, map->type);
mutex_unlock(&fs_info->chunk_mutex);
/*
@@ -2867,7 +2796,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
mutex_unlock(&fs_devices->device_list_mutex);
- ret = btrfs_free_chunk(trans, fs_info, chunk_offset);
+ ret = btrfs_free_chunk(trans, chunk_offset);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -2883,7 +2812,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
}
}
- ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em);
+ ret = btrfs_remove_block_group(trans, chunk_offset, em);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -2948,7 +2877,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
* step two, delete the device extents and the
* chunk tree entries
*/
- ret = btrfs_remove_chunk(trans, fs_info, chunk_offset);
+ ret = btrfs_remove_chunk(trans, chunk_offset);
btrfs_end_transaction(trans);
return ret;
}
@@ -3057,7 +2986,7 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_force_chunk_alloc(trans, fs_info,
+ ret = btrfs_force_chunk_alloc(trans,
BTRFS_BLOCK_GROUP_DATA);
btrfs_end_transaction(trans);
if (ret < 0)
@@ -4690,7 +4619,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_stripe_size = SZ_1G;
- max_chunk_size = 10 * max_stripe_size;
+ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
@@ -4898,7 +4827,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
refcount_inc(&em->refs);
write_unlock(&em_tree->lock);
- ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
+ ret = btrfs_make_block_group(trans, 0, type, start, num_bytes);
if (ret)
goto error_del_extent;
@@ -4932,9 +4861,9 @@ error:
}
int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 chunk_offset, u64 chunk_size)
+ u64 chunk_offset, u64 chunk_size)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_root *chunk_root = fs_info->chunk_root;
struct btrfs_key key;
@@ -5036,13 +4965,12 @@ out:
* require modifying the chunk tree. This division is important for the
* bootstrap process of adding storage to a seed btrfs.
*/
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type)
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
{
u64 chunk_offset;
- lockdep_assert_held(&fs_info->chunk_mutex);
- chunk_offset = find_next_chunk(fs_info);
+ lockdep_assert_held(&trans->fs_info->chunk_mutex);
+ chunk_offset = find_next_chunk(trans->fs_info);
return __btrfs_alloc_chunk(trans, chunk_offset, type);
}
@@ -5173,7 +5101,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
/*
* There could be two corrupted data stripes, we need
* to loop retry in order to rebuild the correct data.
- *
+ *
* Fail a stripe at a time on every retry except the
* stripe under reconstruction.
*/
@@ -6185,21 +6113,11 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
btrfs_io_bio(bio)->stripe_index = dev_nr;
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
-#ifdef DEBUG
- {
- struct rcu_string *name;
-
- rcu_read_lock();
- name = rcu_dereference(dev->name);
- btrfs_debug(fs_info,
- "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
- bio_op(bio), bio->bi_opf,
- (u64)bio->bi_iter.bi_sector,
- (u_long)dev->bdev->bd_dev, name->str, dev->devid,
- bio->bi_iter.bi_size);
- rcu_read_unlock();
- }
-#endif
+ btrfs_debug_in_rcu(fs_info,
+ "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
+ bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
+ (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
+ bio->bi_iter.bi_size);
bio_set_dev(bio, dev->bdev);
btrfs_bio_counter_inc_noblocked(fs_info);
@@ -6401,6 +6319,8 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
u16 num_stripes;
u16 sub_stripes;
u64 type;
+ u64 features;
+ bool mixed = false;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
@@ -6439,6 +6359,32 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
+
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+ btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(fs_info,
+ "system chunk with data or metadata type: 0x%llx", type);
+ return -EIO;
+ }
+
+ features = btrfs_super_incompat_flags(fs_info->super_copy);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = true;
+
+ if (!mixed) {
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
+ btrfs_err(fs_info,
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
+ return -EIO;
+ }
+ }
+
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
@@ -6525,6 +6471,7 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
map->type = btrfs_chunk_type(leaf, chunk);
map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ map->verified_stripes = 0;
for (i = 0; i < num_stripes; i++) {
map->stripes[i].physical =
btrfs_stripe_offset_nr(leaf, chunk, i);
@@ -6561,10 +6508,14 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
write_lock(&map_tree->map_tree.lock);
ret = add_extent_mapping(&map_tree->map_tree, em, 0);
write_unlock(&map_tree->map_tree.lock);
- BUG_ON(ret); /* Tree corruption */
+ if (ret < 0) {
+ btrfs_err(fs_info,
+ "failed to add chunk map, start=%llu len=%llu: %d",
+ em->start, em->len, ret);
+ }
free_extent_map(em);
- return 0;
+ return ret;
}
static void fill_device_from_item(struct extent_buffer *leaf,
@@ -7106,9 +7057,9 @@ out:
}
static int update_dev_stat_item(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
struct btrfs_device *device)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_path *path;
struct btrfs_key key;
@@ -7201,7 +7152,7 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
*/
smp_rmb();
- ret = update_dev_stat_item(trans, fs_info, device);
+ ret = update_dev_stat_item(trans, device);
if (!ret)
atomic_sub(stats_cnt, &device->dev_stats_ccnt);
}
@@ -7380,3 +7331,197 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
fs_devices = fs_devices->seed;
}
}
+
+/*
+ * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10.
+ */
+int btrfs_bg_type_to_factor(u64 flags)
+{
+ if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ return 2;
+ return 1;
+}
+
+
+static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
+{
+ int index = btrfs_bg_flags_to_raid_index(type);
+ int ncopies = btrfs_raid_array[index].ncopies;
+ int data_stripes;
+
+ switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case BTRFS_BLOCK_GROUP_RAID5:
+ data_stripes = num_stripes - 1;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ data_stripes = num_stripes - 2;
+ break;
+ default:
+ data_stripes = num_stripes / ncopies;
+ break;
+ }
+ return div_u64(chunk_len, data_stripes);
+}
+
+static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
+ u64 chunk_offset, u64 devid,
+ u64 physical_offset, u64 physical_len)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct map_lookup *map;
+ u64 stripe_len;
+ bool found = false;
+ int ret = 0;
+ int i;
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, chunk_offset, 1);
+ read_unlock(&em_tree->lock);
+
+ if (!em) {
+ btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
+ physical_offset, devid);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ map = em->map_lookup;
+ stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
+ if (physical_len != stripe_len) {
+ btrfs_err(fs_info,
+"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
+ physical_offset, devid, em->start, physical_len,
+ stripe_len);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ for (i = 0; i < map->num_stripes; i++) {
+ if (map->stripes[i].dev->devid == devid &&
+ map->stripes[i].physical == physical_offset) {
+ found = true;
+ if (map->verified_stripes >= map->num_stripes) {
+ btrfs_err(fs_info,
+ "too many dev extents for chunk %llu found",
+ em->start);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ map->verified_stripes++;
+ break;
+ }
+ }
+ if (!found) {
+ btrfs_err(fs_info,
+ "dev extent physical offset %llu devid %llu has no corresponding chunk",
+ physical_offset, devid);
+ ret = -EUCLEAN;
+ }
+out:
+ free_extent_map(em);
+ return ret;
+}
+
+static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct rb_node *node;
+ int ret = 0;
+
+ read_lock(&em_tree->lock);
+ for (node = rb_first(&em_tree->map); node; node = rb_next(node)) {
+ em = rb_entry(node, struct extent_map, rb_node);
+ if (em->map_lookup->num_stripes !=
+ em->map_lookup->verified_stripes) {
+ btrfs_err(fs_info,
+ "chunk %llu has missing dev extent, have %d expect %d",
+ em->start, em->map_lookup->verified_stripes,
+ em->map_lookup->num_stripes);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+out:
+ read_unlock(&em_tree->lock);
+ return ret;
+}
+
+/*
+ * Ensure that all dev extents are mapped to correct chunk, otherwise
+ * later chunk allocation/free would cause unexpected behavior.
+ *
+ * NOTE: This will iterate through the whole device tree, which should be of
+ * the same size level as the chunk tree. This slightly increases mount time.
+ */
+int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root = fs_info->dev_root;
+ struct btrfs_key key;
+ int ret = 0;
+
+ key.objectid = 1;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->reada = READA_FORWARD;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ /* No dev extents at all? Not good */
+ if (ret > 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+ while (1) {
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_dev_extent *dext;
+ int slot = path->slots[0];
+ u64 chunk_offset;
+ u64 physical_offset;
+ u64 physical_len;
+ u64 devid;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.type != BTRFS_DEV_EXTENT_KEY)
+ break;
+ devid = key.objectid;
+ physical_offset = key.offset;
+
+ dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
+ chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
+ physical_len = btrfs_dev_extent_length(leaf, dext);
+
+ ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
+ physical_offset, physical_len);
+ if (ret < 0)
+ goto out;
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* Ensure all chunks have corresponding dev extents */
+ ret = verify_chunk_dev_extent_mapping(fs_info);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5139ec8daf4c..23e9285d88de 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -11,6 +11,8 @@
#include <linux/btrfs.h>
#include "async-thread.h"
+#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
+
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K
@@ -343,6 +345,7 @@ struct map_lookup {
u64 stripe_len;
int num_stripes;
int sub_stripes;
+ int verified_stripes; /* For mount time dev extent verification */
struct btrfs_bio_stripe stripes[];
};
@@ -382,8 +385,6 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
}
}
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
- u64 end, u64 *length);
void btrfs_get_bbio(struct btrfs_bio *bbio);
void btrfs_put_bbio(struct btrfs_bio *bbio);
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
@@ -396,20 +397,19 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 type);
+int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, int async_submit);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
- struct btrfs_fs_devices **fs_devices_ret);
+struct btrfs_device *btrfs_scan_one_device(const char *path,
+ fmode_t flags, void *holder);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
-void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
- struct btrfs_device *device, struct btrfs_device *this_dev);
+void btrfs_assign_next_active_device(struct btrfs_device *device,
+ struct btrfs_device *this_dev);
int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
const char *device_path,
struct btrfs_device **device);
@@ -453,22 +453,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
-void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *srcdev);
+void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev);
-void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *tgtdev);
+void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
u64 logical);
int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- u64 chunk_offset, u64 chunk_size);
-int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 chunk_offset);
+ u64 chunk_offset, u64 chunk_size);
+int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
@@ -560,4 +556,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
+int btrfs_bg_type_to_factor(u64 flags);
+int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
+
#endif
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index d9f001078e08..4a717d400807 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -218,7 +218,8 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
"%s",
fsdef->dentry->d_sb->s_id);
- fscache_object_init(&fsdef->fscache, NULL, &cache->cache);
+ fscache_object_init(&fsdef->fscache, &fscache_fsdef_index,
+ &cache->cache);
ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag);
if (ret < 0)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index ab0bbe93b398..af2b17b21b94 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -186,12 +186,12 @@ try_again:
* need to wait for it to be destroyed */
wait_for_old_object:
trace_cachefiles_wait_active(object, dentry, xobject);
+ clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
if (fscache_object_is_live(&xobject->fscache)) {
pr_err("\n");
pr_err("Error: Unexpected object collision\n");
cachefiles_printk_object(object, xobject);
- BUG();
}
atomic_inc(&xobject->usage);
write_unlock(&cache->active_lock);
@@ -248,7 +248,6 @@ wait_for_old_object:
goto try_again;
requeue:
- clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo);
_leave(" = -ETIMEDOUT");
return -ETIMEDOUT;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 5082c8a49686..40f7595aad10 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
struct cachefiles_one_read *monitor =
container_of(wait, struct cachefiles_one_read, monitor);
struct cachefiles_object *object;
+ struct fscache_retrieval *op = monitor->op;
struct wait_bit_key *key = _key;
struct page *page = wait->private;
@@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
list_del(&wait->entry);
/* move onto the action list and queue for FS-Cache thread pool */
- ASSERT(monitor->op);
+ ASSERT(op);
- object = container_of(monitor->op->op.object,
- struct cachefiles_object, fscache);
+ /* We need to temporarily bump the usage count as we don't own a ref
+ * here otherwise cachefiles_read_copier() may free the op between the
+ * monitor being enqueued on the op->to_do list and the op getting
+ * enqueued on the work queue.
+ */
+ fscache_get_retrieval(op);
+ object = container_of(op->op.object, struct cachefiles_object, fscache);
spin_lock(&object->work_lock);
- list_add_tail(&monitor->op_link, &monitor->op->to_do);
+ list_add_tail(&monitor->op_link, &op->to_do);
spin_unlock(&object->work_lock);
- fscache_enqueue_retrieval(monitor->op);
+ fscache_enqueue_retrieval(op);
+ fscache_put_retrieval(op);
return 0;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ad0bed99b1d5..e2679e8a2535 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -429,8 +429,7 @@ out:
* file or symlink, return 1 so the VFS can retry.
*/
int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t mode,
- int *opened)
+ struct file *file, unsigned flags, umode_t mode)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -507,9 +506,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
dout("atomic_open finish_open on dn %p\n", dn);
if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
ceph_init_inode_acls(d_inode(dentry), &acls);
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
}
- err = finish_open(file, dentry, ceph_open, opened);
+ err = finish_open(file, dentry, ceph_open);
}
out_req:
if (!req->r_err && req->r_target_inode)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a7077a0c989f..971328b99ede 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1025,8 +1025,7 @@ extern const struct file_operations ceph_file_fops;
extern int ceph_renew_caps(struct inode *inode);
extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t mode,
- int *opened);
+ struct file *file, unsigned flags, umode_t mode);
extern int ceph_release(struct inode *inode, struct file *filp);
extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
char *data, size_t len);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 5f132d59dfc2..35c83fe7dba0 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -16,24 +16,28 @@ config CIFS
select CRYPTO_DES
help
This is the client VFS module for the SMB3 family of NAS protocols,
- as well as for earlier dialects such as SMB2.1, SMB2 and the
+ (including support for the most recent, most secure dialect SMB3.1.1)
+ as well as for earlier dialects such as SMB2.1, SMB2 and the older
Common Internet File System (CIFS) protocol. CIFS was the successor
to the original dialect, the Server Message Block (SMB) protocol, the
native file sharing mechanism for most early PC operating systems.
- The SMB3 protocol is supported by most modern operating systems and
- NAS appliances (e.g. Samba, Windows 8, Windows 2012, MacOS).
+ The SMB3 protocol is supported by most modern operating systems
+ and NAS appliances (e.g. Samba, Windows 10, Windows Server 2016,
+ MacOS) and even in the cloud (e.g. Microsoft Azure).
The older CIFS protocol was included in Windows NT4, 2000 and XP (and
later) as well by Samba (which provides excellent CIFS and SMB3
- server support for Linux and many other operating systems). Limited
- support for OS/2 and Windows ME and similar very old servers is
- provided as well.
+ server support for Linux and many other operating systems). Use of
+ dialects older than SMB2.1 is often discouraged on public networks.
+ This module also provides limited support for OS/2 and Windows ME
+ and similar very old servers.
- The cifs module provides an advanced network file system client
+ This module provides an advanced network file system client
for mounting to SMB3 (and CIFS) compliant servers. It includes
support for DFS (hierarchical name space), secure per-user
- session establishment via Kerberos or NTLM or NTLMv2,
- safe distributed caching (oplock), optional packet
+ session establishment via Kerberos or NTLM or NTLMv2, RDMA
+ (smbdirect), advanced security features, per-share encryption,
+ directory leases, safe distributed caching (oplock), optional packet
signing, Unicode and other internationalization improvements.
In general, the default dialects, SMB3 and later, enable better
@@ -43,18 +47,11 @@ config CIFS
than SMB3 mounts. SMB2/SMB3 mount options are also
slightly simpler (compared to CIFS) due to protocol improvements.
- If you need to mount to Samba, Macs or Windows from this machine, say Y.
-
-config CIFS_STATS
- bool "CIFS statistics"
- depends on CIFS
- help
- Enabling this option will cause statistics for each server share
- mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
+ If you need to mount to Samba, Azure, Macs or Windows from this machine, say Y.
config CIFS_STATS2
bool "Extended statistics"
- depends on CIFS_STATS
+ depends on CIFS
help
Enabling this option will allow more detailed statistics on SMB
request timing to be displayed in /proc/fs/cifs/DebugData and also
@@ -66,9 +63,24 @@ config CIFS_STATS2
Unless you are a developer or are doing network performance analysis
or tuning, say N.
+config CIFS_ALLOW_INSECURE_LEGACY
+ bool "Support legacy servers which use less secure dialects"
+ depends on CIFS
+ default y
+ help
+ Modern dialects, SMB2.1 and later (including SMB3 and 3.1.1), have
+ additional security features, including protection against
+ man-in-the-middle attacks and stronger crypto hashes, so the use
+ of legacy dialects (SMB1/CIFS and SMB2.0) is discouraged.
+
+ Disabling this option prevents users from using vers=1.0 or vers=2.0
+ on mounts with cifs.ko
+
+ If unsure, say Y.
+
config CIFS_WEAK_PW_HASH
bool "Support legacy servers which use weaker LANMAN security"
- depends on CIFS
+ depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY
help
Modern CIFS servers including Samba and most Windows versions
(since 1997) support stronger NTLM (and even NTLMv2 and Kerberos)
@@ -186,15 +198,6 @@ config CIFS_NFSD_EXPORT
help
Allows NFS server to export a CIFS mounted share (nfsd over cifs)
-config CIFS_SMB311
- bool "SMB3.1.1 network file system support"
- depends on CIFS
- select CRYPTO_SHA512
-
- help
- This enables support for the newest, and most secure dialect, SMB3.11.
- If unsure, say Y
-
config CIFS_SMB_DIRECT
bool "SMB Direct support (Experimental)"
depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index e1553d1e0e50..b7420e605b28 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -128,8 +128,10 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
memset(&auxdata, 0, sizeof(auxdata));
auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime);
- auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime);
+ auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+ auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+ auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+ auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
if (memcmp(data, &auxdata, datalen) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index bfe999505815..f1fbea947fef 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,25 +160,41 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
seq_printf(m, "Features:");
#ifdef CONFIG_CIFS_DFS_UPCALL
- seq_printf(m, " dfs");
+ seq_printf(m, " DFS");
#endif
#ifdef CONFIG_CIFS_FSCACHE
- seq_printf(m, " fscache");
+ seq_printf(m, ",FSCACHE");
+#endif
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ seq_printf(m, ",SMB_DIRECT");
+#endif
+#ifdef CONFIG_CIFS_STATS2
+ seq_printf(m, ",STATS2");
+#else
+ seq_printf(m, ",STATS");
+#endif
+#ifdef CONFIG_CIFS_DEBUG2
+ seq_printf(m, ",DEBUG2");
+#elif defined(CONFIG_CIFS_DEBUG)
+ seq_printf(m, ",DEBUG");
+#endif
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ seq_printf(m, ",ALLOW_INSECURE_LEGACY");
#endif
#ifdef CONFIG_CIFS_WEAK_PW_HASH
- seq_printf(m, " lanman");
+ seq_printf(m, ",WEAK_PW_HASH");
#endif
#ifdef CONFIG_CIFS_POSIX
- seq_printf(m, " posix");
+ seq_printf(m, ",CIFS_POSIX");
#endif
#ifdef CONFIG_CIFS_UPCALL
- seq_printf(m, " spnego");
+ seq_printf(m, ",UPCALL(SPNEGO)");
#endif
#ifdef CONFIG_CIFS_XATTR
- seq_printf(m, " xattr");
+ seq_printf(m, ",XATTR");
#endif
#ifdef CONFIG_CIFS_ACL
- seq_printf(m, " acl");
+ seq_printf(m, ",ACL");
#endif
seq_putc(m, '\n');
seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
@@ -259,10 +275,9 @@ skip_rdma:
server->credits, server->dialect);
if (server->sign)
seq_printf(m, " signed");
-#ifdef CONFIG_CIFS_SMB311
if (server->posix_ext_supported)
seq_printf(m, " posix");
-#endif /* 3.1.1 */
+
i++;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
@@ -350,7 +365,6 @@ skip_rdma:
return 0;
}
-#ifdef CONFIG_CIFS_STATS
static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
@@ -364,13 +378,23 @@ static ssize_t cifs_stats_proc_write(struct file *file,
rc = kstrtobool_from_user(buffer, count, &bv);
if (rc == 0) {
#ifdef CONFIG_CIFS_STATS2
+ int i;
+
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
#endif /* CONFIG_CIFS_STATS2 */
+ spin_lock(&GlobalMid_Lock);
+ GlobalMaxActiveXid = 0;
+ GlobalCurrentXid = 0;
+ spin_unlock(&GlobalMid_Lock);
spin_lock(&cifs_tcp_ses_lock);
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list);
+#ifdef CONFIG_CIFS_STATS2
+ for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++)
+ atomic_set(&server->smb2slowcmd[i], 0);
+#endif /* CONFIG_CIFS_STATS2 */
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
smb_ses_list);
@@ -379,6 +403,10 @@ static ssize_t cifs_stats_proc_write(struct file *file,
struct cifs_tcon,
tcon_list);
atomic_set(&tcon->num_smbs_sent, 0);
+ spin_lock(&tcon->stat_lock);
+ tcon->bytes_read = 0;
+ tcon->bytes_written = 0;
+ spin_unlock(&tcon->stat_lock);
if (server->ops->clear_stats)
server->ops->clear_stats(tcon);
}
@@ -395,13 +423,15 @@ static ssize_t cifs_stats_proc_write(struct file *file,
static int cifs_stats_proc_show(struct seq_file *m, void *v)
{
int i;
+#ifdef CONFIG_CIFS_STATS2
+ int j;
+#endif /* STATS2 */
struct list_head *tmp1, *tmp2, *tmp3;
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- seq_printf(m,
- "Resources in use\nCIFS Session: %d\n",
+ seq_printf(m, "Resources in use\nCIFS Session: %d\n",
sesInfoAllocCount.counter);
seq_printf(m, "Share (unique mount targets): %d\n",
tconInfoAllocCount.counter);
@@ -430,6 +460,13 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list);
+#ifdef CONFIG_CIFS_STATS2
+ for (j = 0; j < NUMBER_OF_SMB2_COMMANDS; j++)
+ if (atomic_read(&server->smb2slowcmd[j]))
+ seq_printf(m, "%d slow responses from %s for command %d\n",
+ atomic_read(&server->smb2slowcmd[j]),
+ server->hostname, j);
+#endif /* STATS2 */
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
smb_ses_list);
@@ -466,7 +503,6 @@ static const struct file_operations cifs_stats_proc_fops = {
.release = single_release,
.write = cifs_stats_proc_write,
};
-#endif /* STATS */
#ifdef CONFIG_CIFS_SMB_DIRECT
#define PROC_FILE_DEFINE(name) \
@@ -524,9 +560,7 @@ cifs_proc_init(void)
proc_create_single("DebugData", 0, proc_fs_cifs,
cifs_debug_data_proc_show);
-#ifdef CONFIG_CIFS_STATS
proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
-#endif /* STATS */
proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
proc_create("LinuxExtensionsEnabled", 0644, proc_fs_cifs,
@@ -564,9 +598,7 @@ cifs_proc_clean(void)
remove_proc_entry("DebugData", proc_fs_cifs);
remove_proc_entry("cifsFYI", proc_fs_cifs);
remove_proc_entry("traceSMB", proc_fs_cifs);
-#ifdef CONFIG_CIFS_STATS
remove_proc_entry("Stats", proc_fs_cifs);
-#endif
remove_proc_entry("SecurityFlags", proc_fs_cifs);
remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index ee2a8ec70056..85b31cfa2f3c 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -83,7 +83,13 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;
- crypto_shash_update(shash, kaddr, len);
+ rc = crypto_shash_update(shash, kaddr, len);
+ if (rc) {
+ cifs_dbg(VFS, "%s: Could not update with payload\n",
+ __func__);
+ kunmap(rqst->rq_pages[i]);
+ return rc;
+ }
kunmap(rqst->rq_pages[i]);
}
@@ -452,7 +458,7 @@ find_timestamp(struct cifs_ses *ses)
unsigned char *blobptr;
unsigned char *blobend;
struct ntlmssp2_name *attrptr;
- struct timespec ts;
+ struct timespec64 ts;
if (!ses->auth_key.len || !ses->auth_key.response)
return 0;
@@ -477,7 +483,7 @@ find_timestamp(struct cifs_ses *ses)
blobptr += attrsize; /* advance attr value */
}
- ktime_get_real_ts(&ts);
+ ktime_get_real_ts64(&ts);
return cpu_to_le64(cifs_UnixTimeToNT(ts));
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d5aa7ae917bf..7065426b3280 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -139,6 +139,9 @@ cifs_read_super(struct super_block *sb)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL)
sb->s_flags |= SB_POSIXACL;
+ if (tcon->snapshot_time)
+ sb->s_flags |= SB_RDONLY;
+
if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)
sb->s_maxbytes = MAX_LFS_FILESIZE;
else
@@ -209,14 +212,16 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
xid = get_xid();
- /*
- * PATH_MAX may be too long - it would presumably be total path,
- * but note that some servers (includinng Samba 3) have a shorter
- * maximum path.
- *
- * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO.
- */
- buf->f_namelen = PATH_MAX;
+ if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) > 0)
+ buf->f_namelen =
+ le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength);
+ else
+ buf->f_namelen = PATH_MAX;
+
+ buf->f_fsid.val[0] = tcon->vol_serial_number;
+ /* are using part of create time for more randomness, see man statfs */
+ buf->f_fsid.val[1] = (int)le64_to_cpu(tcon->vol_create_time);
+
buf->f_files = 0; /* undefined */
buf->f_ffree = 0; /* unlimited */
@@ -427,7 +432,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
else if (tcon->ses->user_name)
seq_show_option(s, "username", tcon->ses->user_name);
- if (tcon->ses->domainName)
+ if (tcon->ses->domainName && tcon->ses->domainName[0] != 0)
seq_show_option(s, "domain", tcon->ses->domainName);
if (srcaddr->sa_family != AF_UNSPEC) {
@@ -481,20 +486,12 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",persistenthandles");
else if (tcon->use_resilient)
seq_puts(s, ",resilienthandles");
-
-#ifdef CONFIG_CIFS_SMB311
if (tcon->posix_extensions)
seq_puts(s, ",posix");
else if (tcon->unix_ext)
seq_puts(s, ",unix");
else
seq_puts(s, ",nounix");
-#else
- if (tcon->unix_ext)
- seq_puts(s, ",unix");
- else
- seq_puts(s, ",nounix");
-#endif /* SMB311 */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
seq_puts(s, ",posixpaths");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
@@ -546,6 +543,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",wsize=%u", cifs_sb->wsize);
seq_printf(s, ",echo_interval=%lu",
tcon->ses->server->echo_interval / HZ);
+ if (tcon->snapshot_time)
+ seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
/* convert actimeo and display it in seconds */
seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5f0231803431..f3a78efc3109 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -65,8 +65,7 @@ extern struct inode *cifs_root_iget(struct super_block *);
extern int cifs_create(struct inode *, struct dentry *, umode_t,
bool excl);
extern int cifs_atomic_open(struct inode *, struct dentry *,
- struct file *, unsigned, umode_t,
- int *);
+ struct file *, unsigned, umode_t);
extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
unsigned int);
extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c923c7854027..0c9ab62c3df4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -76,6 +76,9 @@
#define SMB_ECHO_INTERVAL_MAX 600
#define SMB_ECHO_INTERVAL_DEFAULT 60
+/* maximum number of PDUs in one compound */
+#define MAX_COMPOUND 5
+
/*
* Default number of credits to keep available for SMB3.
* This value is chosen somewhat arbitrarily. The Windows client
@@ -191,9 +194,7 @@ enum smb_version {
Smb_21,
Smb_30,
Smb_302,
-#ifdef CONFIG_CIFS_SMB311
Smb_311,
-#endif /* SMB311 */
Smb_3any,
Smb_default,
Smb_version_err
@@ -456,13 +457,11 @@ struct smb_version_operations {
long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t,
loff_t);
/* init transform request - used for encryption for now */
- int (*init_transform_rq)(struct TCP_Server_Info *, struct smb_rqst *,
- struct smb_rqst *);
- /* free transform request */
- void (*free_transform_rq)(struct smb_rqst *);
+ int (*init_transform_rq)(struct TCP_Server_Info *, int num_rqst,
+ struct smb_rqst *, struct smb_rqst *);
int (*is_transform_hdr)(void *buf);
int (*receive_transform)(struct TCP_Server_Info *,
- struct mid_q_entry **);
+ struct mid_q_entry **, char **, int *);
enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
enum securityEnum);
int (*next_header)(char *);
@@ -684,15 +683,14 @@ struct TCP_Server_Info {
#ifdef CONFIG_CIFS_STATS2
atomic_t in_send; /* requests trying to send */
atomic_t num_waiters; /* blocked waiting to get in sendrecv */
-#endif
+ atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */
+#endif /* STATS2 */
unsigned int max_read;
unsigned int max_write;
-#ifdef CONFIG_CIFS_SMB311
__le16 cipher_type;
/* save initital negprot hash */
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
bool posix_ext_supported;
-#endif /* 3.1.1 */
struct delayed_work reconnect; /* reconnect workqueue job */
struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
unsigned long echo_interval;
@@ -886,9 +884,7 @@ struct cifs_ses {
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
-#ifdef CONFIG_CIFS_SMB311
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
-#endif /* 3.1.1 */
/*
* Network interfaces available on the server this session is
@@ -913,6 +909,7 @@ cap_unix(struct cifs_ses *ses)
struct cached_fid {
bool is_valid:1; /* Do we have a useable root fid */
+ struct kref refcount;
struct cifs_fid *fid;
struct mutex fid_mutex;
struct cifs_tcon *tcon;
@@ -936,7 +933,6 @@ struct cifs_tcon {
__u32 tid; /* The 4 byte tree id */
__u16 Flags; /* optional support bits */
enum statusEnum tidStatus;
-#ifdef CONFIG_CIFS_STATS
atomic_t num_smbs_sent;
union {
struct {
@@ -967,24 +963,9 @@ struct cifs_tcon {
atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS];
} smb2_stats;
} stats;
-#ifdef CONFIG_CIFS_STATS2
- unsigned long long time_writes;
- unsigned long long time_reads;
- unsigned long long time_opens;
- unsigned long long time_deletes;
- unsigned long long time_closes;
- unsigned long long time_mkdirs;
- unsigned long long time_rmdirs;
- unsigned long long time_renames;
- unsigned long long time_t2renames;
- unsigned long long time_ffirst;
- unsigned long long time_fnext;
- unsigned long long time_fclose;
-#endif /* CONFIG_CIFS_STATS2 */
__u64 bytes_read;
__u64 bytes_written;
spinlock_t stat_lock; /* protects the two fields above */
-#endif /* CONFIG_CIFS_STATS */
FILE_SYSTEM_DEVICE_INFO fsDevInfo;
FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
FILE_SYSTEM_UNIX_INFO fsUnixInfo;
@@ -997,9 +978,7 @@ struct cifs_tcon {
bool seal:1; /* transport encryption for this mounted share */
bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
for this mount even if server would support */
-#ifdef CONFIG_CIFS_SMB311
bool posix_extensions; /* if true SMB3.11 posix extensions enabled */
-#endif /* CIFS_311 */
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
bool broken_sparse_sup; /* if server or share does not support sparse */
@@ -1046,6 +1025,7 @@ struct tcon_link {
};
extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb);
+extern void smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst);
static inline struct cifs_tcon *
tlink_tcon(struct tcon_link *tlink)
@@ -1352,7 +1332,6 @@ convert_delimiter(char *path, char delim)
*pos = delim;
}
-#ifdef CONFIG_CIFS_STATS
#define cifs_stats_inc atomic_inc
static inline void cifs_stats_bytes_written(struct cifs_tcon *tcon,
@@ -1372,13 +1351,6 @@ static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon,
tcon->bytes_read += bytes;
spin_unlock(&tcon->stat_lock);
}
-#else
-
-#define cifs_stats_inc(field) do {} while (0)
-#define cifs_stats_bytes_written(tcon, bytes) do {} while (0)
-#define cifs_stats_bytes_read(tcon, bytes) do {} while (0)
-
-#endif
/*
@@ -1544,9 +1516,9 @@ struct cifs_fattr {
dev_t cf_rdev;
unsigned int cf_nlink;
unsigned int cf_dtype;
- struct timespec cf_atime;
- struct timespec cf_mtime;
- struct timespec cf_ctime;
+ struct timespec64 cf_atime;
+ struct timespec64 cf_mtime;
+ struct timespec64 cf_ctime;
};
static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1890f534c88b..20adda4de83b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -94,6 +94,10 @@ extern int cifs_call_async(struct TCP_Server_Info *server,
extern int cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
struct smb_rqst *rqst, int *resp_buf_type,
const int flags, struct kvec *resp_iov);
+extern int compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ const int flags, const int num_rqst,
+ struct smb_rqst *rqst, int *resp_buf_type,
+ struct kvec *resp_iov);
extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
struct smb_hdr * /* input */ ,
struct smb_hdr * /* out */ ,
@@ -143,9 +147,9 @@ extern enum securityEnum select_sectype(struct TCP_Server_Info *server,
enum securityEnum requested);
extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
const struct nls_table *nls_cp);
-extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
-extern u64 cifs_UnixTimeToNT(struct timespec);
-extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
+extern struct timespec64 cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
+extern u64 cifs_UnixTimeToNT(struct timespec64);
+extern struct timespec64 cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
int offset);
extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
extern int cifs_get_writer(struct cifsInodeInfo *cinode);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 93408eab92e7..dc2f4cf08fe9 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -508,13 +508,13 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
* this requirement.
*/
int val, seconds, remain, result;
- struct timespec ts;
- unsigned long utc = ktime_get_real_seconds();
+ struct timespec64 ts;
+ time64_t utc = ktime_get_real_seconds();
ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
rsp->SrvTime.Time, 0);
- cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n",
- (int)ts.tv_sec, (int)utc,
- (int)(utc - ts.tv_sec));
+ cifs_dbg(FYI, "SrvTime %lld sec since 1970 (utc: %lld) diff: %lld\n",
+ ts.tv_sec, utc,
+ utc - ts.tv_sec);
val = (int)(utc - ts.tv_sec);
seconds = abs(val);
result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
@@ -4082,7 +4082,7 @@ QInfRetry:
if (rc) {
cifs_dbg(FYI, "Send error in QueryInfo = %d\n", rc);
} else if (data) {
- struct timespec ts;
+ struct timespec64 ts;
__u32 time = le32_to_cpu(pSMBr->last_write_time);
/* decode response */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5df2c0698cda..c832a8a1970a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -303,10 +303,8 @@ static const match_table_t cifs_smb_version_tokens = {
{ Smb_21, SMB21_VERSION_STRING },
{ Smb_30, SMB30_VERSION_STRING },
{ Smb_302, SMB302_VERSION_STRING },
-#ifdef CONFIG_CIFS_SMB311
{ Smb_311, SMB311_VERSION_STRING },
{ Smb_311, ALT_SMB311_VERSION_STRING },
-#endif /* SMB311 */
{ Smb_3any, SMB3ANY_VERSION_STRING },
{ Smb_default, SMBDEFAULT_VERSION_STRING },
{ Smb_version_err, NULL }
@@ -350,6 +348,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->max_read = 0;
cifs_dbg(FYI, "Reconnecting tcp session\n");
+ trace_smb3_reconnect(server->CurrentMid, server->hostname);
/* before reconnecting the tcp session, mark the smb session (uid)
and the tid bad so they are not used until reconnected */
@@ -851,13 +850,14 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
static int
cifs_demultiplex_thread(void *p)
{
- int length;
+ int i, num_mids, length;
struct TCP_Server_Info *server = p;
unsigned int pdu_length;
unsigned int next_offset;
char *buf = NULL;
struct task_struct *task_to_wake = NULL;
- struct mid_q_entry *mid_entry;
+ struct mid_q_entry *mids[MAX_COMPOUND];
+ char *bufs[MAX_COMPOUND];
current->flags |= PF_MEMALLOC;
cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current));
@@ -924,58 +924,75 @@ next_pdu:
server->pdu_size = next_offset;
}
- mid_entry = NULL;
+ memset(mids, 0, sizeof(mids));
+ memset(bufs, 0, sizeof(bufs));
+ num_mids = 0;
+
if (server->ops->is_transform_hdr &&
server->ops->receive_transform &&
server->ops->is_transform_hdr(buf)) {
length = server->ops->receive_transform(server,
- &mid_entry);
+ mids,
+ bufs,
+ &num_mids);
} else {
- mid_entry = server->ops->find_mid(server, buf);
+ mids[0] = server->ops->find_mid(server, buf);
+ bufs[0] = buf;
+ if (mids[0])
+ num_mids = 1;
- if (!mid_entry || !mid_entry->receive)
- length = standard_receive3(server, mid_entry);
+ if (!mids[0] || !mids[0]->receive)
+ length = standard_receive3(server, mids[0]);
else
- length = mid_entry->receive(server, mid_entry);
+ length = mids[0]->receive(server, mids[0]);
}
if (length < 0) {
- if (mid_entry)
- cifs_mid_q_entry_release(mid_entry);
+ for (i = 0; i < num_mids; i++)
+ if (mids[i])
+ cifs_mid_q_entry_release(mids[i]);
continue;
}
if (server->large_buf)
buf = server->bigbuf;
+
server->lstrp = jiffies;
- if (mid_entry != NULL) {
- mid_entry->resp_buf_size = server->pdu_size;
- if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) &&
- mid_entry->mid_state == MID_RESPONSE_RECEIVED &&
- server->ops->handle_cancelled_mid)
- server->ops->handle_cancelled_mid(
- mid_entry->resp_buf,
- server);
- if (!mid_entry->multiRsp || mid_entry->multiEnd)
- mid_entry->callback(mid_entry);
+ for (i = 0; i < num_mids; i++) {
+ if (mids[i] != NULL) {
+ mids[i]->resp_buf_size = server->pdu_size;
+ if ((mids[i]->mid_flags & MID_WAIT_CANCELLED) &&
+ mids[i]->mid_state == MID_RESPONSE_RECEIVED &&
+ server->ops->handle_cancelled_mid)
+ server->ops->handle_cancelled_mid(
+ mids[i]->resp_buf,
+ server);
- cifs_mid_q_entry_release(mid_entry);
- } else if (server->ops->is_oplock_break &&
- server->ops->is_oplock_break(buf, server)) {
- cifs_dbg(FYI, "Received oplock break\n");
- } else {
- cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
- atomic_read(&midCount));
- cifs_dump_mem("Received Data is: ", buf,
- HEADER_SIZE(server));
+ if (!mids[i]->multiRsp || mids[i]->multiEnd)
+ mids[i]->callback(mids[i]);
+
+ cifs_mid_q_entry_release(mids[i]);
+ } else if (server->ops->is_oplock_break &&
+ server->ops->is_oplock_break(bufs[i],
+ server)) {
+ cifs_dbg(FYI, "Received oplock break\n");
+ } else {
+ cifs_dbg(VFS, "No task to wake, unknown frame "
+ "received! NumMids %d\n",
+ atomic_read(&midCount));
+ cifs_dump_mem("Received Data is: ", bufs[i],
+ HEADER_SIZE(server));
#ifdef CONFIG_CIFS_DEBUG2
- if (server->ops->dump_detail)
- server->ops->dump_detail(buf, server);
- cifs_dump_mids(server);
+ if (server->ops->dump_detail)
+ server->ops->dump_detail(bufs[i],
+ server);
+ cifs_dump_mids(server);
#endif /* CIFS_DEBUG2 */
+ }
}
+
if (pdu_length > server->pdu_size) {
if (!allocate_buffers(server))
continue;
@@ -1174,6 +1191,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
substring_t args[MAX_OPT_ARGS];
switch (match_token(value, cifs_smb_version_tokens, args)) {
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
case Smb_1:
if (disable_legacy_dialects) {
cifs_dbg(VFS, "mount with legacy dialect disabled\n");
@@ -1198,6 +1216,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
vol->ops = &smb20_operations;
vol->vals = &smb20_values;
break;
+#else
+ case Smb_1:
+ cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
+ return 1;
+ case Smb_20:
+ cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
+ return 1;
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
case Smb_21:
vol->ops = &smb21_operations;
vol->vals = &smb21_values;
@@ -1210,12 +1236,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
vol->ops = &smb30_operations; /* currently identical with 3.0 */
vol->vals = &smb302_values;
break;
-#ifdef CONFIG_CIFS_SMB311
case Smb_311:
vol->ops = &smb311_operations;
vol->vals = &smb311_values;
break;
-#endif /* SMB311 */
case Smb_3any:
vol->ops = &smb30_operations; /* currently identical with 3.0 */
vol->vals = &smb3any_values;
@@ -3030,15 +3054,17 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
}
}
-#ifdef CONFIG_CIFS_SMB311
- if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) {
- if (ses->server->vals->protocol_id == SMB311_PROT_ID) {
+ if (volume_info->linux_ext) {
+ if (ses->server->posix_ext_supported) {
tcon->posix_extensions = true;
printk_once(KERN_WARNING
"SMB3.11 POSIX Extensions are experimental\n");
+ } else {
+ cifs_dbg(VFS, "Server does not support mounting with posix SMB3.11 extensions.\n");
+ rc = -EOPNOTSUPP;
+ goto out_fail;
}
}
-#endif /* 311 */
/*
* BB Do we need to wrap session_mutex around this TCon call and Unix
@@ -3992,11 +4018,9 @@ try_mount_again:
goto remote_path_check;
}
-#ifdef CONFIG_CIFS_SMB311
/* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
if (tcon->posix_extensions)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
-#endif /* SMB3.11 */
/* tell server which Unix caps we support */
if (cap_unix(tcon->ses)) {
@@ -4459,11 +4483,10 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}
-#ifdef CONFIG_CIFS_SMB311
/* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
if (tcon->posix_extensions)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
-#endif /* SMB3.11 */
+
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, vol_info);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ddae52bd1993..3713d22b95a7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -465,8 +465,7 @@ out_err:
int
cifs_atomic_open(struct inode *inode, struct dentry *direntry,
- struct file *file, unsigned oflags, umode_t mode,
- int *opened)
+ struct file *file, unsigned oflags, umode_t mode)
{
int rc;
unsigned int xid;
@@ -539,9 +538,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
}
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
- rc = finish_open(file, direntry, generic_file_open, opened);
+ rc = finish_open(file, direntry, generic_file_open);
if (rc) {
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 85145a763021..ea6ace9c2417 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -129,8 +129,10 @@ static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi,
memset(&auxdata, 0, sizeof(auxdata));
auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime);
- auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime);
+ auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+ auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+ auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+ auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
cifsi->fscache =
fscache_acquire_cookie(tcon->fscache,
@@ -166,8 +168,10 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
if (cifsi->fscache) {
memset(&auxdata, 0, sizeof(auxdata));
auxdata.eof = cifsi->server_eof;
- auxdata.last_write_time = timespec64_to_timespec(cifsi->vfs_inode.i_mtime);
- auxdata.last_change_time = timespec64_to_timespec(cifsi->vfs_inode.i_ctime);
+ auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec;
+ auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec;
+ auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec;
+ auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec;
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index c7e3ac251e16..8c0862e41306 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -31,9 +31,11 @@
* Auxiliary data attached to CIFS inode within the cache
*/
struct cifs_fscache_inode_auxdata {
- struct timespec last_write_time;
- struct timespec last_change_time;
- u64 eof;
+ u64 last_write_time_sec;
+ u64 last_change_time_sec;
+ u32 last_write_time_nsec;
+ u32 last_change_time_nsec;
+ u64 eof;
};
/*
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a2cfb33e85c1..d32eaa4b2437 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -95,7 +95,6 @@ static void
cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
{
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
- struct timespec ts;
cifs_dbg(FYI, "%s: revalidating inode %llu\n",
__func__, cifs_i->uniqueid);
@@ -114,8 +113,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
}
/* revalidate if mtime or size have changed */
- ts = timespec64_to_timespec(inode->i_mtime);
- if (timespec_equal(&ts, &fattr->cf_mtime) &&
+ if (timespec64_equal(&inode->i_mtime, &fattr->cf_mtime) &&
cifs_i->server_eof == fattr->cf_eof) {
cifs_dbg(FYI, "%s: inode %llu is unchanged\n",
__func__, cifs_i->uniqueid);
@@ -164,9 +162,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
cifs_revalidate_cache(inode, fattr);
spin_lock(&inode->i_lock);
- inode->i_atime = timespec_to_timespec64(fattr->cf_atime);
- inode->i_mtime = timespec_to_timespec64(fattr->cf_mtime);
- inode->i_ctime = timespec_to_timespec64(fattr->cf_ctime);
+ inode->i_atime = fattr->cf_atime;
+ inode->i_mtime = fattr->cf_mtime;
+ inode->i_ctime = fattr->cf_ctime;
inode->i_rdev = fattr->cf_rdev;
cifs_nlink_fattr_to_inode(inode, fattr);
inode->i_uid = fattr->cf_uid;
@@ -327,8 +325,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
fattr->cf_uid = cifs_sb->mnt_uid;
fattr->cf_gid = cifs_sb->mnt_gid;
- ktime_get_real_ts(&fattr->cf_mtime);
- fattr->cf_mtime = timespec_trunc(fattr->cf_mtime, sb->s_time_gran);
+ ktime_get_real_ts64(&fattr->cf_mtime);
+ fattr->cf_mtime = timespec64_trunc(fattr->cf_mtime, sb->s_time_gran);
fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime;
fattr->cf_nlink = 2;
fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
@@ -604,8 +602,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
if (info->LastAccessTime)
fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
else {
- ktime_get_real_ts(&fattr->cf_atime);
- fattr->cf_atime = timespec_trunc(fattr->cf_atime, sb->s_time_gran);
+ ktime_get_real_ts64(&fattr->cf_atime);
+ fattr->cf_atime = timespec64_trunc(fattr->cf_atime, sb->s_time_gran);
}
fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
@@ -1122,17 +1120,19 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
if (!server->ops->set_file_info)
return -ENOSYS;
+ info_buf.Pad = 0;
+
if (attrs->ia_valid & ATTR_ATIME) {
set_time = true;
info_buf.LastAccessTime =
- cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime)));
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
} else
info_buf.LastAccessTime = 0;
if (attrs->ia_valid & ATTR_MTIME) {
set_time = true;
info_buf.LastWriteTime =
- cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime)));
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
} else
info_buf.LastWriteTime = 0;
@@ -1145,7 +1145,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
cifs_dbg(FYI, "CIFS - CTIME changed\n");
info_buf.ChangeTime =
- cpu_to_le64(cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime)));
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
} else
info_buf.ChangeTime = 0;
@@ -1577,14 +1577,12 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
server = tcon->ses->server;
-#ifdef CONFIG_CIFS_SMB311
if ((server->ops->posix_mkdir) && (tcon->posix_extensions)) {
rc = server->ops->posix_mkdir(xid, inode, mode, tcon, full_path,
cifs_sb);
d_drop(direntry); /* for time being always refresh inode info */
goto mkdir_out;
}
-#endif /* SMB311 */
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
@@ -2071,8 +2069,8 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
/* old CIFS Unix Extensions doesn't return create time */
if (CIFS_I(inode)->createtime) {
stat->result_mask |= STATX_BTIME;
- stat->btime = timespec_to_timespec64(
- cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime)));
+ stat->btime =
+ cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
}
stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
@@ -2278,17 +2276,17 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
args->gid = INVALID_GID; /* no change */
if (attrs->ia_valid & ATTR_ATIME)
- args->atime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_atime));
+ args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
else
args->atime = NO_CHANGE_64;
if (attrs->ia_valid & ATTR_MTIME)
- args->mtime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_mtime));
+ args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
else
args->mtime = NO_CHANGE_64;
if (attrs->ia_valid & ATTR_CTIME)
- args->ctime = cifs_UnixTimeToNT(timespec64_to_timespec(attrs->ia_ctime));
+ args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
else
args->ctime = NO_CHANGE_64;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index de41f96aba49..2148b0f60e5e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -396,7 +396,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_io_parms io_parms;
int buf_type = CIFS_NO_BUFFER;
__le16 *utf16_path;
- __u8 oplock = SMB2_OPLOCK_LEVEL_II;
+ __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct smb2_file_all_info *pfile_info = NULL;
oparms.tcon = tcon;
@@ -459,7 +459,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_io_parms io_parms;
int create_options = CREATE_NOT_DIR;
__le16 *utf16_path;
- __u8 oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct kvec iov[2];
if (backup_cred(cifs_sb))
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 53e8362cbc4a..dacb2c05674c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -122,9 +122,7 @@ tconInfoAlloc(void)
mutex_init(&ret_buf->crfid.fid_mutex);
ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid),
GFP_KERNEL);
-#ifdef CONFIG_CIFS_STATS
spin_lock_init(&ret_buf->stat_lock);
-#endif
}
return ret_buf;
}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index d7ad0dfe4e68..fdd908e4a26b 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -918,10 +918,10 @@ smbCalcSize(void *buf, struct TCP_Server_Info *server)
* Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
* into Unix UTC (based 1970-01-01, in seconds).
*/
-struct timespec
+struct timespec64
cifs_NTtimeToUnix(__le64 ntutc)
{
- struct timespec ts;
+ struct timespec64 ts;
/* BB what about the timezone? BB */
/* Subtract the NTFS time offset, then convert to 1s intervals. */
@@ -935,12 +935,12 @@ cifs_NTtimeToUnix(__le64 ntutc)
*/
if (t < 0) {
abs_t = -t;
- ts.tv_nsec = (long)(do_div(abs_t, 10000000) * 100);
+ ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100);
ts.tv_nsec = -ts.tv_nsec;
ts.tv_sec = -abs_t;
} else {
abs_t = t;
- ts.tv_nsec = (long)do_div(abs_t, 10000000) * 100;
+ ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100;
ts.tv_sec = abs_t;
}
@@ -949,7 +949,7 @@ cifs_NTtimeToUnix(__le64 ntutc)
/* Convert the Unix UTC into NT UTC. */
u64
-cifs_UnixTimeToNT(struct timespec t)
+cifs_UnixTimeToNT(struct timespec64 t)
{
/* Convert to 100ns intervals and then add the NTFS time offset. */
return (u64) t.tv_sec * 10000000 + t.tv_nsec/100 + NTFS_TIME_OFFSET;
@@ -959,10 +959,11 @@ static const int total_days_of_prev_months[] = {
0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
};
-struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
+struct timespec64 cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
{
- struct timespec ts;
- int sec, min, days, month, year;
+ struct timespec64 ts;
+ time64_t sec;
+ int min, days, month, year;
u16 date = le16_to_cpu(le_date);
u16 time = le16_to_cpu(le_time);
SMB_TIME *st = (SMB_TIME *)&time;
@@ -973,7 +974,7 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
sec = 2 * st->TwoSeconds;
min = st->Minutes;
if ((sec > 59) || (min > 59))
- cifs_dbg(VFS, "illegal time min %d sec %d\n", min, sec);
+ cifs_dbg(VFS, "illegal time min %d sec %lld\n", min, sec);
sec += (min * 60);
sec += 60 * 60 * st->Hours;
if (st->Hours > 24)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 646dcd149de1..378151e09e91 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -624,7 +624,6 @@ cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
static void
cifs_clear_stats(struct cifs_tcon *tcon)
{
-#ifdef CONFIG_CIFS_STATS
atomic_set(&tcon->stats.cifs_stats.num_writes, 0);
atomic_set(&tcon->stats.cifs_stats.num_reads, 0);
atomic_set(&tcon->stats.cifs_stats.num_flushes, 0);
@@ -646,13 +645,11 @@ cifs_clear_stats(struct cifs_tcon *tcon)
atomic_set(&tcon->stats.cifs_stats.num_locks, 0);
atomic_set(&tcon->stats.cifs_stats.num_acl_get, 0);
atomic_set(&tcon->stats.cifs_stats.num_acl_set, 0);
-#endif
}
static void
cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
{
-#ifdef CONFIG_CIFS_STATS
seq_printf(m, " Oplocks breaks: %d",
atomic_read(&tcon->stats.cifs_stats.num_oplock_brks));
seq_printf(m, "\nReads: %d Bytes: %llu",
@@ -684,7 +681,6 @@ cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
atomic_read(&tcon->stats.cifs_stats.num_ffirst),
atomic_read(&tcon->stats.cifs_stats.num_fnext),
atomic_read(&tcon->stats.cifs_stats.num_fclose));
-#endif
}
static void
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index d01ad706d7fc..1eef1791d0c4 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -120,7 +120,9 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
break;
}
- if (use_cached_root_handle == false)
+ if (use_cached_root_handle)
+ close_shroot(&tcon->crfid);
+ else
rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (tmprc)
rc = tmprc;
@@ -281,7 +283,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
int rc;
if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
- (buf->LastWriteTime == 0) && (buf->ChangeTime) &&
+ (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) &&
(buf->Attributes == 0))
return 0; /* would be a no op, no sense sending this */
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 3ff7cec2da81..303d4592ebe7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -93,7 +93,6 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
/* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
};
-#ifdef CONFIG_CIFS_SMB311
static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
__u32 non_ctxlen)
{
@@ -127,7 +126,6 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
/* length of negcontexts including pad from end of sec blob to them */
return (len - nc_offset) + size_of_pad_before_neg_ctxts;
}
-#endif /* CIFS_SMB311 */
int
smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
@@ -222,10 +220,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
clc_len = smb2_calc_size(buf, srvr);
-#ifdef CONFIG_CIFS_SMB311
if (shdr->Command == SMB2_NEGOTIATE)
clc_len += get_neg_ctxt_len(shdr, len, clc_len);
-#endif /* SMB311 */
+
if (len != clc_len) {
cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
clc_len, len, mid);
@@ -451,15 +448,13 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
/* Windows doesn't allow paths beginning with \ */
if (from[0] == '\\')
start_of_path = from + 1;
-#ifdef CONFIG_CIFS_SMB311
+
/* SMB311 POSIX extensions paths do not include leading slash */
else if (cifs_sb_master_tlink(cifs_sb) &&
cifs_sb_master_tcon(cifs_sb)->posix_extensions &&
(from[0] == '/')) {
start_of_path = from + 1;
- }
-#endif /* 311 */
- else
+ } else
start_of_path = from;
to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len,
@@ -759,7 +754,6 @@ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
return 0;
}
-#ifdef CONFIG_CIFS_SMB311
/**
* smb311_update_preauth_hash - update @ses hash with the packet data in @iov
*
@@ -821,4 +815,3 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec)
return 0;
}
-#endif
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ea92a38b2f08..541258447c4c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -444,7 +444,11 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
NULL /* no data input */, 0 /* no data input */,
(char **)&out_buf, &ret_data_len);
- if (rc != 0) {
+ if (rc == -EOPNOTSUPP) {
+ cifs_dbg(FYI,
+ "server does not support query network interfaces\n");
+ goto out;
+ } else if (rc != 0) {
cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc);
goto out;
}
@@ -466,21 +470,36 @@ out:
return rc;
}
-void
-smb2_cached_lease_break(struct work_struct *work)
+static void
+smb2_close_cached_fid(struct kref *ref)
{
- struct cached_fid *cfid = container_of(work,
- struct cached_fid, lease_break);
- mutex_lock(&cfid->fid_mutex);
+ struct cached_fid *cfid = container_of(ref, struct cached_fid,
+ refcount);
+
if (cfid->is_valid) {
cifs_dbg(FYI, "clear cached root file handle\n");
SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid,
cfid->fid->volatile_fid);
cfid->is_valid = false;
}
+}
+
+void close_shroot(struct cached_fid *cfid)
+{
+ mutex_lock(&cfid->fid_mutex);
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
mutex_unlock(&cfid->fid_mutex);
}
+void
+smb2_cached_lease_break(struct work_struct *work)
+{
+ struct cached_fid *cfid = container_of(work,
+ struct cached_fid, lease_break);
+
+ close_shroot(cfid);
+}
+
/*
* Open the directory at the root of a share
*/
@@ -495,6 +514,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
if (tcon->crfid.is_valid) {
cifs_dbg(FYI, "found a cached root file handle\n");
memcpy(pfid, tcon->crfid.fid, sizeof(struct cifs_fid));
+ kref_get(&tcon->crfid.refcount);
mutex_unlock(&tcon->crfid.fid_mutex);
return 0;
}
@@ -511,6 +531,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
tcon->crfid.tcon = tcon;
tcon->crfid.is_valid = true;
+ kref_init(&tcon->crfid.refcount);
+ kref_get(&tcon->crfid.refcount);
}
mutex_unlock(&tcon->crfid.fid_mutex);
return rc;
@@ -549,9 +571,14 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_DEVICE_INFORMATION);
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ FS_VOLUME_INFORMATION);
+ SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
if (no_cached_open)
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ else
+ close_shroot(&tcon->crfid);
+
return;
}
@@ -873,13 +900,11 @@ smb2_can_echo(struct TCP_Server_Info *server)
static void
smb2_clear_stats(struct cifs_tcon *tcon)
{
-#ifdef CONFIG_CIFS_STATS
int i;
for (i = 0; i < NUMBER_OF_SMB2_COMMANDS; i++) {
atomic_set(&tcon->stats.smb2_stats.smb2_com_sent[i], 0);
atomic_set(&tcon->stats.smb2_stats.smb2_com_failed[i], 0);
}
-#endif
}
static void
@@ -918,67 +943,58 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon)
static void
smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
{
-#ifdef CONFIG_CIFS_STATS
atomic_t *sent = tcon->stats.smb2_stats.smb2_com_sent;
atomic_t *failed = tcon->stats.smb2_stats.smb2_com_failed;
- seq_printf(m, "\nNegotiates: %d sent %d failed",
- atomic_read(&sent[SMB2_NEGOTIATE_HE]),
- atomic_read(&failed[SMB2_NEGOTIATE_HE]));
- seq_printf(m, "\nSessionSetups: %d sent %d failed",
- atomic_read(&sent[SMB2_SESSION_SETUP_HE]),
- atomic_read(&failed[SMB2_SESSION_SETUP_HE]));
- seq_printf(m, "\nLogoffs: %d sent %d failed",
- atomic_read(&sent[SMB2_LOGOFF_HE]),
- atomic_read(&failed[SMB2_LOGOFF_HE]));
- seq_printf(m, "\nTreeConnects: %d sent %d failed",
+
+ /*
+ * Can't display SMB2_NEGOTIATE, SESSION_SETUP, LOGOFF, CANCEL and ECHO
+ * totals (requests sent) since those SMBs are per-session not per tcon
+ */
+ seq_printf(m, "\nBytes read: %llu Bytes written: %llu",
+ (long long)(tcon->bytes_read),
+ (long long)(tcon->bytes_written));
+ seq_printf(m, "\nTreeConnects: %d total %d failed",
atomic_read(&sent[SMB2_TREE_CONNECT_HE]),
atomic_read(&failed[SMB2_TREE_CONNECT_HE]));
- seq_printf(m, "\nTreeDisconnects: %d sent %d failed",
+ seq_printf(m, "\nTreeDisconnects: %d total %d failed",
atomic_read(&sent[SMB2_TREE_DISCONNECT_HE]),
atomic_read(&failed[SMB2_TREE_DISCONNECT_HE]));
- seq_printf(m, "\nCreates: %d sent %d failed",
+ seq_printf(m, "\nCreates: %d total %d failed",
atomic_read(&sent[SMB2_CREATE_HE]),
atomic_read(&failed[SMB2_CREATE_HE]));
- seq_printf(m, "\nCloses: %d sent %d failed",
+ seq_printf(m, "\nCloses: %d total %d failed",
atomic_read(&sent[SMB2_CLOSE_HE]),
atomic_read(&failed[SMB2_CLOSE_HE]));
- seq_printf(m, "\nFlushes: %d sent %d failed",
+ seq_printf(m, "\nFlushes: %d total %d failed",
atomic_read(&sent[SMB2_FLUSH_HE]),
atomic_read(&failed[SMB2_FLUSH_HE]));
- seq_printf(m, "\nReads: %d sent %d failed",
+ seq_printf(m, "\nReads: %d total %d failed",
atomic_read(&sent[SMB2_READ_HE]),
atomic_read(&failed[SMB2_READ_HE]));
- seq_printf(m, "\nWrites: %d sent %d failed",
+ seq_printf(m, "\nWrites: %d total %d failed",
atomic_read(&sent[SMB2_WRITE_HE]),
atomic_read(&failed[SMB2_WRITE_HE]));
- seq_printf(m, "\nLocks: %d sent %d failed",
+ seq_printf(m, "\nLocks: %d total %d failed",
atomic_read(&sent[SMB2_LOCK_HE]),
atomic_read(&failed[SMB2_LOCK_HE]));
- seq_printf(m, "\nIOCTLs: %d sent %d failed",
+ seq_printf(m, "\nIOCTLs: %d total %d failed",
atomic_read(&sent[SMB2_IOCTL_HE]),
atomic_read(&failed[SMB2_IOCTL_HE]));
- seq_printf(m, "\nCancels: %d sent %d failed",
- atomic_read(&sent[SMB2_CANCEL_HE]),
- atomic_read(&failed[SMB2_CANCEL_HE]));
- seq_printf(m, "\nEchos: %d sent %d failed",
- atomic_read(&sent[SMB2_ECHO_HE]),
- atomic_read(&failed[SMB2_ECHO_HE]));
- seq_printf(m, "\nQueryDirectories: %d sent %d failed",
+ seq_printf(m, "\nQueryDirectories: %d total %d failed",
atomic_read(&sent[SMB2_QUERY_DIRECTORY_HE]),
atomic_read(&failed[SMB2_QUERY_DIRECTORY_HE]));
- seq_printf(m, "\nChangeNotifies: %d sent %d failed",
+ seq_printf(m, "\nChangeNotifies: %d total %d failed",
atomic_read(&sent[SMB2_CHANGE_NOTIFY_HE]),
atomic_read(&failed[SMB2_CHANGE_NOTIFY_HE]));
- seq_printf(m, "\nQueryInfos: %d sent %d failed",
+ seq_printf(m, "\nQueryInfos: %d total %d failed",
atomic_read(&sent[SMB2_QUERY_INFO_HE]),
atomic_read(&failed[SMB2_QUERY_INFO_HE]));
- seq_printf(m, "\nSetInfos: %d sent %d failed",
+ seq_printf(m, "\nSetInfos: %d total %d failed",
atomic_read(&sent[SMB2_SET_INFO_HE]),
atomic_read(&failed[SMB2_SET_INFO_HE]));
seq_printf(m, "\nOplockBreaks: %d sent %d failed",
atomic_read(&sent[SMB2_OPLOCK_BREAK_HE]),
atomic_read(&failed[SMB2_OPLOCK_BREAK_HE]));
-#endif
}
static void
@@ -1353,6 +1369,13 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
}
+/* GMT Token is @GMT-YYYY.MM.DD-HH.MM.SS Unicode which is 48 bytes + null */
+#define GMT_TOKEN_SIZE 50
+
+/*
+ * Input buffer contains (empty) struct smb_snapshot array with size filled in
+ * For output see struct SRV_SNAPSHOT_ARRAY in MS-SMB2 section 2.2.32.2
+ */
static int
smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile, void __user *ioc_buf)
@@ -1382,14 +1405,27 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
kfree(retbuf);
return rc;
}
- if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) {
- rc = -ERANGE;
- kfree(retbuf);
- return rc;
- }
- if (ret_data_len > snapshot_in.snapshot_array_size)
- ret_data_len = snapshot_in.snapshot_array_size;
+ /*
+ * Check for min size, ie not large enough to fit even one GMT
+ * token (snapshot). On the first ioctl some users may pass in
+ * smaller size (or zero) to simply get the size of the array
+ * so the user space caller can allocate sufficient memory
+ * and retry the ioctl again with larger array size sufficient
+ * to hold all of the snapshot GMT tokens on the second try.
+ */
+ if (snapshot_in.snapshot_array_size < GMT_TOKEN_SIZE)
+ ret_data_len = sizeof(struct smb_snapshot_array);
+
+ /*
+ * We return struct SRV_SNAPSHOT_ARRAY, followed by
+ * the snapshot array (of 50 byte GMT tokens) each
+ * representing an available previous version of the data
+ */
+ if (ret_data_len > (snapshot_in.snapshot_array_size +
+ sizeof(struct smb_snapshot_array)))
+ ret_data_len = snapshot_in.snapshot_array_size +
+ sizeof(struct smb_snapshot_array);
if (copy_to_user(ioc_buf, retbuf, ret_data_len))
rc = -EFAULT;
@@ -1487,7 +1523,11 @@ smb2_is_session_expired(char *buf)
shdr->Status != STATUS_USER_SESSION_DELETED)
return false;
+ trace_smb3_ses_expired(shdr->TreeId, shdr->SessionId,
+ le16_to_cpu(shdr->Command),
+ le64_to_cpu(shdr->MessageId));
cifs_dbg(FYI, "Session expired or deleted\n");
+
return true;
}
@@ -1504,16 +1544,140 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
CIFS_CACHE_READ(cinode) ? 1 : 0);
}
+static void
+smb2_set_related(struct smb_rqst *rqst)
+{
+ struct smb2_sync_hdr *shdr;
+
+ shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base);
+ shdr->Flags |= SMB2_FLAGS_RELATED_OPERATIONS;
+}
+
+char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0};
+
+static void
+smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+{
+ struct smb2_sync_hdr *shdr;
+ unsigned long len = smb_rqst_len(server, rqst);
+
+ /* SMB headers in a compound are 8 byte aligned. */
+ if (len & 7) {
+ rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding;
+ rqst->rq_iov[rqst->rq_nvec].iov_len = 8 - (len & 7);
+ rqst->rq_nvec++;
+ len = smb_rqst_len(server, rqst);
+ }
+
+ shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base);
+ shdr->NextCommand = cpu_to_le32(len);
+}
+
static int
smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
struct kstatfs *buf)
{
+ struct smb2_query_info_rsp *rsp;
+ struct smb2_fs_full_size_info *info = NULL;
+ struct smb_rqst rqst[3];
+ int resp_buftype[3];
+ struct kvec rsp_iov[3];
+ struct kvec open_iov[5]; /* 4 + potential padding. */
+ struct kvec qi_iov[1];
+ struct kvec close_iov[1];
+ struct cifs_ses *ses = tcon->ses;
+ struct TCP_Server_Info *server = ses->server;
+ __le16 srch_path = 0; /* Null - open root of share */
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_open_parms oparms;
+ struct cifs_fid fid;
+ int flags = 0;
+ int rc;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ memset(rqst, 0, sizeof(rqst));
+ memset(resp_buftype, 0, sizeof(resp_buftype));
+ memset(rsp_iov, 0, sizeof(rsp_iov));
+
+ memset(&open_iov, 0, sizeof(open_iov));
+ rqst[0].rq_iov = open_iov;
+ rqst[0].rq_nvec = 4;
+
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_READ_ATTRIBUTES;
+ oparms.disposition = FILE_OPEN;
+ oparms.create_options = 0;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, &srch_path);
+ if (rc)
+ goto qfs_exit;
+ smb2_set_next_command(server, &rqst[0]);
+
+ memset(&qi_iov, 0, sizeof(qi_iov));
+ rqst[1].rq_iov = qi_iov;
+ rqst[1].rq_nvec = 1;
+
+ rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
+ FS_FULL_SIZE_INFORMATION,
+ SMB2_O_INFO_FILESYSTEM, 0,
+ sizeof(struct smb2_fs_full_size_info));
+ if (rc)
+ goto qfs_exit;
+ smb2_set_next_command(server, &rqst[1]);
+ smb2_set_related(&rqst[1]);
+
+ memset(&close_iov, 0, sizeof(close_iov));
+ rqst[2].rq_iov = close_iov;
+ rqst[2].rq_nvec = 1;
+
+ rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID);
+ if (rc)
+ goto qfs_exit;
+ smb2_set_related(&rqst[2]);
+
+ rc = compound_send_recv(xid, ses, flags, 3, rqst,
+ resp_buftype, rsp_iov);
+ if (rc)
+ goto qfs_exit;
+
+ rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+ buf->f_type = SMB2_MAGIC_NUMBER;
+ info = (struct smb2_fs_full_size_info *)(
+ le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+ rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+ le32_to_cpu(rsp->OutputBufferLength),
+ &rsp_iov[1],
+ sizeof(struct smb2_fs_full_size_info));
+ if (!rc)
+ smb2_copy_fs_info_to_kstatfs(info, buf);
+
+qfs_exit:
+ SMB2_open_free(&rqst[0]);
+ SMB2_query_info_free(&rqst[1]);
+ SMB2_close_free(&rqst[2]);
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+ return rc;
+}
+
+static int
+smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
+ struct kstatfs *buf)
+{
int rc;
__le16 srch_path = 0; /* Null - open root of share */
u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
+ if (!tcon->posix_extensions)
+ return smb2_queryfs(xid, tcon, buf);
+
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
@@ -1524,9 +1688,10 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL);
if (rc)
return rc;
+
+ rc = SMB311_posix_qfs_info(xid, tcon, fid.persistent_fid,
+ fid.volatile_fid, buf);
buf->f_type = SMB2_MAGIC_NUMBER;
- rc = SMB2_QFS_info(xid, tcon, fid.persistent_fid, fid.volatile_fid,
- buf);
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
return rc;
}
@@ -1700,7 +1865,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
&resp_buftype);
if (!rc || !err_iov.iov_base) {
rc = -ENOENT;
- goto querty_exit;
+ goto free_path;
}
err_buf = err_iov.iov_base;
@@ -1741,6 +1906,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
querty_exit:
free_rsp_buf(resp_buftype, err_buf);
+ free_path:
kfree(utf16_path);
return rc;
}
@@ -2326,35 +2492,51 @@ static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
}
-/* Assumes:
- * rqst->rq_iov[0] is transform header
- * rqst->rq_iov[1+] data to be encrypted/decrypted
+/* Assumes the first rqst has a transform header as the first iov.
+ * I.e.
+ * rqst[0].rq_iov[0] is transform header
+ * rqst[0].rq_iov[1+] data to be encrypted/decrypted
+ * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
*/
static struct scatterlist *
-init_sg(struct smb_rqst *rqst, u8 *sign)
+init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign)
{
- unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1;
- unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
+ unsigned int sg_len;
struct scatterlist *sg;
unsigned int i;
unsigned int j;
+ unsigned int idx = 0;
+ int skip;
+
+ sg_len = 1;
+ for (i = 0; i < num_rqst; i++)
+ sg_len += rqst[i].rq_nvec + rqst[i].rq_npages;
sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL);
if (!sg)
return NULL;
sg_init_table(sg, sg_len);
- smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 20, assoc_data_len);
- for (i = 1; i < rqst->rq_nvec; i++)
- smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
- rqst->rq_iov[i].iov_len);
- for (j = 0; i < sg_len - 1; i++, j++) {
- unsigned int len, offset;
+ for (i = 0; i < num_rqst; i++) {
+ for (j = 0; j < rqst[i].rq_nvec; j++) {
+ /*
+ * The first rqst has a transform header where the
+ * first 20 bytes are not part of the encrypted blob
+ */
+ skip = (i == 0) && (j == 0) ? 20 : 0;
+ smb2_sg_set_buf(&sg[idx++],
+ rqst[i].rq_iov[j].iov_base + skip,
+ rqst[i].rq_iov[j].iov_len - skip);
+ }
- rqst_page_get_length(rqst, j, &len, &offset);
- sg_set_page(&sg[i], rqst->rq_pages[j], len, offset);
+ for (j = 0; j < rqst[i].rq_npages; j++) {
+ unsigned int len, offset;
+
+ rqst_page_get_length(&rqst[i], j, &len, &offset);
+ sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset);
+ }
}
- smb2_sg_set_buf(&sg[sg_len - 1], sign, SMB2_SIGNATURE_SIZE);
+ smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE);
return sg;
}
@@ -2386,10 +2568,11 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
* untouched.
*/
static int
-crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
+crypt_message(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *rqst, int enc)
{
struct smb2_transform_hdr *tr_hdr =
- (struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
+ (struct smb2_transform_hdr *)rqst[0].rq_iov[0].iov_base;
unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
int rc = 0;
struct scatterlist *sg;
@@ -2440,7 +2623,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
crypt_len += SMB2_SIGNATURE_SIZE;
}
- sg = init_sg(rqst, sign);
+ sg = init_sg(num_rqst, rqst, sign);
if (!sg) {
cifs_dbg(VFS, "%s: Failed to init sg", __func__);
rc = -ENOMEM;
@@ -2477,103 +2660,98 @@ free_req:
return rc;
}
+void
+smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst)
+{
+ int i, j;
+
+ for (i = 0; i < num_rqst; i++) {
+ if (rqst[i].rq_pages) {
+ for (j = rqst[i].rq_npages - 1; j >= 0; j--)
+ put_page(rqst[i].rq_pages[j]);
+ kfree(rqst[i].rq_pages);
+ }
+ }
+}
+
+/*
+ * This function will initialize new_rq and encrypt the content.
+ * The first entry, new_rq[0], only contains a single iov which contains
+ * a smb2_transform_hdr and is pre-allocated by the caller.
+ * This function then populates new_rq[1+] with the content from olq_rq[0+].
+ *
+ * The end result is an array of smb_rqst structures where the first structure
+ * only contains a single iov for the transform header which we then can pass
+ * to crypt_message().
+ *
+ * new_rq[0].rq_iov[0] : smb2_transform_hdr pre-allocated by the caller
+ * new_rq[1+].rq_iov[*] == old_rq[0+].rq_iov[*] : SMB2/3 requests
+ */
static int
-smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
- struct smb_rqst *old_rq)
+smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *new_rq, struct smb_rqst *old_rq)
{
- struct kvec *iov;
struct page **pages;
- struct smb2_transform_hdr *tr_hdr;
- unsigned int npages = old_rq->rq_npages;
- unsigned int orig_len;
- int i;
+ struct smb2_transform_hdr *tr_hdr = new_rq[0].rq_iov[0].iov_base;
+ unsigned int npages;
+ unsigned int orig_len = 0;
+ int i, j;
int rc = -ENOMEM;
- pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
- if (!pages)
- return rc;
-
- new_rq->rq_pages = pages;
- new_rq->rq_offset = old_rq->rq_offset;
- new_rq->rq_npages = old_rq->rq_npages;
- new_rq->rq_pagesz = old_rq->rq_pagesz;
- new_rq->rq_tailsz = old_rq->rq_tailsz;
-
- for (i = 0; i < npages; i++) {
- pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
- if (!pages[i])
- goto err_free_pages;
- }
-
- iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec),
- GFP_KERNEL);
- if (!iov)
- goto err_free_pages;
+ for (i = 1; i < num_rqst; i++) {
+ npages = old_rq[i - 1].rq_npages;
+ pages = kmalloc_array(npages, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pages)
+ goto err_free;
+
+ new_rq[i].rq_pages = pages;
+ new_rq[i].rq_npages = npages;
+ new_rq[i].rq_offset = old_rq[i - 1].rq_offset;
+ new_rq[i].rq_pagesz = old_rq[i - 1].rq_pagesz;
+ new_rq[i].rq_tailsz = old_rq[i - 1].rq_tailsz;
+ new_rq[i].rq_iov = old_rq[i - 1].rq_iov;
+ new_rq[i].rq_nvec = old_rq[i - 1].rq_nvec;
+
+ orig_len += smb_rqst_len(server, &old_rq[i - 1]);
+
+ for (j = 0; j < npages; j++) {
+ pages[j] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+ if (!pages[j])
+ goto err_free;
+ }
- /* copy all iovs from the old */
- memcpy(&iov[1], &old_rq->rq_iov[0],
- sizeof(struct kvec) * old_rq->rq_nvec);
+ /* copy pages form the old */
+ for (j = 0; j < npages; j++) {
+ char *dst, *src;
+ unsigned int offset, len;
- new_rq->rq_iov = iov;
- new_rq->rq_nvec = old_rq->rq_nvec + 1;
+ rqst_page_get_length(&new_rq[i], j, &len, &offset);
- tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
- if (!tr_hdr)
- goto err_free_iov;
+ dst = (char *) kmap(new_rq[i].rq_pages[j]) + offset;
+ src = (char *) kmap(old_rq[i - 1].rq_pages[j]) + offset;
- orig_len = smb_rqst_len(server, old_rq);
+ memcpy(dst, src, len);
+ kunmap(new_rq[i].rq_pages[j]);
+ kunmap(old_rq[i - 1].rq_pages[j]);
+ }
+ }
- /* fill the 2nd iov with a transform header */
+ /* fill the 1st iov with a transform header */
fill_transform_hdr(tr_hdr, orig_len, old_rq);
- new_rq->rq_iov[0].iov_base = tr_hdr;
- new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
-
- /* copy pages form the old */
- for (i = 0; i < npages; i++) {
- char *dst, *src;
- unsigned int offset, len;
-
- rqst_page_get_length(new_rq, i, &len, &offset);
-
- dst = (char *) kmap(new_rq->rq_pages[i]) + offset;
- src = (char *) kmap(old_rq->rq_pages[i]) + offset;
- memcpy(dst, src, len);
- kunmap(new_rq->rq_pages[i]);
- kunmap(old_rq->rq_pages[i]);
- }
-
- rc = crypt_message(server, new_rq, 1);
+ rc = crypt_message(server, num_rqst, new_rq, 1);
cifs_dbg(FYI, "encrypt message returned %d", rc);
if (rc)
- goto err_free_tr_hdr;
+ goto err_free;
return rc;
-err_free_tr_hdr:
- kfree(tr_hdr);
-err_free_iov:
- kfree(iov);
-err_free_pages:
- for (i = i - 1; i >= 0; i--)
- put_page(pages[i]);
- kfree(pages);
+err_free:
+ smb3_free_compound_rqst(num_rqst - 1, &new_rq[1]);
return rc;
}
-static void
-smb3_free_transform_rq(struct smb_rqst *rqst)
-{
- int i = rqst->rq_npages - 1;
-
- for (; i >= 0; i--)
- put_page(rqst->rq_pages[i]);
- kfree(rqst->rq_pages);
- /* free transform header */
- kfree(rqst->rq_iov[0].iov_base);
- kfree(rqst->rq_iov);
-}
-
static int
smb3_is_transform_hdr(void *buf)
{
@@ -2603,7 +2781,7 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
rqst.rq_pagesz = PAGE_SIZE;
rqst.rq_tailsz = (page_data_size % PAGE_SIZE) ? : PAGE_SIZE;
- rc = crypt_message(server, &rqst, 0);
+ rc = crypt_message(server, 1, &rqst, 0);
cifs_dbg(FYI, "decrypt message returned %d\n", rc);
if (rc)
@@ -2878,13 +3056,20 @@ discard_data:
static int
receive_encrypted_standard(struct TCP_Server_Info *server,
- struct mid_q_entry **mid)
+ struct mid_q_entry **mids, char **bufs,
+ int *num_mids)
{
- int length;
+ int ret, length;
char *buf = server->smallbuf;
+ char *tmpbuf;
+ struct smb2_sync_hdr *shdr;
unsigned int pdu_length = server->pdu_size;
unsigned int buf_size;
struct mid_q_entry *mid_entry;
+ int next_is_large;
+ char *next_buffer = NULL;
+
+ *num_mids = 0;
/* switch to large buffer if too big for a small one */
if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE) {
@@ -2905,24 +3090,61 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
if (length)
return length;
+ next_is_large = server->large_buf;
+ one_more:
+ shdr = (struct smb2_sync_hdr *)buf;
+ if (shdr->NextCommand) {
+ if (next_is_large) {
+ tmpbuf = server->bigbuf;
+ next_buffer = (char *)cifs_buf_get();
+ } else {
+ tmpbuf = server->smallbuf;
+ next_buffer = (char *)cifs_small_buf_get();
+ }
+ memcpy(next_buffer,
+ tmpbuf + le32_to_cpu(shdr->NextCommand),
+ pdu_length - le32_to_cpu(shdr->NextCommand));
+ }
+
mid_entry = smb2_find_mid(server, buf);
if (mid_entry == NULL)
cifs_dbg(FYI, "mid not found\n");
else {
cifs_dbg(FYI, "mid found\n");
mid_entry->decrypted = true;
+ mid_entry->resp_buf_size = server->pdu_size;
}
- *mid = mid_entry;
+ if (*num_mids >= MAX_COMPOUND) {
+ cifs_dbg(VFS, "too many PDUs in compound\n");
+ return -1;
+ }
+ bufs[*num_mids] = buf;
+ mids[(*num_mids)++] = mid_entry;
if (mid_entry && mid_entry->handle)
- return mid_entry->handle(server, mid_entry);
+ ret = mid_entry->handle(server, mid_entry);
+ else
+ ret = cifs_handle_standard(server, mid_entry);
- return cifs_handle_standard(server, mid_entry);
+ if (ret == 0 && shdr->NextCommand) {
+ pdu_length -= le32_to_cpu(shdr->NextCommand);
+ server->large_buf = next_is_large;
+ if (next_is_large)
+ server->bigbuf = next_buffer;
+ else
+ server->smallbuf = next_buffer;
+
+ buf += le32_to_cpu(shdr->NextCommand);
+ goto one_more;
+ }
+
+ return ret;
}
static int
-smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
+smb3_receive_transform(struct TCP_Server_Info *server,
+ struct mid_q_entry **mids, char **bufs, int *num_mids)
{
char *buf = server->smallbuf;
unsigned int pdu_length = server->pdu_size;
@@ -2945,10 +3167,11 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
return -ECONNABORTED;
}
+ /* TODO: add support for compounds containing READ. */
if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
- return receive_encrypted_read(server, mid);
+ return receive_encrypted_read(server, &mids[0]);
- return receive_encrypted_standard(server, mid);
+ return receive_encrypted_standard(server, mids, bufs, num_mids);
}
int
@@ -3250,7 +3473,6 @@ struct smb_version_operations smb30_operations = {
.fallocate = smb3_fallocate,
.enum_snapshots = smb3_enum_snapshots,
.init_transform_rq = smb3_init_transform_rq,
- .free_transform_rq = smb3_free_transform_rq,
.is_transform_hdr = smb3_is_transform_hdr,
.receive_transform = smb3_receive_transform,
.get_dfs_refer = smb2_get_dfs_refer,
@@ -3267,7 +3489,6 @@ struct smb_version_operations smb30_operations = {
.next_header = smb2_next_header,
};
-#ifdef CONFIG_CIFS_SMB311
struct smb_version_operations smb311_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -3335,7 +3556,7 @@ struct smb_version_operations smb311_operations = {
.is_status_pending = smb2_is_status_pending,
.is_session_expired = smb2_is_session_expired,
.oplock_response = smb2_oplock_response,
- .queryfs = smb2_queryfs,
+ .queryfs = smb311_queryfs,
.mand_lock = smb2_mand_lock,
.mand_unlock_range = smb2_unlock_range,
.push_mand_locks = smb2_push_mandatory_locks,
@@ -3357,7 +3578,6 @@ struct smb_version_operations smb311_operations = {
.fallocate = smb3_fallocate,
.enum_snapshots = smb3_enum_snapshots,
.init_transform_rq = smb3_init_transform_rq,
- .free_transform_rq = smb3_free_transform_rq,
.is_transform_hdr = smb3_is_transform_hdr,
.receive_transform = smb3_receive_transform,
.get_dfs_refer = smb2_get_dfs_refer,
@@ -3366,9 +3586,13 @@ struct smb_version_operations smb311_operations = {
.query_all_EAs = smb2_query_eas,
.set_EA = smb2_set_ea,
#endif /* CIFS_XATTR */
+#ifdef CONFIG_CIFS_ACL
+ .get_acl = get_smb2_acl,
+ .get_acl_by_fid = get_smb2_acl_by_fid,
+ .set_acl = set_smb2_acl,
+#endif /* CIFS_ACL */
.next_header = smb2_next_header,
};
-#endif /* CIFS_SMB311 */
struct smb_version_values smb20_values = {
.version_string = SMB20_VERSION_STRING,
@@ -3496,7 +3720,6 @@ struct smb_version_values smb302_values = {
.create_lease_size = sizeof(struct create_lease_v2),
};
-#ifdef CONFIG_CIFS_SMB311
struct smb_version_values smb311_values = {
.version_string = SMB311_VERSION_STRING,
.protocol_id = SMB311_PROT_ID,
@@ -3517,4 +3740,3 @@ struct smb_version_values smb311_values = {
.signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
.create_lease_size = sizeof(struct create_lease_v2),
};
-#endif /* SMB311 */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3c92678cb45b..2f1938011395 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -80,7 +80,7 @@ static const int smb2_req_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
/* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */
};
-static int smb3_encryption_required(const struct cifs_tcon *tcon)
+int smb3_encryption_required(const struct cifs_tcon *tcon)
{
if (!tcon)
return 0;
@@ -360,17 +360,15 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
total_len);
if (tcon != NULL) {
-#ifdef CONFIG_CIFS_STATS2
uint16_t com_code = le16_to_cpu(smb2_command);
cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]);
-#endif
cifs_stats_inc(&tcon->num_smbs_sent);
}
return rc;
}
-#ifdef CONFIG_CIFS_SMB311
+
/* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */
#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */
@@ -585,13 +583,6 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
return 0;
}
-#else
-static void assemble_neg_contexts(struct smb2_negotiate_req *req,
- unsigned int *total_len)
-{
- return;
-}
-#endif /* SMB311 */
/*
*
@@ -636,10 +627,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
return rc;
req->sync_hdr.SessionId = 0;
-#ifdef CONFIG_CIFS_SMB311
+
memset(server->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE);
memset(ses->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE);
-#endif
if (strcmp(ses->server->vals->version_string,
SMB3ANY_VERSION_STRING) == 0) {
@@ -741,10 +731,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
cifs_dbg(FYI, "negotiated smb3.0 dialect\n");
else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID))
cifs_dbg(FYI, "negotiated smb3.02 dialect\n");
-#ifdef CONFIG_CIFS_SMB311
else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n");
-#endif /* SMB311 */
else {
cifs_dbg(VFS, "Illegal dialect returned by server 0x%x\n",
le16_to_cpu(rsp->DialectRevision));
@@ -753,9 +741,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
}
server->dialect = le16_to_cpu(rsp->DialectRevision);
- /* BB: add check that dialect was valid given dialect(s) we asked for */
-
-#ifdef CONFIG_CIFS_SMB311
/*
* Keep a copy of the hash after negprot. This hash will be
* the starting hash value for all sessions made from this
@@ -763,7 +748,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
*/
memcpy(server->preauth_sha_hash, ses->preauth_sha_hash,
SMB2_PREAUTH_HASH_SIZE);
-#endif
+
/* SMB2 only has an extended negflavor */
server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
/* set it to the maximum buffer size value we can send with 1 credit */
@@ -804,7 +789,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
rc = -EIO;
}
-#ifdef CONFIG_CIFS_SMB311
if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
if (rsp->NegotiateContextCount)
rc = smb311_decode_neg_context(rsp, server,
@@ -812,7 +796,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
else
cifs_dbg(VFS, "Missing expected negotiate contexts\n");
}
-#endif /* CONFIG_CIFS_SMB311 */
neg_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -1373,13 +1356,11 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
sess_data->nls_cp = (struct nls_table *) nls_cp;
sess_data->previous_session = ses->Suid;
-#ifdef CONFIG_CIFS_SMB311
/*
* Initialize the session hash with the server one.
*/
memcpy(ses->preauth_sha_hash, ses->server->preauth_sha_hash,
SMB2_PREAUTH_HASH_SIZE);
-#endif
while (sess_data->func)
sess_data->func(sess_data);
@@ -1875,6 +1856,51 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
return 0;
}
+/* See MS-SMB2 2.2.13.2.7 */
+static struct crt_twarp_ctxt *
+create_twarp_buf(__u64 timewarp)
+{
+ struct crt_twarp_ctxt *buf;
+
+ buf = kzalloc(sizeof(struct crt_twarp_ctxt), GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof
+ (struct crt_twarp_ctxt, Timestamp));
+ buf->ccontext.DataLength = cpu_to_le32(8);
+ buf->ccontext.NameOffset = cpu_to_le16(offsetof
+ (struct crt_twarp_ctxt, Name));
+ buf->ccontext.NameLength = cpu_to_le16(4);
+ /* SMB2_CREATE_TIMEWARP_TOKEN is "TWrp" */
+ buf->Name[0] = 'T';
+ buf->Name[1] = 'W';
+ buf->Name[2] = 'r';
+ buf->Name[3] = 'p';
+ buf->Timestamp = cpu_to_le64(timewarp);
+ return buf;
+}
+
+/* See MS-SMB2 2.2.13.2.7 */
+static int
+add_twarp_context(struct kvec *iov, unsigned int *num_iovec, __u64 timewarp)
+{
+ struct smb2_create_req *req = iov[0].iov_base;
+ unsigned int num = *num_iovec;
+
+ iov[num].iov_base = create_twarp_buf(timewarp);
+ if (iov[num].iov_base == NULL)
+ return -ENOMEM;
+ iov[num].iov_len = sizeof(struct crt_twarp_ctxt);
+ if (!req->CreateContextsOffset)
+ req->CreateContextsOffset = cpu_to_le32(
+ sizeof(struct smb2_create_req) +
+ iov[num - 1].iov_len);
+ le32_add_cpu(&req->CreateContextsLength, sizeof(struct crt_twarp_ctxt));
+ *num_iovec = num + 1;
+ return 0;
+}
+
static int
alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
const char *treename, const __le16 *path)
@@ -1920,7 +1946,6 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
return 0;
}
-#ifdef CONFIG_CIFS_SMB311
int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
umode_t mode, struct cifs_tcon *tcon,
const char *full_path,
@@ -1928,7 +1953,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
{
struct smb_rqst rqst;
struct smb2_create_req *req;
- struct smb2_create_rsp *rsp;
+ struct smb2_create_rsp *rsp = NULL;
struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[3]; /* make sure at least one for each open context */
@@ -1943,27 +1968,31 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
char *pc_buf = NULL;
int flags = 0;
unsigned int total_len;
- __le16 *path = cifs_convert_path_to_utf16(full_path, cifs_sb);
-
- if (!path)
- return -ENOMEM;
+ __le16 *utf16_path = NULL;
cifs_dbg(FYI, "mkdir\n");
+ /* resource #1: path allocation */
+ utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
+
if (ses && (ses->server))
server = ses->server;
- else
- return -EIO;
+ else {
+ rc = -EIO;
+ goto err_free_path;
+ }
+ /* resource #2: request */
rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len);
-
if (rc)
- return rc;
+ goto err_free_path;
+
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
-
req->ImpersonationLevel = IL_IMPERSONATION;
req->DesiredAccess = cpu_to_le32(FILE_WRITE_ATTRIBUTES);
/* File attributes ignored on open (used in create though) */
@@ -1992,50 +2021,44 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
&name_len,
- tcon->treeName, path);
- if (rc) {
- cifs_small_buf_release(req);
- return rc;
- }
+ tcon->treeName, utf16_path);
+ if (rc)
+ goto err_free_req;
+
req->NameLength = cpu_to_le16(name_len * 2);
uni_path_len = copy_size;
- path = copy_path;
+ /* free before overwriting resource */
+ kfree(utf16_path);
+ utf16_path = copy_path;
} else {
- uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
+ uni_path_len = (2 * UniStrnlen((wchar_t *)utf16_path, PATH_MAX)) + 2;
/* MUST set path len (NameLength) to 0 opening root of share */
req->NameLength = cpu_to_le16(uni_path_len - 2);
if (uni_path_len % 8 != 0) {
copy_size = roundup(uni_path_len, 8);
copy_path = kzalloc(copy_size, GFP_KERNEL);
if (!copy_path) {
- cifs_small_buf_release(req);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto err_free_req;
}
- memcpy((char *)copy_path, (const char *)path,
+ memcpy((char *)copy_path, (const char *)utf16_path,
uni_path_len);
uni_path_len = copy_size;
- path = copy_path;
+ /* free before overwriting resource */
+ kfree(utf16_path);
+ utf16_path = copy_path;
}
}
iov[1].iov_len = uni_path_len;
- iov[1].iov_base = path;
+ iov[1].iov_base = utf16_path;
req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE;
if (tcon->posix_extensions) {
- if (n_iov > 2) {
- struct create_context *ccontext =
- (struct create_context *)iov[n_iov-1].iov_base;
- ccontext->Next =
- cpu_to_le32(iov[n_iov-1].iov_len);
- }
-
+ /* resource #3: posix buf */
rc = add_posix_context(iov, &n_iov, mode);
- if (rc) {
- cifs_small_buf_release(req);
- kfree(copy_path);
- return rc;
- }
+ if (rc)
+ goto err_free_req;
pc_buf = iov[n_iov-1].iov_base;
}
@@ -2044,73 +2067,57 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
rqst.rq_iov = iov;
rqst.rq_nvec = n_iov;
- rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
- &rsp_iov);
-
- cifs_small_buf_release(req);
- rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
-
- if (rc != 0) {
+ /* resource #4: response buffer */
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
+ if (rc) {
cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
trace_smb3_posix_mkdir_err(xid, tcon->tid, ses->Suid,
- CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES, rc);
- goto smb311_mkdir_exit;
- } else
- trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid,
- ses->Suid, CREATE_NOT_FILE,
- FILE_WRITE_ATTRIBUTES);
+ CREATE_NOT_FILE,
+ FILE_WRITE_ATTRIBUTES, rc);
+ goto err_free_rsp_buf;
+ }
+
+ rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
+ trace_smb3_posix_mkdir_done(xid, rsp->PersistentFileId, tcon->tid,
+ ses->Suid, CREATE_NOT_FILE,
+ FILE_WRITE_ATTRIBUTES);
SMB2_close(xid, tcon, rsp->PersistentFileId, rsp->VolatileFileId);
/* Eventually save off posix specific response info and timestaps */
-smb311_mkdir_exit:
- kfree(copy_path);
- kfree(pc_buf);
+err_free_rsp_buf:
free_rsp_buf(resp_buftype, rsp);
+ kfree(pc_buf);
+err_free_req:
+ cifs_small_buf_release(req);
+err_free_path:
+ kfree(utf16_path);
return rc;
-
}
-#endif /* SMB311 */
int
-SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
- __u8 *oplock, struct smb2_file_all_info *buf,
- struct kvec *err_iov, int *buftype)
+SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
+ struct cifs_open_parms *oparms, __le16 *path)
{
- struct smb_rqst rqst;
+ struct TCP_Server_Info *server = tcon->ses->server;
struct smb2_create_req *req;
- struct smb2_create_rsp *rsp;
- struct TCP_Server_Info *server;
- struct cifs_tcon *tcon = oparms->tcon;
- struct cifs_ses *ses = tcon->ses;
- struct kvec iov[5]; /* make sure at least one for each open context */
- struct kvec rsp_iov = {NULL, 0};
- int resp_buftype;
- int uni_path_len;
- __le16 *copy_path = NULL;
- int copy_size;
- int rc = 0;
unsigned int n_iov = 2;
__u32 file_attributes = 0;
- char *dhc_buf = NULL, *lc_buf = NULL, *pc_buf = NULL;
- int flags = 0;
+ int copy_size;
+ int uni_path_len;
unsigned int total_len;
-
- cifs_dbg(FYI, "create/open\n");
-
- if (ses && (ses->server))
- server = ses->server;
- else
- return -EIO;
+ struct kvec *iov = rqst->rq_iov;
+ __le16 *copy_path;
+ int rc;
rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len);
-
if (rc)
return rc;
- if (smb3_encryption_required(tcon))
- flags |= CIFS_TRANSFORM_REQ;
+ iov[0].iov_base = (char *)req;
+ /* -1 since last byte is buf[0] which is sent below (path) */
+ iov[0].iov_len = total_len - 1;
if (oparms->create_options & CREATE_OPTION_READONLY)
file_attributes |= ATTR_READONLY;
@@ -2124,11 +2131,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
req->ShareAccess = FILE_SHARE_ALL_LE;
req->CreateDisposition = cpu_to_le32(oparms->disposition);
req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
-
- iov[0].iov_base = (char *)req;
- /* -1 since last byte is buf[0] which is sent below (path) */
- iov[0].iov_len = total_len - 1;
-
req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req));
/* [MS-SMB2] 2.2.13 NameOffset:
@@ -2146,10 +2148,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
&name_len,
tcon->treeName, path);
- if (rc) {
- cifs_small_buf_release(req);
+ if (rc)
return rc;
- }
req->NameLength = cpu_to_le16(name_len * 2);
uni_path_len = copy_size;
path = copy_path;
@@ -2157,18 +2157,16 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
/* MUST set path len (NameLength) to 0 opening root of share */
req->NameLength = cpu_to_le16(uni_path_len - 2);
- if (uni_path_len % 8 != 0) {
- copy_size = roundup(uni_path_len, 8);
- copy_path = kzalloc(copy_size, GFP_KERNEL);
- if (!copy_path) {
- cifs_small_buf_release(req);
- return -ENOMEM;
- }
- memcpy((char *)copy_path, (const char *)path,
- uni_path_len);
- uni_path_len = copy_size;
- path = copy_path;
- }
+ copy_size = uni_path_len;
+ if (copy_size % 8 != 0)
+ copy_size = roundup(copy_size, 8);
+ copy_path = kzalloc(copy_size, GFP_KERNEL);
+ if (!copy_path)
+ return -ENOMEM;
+ memcpy((char *)copy_path, (const char *)path,
+ uni_path_len);
+ uni_path_len = copy_size;
+ path = copy_path;
}
iov[1].iov_len = uni_path_len;
@@ -2183,12 +2181,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
else {
rc = add_lease_context(server, iov, &n_iov,
oparms->fid->lease_key, oplock);
- if (rc) {
- cifs_small_buf_release(req);
- kfree(copy_path);
+ if (rc)
return rc;
- }
- lc_buf = iov[n_iov-1].iov_base;
}
if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) {
@@ -2202,16 +2196,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
rc = add_durable_context(iov, &n_iov, oparms,
tcon->use_persistent);
- if (rc) {
- cifs_small_buf_release(req);
- kfree(copy_path);
- kfree(lc_buf);
+ if (rc)
return rc;
- }
- dhc_buf = iov[n_iov-1].iov_base;
}
-#ifdef CONFIG_CIFS_SMB311
if (tcon->posix_extensions) {
if (n_iov > 2) {
struct create_context *ccontext =
@@ -2221,24 +2209,79 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
}
rc = add_posix_context(iov, &n_iov, oparms->mode);
- if (rc) {
- cifs_small_buf_release(req);
- kfree(copy_path);
- kfree(lc_buf);
- kfree(dhc_buf);
+ if (rc)
return rc;
+ }
+
+ if (tcon->snapshot_time) {
+ cifs_dbg(FYI, "adding snapshot context\n");
+ if (n_iov > 2) {
+ struct create_context *ccontext =
+ (struct create_context *)iov[n_iov-1].iov_base;
+ ccontext->Next =
+ cpu_to_le32(iov[n_iov-1].iov_len);
}
- pc_buf = iov[n_iov-1].iov_base;
+
+ rc = add_twarp_context(iov, &n_iov, tcon->snapshot_time);
+ if (rc)
+ return rc;
}
-#endif /* SMB311 */
+
+
+ rqst->rq_nvec = n_iov;
+ return 0;
+}
+
+/* rq_iov[0] is the request and is released by cifs_small_buf_release().
+ * All other vectors are freed by kfree().
+ */
+void
+SMB2_open_free(struct smb_rqst *rqst)
+{
+ int i;
+
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base);
+ for (i = 1; i < rqst->rq_nvec; i++)
+ if (rqst->rq_iov[i].iov_base != smb2_padding)
+ kfree(rqst->rq_iov[i].iov_base);
+}
+
+int
+SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
+ __u8 *oplock, struct smb2_file_all_info *buf,
+ struct kvec *err_iov, int *buftype)
+{
+ struct smb_rqst rqst;
+ struct smb2_create_rsp *rsp = NULL;
+ struct TCP_Server_Info *server;
+ struct cifs_tcon *tcon = oparms->tcon;
+ struct cifs_ses *ses = tcon->ses;
+ struct kvec iov[5]; /* make sure at least one for each open context */
+ struct kvec rsp_iov = {NULL, 0};
+ int resp_buftype;
+ int rc = 0;
+ int flags = 0;
+
+ cifs_dbg(FYI, "create/open\n");
+ if (ses && (ses->server))
+ server = ses->server;
+ else
+ return -EIO;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
memset(&rqst, 0, sizeof(struct smb_rqst));
+ memset(&iov, 0, sizeof(iov));
rqst.rq_iov = iov;
- rqst.rq_nvec = n_iov;
+ rqst.rq_nvec = 5;
+
+ rc = SMB2_open_init(tcon, &rqst, oplock, oparms, path);
+ if (rc)
+ goto creat_exit;
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
&rsp_iov);
- cifs_small_buf_release(req);
rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
if (rc != 0) {
@@ -2275,10 +2318,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
else
*oplock = rsp->OplockLevel;
creat_exit:
- kfree(copy_path);
- kfree(lc_buf);
- kfree(dhc_buf);
- kfree(pc_buf);
+ SMB2_open_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
@@ -2469,43 +2509,62 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
}
int
+SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+ u64 persistent_fid, u64 volatile_fid)
+{
+ struct smb2_close_req *req;
+ struct kvec *iov = rqst->rq_iov;
+ unsigned int total_len;
+ int rc;
+
+ rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len);
+ if (rc)
+ return rc;
+
+ req->PersistentFileId = persistent_fid;
+ req->VolatileFileId = volatile_fid;
+ iov[0].iov_base = (char *)req;
+ iov[0].iov_len = total_len;
+
+ return 0;
+}
+
+void
+SMB2_close_free(struct smb_rqst *rqst)
+{
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
+int
SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int flags)
{
struct smb_rqst rqst;
- struct smb2_close_req *req;
- struct smb2_close_rsp *rsp;
+ struct smb2_close_rsp *rsp = NULL;
struct cifs_ses *ses = tcon->ses;
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buftype;
int rc = 0;
- unsigned int total_len;
cifs_dbg(FYI, "Close\n");
if (!ses || !(ses->server))
return -EIO;
- rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len);
- if (rc)
- return rc;
-
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- req->PersistentFileId = persistent_fid;
- req->VolatileFileId = volatile_fid;
-
- iov[0].iov_base = (char *)req;
- iov[0].iov_len = total_len;
-
memset(&rqst, 0, sizeof(struct smb_rqst));
+ memset(&iov, 0, sizeof(iov));
rqst.rq_iov = iov;
rqst.rq_nvec = 1;
+ rc = SMB2_close_init(tcon, &rqst, persistent_fid, volatile_fid);
+ if (rc)
+ goto close_exit;
+
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
rsp = (struct smb2_close_rsp *)rsp_iov.iov_base;
if (rc != 0) {
@@ -2518,6 +2577,7 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
/* BB FIXME - decode close response, update inode for caching */
close_exit:
+ SMB2_close_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
@@ -2529,9 +2589,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
return SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0);
}
-static int
-validate_iov(unsigned int offset, unsigned int buffer_length,
- struct kvec *iov, unsigned int min_buf_size)
+int
+smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
+ struct kvec *iov, unsigned int min_buf_size)
{
unsigned int smb_len = iov->iov_len;
char *end_of_smb = smb_len + (char *)iov->iov_base;
@@ -2575,7 +2635,7 @@ validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
if (!data)
return -EINVAL;
- rc = validate_iov(offset, buffer_length, iov, minbufsize);
+ rc = smb2_validate_iov(offset, buffer_length, iov, minbufsize);
if (rc)
return rc;
@@ -2584,36 +2644,22 @@ validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
return 0;
}
-static int
-query_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, u8 info_class, u8 info_type,
- u32 additional_info, size_t output_len, size_t min_len, void **data,
- u32 *dlen)
+int
+SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+ u64 persistent_fid, u64 volatile_fid,
+ u8 info_class, u8 info_type, u32 additional_info,
+ size_t output_len)
{
- struct smb_rqst rqst;
struct smb2_query_info_req *req;
- struct smb2_query_info_rsp *rsp = NULL;
- struct kvec iov[2];
- struct kvec rsp_iov;
- int rc = 0;
- int resp_buftype;
- struct cifs_ses *ses = tcon->ses;
- int flags = 0;
+ struct kvec *iov = rqst->rq_iov;
unsigned int total_len;
-
- cifs_dbg(FYI, "Query Info\n");
-
- if (!ses || !(ses->server))
- return -EIO;
+ int rc;
rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req,
&total_len);
if (rc)
return rc;
- if (smb3_encryption_required(tcon))
- flags |= CIFS_TRANSFORM_REQ;
-
req->InfoType = info_type;
req->FileInfoClass = info_class;
req->PersistentFileId = persistent_fid;
@@ -2630,13 +2676,50 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
iov[0].iov_base = (char *)req;
/* 1 for Buffer */
iov[0].iov_len = total_len - 1;
+ return 0;
+}
+
+void
+SMB2_query_info_free(struct smb_rqst *rqst)
+{
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
+static int
+query_info(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, u8 info_class, u8 info_type,
+ u32 additional_info, size_t output_len, size_t min_len, void **data,
+ u32 *dlen)
+{
+ struct smb_rqst rqst;
+ struct smb2_query_info_rsp *rsp = NULL;
+ struct kvec iov[1];
+ struct kvec rsp_iov;
+ int rc = 0;
+ int resp_buftype;
+ struct cifs_ses *ses = tcon->ses;
+ int flags = 0;
+
+ cifs_dbg(FYI, "Query Info\n");
+
+ if (!ses || !(ses->server))
+ return -EIO;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
memset(&rqst, 0, sizeof(struct smb_rqst));
+ memset(&iov, 0, sizeof(iov));
rqst.rq_iov = iov;
rqst.rq_nvec = 1;
+ rc = SMB2_query_info_init(tcon, &rqst, persistent_fid, volatile_fid,
+ info_class, info_type, additional_info,
+ output_len);
+ if (rc)
+ goto qinf_exit;
+
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
if (rc) {
@@ -2665,6 +2748,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
&rsp_iov, min_len, *data);
qinf_exit:
+ SMB2_query_info_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
@@ -3623,9 +3707,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
goto qdir_exit;
}
- rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
- le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
- info_buf_size);
+ rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+ le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
+ info_buf_size);
if (rc)
goto qdir_exit;
@@ -3927,9 +4011,9 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
-static void
-copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
- struct kstatfs *kst)
+void
+smb2_copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
+ struct kstatfs *kst)
{
kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
@@ -3939,6 +4023,25 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
return;
}
+static void
+copy_posix_fs_info_to_kstatfs(FILE_SYSTEM_POSIX_INFO *response_data,
+ struct kstatfs *kst)
+{
+ kst->f_bsize = le32_to_cpu(response_data->BlockSize);
+ kst->f_blocks = le64_to_cpu(response_data->TotalBlocks);
+ kst->f_bfree = le64_to_cpu(response_data->BlocksAvail);
+ if (response_data->UserBlocksAvail == cpu_to_le64(-1))
+ kst->f_bavail = kst->f_bfree;
+ else
+ kst->f_bavail = le64_to_cpu(response_data->UserBlocksAvail);
+ if (response_data->TotalFileNodes != cpu_to_le64(-1))
+ kst->f_files = le64_to_cpu(response_data->TotalFileNodes);
+ if (response_data->FreeFileNodes != cpu_to_le64(-1))
+ kst->f_ffree = le64_to_cpu(response_data->FreeFileNodes);
+
+ return;
+}
+
static int
build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
int outbuf_len, u64 persistent_fid, u64 volatile_fid)
@@ -3976,6 +4079,54 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
}
int
+SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
+{
+ struct smb_rqst rqst;
+ struct smb2_query_info_rsp *rsp = NULL;
+ struct kvec iov;
+ struct kvec rsp_iov;
+ int rc = 0;
+ int resp_buftype;
+ struct cifs_ses *ses = tcon->ses;
+ FILE_SYSTEM_POSIX_INFO *info = NULL;
+ int flags = 0;
+
+ rc = build_qfs_info_req(&iov, tcon, FS_POSIX_INFORMATION,
+ sizeof(FILE_SYSTEM_POSIX_INFO),
+ persistent_fid, volatile_fid);
+ if (rc)
+ return rc;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ memset(&rqst, 0, sizeof(struct smb_rqst));
+ rqst.rq_iov = &iov;
+ rqst.rq_nvec = 1;
+
+ rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
+ cifs_small_buf_release(iov.iov_base);
+ if (rc) {
+ cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
+ goto posix_qfsinf_exit;
+ }
+ rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
+
+ info = (FILE_SYSTEM_POSIX_INFO *)(
+ le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+ rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+ le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
+ sizeof(FILE_SYSTEM_POSIX_INFO));
+ if (!rc)
+ copy_posix_fs_info_to_kstatfs(info, fsdata);
+
+posix_qfsinf_exit:
+ free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+ return rc;
+}
+
+int
SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
{
@@ -4012,11 +4163,11 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
info = (struct smb2_fs_full_size_info *)(
le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
- rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
- le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
- sizeof(struct smb2_fs_full_size_info));
+ rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
+ le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
+ sizeof(struct smb2_fs_full_size_info));
if (!rc)
- copy_fs_info_to_kstatfs(info, fsdata);
+ smb2_copy_fs_info_to_kstatfs(info, fsdata);
qfsinf_exit:
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
@@ -4046,6 +4197,9 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
} else if (level == FS_SECTOR_SIZE_INFORMATION) {
max_len = sizeof(struct smb3_fs_ss_info);
min_len = sizeof(struct smb3_fs_ss_info);
+ } else if (level == FS_VOLUME_INFORMATION) {
+ max_len = sizeof(struct smb3_fs_vol_info) + MAX_VOL_LABEL_LEN;
+ min_len = sizeof(struct smb3_fs_vol_info);
} else {
cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level);
return -EINVAL;
@@ -4073,7 +4227,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
rsp_len = le32_to_cpu(rsp->OutputBufferLength);
offset = le16_to_cpu(rsp->OutputBufferOffset);
- rc = validate_iov(offset, rsp_len, &rsp_iov, min_len);
+ rc = smb2_validate_iov(offset, rsp_len, &rsp_iov, min_len);
if (rc)
goto qfsattr_exit;
@@ -4090,6 +4244,11 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
tcon->ss_flags = le32_to_cpu(ss_info->Flags);
tcon->perf_sector_size =
le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
+ } else if (level == FS_VOLUME_INFORMATION) {
+ struct smb3_fs_vol_info *vol_info = (struct smb3_fs_vol_info *)
+ (offset + (char *)rsp);
+ tcon->vol_serial_number = vol_info->VolumeSerialNumber;
+ tcon->vol_create_time = vol_info->VolumeCreationTime;
}
qfsattr_exit:
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index a671adcc44a6..a2eeae9e0432 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -153,6 +153,8 @@ struct smb2_transform_hdr {
*
*/
+#define COMPOUND_FID 0xFFFFFFFFFFFFFFFFULL
+
#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
@@ -765,6 +767,14 @@ struct create_durable_handle_reconnect_v2 {
struct durable_reconnect_context_v2 dcontext;
} __packed;
+/* See MS-SMB2 2.2.13.2.5 */
+struct crt_twarp_ctxt {
+ struct create_context ccontext;
+ __u8 Name[8];
+ __le64 Timestamp;
+
+} __packed;
+
#define COPY_CHUNK_RES_KEY_SIZE 24
struct resume_key_req {
char ResumeKey[COPY_CHUNK_RES_KEY_SIZE];
@@ -1223,6 +1233,7 @@ struct smb2_lease_ack {
#define FS_DRIVER_PATH_INFORMATION 9 /* Local only */
#define FS_VOLUME_FLAGS_INFORMATION 10 /* Local only */
#define FS_SECTOR_SIZE_INFORMATION 11 /* SMB3 or later. Query */
+#define FS_POSIX_INFORMATION 100 /* SMB3.1.1 POSIX. Query */
struct smb2_fs_full_size_info {
__le64 TotalAllocationUnits;
@@ -1248,6 +1259,17 @@ struct smb3_fs_ss_info {
__le32 ByteOffsetForPartitionAlignment;
} __packed;
+/* volume info struct - see MS-FSCC 2.5.9 */
+#define MAX_VOL_LABEL_LEN 32
+struct smb3_fs_vol_info {
+ __le64 VolumeCreationTime;
+ __u32 VolumeSerialNumber;
+ __le32 VolumeLabelLength; /* includes trailing null */
+ __u8 SupportsObjects; /* True if eg like NTFS, supports objects */
+ __u8 Reserved;
+ __u8 VolumeLabel[0]; /* variable len */
+} __packed;
+
/* partial list of QUERY INFO levels */
#define FILE_DIRECTORY_INFORMATION 1
#define FILE_FULL_DIRECTORY_INFORMATION 2
@@ -1361,4 +1383,6 @@ struct smb2_file_eof_info { /* encoding of request for level 10 */
__le64 EndOfFile; /* new end of file value */
} __packed; /* level 20 Set */
+extern char smb2_padding[7];
+
#endif /* _SMB2PDU_H */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 6e6a4f2ec890..b4076577eeb7 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -68,6 +68,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server,
extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *pfid);
+extern void close_shroot(struct cached_fid *cfid);
extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
struct smb2_file_all_info *src);
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
@@ -132,6 +133,10 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
__le16 *path, __u8 *oplock,
struct smb2_file_all_info *buf,
struct kvec *err_iov, int *resp_buftype);
+extern int SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+ __u8 *oplock, struct cifs_open_parms *oparms,
+ __le16 *path);
+extern void SMB2_open_free(struct smb_rqst *rqst);
extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 opcode,
bool is_fsctl, char *in_data, u32 indatalen,
@@ -140,6 +145,9 @@ extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int flags);
+extern int SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+ u64 persistent_file_id, u64 volatile_file_id);
+extern void SMB2_close_free(struct smb_rqst *rqst);
extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
@@ -149,6 +157,11 @@ extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct smb2_file_all_info *data);
+extern int SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+ u64 persistent_fid, u64 volatile_fid,
+ u8 info_class, u8 info_type,
+ u32 additional_info, size_t output_len);
+extern void SMB2_query_info_free(struct smb_rqst *rqst);
extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
void **data, unsigned int *plen);
@@ -197,6 +210,9 @@ void smb2_cancelled_close_fid(struct work_struct *work);
extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct kstatfs *FSData);
+extern int SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_file_id, u64 volatile_file_id,
+ struct kstatfs *FSData);
extern int SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id, int lvl);
extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
@@ -213,9 +229,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
-#ifdef CONFIG_CIFS_SMB311
+extern int smb3_encryption_required(const struct cifs_tcon *tcon);
+extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
+ struct kvec *iov, unsigned int min_buf_size);
+extern void smb2_copy_fs_info_to_kstatfs(
+ struct smb2_fs_full_size_info *pfs_inf,
+ struct kstatfs *kst);
extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
extern int smb311_update_preauth_hash(struct cifs_ses *ses,
struct kvec *iov, int nvec);
-#endif
#endif /* _SMB2PROTO_H */
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 719d55e63d88..7b351c65ee46 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -70,7 +70,6 @@ err:
return rc;
}
-#ifdef CONFIG_CIFS_SMB311
int
smb311_crypto_shash_allocate(struct TCP_Server_Info *server)
{
@@ -98,7 +97,6 @@ err:
cifs_free_hash(&p->hmacsha256, &p->sdeschmacsha256);
return rc;
}
-#endif
static struct cifs_ses *
smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
@@ -173,7 +171,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
struct kvec *iov = rqst->rq_iov;
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
struct cifs_ses *ses;
- struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash;
+ struct shash_desc *shash;
struct smb_rqst drqst;
ses = smb2_find_smb_ses(server, shdr->SessionId);
@@ -187,7 +185,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
rc = smb2_crypto_shash_allocate(server);
if (rc) {
- cifs_dbg(VFS, "%s: shah256 alloc failed\n", __func__);
+ cifs_dbg(VFS, "%s: sha256 alloc failed\n", __func__);
return rc;
}
@@ -198,6 +196,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
return rc;
}
+ shash = &server->secmech.sdeschmacsha256->shash;
rc = crypto_shash_init(shash);
if (rc) {
cifs_dbg(VFS, "%s: Could not init sha256", __func__);
@@ -395,7 +394,6 @@ generate_smb30signingkey(struct cifs_ses *ses)
return generate_smb3signingkey(ses, &triplet);
}
-#ifdef CONFIG_CIFS_SMB311
int
generate_smb311signingkey(struct cifs_ses *ses)
@@ -423,7 +421,6 @@ generate_smb311signingkey(struct cifs_ses *ses)
return generate_smb3signingkey(ses, &triplet);
}
-#endif /* 311 */
int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 67e413f6ee4d..d4aed5217a56 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -281,6 +281,44 @@ DEFINE_EVENT(smb3_cmd_done_class, smb3_##name, \
TP_ARGS(tid, sesid, cmd, mid))
DEFINE_SMB3_CMD_DONE_EVENT(cmd_done);
+DEFINE_SMB3_CMD_DONE_EVENT(ses_expired);
+
+DECLARE_EVENT_CLASS(smb3_mid_class,
+ TP_PROTO(__u16 cmd,
+ __u64 mid,
+ __u32 pid,
+ unsigned long when_sent,
+ unsigned long when_received),
+ TP_ARGS(cmd, mid, pid, when_sent, when_received),
+ TP_STRUCT__entry(
+ __field(__u16, cmd)
+ __field(__u64, mid)
+ __field(__u32, pid)
+ __field(unsigned long, when_sent)
+ __field(unsigned long, when_received)
+ ),
+ TP_fast_assign(
+ __entry->cmd = cmd;
+ __entry->mid = mid;
+ __entry->pid = pid;
+ __entry->when_sent = when_sent;
+ __entry->when_received = when_received;
+ ),
+ TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
+ __entry->cmd, __entry->mid, __entry->pid, __entry->when_sent,
+ __entry->when_received)
+)
+
+#define DEFINE_SMB3_MID_EVENT(name) \
+DEFINE_EVENT(smb3_mid_class, smb3_##name, \
+ TP_PROTO(__u16 cmd, \
+ __u64 mid, \
+ __u32 pid, \
+ unsigned long when_sent, \
+ unsigned long when_received), \
+ TP_ARGS(cmd, mid, pid, when_sent, when_received))
+
+DEFINE_SMB3_MID_EVENT(slow_rsp);
DECLARE_EVENT_CLASS(smb3_exit_err_class,
TP_PROTO(unsigned int xid,
@@ -422,6 +460,32 @@ DEFINE_EVENT(smb3_open_done_class, smb3_##name, \
DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done);
+DECLARE_EVENT_CLASS(smb3_reconnect_class,
+ TP_PROTO(__u64 currmid,
+ char *hostname),
+ TP_ARGS(currmid, hostname),
+ TP_STRUCT__entry(
+ __field(__u64, currmid)
+ __field(char *, hostname)
+ ),
+ TP_fast_assign(
+ __entry->currmid = currmid;
+ __entry->hostname = hostname;
+ ),
+ TP_printk("server=%s current_mid=0x%llx",
+ __entry->hostname,
+ __entry->currmid)
+)
+
+#define DEFINE_SMB3_RECONNECT_EVENT(name) \
+DEFINE_EVENT(smb3_reconnect_class, smb3_##name, \
+ TP_PROTO(__u64 currmid, \
+ char *hostname), \
+ TP_ARGS(currmid, hostname))
+
+DEFINE_SMB3_RECONNECT_EVENT(reconnect);
+DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
+
#endif /* _CIFS_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a341ec839c83..78f96fa3d7d9 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -115,8 +115,17 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
now = jiffies;
/* commands taking longer than one second are indications that
something is wrong, unless it is quite a slow link or server */
- if (time_after(now, midEntry->when_alloc + HZ)) {
- if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) {
+ if (time_after(now, midEntry->when_alloc + HZ) &&
+ (midEntry->command != command)) {
+ /* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */
+ if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) &&
+ (le16_to_cpu(midEntry->command) >= 0))
+ cifs_stats_inc(&midEntry->server->smb2slowcmd[le16_to_cpu(midEntry->command)]);
+
+ trace_smb3_slow_rsp(le16_to_cpu(midEntry->command),
+ midEntry->mid, midEntry->pid,
+ midEntry->when_sent, midEntry->when_received);
+ if (cifsFYI & CIFS_TIMER) {
pr_debug(" CIFS slow rsp: cmd %d mid %llu",
midEntry->command, midEntry->mid);
pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
@@ -361,6 +370,8 @@ uncork:
* socket so the server throws away the partial SMB
*/
server->tcpStatus = CifsNeedReconnect;
+ trace_smb3_partial_send_reconnect(server->CurrentMid,
+ server->hostname);
}
smbd_done:
if (rc < 0 && rc != -EINTR)
@@ -373,26 +384,42 @@ smbd_done:
}
static int
-smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst, int flags)
+smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ struct smb_rqst *rqst, int flags)
{
- struct smb_rqst cur_rqst;
+ struct kvec iov;
+ struct smb2_transform_hdr tr_hdr;
+ struct smb_rqst cur_rqst[MAX_COMPOUND];
int rc;
if (!(flags & CIFS_TRANSFORM_REQ))
- return __smb_send_rqst(server, 1, rqst);
+ return __smb_send_rqst(server, num_rqst, rqst);
- if (!server->ops->init_transform_rq ||
- !server->ops->free_transform_rq) {
- cifs_dbg(VFS, "Encryption requested but transform callbacks are missed\n");
+ if (num_rqst > MAX_COMPOUND - 1)
+ return -ENOMEM;
+
+ memset(&cur_rqst[0], 0, sizeof(cur_rqst));
+ memset(&iov, 0, sizeof(iov));
+ memset(&tr_hdr, 0, sizeof(tr_hdr));
+
+ iov.iov_base = &tr_hdr;
+ iov.iov_len = sizeof(tr_hdr);
+ cur_rqst[0].rq_iov = &iov;
+ cur_rqst[0].rq_nvec = 1;
+
+ if (!server->ops->init_transform_rq) {
+ cifs_dbg(VFS, "Encryption requested but transform callback "
+ "is missing\n");
return -EIO;
}
- rc = server->ops->init_transform_rq(server, &cur_rqst, rqst);
+ rc = server->ops->init_transform_rq(server, num_rqst + 1,
+ &cur_rqst[0], rqst);
if (rc)
return rc;
- rc = __smb_send_rqst(server, 1, &cur_rqst);
- server->ops->free_transform_rq(&cur_rqst);
+ rc = __smb_send_rqst(server, num_rqst + 1, &cur_rqst[0]);
+ smb3_free_compound_rqst(num_rqst, &cur_rqst[1]);
return rc;
}
@@ -606,7 +633,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
*/
cifs_save_when_sent(mid);
cifs_in_send_inc(server);
- rc = smb_send_rqst(server, rqst, flags);
+ rc = smb_send_rqst(server, 1, rqst, flags);
cifs_in_send_dec(server);
if (rc < 0) {
@@ -746,20 +773,21 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
}
int
-cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
- struct smb_rqst *rqst, int *resp_buf_type, const int flags,
- struct kvec *resp_iov)
+compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ const int flags, const int num_rqst, struct smb_rqst *rqst,
+ int *resp_buf_type, struct kvec *resp_iov)
{
- int rc = 0;
+ int i, j, rc = 0;
int timeout, optype;
- struct mid_q_entry *midQ;
+ struct mid_q_entry *midQ[MAX_COMPOUND];
unsigned int credits = 1;
char *buf;
timeout = flags & CIFS_TIMEOUT_MASK;
optype = flags & CIFS_OP_MASK;
- *resp_buf_type = CIFS_NO_BUFFER; /* no response buf yet */
+ for (i = 0; i < num_rqst; i++)
+ resp_buf_type[i] = CIFS_NO_BUFFER; /* no response buf yet */
if ((ses == NULL) || (ses->server == NULL)) {
cifs_dbg(VFS, "Null session\n");
@@ -786,98 +814,117 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
mutex_lock(&ses->server->srv_mutex);
- midQ = ses->server->ops->setup_request(ses, rqst);
- if (IS_ERR(midQ)) {
- mutex_unlock(&ses->server->srv_mutex);
- /* Update # of requests on wire to server */
- add_credits(ses->server, 1, optype);
- return PTR_ERR(midQ);
+ for (i = 0; i < num_rqst; i++) {
+ midQ[i] = ses->server->ops->setup_request(ses, &rqst[i]);
+ if (IS_ERR(midQ[i])) {
+ for (j = 0; j < i; j++)
+ cifs_delete_mid(midQ[j]);
+ mutex_unlock(&ses->server->srv_mutex);
+ /* Update # of requests on wire to server */
+ add_credits(ses->server, 1, optype);
+ return PTR_ERR(midQ[i]);
+ }
+
+ midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
}
- midQ->mid_state = MID_REQUEST_SUBMITTED;
cifs_in_send_inc(ses->server);
- rc = smb_send_rqst(ses->server, rqst, flags);
+ rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
cifs_in_send_dec(ses->server);
- cifs_save_when_sent(midQ);
+
+ for (i = 0; i < num_rqst; i++)
+ cifs_save_when_sent(midQ[i]);
if (rc < 0)
ses->server->sequence_number -= 2;
+
mutex_unlock(&ses->server->srv_mutex);
- if (rc < 0)
- goto out;
+ for (i = 0; i < num_rqst; i++) {
+ if (rc < 0)
+ goto out;
-#ifdef CONFIG_CIFS_SMB311
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
- smb311_update_preauth_hash(ses, rqst->rq_iov,
- rqst->rq_nvec);
-#endif
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+ smb311_update_preauth_hash(ses, rqst[i].rq_iov,
+ rqst[i].rq_nvec);
- if (timeout == CIFS_ASYNC_OP)
- goto out;
+ if (timeout == CIFS_ASYNC_OP)
+ goto out;
- rc = wait_for_response(ses->server, midQ);
- if (rc != 0) {
- cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid);
- send_cancel(ses->server, rqst, midQ);
- spin_lock(&GlobalMid_Lock);
- if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
- midQ->mid_flags |= MID_WAIT_CANCELLED;
- midQ->callback = DeleteMidQEntry;
+ rc = wait_for_response(ses->server, midQ[i]);
+ if (rc != 0) {
+ cifs_dbg(FYI, "Cancelling wait for mid %llu\n",
+ midQ[i]->mid);
+ send_cancel(ses->server, &rqst[i], midQ[i]);
+ spin_lock(&GlobalMid_Lock);
+ if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
+ midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
+ midQ[i]->callback = DeleteMidQEntry;
+ spin_unlock(&GlobalMid_Lock);
+ add_credits(ses->server, 1, optype);
+ return rc;
+ }
spin_unlock(&GlobalMid_Lock);
+ }
+
+ rc = cifs_sync_mid_result(midQ[i], ses->server);
+ if (rc != 0) {
add_credits(ses->server, 1, optype);
return rc;
}
- spin_unlock(&GlobalMid_Lock);
- }
-
- rc = cifs_sync_mid_result(midQ, ses->server);
- if (rc != 0) {
- add_credits(ses->server, 1, optype);
- return rc;
- }
-
- if (!midQ->resp_buf || midQ->mid_state != MID_RESPONSE_RECEIVED) {
- rc = -EIO;
- cifs_dbg(FYI, "Bad MID state?\n");
- goto out;
- }
- buf = (char *)midQ->resp_buf;
- resp_iov->iov_base = buf;
- resp_iov->iov_len = midQ->resp_buf_size +
- ses->server->vals->header_preamble_size;
- if (midQ->large_buf)
- *resp_buf_type = CIFS_LARGE_BUFFER;
- else
- *resp_buf_type = CIFS_SMALL_BUFFER;
+ if (!midQ[i]->resp_buf ||
+ midQ[i]->mid_state != MID_RESPONSE_RECEIVED) {
+ rc = -EIO;
+ cifs_dbg(FYI, "Bad MID state?\n");
+ goto out;
+ }
-#ifdef CONFIG_CIFS_SMB311
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
- struct kvec iov = {
- .iov_base = resp_iov->iov_base,
- .iov_len = resp_iov->iov_len
- };
- smb311_update_preauth_hash(ses, &iov, 1);
- }
-#endif
+ buf = (char *)midQ[i]->resp_buf;
+ resp_iov[i].iov_base = buf;
+ resp_iov[i].iov_len = midQ[i]->resp_buf_size +
+ ses->server->vals->header_preamble_size;
+
+ if (midQ[i]->large_buf)
+ resp_buf_type[i] = CIFS_LARGE_BUFFER;
+ else
+ resp_buf_type[i] = CIFS_SMALL_BUFFER;
+
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+ struct kvec iov = {
+ .iov_base = resp_iov[i].iov_base,
+ .iov_len = resp_iov[i].iov_len
+ };
+ smb311_update_preauth_hash(ses, &iov, 1);
+ }
- credits = ses->server->ops->get_credits(midQ);
+ credits = ses->server->ops->get_credits(midQ[i]);
- rc = ses->server->ops->check_receive(midQ, ses->server,
- flags & CIFS_LOG_ERROR);
+ rc = ses->server->ops->check_receive(midQ[i], ses->server,
+ flags & CIFS_LOG_ERROR);
- /* mark it so buf will not be freed by cifs_delete_mid */
- if ((flags & CIFS_NO_RESP) == 0)
- midQ->resp_buf = NULL;
+ /* mark it so buf will not be freed by cifs_delete_mid */
+ if ((flags & CIFS_NO_RESP) == 0)
+ midQ[i]->resp_buf = NULL;
+ }
out:
- cifs_delete_mid(midQ);
+ for (i = 0; i < num_rqst; i++)
+ cifs_delete_mid(midQ[i]);
add_credits(ses->server, credits, optype);
return rc;
}
int
+cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
+ struct smb_rqst *rqst, int *resp_buf_type, const int flags,
+ struct kvec *resp_iov)
+{
+ return compound_send_recv(xid, ses, flags, 1, rqst, resp_buf_type,
+ resp_iov);
+}
+
+int
SendReceive2(const unsigned int xid, struct cifs_ses *ses,
struct kvec *iov, int n_vec, int *resp_buf_type /* ret */,
const int flags, struct kvec *resp_iov)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 316af84674f1..50ddb795aaeb 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -35,6 +35,14 @@
#define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
#define CIFS_XATTR_ATTRIB "cifs.dosattrib" /* full name: user.cifs.dosattrib */
#define CIFS_XATTR_CREATETIME "cifs.creationtime" /* user.cifs.creationtime */
+/*
+ * Although these three are just aliases for the above, need to move away from
+ * confusing users and using the 20+ year old term 'cifs' when it is no longer
+ * secure, replaced by SMB2 (then even more highly secure SMB3) many years ago
+ */
+#define SMB3_XATTR_CIFS_ACL "system.smb3_acl"
+#define SMB3_XATTR_ATTRIB "smb3.dosattrib" /* full name: user.smb3.dosattrib */
+#define SMB3_XATTR_CREATETIME "smb3.creationtime" /* user.smb3.creationtime */
/* BB need to add server (Samba e.g) support for security and trusted prefix */
enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT };
@@ -220,10 +228,12 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
switch (handler->flags) {
case XATTR_USER:
cifs_dbg(FYI, "%s:querying user xattr %s\n", __func__, name);
- if (strcmp(name, CIFS_XATTR_ATTRIB) == 0) {
+ if ((strcmp(name, CIFS_XATTR_ATTRIB) == 0) ||
+ (strcmp(name, SMB3_XATTR_ATTRIB) == 0)) {
rc = cifs_attrib_get(dentry, inode, value, size);
break;
- } else if (strcmp(name, CIFS_XATTR_CREATETIME) == 0) {
+ } else if ((strcmp(name, CIFS_XATTR_CREATETIME) == 0) ||
+ (strcmp(name, SMB3_XATTR_CREATETIME) == 0)) {
rc = cifs_creation_time_get(dentry, inode, value, size);
break;
}
@@ -363,6 +373,19 @@ static const struct xattr_handler cifs_cifs_acl_xattr_handler = {
.set = cifs_xattr_set,
};
+/*
+ * Although this is just an alias for the above, need to move away from
+ * confusing users and using the 20 year old term 'cifs' when it is no
+ * longer secure and was replaced by SMB2/SMB3 a long time ago, and
+ * SMB3 and later are highly secure.
+ */
+static const struct xattr_handler smb3_acl_xattr_handler = {
+ .name = SMB3_XATTR_CIFS_ACL,
+ .flags = XATTR_CIFS_ACL,
+ .get = cifs_xattr_get,
+ .set = cifs_xattr_set,
+};
+
static const struct xattr_handler cifs_posix_acl_access_xattr_handler = {
.name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = XATTR_ACL_ACCESS,
@@ -381,6 +404,7 @@ const struct xattr_handler *cifs_xattr_handlers[] = {
&cifs_user_xattr_handler,
&cifs_os2_xattr_handler,
&cifs_cifs_acl_xattr_handler,
+ &smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */
&cifs_posix_acl_access_xattr_handler,
&cifs_posix_acl_default_xattr_handler,
NULL
diff --git a/fs/dax.c b/fs/dax.c
index 641192808bb6..897b51e41d8f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -566,7 +566,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (index >= end)
break;
- if (!radix_tree_exceptional_entry(pvec_ent))
+ if (WARN_ON_ONCE(
+ !radix_tree_exceptional_entry(pvec_ent)))
continue;
xa_lock_irq(&mapping->i_pages);
@@ -578,6 +579,13 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
if (page)
break;
}
+
+ /*
+ * We don't expect normal struct page entries to exist in our
+ * tree, but we keep these pagevec calls so that this code is
+ * consistent with the common pattern for handling pagevecs
+ * throughout the kernel.
+ */
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
index++;
diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..8d2ec4898c2b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -358,14 +358,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
__releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
- bool hashed = !d_unhashed(dentry);
- if (hashed)
- raw_write_seqcount_begin(&dentry->d_seq);
+ raw_write_seqcount_begin(&dentry->d_seq);
__d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
- if (hashed)
- raw_write_seqcount_end(&dentry->d_seq);
+ raw_write_seqcount_end(&dentry->d_seq);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -732,16 +729,16 @@ static inline bool fast_dput(struct dentry *dentry)
if (dentry->d_lockref.count > 1) {
dentry->d_lockref.count--;
spin_unlock(&dentry->d_lock);
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/*
* If we weren't the last ref, we're done.
*/
if (ret)
- return 1;
+ return true;
/*
* Careful, careful. The reference count went down
@@ -770,7 +767,7 @@ static inline bool fast_dput(struct dentry *dentry)
/* Nothing to do? Dropping the reference was all we needed? */
if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
- return 1;
+ return true;
/*
* Not the fast normal case? Get the lock. We've already decremented
@@ -787,7 +784,7 @@ static inline bool fast_dput(struct dentry *dentry)
*/
if (dentry->d_lockref.count) {
spin_unlock(&dentry->d_lock);
- return 1;
+ return true;
}
/*
@@ -796,7 +793,7 @@ static inline bool fast_dput(struct dentry *dentry)
* set it to 1.
*/
dentry->d_lockref.count = 1;
- return 0;
+ return false;
}
@@ -1892,50 +1889,25 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
WARN_ON(!(inode->i_state & I_NEW));
- inode->i_state &= ~I_NEW;
+ inode->i_state &= ~I_NEW & ~I_CREATING;
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);
-/**
- * d_instantiate_no_diralias - instantiate a non-aliased dentry
- * @entry: dentry to complete
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry. If a directory alias is found, then
- * return an error (and drop inode). Together with d_materialise_unique() this
- * guarantees that a directory inode may never have more than one alias.
- */
-int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
-{
- BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
-
- security_d_instantiate(entry, inode);
- spin_lock(&inode->i_lock);
- if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
- spin_unlock(&inode->i_lock);
- iput(inode);
- return -EBUSY;
- }
- __d_instantiate(entry, inode);
- spin_unlock(&inode->i_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(d_instantiate_no_diralias);
-
struct dentry *d_make_root(struct inode *root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
res = d_alloc_anon(root_inode->i_sb);
- if (res)
+ if (res) {
+ res->d_flags |= DCACHE_RCUACCESS;
d_instantiate(res, root_inode);
- else
+ } else {
iput(root_inode);
+ }
}
return res;
}
@@ -2676,33 +2648,6 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
}
EXPORT_SYMBOL(d_exact_alias);
-/**
- * dentry_update_name_case - update case insensitive dentry with a new name
- * @dentry: dentry to be updated
- * @name: new name
- *
- * Update a case insensitive dentry with new case of name.
- *
- * dentry must have been returned by d_lookup with name @name. Old and new
- * name lengths must match (ie. no d_compare which allows mismatched name
- * lengths).
- *
- * Parent inode i_mutex must be held over d_lookup and into this call (to
- * keep renames and concurrent inserts, and readdir(2) away).
- */
-void dentry_update_name_case(struct dentry *dentry, const struct qstr *name)
-{
- BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
- BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
-
- spin_lock(&dentry->d_lock);
- write_seqcount_begin(&dentry->d_seq);
- memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
- write_seqcount_end(&dentry->d_seq);
- spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(dentry_update_name_case);
-
static void swap_names(struct dentry *dentry, struct dentry *target)
{
if (unlikely(dname_external(target))) {
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 71fccccf317e..8c6ab6c95727 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -86,7 +86,9 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
/* length of the variable name itself: remove GUID and separator */
namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1;
- uuid_le_to_bin(dentry->d_name.name + namelen + 1, &var->var.VendorGuid);
+ err = guid_parse(dentry->d_name.name + namelen + 1, &var->var.VendorGuid);
+ if (err)
+ goto out;
if (efivar_variable_is_removable(var->var.VendorGuid,
dentry->d_name.name, namelen))
diff --git a/fs/exec.c b/fs/exec.c
index 2d4e0075bd24..bdd0eacefdf5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -290,15 +290,15 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
struct vm_area_struct *vma = NULL;
struct mm_struct *mm = bprm->mm;
- bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ bprm->vma = vma = vm_area_alloc(mm);
if (!vma)
return -ENOMEM;
+ vma_set_anonymous(vma);
if (down_write_killable(&mm->mmap_sem)) {
err = -EINTR;
goto err_free;
}
- vma->vm_mm = mm;
/*
* Place the stack at the largest stack address the architecture
@@ -311,7 +311,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
vma->vm_start = vma->vm_end - PAGE_SIZE;
vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- INIT_LIST_HEAD(&vma->anon_vma_chain);
err = insert_vm_struct(mm, vma);
if (err)
@@ -326,7 +325,7 @@ err:
up_write(&mm->mmap_sem);
err_free:
bprm->vma = NULL;
- kmem_cache_free(vm_area_cachep, vma);
+ vm_area_free(vma);
return err;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 6484199b35d1..5c3d7b7e4975 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -611,8 +611,7 @@ fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
clear_nlink(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return ERR_PTR(err);
fail:
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 152453a91877..0c26dcc5d850 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -45,8 +45,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
return 0;
}
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
@@ -192,8 +191,7 @@ out:
out_fail:
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput (inode);
+ discard_new_inode(inode);
goto out;
}
@@ -261,8 +259,7 @@ out:
out_fail:
inode_dec_link_count(inode);
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
out_dir:
inode_dec_link_count(dir);
goto out;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e68cefe08261..e5d6ee61ff48 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -368,6 +368,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
+ if (buffer_verified(bh))
+ goto verified;
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
desc, bh))) {
ext4_unlock_group(sb, block_group);
@@ -386,6 +388,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
return -EFSCORRUPTED;
}
set_buffer_verified(bh);
+verified:
ext4_unlock_group(sb, block_group);
return 0;
}
@@ -423,9 +426,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
}
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
- ext4_error(sb, "Cannot get buffer for block bitmap - "
- "block_group = %u, block_bitmap = %llu",
- block_group, bitmap_blk);
+ ext4_warning(sb, "Cannot get buffer for block bitmap - "
+ "block_group = %u, block_bitmap = %llu",
+ block_group, bitmap_blk);
return ERR_PTR(-ENOMEM);
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7c7123f265c2..1fc013f3d944 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -789,17 +789,16 @@ struct move_extent {
* affected filesystem before 2242.
*/
-static inline __le32 ext4_encode_extra_time(struct timespec *time)
+static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
{
- u32 extra = sizeof(time->tv_sec) > 4 ?
- ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0;
+ u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
}
-static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+static inline void ext4_decode_extra_time(struct timespec64 *time,
+ __le32 extra)
{
- if (unlikely(sizeof(time->tv_sec) > 4 &&
- (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
+ if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK))) {
#if 1
/* Handle legacy encoding of pre-1970 dates with epoch
@@ -821,9 +820,8 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
do { \
(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\
- struct timespec ts = timespec64_to_timespec((inode)->xtime); \
(raw_inode)->xtime ## _extra = \
- ext4_encode_extra_time(&ts); \
+ ext4_encode_extra_time(&(inode)->xtime); \
} \
} while (0)
@@ -840,10 +838,8 @@ do { \
do { \
(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \
- struct timespec ts = timespec64_to_timespec((inode)->xtime); \
- ext4_decode_extra_time(&ts, \
+ ext4_decode_extra_time(&(inode)->xtime, \
raw_inode->xtime ## _extra); \
- (inode)->xtime = timespec_to_timespec64(ts); \
} \
else \
(inode)->xtime.tv_nsec = 0; \
@@ -993,9 +989,9 @@ struct ext4_inode_info {
/*
* File creation time. Its function is same as that of
- * struct timespec i_{a,c,m}time in the generic inode.
+ * struct timespec64 i_{a,c,m}time in the generic inode.
*/
- struct timespec i_crtime;
+ struct timespec64 i_crtime;
/* mballoc */
struct list_head i_prealloc_list;
@@ -1299,7 +1295,14 @@ struct ext4_super_block {
__le32 s_lpf_ino; /* Location of the lost+found inode */
__le32 s_prj_quota_inum; /* inode for tracking project quota */
__le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */
- __le32 s_reserved[98]; /* Padding to the end of the block */
+ __u8 s_wtime_hi;
+ __u8 s_mtime_hi;
+ __u8 s_mkfs_time_hi;
+ __u8 s_lastcheck_hi;
+ __u8 s_first_error_time_hi;
+ __u8 s_last_error_time_hi;
+ __u8 s_pad[2];
+ __le32 s_reserved[96]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
@@ -2456,6 +2459,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
+extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8ce6fd5b10dd..72a361d5ef74 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4826,6 +4826,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* released from page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_break_layouts(inode);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ goto out_mutex;
+ }
+
ret = ext4_update_disksize_before_punch(inode, offset, len);
if (ret) {
up_write(&EXT4_I(inode)->i_mmap_sem);
@@ -5499,6 +5506,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_break_layouts(inode);
+ if (ret)
+ goto out_mmap;
+
/*
* Need to round down offset to be aligned with page size boundary
* for page size > block size.
@@ -5647,6 +5659,11 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
* page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_break_layouts(inode);
+ if (ret)
+ goto out_mmap;
+
/*
* Need to round down to align start offset to page size boundary
* for page size > block size.
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fb83750c1a14..2addcb8730e1 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -90,6 +90,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
return -EFSCORRUPTED;
ext4_lock_group(sb, block_group);
+ if (buffer_verified(bh))
+ goto verified;
blk = ext4_inode_bitmap(sb, desc);
if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh,
EXT4_INODES_PER_GROUP(sb) / 8)) {
@@ -101,6 +103,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
return -EFSBADCRC;
}
set_buffer_verified(bh);
+verified:
ext4_unlock_group(sb, block_group);
return 0;
}
@@ -135,9 +138,9 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
}
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
- ext4_error(sb, "Cannot read inode bitmap - "
- "block_group = %u, inode_bitmap = %llu",
- block_group, bitmap_blk);
+ ext4_warning(sb, "Cannot read inode bitmap - "
+ "block_group = %u, inode_bitmap = %llu",
+ block_group, bitmap_blk);
return ERR_PTR(-ENOMEM);
}
if (bitmap_uptodate(bh))
@@ -1083,7 +1086,7 @@ got:
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
- ei->i_crtime = timespec64_to_timespec(inode->i_mtime);
+ ei->i_crtime = inode->i_mtime;
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_dir_start_lookup = 0;
@@ -1385,7 +1388,10 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
ext4_itable_unused_count(sb, gdp)),
sbi->s_inodes_per_block);
- if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+ if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) ||
+ ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp)) <
+ EXT4_FIRST_INO(sb)))) {
ext4_error(sb, "Something is wrong with group %u: "
"used itable blocks: %d; "
"itable unused count: %u",
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index e55a8bc870bd..3543fe80a3c4 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -682,6 +682,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
goto convert;
}
+ ret = ext4_journal_get_write_access(handle, iloc.bh);
+ if (ret)
+ goto out;
+
flags |= AOP_FLAG_NOFS;
page = grab_cache_page_write_begin(mapping, 0, flags);
@@ -710,7 +714,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
out_up_read:
up_read(&EXT4_I(inode)->xattr_sem);
out:
- if (handle)
+ if (handle && (ret != 1))
ext4_journal_stop(handle);
brelse(iloc.bh);
return ret;
@@ -752,6 +756,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
ext4_write_unlock_xattr(inode, &no_expand);
brelse(iloc.bh);
+ mark_inode_dirty(inode);
out:
return copied;
}
@@ -898,7 +903,6 @@ retry_journal:
goto out;
}
-
page = grab_cache_page_write_begin(mapping, 0, flags);
if (!page) {
ret = -ENOMEM;
@@ -916,6 +920,9 @@ retry_journal:
if (ret < 0)
goto out_release_page;
}
+ ret = ext4_journal_get_write_access(handle, iloc.bh);
+ if (ret)
+ goto out_release_page;
up_read(&EXT4_I(inode)->xattr_sem);
*pagep = page;
@@ -936,7 +943,6 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
unsigned len, unsigned copied,
struct page *page)
{
- int i_size_changed = 0;
int ret;
ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
@@ -954,10 +960,8 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
- if (pos+copied > inode->i_size) {
+ if (pos+copied > inode->i_size)
i_size_write(inode, pos+copied);
- i_size_changed = 1;
- }
unlock_page(page);
put_page(page);
@@ -967,8 +971,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
* ordering of page lock and transaction start for journaling
* filesystems.
*/
- if (i_size_changed)
- mark_inode_dirty(inode);
+ mark_inode_dirty(inode);
return copied;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7d6c10017bdf..8f6ad7667974 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -317,7 +317,7 @@ stop_handle:
* (Well, we could do this if we need to, but heck - it works)
*/
ext4_orphan_del(handle, inode);
- EXT4_I(inode)->i_dtime = get_seconds();
+ EXT4_I(inode)->i_dtime = (__u32)ktime_get_real_seconds();
/*
* One subtle ordering requirement: if anything has gone wrong
@@ -1389,9 +1389,10 @@ static int ext4_write_end(struct file *file,
loff_t old_size = inode->i_size;
int ret = 0, ret2;
int i_size_changed = 0;
+ int inline_data = ext4_has_inline_data(inode);
trace_ext4_write_end(inode, pos, len, copied);
- if (ext4_has_inline_data(inode)) {
+ if (inline_data) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
@@ -1419,7 +1420,7 @@ static int ext4_write_end(struct file *file,
* ordering of page lock and transaction start for journaling
* filesystems.
*/
- if (i_size_changed)
+ if (i_size_changed || inline_data)
ext4_mark_inode_dirty(handle, inode);
if (pos + len > inode->i_size && ext4_can_truncate(inode))
@@ -1493,6 +1494,7 @@ static int ext4_journalled_write_end(struct file *file,
int partial = 0;
unsigned from, to;
int size_changed = 0;
+ int inline_data = ext4_has_inline_data(inode);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_SIZE - 1);
@@ -1500,7 +1502,7 @@ static int ext4_journalled_write_end(struct file *file,
BUG_ON(!ext4_handle_valid(handle));
- if (ext4_has_inline_data(inode)) {
+ if (inline_data) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
@@ -1531,7 +1533,7 @@ static int ext4_journalled_write_end(struct file *file,
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
- if (size_changed) {
+ if (size_changed || inline_data) {
ret2 = ext4_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
@@ -2028,11 +2030,7 @@ static int __ext4_journalled_writepage(struct page *page,
}
if (inline_data) {
- BUFFER_TRACE(inode_bh, "get write access");
- ret = ext4_journal_get_write_access(handle, inode_bh);
-
- err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
-
+ ret = ext4_mark_inode_dirty(handle, inode);
} else {
ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
do_journal_get_write_access);
@@ -4193,6 +4191,39 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
return 0;
}
+static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock)
+{
+ *did_unlock = true;
+ up_write(&ei->i_mmap_sem);
+ schedule();
+ down_write(&ei->i_mmap_sem);
+}
+
+int ext4_break_layouts(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct page *page;
+ bool retry;
+ int error;
+
+ if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem)))
+ return -EINVAL;
+
+ do {
+ retry = false;
+ page = dax_layout_busy_page(inode->i_mapping);
+ if (!page)
+ return 0;
+
+ error = ___wait_var_event(&page->_refcount,
+ atomic_read(&page->_refcount) == 1,
+ TASK_INTERRUPTIBLE, 0, 0,
+ ext4_wait_dax_page(ei, &retry));
+ } while (error == 0 && retry);
+
+ return error;
+}
+
/*
* ext4_punch_hole: punches a hole in a file by releasing the blocks
* associated with the given offset and length
@@ -4266,6 +4297,11 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
* page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_break_layouts(inode);
+ if (ret)
+ goto out_dio;
+
first_block_offset = round_up(offset, sb->s_blocksize);
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
@@ -4946,17 +4982,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = -EFSCORRUPTED;
goto bad_inode;
} else if (!ext4_has_inline_data(inode)) {
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- (S_ISLNK(inode->i_mode) &&
- !ext4_inode_is_fast_symlink(inode))))
- /* Validate extent which is part of inode */
+ /* validate the block references in the inode */
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ (S_ISLNK(inode->i_mode) &&
+ !ext4_inode_is_fast_symlink(inode))) {
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ret = ext4_ext_check_inode(inode);
- } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- (S_ISLNK(inode->i_mode) &&
- !ext4_inode_is_fast_symlink(inode))) {
- /* Validate block references which are part of inode */
- ret = ext4_ind_check_inode(inode);
+ else
+ ret = ext4_ind_check_inode(inode);
}
}
if (ret)
@@ -5555,6 +5588,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_wait_for_tail_page_commit(inode);
}
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ rc = ext4_break_layouts(inode);
+ if (rc) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ error = rc;
+ goto err_out;
+ }
+
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f7ab34088162..e29fce2fbf25 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -14,6 +14,7 @@
#include <linux/log2.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/nospec.h>
#include <linux/backing-dev.h>
#include <trace/events/ext4.h>
@@ -2140,7 +2141,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* This should tell if fe_len is exactly power of 2
*/
if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
- ac->ac_2order = i - 1;
+ ac->ac_2order = array_index_nospec(i - 1,
+ sb->s_blocksize_bits + 2);
}
/* if stream allocation is enabled, use global goal */
@@ -3799,7 +3801,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
ext4_group_t group;
ext4_grpblk_t bit;
unsigned long long grp_blk_start;
- int err = 0;
int free = 0;
BUG_ON(pa->pa_deleted == 0);
@@ -3840,7 +3841,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
}
atomic_add(free, &sbi->s_mb_discarded);
- return err;
+ return 0;
}
static noinline_for_stack int
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 27b9a76a0dfa..39b07c2d3384 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -147,7 +147,7 @@ static int kmmpd(void *data)
mmp_block = le64_to_cpu(es->s_mmp_block);
mmp = (struct mmp_struct *)(bh->b_data);
- mmp->mmp_time = cpu_to_le64(get_seconds());
+ mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
/*
* Start with the higher mmp_check_interval and reduce it if
* the MMP block is being updated on time.
@@ -165,7 +165,7 @@ static int kmmpd(void *data)
seq = 1;
mmp->mmp_seq = cpu_to_le32(seq);
- mmp->mmp_time = cpu_to_le64(get_seconds());
+ mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
last_update_time = jiffies;
retval = write_mmp_block(sb, bh);
@@ -186,11 +186,8 @@ static int kmmpd(void *data)
goto exit_thread;
}
- if (sb_rdonly(sb)) {
- ext4_warning(sb, "kmmpd being stopped since filesystem "
- "has been remounted as readonly.");
- goto exit_thread;
- }
+ if (sb_rdonly(sb))
+ break;
diff = jiffies - last_update_time;
if (diff < mmp_update_interval * HZ)
@@ -244,7 +241,7 @@ static int kmmpd(void *data)
* Unmount seems to be clean.
*/
mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
- mmp->mmp_time = cpu_to_le64(get_seconds());
+ mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
retval = write_mmp_block(sb, bh);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 8e17efdcbf11..a409ff70d67b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -134,9 +134,7 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
mapping[0] = inode1->i_mapping;
mapping[1] = inode2->i_mapping;
} else {
- pgoff_t tmp = index1;
- index1 = index2;
- index2 = tmp;
+ swap(index1, index2);
mapping[0] = inode2->i_mapping;
mapping[1] = inode1->i_mapping;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a4c25c4681d..116ff68c5bd4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1398,6 +1398,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
goto cleanup_and_exit;
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
"falling back\n"));
+ ret = NULL;
}
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
if (!nblocks) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba2396a7bd04..f7750bc5b85a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -312,6 +312,24 @@ void ext4_itable_unused_set(struct super_block *sb,
bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}
+static void __ext4_update_tstamp(__le32 *lo, __u8 *hi)
+{
+ time64_t now = ktime_get_real_seconds();
+
+ now = clamp_val(now, 0, (1ull << 40) - 1);
+
+ *lo = cpu_to_le32(lower_32_bits(now));
+ *hi = upper_32_bits(now);
+}
+
+static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
+{
+ return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
+}
+#define ext4_update_tstamp(es, tstamp) \
+ __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
+#define ext4_get_tstamp(es, tstamp) \
+ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
static void __save_error_info(struct super_block *sb, const char *func,
unsigned int line)
@@ -322,11 +340,12 @@ static void __save_error_info(struct super_block *sb, const char *func,
if (bdev_read_only(sb->s_bdev))
return;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- es->s_last_error_time = cpu_to_le32(get_seconds());
+ ext4_update_tstamp(es, s_last_error_time);
strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
es->s_last_error_line = cpu_to_le32(line);
if (!es->s_first_error_time) {
es->s_first_error_time = es->s_last_error_time;
+ es->s_first_error_time_hi = es->s_last_error_time_hi;
strncpy(es->s_first_error_func, func,
sizeof(es->s_first_error_func));
es->s_first_error_line = cpu_to_le32(line);
@@ -776,26 +795,26 @@ void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ int ret;
- if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) &&
- !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
- percpu_counter_sub(&sbi->s_freeclusters_counter,
- grp->bb_free);
- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
- &grp->bb_state);
+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret)
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
}
- if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) &&
- !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
- if (gdp) {
+ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret && gdp) {
int count;
count = ext4_free_inodes_count(sb, gdp);
percpu_counter_sub(&sbi->s_freeinodes_counter,
count);
}
- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
- &grp->bb_state);
}
}
@@ -2174,8 +2193,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
"warning: maximal mount count reached, "
"running e2fsck is recommended");
else if (le32_to_cpu(es->s_checkinterval) &&
- (le32_to_cpu(es->s_lastcheck) +
- le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+ (ext4_get_tstamp(es, s_lastcheck) +
+ le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
ext4_msg(sb, KERN_WARNING,
"warning: checktime reached, "
"running e2fsck is recommended");
@@ -2184,7 +2203,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
- es->s_mtime = cpu_to_le32(get_seconds());
+ ext4_update_tstamp(es, s_mtime);
ext4_update_dynamic_rev(sb);
if (sbi->s_journal)
ext4_set_feature_journal_needs_recovery(sb);
@@ -2342,7 +2361,7 @@ static int ext4_check_descriptors(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
ext4_fsblk_t last_block;
- ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1;
+ ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
ext4_fsblk_t block_bitmap;
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
@@ -2875,8 +2894,9 @@ static void print_daily_error_info(struct timer_list *t)
ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_first_error_time),
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
+ sb->s_id,
+ ext4_get_tstamp(es, s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
le32_to_cpu(es->s_first_error_line));
@@ -2889,8 +2909,9 @@ static void print_daily_error_info(struct timer_list *t)
printk(KERN_CONT "\n");
}
if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_last_error_time),
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
+ sb->s_id,
+ ext4_get_tstamp(es, s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
le32_to_cpu(es->s_last_error_line));
@@ -3141,14 +3162,8 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
if (!gdp)
continue;
- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
- continue;
- if (group != 0)
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
break;
- ext4_error(sb, "Inode table for bg 0 marked as "
- "needing zeroing");
- if (sb_rdonly(sb))
- return ngroups;
}
return group;
@@ -4085,14 +4100,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
+ sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
ret = -EFSCORRUPTED;
goto failed_mount2;
}
- sbi->s_gdb_count = db_count;
-
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
/* Register extent status tree shrinker */
@@ -4820,7 +4834,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* to complain and force a full file system check.
*/
if (!(sb->s_flags & SB_RDONLY))
- es->s_wtime = cpu_to_le32(get_seconds());
+ ext4_update_tstamp(es, s_wtime);
if (sb->s_bdev->bd_part)
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
@@ -5087,6 +5101,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
#endif
char *orig_data = kstrdup(data, GFP_KERNEL);
+ if (data && !orig_data)
+ return -ENOMEM;
+
/* Store the original options */
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
@@ -5213,6 +5230,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal)
ext4_mark_recovery_complete(sb, es);
+ if (sbi->s_mmp_tsk)
+ kthread_stop(sbi->s_mmp_tsk);
} else {
/* Make sure we can mount this feature set readwrite */
if (ext4_has_feature_readonly(sb) ||
@@ -5670,13 +5689,13 @@ static int ext4_enable_quotas(struct super_block *sb)
DQUOT_USAGE_ENABLED |
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
- for (type--; type >= 0; type--)
- dquot_quota_off(sb, type);
-
ext4_warning(sb,
"Failed to enable quota tracking "
"(type=%d, err=%d). Please run "
"e2fsck to fix.", type, err);
+ for (type--; type >= 0; type--)
+ dquot_quota_off(sb, type);
+
return err;
}
}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f34da0bb8f17..e60cc5e89023 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -25,6 +25,8 @@ typedef enum {
attr_reserved_clusters,
attr_inode_readahead,
attr_trigger_test_error,
+ attr_first_error_time,
+ attr_last_error_time,
attr_feature,
attr_pointer_ui,
attr_pointer_atomic,
@@ -182,8 +184,8 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
-EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
-EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
+EXT4_ATTR(first_error_time, 0444, first_error_time);
+EXT4_ATTR(last_error_time, 0444, last_error_time);
static unsigned int old_bump_val = 128;
EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -249,6 +251,15 @@ static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
return NULL;
}
+static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi)
+{
+ return snprintf(buf, PAGE_SIZE, "%lld",
+ ((time64_t)hi << 32) + le32_to_cpu(lo));
+}
+
+#define print_tstamp(buf, es, tstamp) \
+ __print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi)
+
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -274,8 +285,12 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
case attr_pointer_ui:
if (!ptr)
return 0;
- return snprintf(buf, PAGE_SIZE, "%u\n",
- *((unsigned int *) ptr));
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ le32_to_cpup(ptr));
+ else
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ *((unsigned int *) ptr));
case attr_pointer_atomic:
if (!ptr)
return 0;
@@ -283,6 +298,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
atomic_read((atomic_t *) ptr));
case attr_feature:
return snprintf(buf, PAGE_SIZE, "supported\n");
+ case attr_first_error_time:
+ return print_tstamp(buf, sbi->s_es, s_first_error_time);
+ case attr_last_error_time:
+ return print_tstamp(buf, sbi->s_es, s_last_error_time);
}
return 0;
@@ -308,7 +327,10 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
- *((unsigned int *) ptr) = t;
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ *((__le32 *) ptr) = cpu_to_le32(t);
+ else
+ *((unsigned int *) ptr) = t;
return len;
case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len);
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 0cb13badf473..bcbe3668c1d4 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -11,6 +11,10 @@
*/
static inline void ext4_truncate_failed_write(struct inode *inode)
{
+ /*
+ * We don't need to call ext4_break_layouts() because the blocks we
+ * are truncating were never visible to userspace.
+ */
down_write(&EXT4_I(inode)->i_mmap_sem);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 723df14f4084..f36fc5d5b257 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -190,6 +190,8 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end)
return -EFSCORRUPTED;
+ if (strnlen(e->e_name, e->e_name_len) != e->e_name_len)
+ return -EFSCORRUPTED;
e = next;
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 065dc919a0ce..bfd589ea74c0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -707,13 +707,21 @@ static void fat_set_state(struct super_block *sb,
brelse(bh);
}
+static void fat_reset_iocharset(struct fat_mount_options *opts)
+{
+ if (opts->iocharset != fat_default_iocharset) {
+ /* Note: opts->iocharset can be NULL here */
+ kfree(opts->iocharset);
+ opts->iocharset = fat_default_iocharset;
+ }
+}
+
static void delayed_free(struct rcu_head *p)
{
struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
unload_nls(sbi->nls_disk);
unload_nls(sbi->nls_io);
- if (sbi->options.iocharset != fat_default_iocharset)
- kfree(sbi->options.iocharset);
+ fat_reset_iocharset(&sbi->options);
kfree(sbi);
}
@@ -1132,7 +1140,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
opts->fs_fmask = opts->fs_dmask = current_umask();
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
- opts->iocharset = fat_default_iocharset;
+ fat_reset_iocharset(opts);
if (is_vfat) {
opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
opts->rodir = 0;
@@ -1289,8 +1297,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
/* vfat specific */
case Opt_charset:
- if (opts->iocharset != fat_default_iocharset)
- kfree(opts->iocharset);
+ fat_reset_iocharset(opts);
iocharset = match_strdup(&args[0]);
if (!iocharset)
return -ENOMEM;
@@ -1881,8 +1888,7 @@ out_fail:
iput(fat_inode);
unload_nls(sbi->nls_io);
unload_nls(sbi->nls_disk);
- if (sbi->options.iocharset != fat_default_iocharset)
- kfree(sbi->options.iocharset);
+ fat_reset_iocharset(&sbi->options);
sb->s_fs_info = NULL;
kfree(sbi);
return error;
diff --git a/fs/file_table.c b/fs/file_table.c
index 7ec0b3e5f05d..d6eccd04d703 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -51,6 +51,7 @@ static void file_free_rcu(struct rcu_head *head)
static inline void file_free(struct file *f)
{
+ security_file_free(f);
percpu_counter_dec(&nr_files);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
@@ -100,9 +101,8 @@ int proc_nr_files(struct ctl_table *table, int write,
* done, you will imbalance int the mount's writer count
* and a warning at __fput() time.
*/
-struct file *get_empty_filp(void)
+struct file *alloc_empty_file(int flags, const struct cred *cred)
{
- const struct cred *cred = current_cred();
static long old_max;
struct file *f;
int error;
@@ -123,11 +123,10 @@ struct file *get_empty_filp(void)
if (unlikely(!f))
return ERR_PTR(-ENOMEM);
- percpu_counter_inc(&nr_files);
f->f_cred = get_cred(cred);
error = security_file_alloc(f);
if (unlikely(error)) {
- file_free(f);
+ file_free_rcu(&f->f_u.fu_rcuhead);
return ERR_PTR(error);
}
@@ -136,7 +135,10 @@ struct file *get_empty_filp(void)
spin_lock_init(&f->f_lock);
mutex_init(&f->f_pos_lock);
eventpoll_init_file(f);
+ f->f_flags = flags;
+ f->f_mode = OPEN_FMODE(flags);
/* f->f_version: 0 */
+ percpu_counter_inc(&nr_files);
return f;
over:
@@ -152,15 +154,15 @@ over:
* alloc_file - allocate and initialize a 'struct file'
*
* @path: the (dentry, vfsmount) pair for the new file
- * @mode: the mode with which the new file will be opened
+ * @flags: O_... flags with which the new file will be opened
* @fop: the 'struct file_operations' for the new file
*/
-struct file *alloc_file(const struct path *path, fmode_t mode,
+static struct file *alloc_file(const struct path *path, int flags,
const struct file_operations *fop)
{
struct file *file;
- file = get_empty_filp();
+ file = alloc_empty_file(flags, current_cred());
if (IS_ERR(file))
return file;
@@ -168,19 +170,56 @@ struct file *alloc_file(const struct path *path, fmode_t mode,
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
- if ((mode & FMODE_READ) &&
+ if ((file->f_mode & FMODE_READ) &&
likely(fop->read || fop->read_iter))
- mode |= FMODE_CAN_READ;
- if ((mode & FMODE_WRITE) &&
+ file->f_mode |= FMODE_CAN_READ;
+ if ((file->f_mode & FMODE_WRITE) &&
likely(fop->write || fop->write_iter))
- mode |= FMODE_CAN_WRITE;
- file->f_mode = mode;
+ file->f_mode |= FMODE_CAN_WRITE;
+ file->f_mode |= FMODE_OPENED;
file->f_op = fop;
- if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(path->dentry->d_inode);
return file;
}
-EXPORT_SYMBOL(alloc_file);
+
+struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
+ const char *name, int flags,
+ const struct file_operations *fops)
+{
+ static const struct dentry_operations anon_ops = {
+ .d_dname = simple_dname
+ };
+ struct qstr this = QSTR_INIT(name, strlen(name));
+ struct path path;
+ struct file *file;
+
+ path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
+ if (!path.dentry)
+ return ERR_PTR(-ENOMEM);
+ if (!mnt->mnt_sb->s_d_op)
+ d_set_d_op(path.dentry, &anon_ops);
+ path.mnt = mntget(mnt);
+ d_instantiate(path.dentry, inode);
+ file = alloc_file(&path, flags, fops);
+ if (IS_ERR(file)) {
+ ihold(inode);
+ path_put(&path);
+ }
+ return file;
+}
+EXPORT_SYMBOL(alloc_file_pseudo);
+
+struct file *alloc_file_clone(struct file *base, int flags,
+ const struct file_operations *fops)
+{
+ struct file *f = alloc_file(&base->f_path, flags, fops);
+ if (!IS_ERR(f)) {
+ path_get(&f->f_path);
+ f->f_mapping = base->f_mapping;
+ }
+ return f;
+}
/* the real guts of fput() - releasing the last reference to file
*/
@@ -190,6 +229,9 @@ static void __fput(struct file *file)
struct vfsmount *mnt = file->f_path.mnt;
struct inode *inode = file->f_inode;
+ if (unlikely(!(file->f_mode & FMODE_OPENED)))
+ goto out;
+
might_sleep();
fsnotify_close(file);
@@ -207,7 +249,6 @@ static void __fput(struct file *file)
}
if (file->f_op->release)
file->f_op->release(inode, file);
- security_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
!(file->f_mode & FMODE_PATH))) {
cdev_put(inode->i_cdev);
@@ -220,12 +261,10 @@ static void __fput(struct file *file)
put_write_access(inode);
__mnt_drop_write(mnt);
}
- file->f_path.dentry = NULL;
- file->f_path.mnt = NULL;
- file->f_inode = NULL;
- file_free(file);
dput(dentry);
mntput(mnt);
+out:
+ file_free(file);
}
static LLIST_HEAD(delayed_fput_list);
@@ -300,14 +339,6 @@ void __fput_sync(struct file *file)
EXPORT_SYMBOL(fput);
-void put_filp(struct file *file)
-{
- if (atomic_long_dec_and_test(&file->f_count)) {
- security_file_free(file);
- file_free(file);
- }
-}
-
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index c184c5a356ff..cdcb376ef8df 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -220,6 +220,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
struct fscache_cache_tag *tag;
+ ASSERTCMP(ifsdef->cookie, ==, &fscache_fsdef_index);
BUG_ON(!cache->ops);
BUG_ON(!ifsdef);
@@ -248,7 +249,6 @@ int fscache_add_cache(struct fscache_cache *cache,
if (!cache->kobj)
goto error;
- ifsdef->cookie = &fscache_fsdef_index;
ifsdef->cache = cache;
cache->fsdef = ifsdef;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 97137d7ec5ee..83bfe04456b6 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -516,6 +516,7 @@ static int fscache_alloc_object(struct fscache_cache *cache,
goto error;
}
+ ASSERTCMP(object->cookie, ==, cookie);
fscache_stat(&fscache_n_object_alloc);
object->debug_id = atomic_inc_return(&fscache_object_debug_id);
@@ -571,6 +572,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
_enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
+ ASSERTCMP(object->cookie, ==, cookie);
+
spin_lock(&cookie->lock);
/* there may be multiple initial creations of this object, but we only
@@ -610,9 +613,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
spin_unlock(&cache->object_list_lock);
}
- /* attach to the cookie */
- object->cookie = cookie;
- fscache_cookie_get(cookie, fscache_cookie_get_attach_object);
+ /* Attach to the cookie. The object already has a ref on it. */
hlist_add_head(&object->cookie_link, &cookie->backing_objects);
fscache_objlist_add(object);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 20e0d0a4dc8c..9edc920f651f 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -327,6 +327,7 @@ void fscache_object_init(struct fscache_object *object,
object->store_limit_l = 0;
object->cache = cache;
object->cookie = cookie;
+ fscache_cookie_get(cookie, fscache_cookie_get_attach_object);
object->parent = NULL;
#ifdef CONFIG_FSCACHE_OBJECT_LIST
RB_CLEAR_NODE(&object->objlist_link);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index e30c5975ea58..8d265790374c 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -70,7 +70,8 @@ void fscache_enqueue_operation(struct fscache_operation *op)
ASSERT(op->processor != NULL);
ASSERT(fscache_object_is_available(op->object));
ASSERTCMP(atomic_read(&op->usage), >, 0);
- ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
+ ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS,
+ op->state, ==, FSCACHE_OP_ST_CANCELLED);
fscache_stat(&fscache_n_op_enqueue);
switch (op->flags & FSCACHE_OP_TYPE) {
@@ -499,7 +500,8 @@ void fscache_put_operation(struct fscache_operation *op)
struct fscache_cache *cache;
_enter("{OBJ%x OP%x,%d}",
- op->object->debug_id, op->debug_id, atomic_read(&op->usage));
+ op->object ? op->object->debug_id : 0,
+ op->debug_id, atomic_read(&op->usage));
ASSERTCMP(atomic_read(&op->usage), >, 0);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..d80aab0d5982 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -399,7 +399,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
*/
static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
int err;
struct inode *inode;
@@ -469,7 +469,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir);
- err = finish_open(file, entry, generic_file_open, opened);
+ err = finish_open(file, entry, generic_file_open);
if (err) {
fuse_sync_release(ff, flags);
} else {
@@ -489,7 +489,7 @@ out_err:
static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
@@ -508,12 +508,12 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
goto no_open;
/* Only creates */
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
if (fc->no_create)
goto mknod;
- err = fuse_create_open(dir, entry, file, flags, mode, opened);
+ err = fuse_create_open(dir, entry, file, flags, mode);
if (err == -ENOSYS) {
fc->no_create = 1;
goto mknod;
@@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
{
struct fuse_entry_out outarg;
struct inode *inode;
+ struct dentry *d;
int err;
struct fuse_forget_link *forget;
@@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
}
kfree(forget);
- err = d_instantiate_no_diralias(entry, inode);
- if (err)
- return err;
+ d_drop(entry);
+ d = d_splice_alias(inode, entry);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
- fuse_change_entry_timeout(entry, &outarg);
+ if (d) {
+ fuse_change_entry_timeout(d, &outarg);
+ dput(d);
+ } else {
+ fuse_change_entry_timeout(entry, &outarg);
+ }
fuse_invalidate_attr(dir);
return 0;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index feda55f67050..648f0ca1ad57 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -580,7 +580,7 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct file *file,
umode_t mode, dev_t dev, const char *symname,
- unsigned int size, int excl, int *opened)
+ unsigned int size, int excl)
{
const struct qstr *name = &dentry->d_name;
struct posix_acl *default_acl, *acl;
@@ -626,7 +626,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = 0;
if (file) {
if (S_ISREG(inode->i_mode))
- error = finish_open(file, dentry, gfs2_open_common, opened);
+ error = finish_open(file, dentry, gfs2_open_common);
else
error = finish_no_open(file, NULL);
}
@@ -767,8 +767,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
mark_inode_dirty(inode);
d_instantiate(dentry, inode);
if (file) {
- *opened |= FILE_CREATED;
- error = finish_open(file, dentry, gfs2_open_common, opened);
+ file->f_mode |= FMODE_CREATED;
+ error = finish_open(file, dentry, gfs2_open_common);
}
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(ghs + 1);
@@ -822,7 +822,7 @@ fail:
static int gfs2_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl);
}
/**
@@ -830,14 +830,13 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
* @dir: The directory inode
* @dentry: The dentry of the new inode
* @file: File to be opened
- * @opened: atomic_open flags
*
*
* Returns: errno
*/
static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
- struct file *file, int *opened)
+ struct file *file)
{
struct inode *inode;
struct dentry *d;
@@ -866,7 +865,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
return d;
}
if (file && S_ISREG(inode->i_mode))
- error = finish_open(file, dentry, gfs2_open_common, opened);
+ error = finish_open(file, dentry, gfs2_open_common);
gfs2_glock_dq_uninit(&gh);
if (error) {
@@ -879,7 +878,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
unsigned flags)
{
- return __gfs2_lookup(dir, dentry, NULL, NULL);
+ return __gfs2_lookup(dir, dentry, NULL);
}
/**
@@ -1189,7 +1188,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
if (size >= gfs2_max_stuffed_size(GFS2_I(dir)))
return -ENAMETOOLONG;
- return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0);
}
/**
@@ -1204,7 +1203,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
- return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0);
}
/**
@@ -1219,7 +1218,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t dev)
{
- return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL);
+ return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0);
}
/**
@@ -1229,14 +1228,13 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
* @file: The proposed new struct file
* @flags: open flags
* @mode: File mode
- * @opened: Flag to say whether the file has been opened or not
*
* Returns: error code or 0 for success
*/
static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
struct dentry *d;
bool excl = !!(flags & O_EXCL);
@@ -1244,13 +1242,13 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
if (!d_in_lookup(dentry))
goto skip_lookup;
- d = __gfs2_lookup(dir, dentry, file, opened);
+ d = __gfs2_lookup(dir, dentry, file);
if (IS_ERR(d))
return PTR_ERR(d);
if (d != NULL)
dentry = d;
if (d_really_is_positive(dentry)) {
- if (!(*opened & FILE_OPENED))
+ if (!(file->f_mode & FMODE_OPENED))
return finish_no_open(file, d);
dput(d);
return 0;
@@ -1262,7 +1260,7 @@ skip_lookup:
if (!(flags & O_CREAT))
return -ENOENT;
- return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened);
+ return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl);
}
/*
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2a16111d312f..a2dfa1b2a89c 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
HFS_I(inode)->rsrc_inode = dir;
HFS_I(dir)->rsrc_inode = inode;
igrab(dir);
- hlist_add_fake(&inode->i_hash);
+ inode_fake_hash(inode);
mark_inode_dirty(inode);
dont_mount(dentry);
out:
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2597b290c2a5..444c7b170359 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -610,33 +610,21 @@ static struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
int err;
inode = hostfs_iget(ino->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out;
- }
err = -ENOMEM;
name = dentry_name(dentry);
- if (name == NULL)
- goto out_put;
-
- err = read_name(inode, name);
-
- __putname(name);
- if (err == -ENOENT) {
+ if (name) {
+ err = read_name(inode, name);
+ __putname(name);
+ }
+ if (err) {
iput(inode);
- inode = NULL;
+ inode = (err == -ENOENT) ? NULL : ERR_PTR(err);
}
- else if (err)
- goto out_put;
-
- d_add(dentry, inode);
- return NULL;
-
- out_put:
- iput(inode);
out:
- return ERR_PTR(err);
+ return d_splice_alias(inode, dentry);
}
static int hostfs_link(struct dentry *to, struct inode *ino,
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index c83ece7facc5..d85230c84ef2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -244,6 +244,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
result = iget_locked(dir->i_sb, ino);
if (!result) {
hpfs_error(dir->i_sb, "hpfs_lookup: can't get inode");
+ result = ERR_PTR(-ENOMEM);
goto bail1;
}
if (result->i_state & I_NEW) {
@@ -266,6 +267,8 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
if (de->has_acl || de->has_xtd_perm) if (!sb_rdonly(dir->i_sb)) {
hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
+ iput(result);
+ result = ERR_PTR(-EINVAL);
goto bail1;
}
@@ -301,29 +304,17 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
}
}
+bail1:
hpfs_brelse4(&qbh);
/*
* Made it.
*/
- end:
- end_add:
+end:
+end_add:
hpfs_unlock(dir->i_sb);
- d_add(dentry, result);
- return NULL;
-
- /*
- * Didn't.
- */
- bail1:
-
- hpfs_brelse4(&qbh);
-
- /*bail:*/
-
- hpfs_unlock(dir->i_sb);
- return ERR_PTR(-ENOENT);
+ return d_splice_alias(result, dentry);
}
const struct file_operations hpfs_dir_ops =
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d508c7844681..346a146c7617 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -411,6 +411,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
bool truncate_op = (lend == LLONG_MAX);
memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+ vma_init(&pseudo_vma, current->mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pagevec_init(&pvec);
next = start;
@@ -595,6 +596,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* as input to create an allocation policy.
*/
memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
+ vma_init(&pseudo_vma, mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
@@ -1308,10 +1310,6 @@ static int get_hstate_idx(int page_size_log)
return h - hstates;
}
-static const struct dentry_operations anon_ops = {
- .d_dname = simple_dname
-};
-
/*
* Note that size should be aligned to proper hugepage size in caller side,
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
@@ -1320,19 +1318,18 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
vm_flags_t acctflag, struct user_struct **user,
int creat_flags, int page_size_log)
{
- struct file *file = ERR_PTR(-ENOMEM);
struct inode *inode;
- struct path path;
- struct super_block *sb;
- struct qstr quick_string;
+ struct vfsmount *mnt;
int hstate_idx;
+ struct file *file;
hstate_idx = get_hstate_idx(page_size_log);
if (hstate_idx < 0)
return ERR_PTR(-ENODEV);
*user = NULL;
- if (!hugetlbfs_vfsmount[hstate_idx])
+ mnt = hugetlbfs_vfsmount[hstate_idx];
+ if (!mnt)
return ERR_PTR(-ENOENT);
if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
@@ -1348,45 +1345,28 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
}
}
- sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
- quick_string.name = name;
- quick_string.len = strlen(quick_string.name);
- quick_string.hash = 0;
- path.dentry = d_alloc_pseudo(sb, &quick_string);
- if (!path.dentry)
- goto out_shm_unlock;
-
- d_set_d_op(path.dentry, &anon_ops);
- path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
file = ERR_PTR(-ENOSPC);
- inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
+ inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
if (!inode)
- goto out_dentry;
+ goto out;
if (creat_flags == HUGETLB_SHMFS_INODE)
inode->i_flags |= S_PRIVATE;
- file = ERR_PTR(-ENOMEM);
- if (hugetlb_reserve_pages(inode, 0,
- size >> huge_page_shift(hstate_inode(inode)), NULL,
- acctflag))
- goto out_inode;
-
- d_instantiate(path.dentry, inode);
inode->i_size = size;
clear_nlink(inode);
- file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
- &hugetlbfs_file_operations);
- if (IS_ERR(file))
- goto out_dentry; /* inode is already attached */
-
- return file;
+ if (hugetlb_reserve_pages(inode, 0,
+ size >> huge_page_shift(hstate_inode(inode)), NULL,
+ acctflag))
+ file = ERR_PTR(-ENOMEM);
+ else
+ file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
+ &hugetlbfs_file_operations);
+ if (!IS_ERR(file))
+ return file;
-out_inode:
iput(inode);
-out_dentry:
- path_put(&path);
-out_shm_unlock:
+out:
if (*user) {
user_shm_unlock(size, *user);
*user = NULL;
diff --git a/fs/inode.c b/fs/inode.c
index 8c86c809ca17..a06de4454232 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -804,6 +804,10 @@ repeat:
__wait_on_freeing_inode(inode);
goto repeat;
}
+ if (unlikely(inode->i_state & I_CREATING)) {
+ spin_unlock(&inode->i_lock);
+ return ERR_PTR(-ESTALE);
+ }
__iget(inode);
spin_unlock(&inode->i_lock);
return inode;
@@ -831,6 +835,10 @@ repeat:
__wait_on_freeing_inode(inode);
goto repeat;
}
+ if (unlikely(inode->i_state & I_CREATING)) {
+ spin_unlock(&inode->i_lock);
+ return ERR_PTR(-ESTALE);
+ }
__iget(inode);
spin_unlock(&inode->i_lock);
return inode;
@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
- inode->i_state &= ~I_NEW;
+ inode->i_state &= ~I_NEW & ~I_CREATING;
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
+void discard_new_inode(struct inode *inode)
+{
+ lockdep_annotate_inode_mutex_key(inode);
+ spin_lock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
+ smp_mb();
+ wake_up_bit(&inode->i_state, __I_NEW);
+ spin_unlock(&inode->i_lock);
+ iput(inode);
+}
+EXPORT_SYMBOL(discard_new_inode);
+
/**
* lock_two_nondirectories - take two i_mutexes on non-directory objects
*
@@ -1029,6 +1050,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
{
struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
struct inode *old;
+ bool creating = inode->i_state & I_CREATING;
again:
spin_lock(&inode_hash_lock);
@@ -1039,6 +1061,8 @@ again:
* Use the old inode instead of the preallocated one.
*/
spin_unlock(&inode_hash_lock);
+ if (IS_ERR(old))
+ return NULL;
wait_on_inode(old);
if (unlikely(inode_unhashed(old))) {
iput(old);
@@ -1060,6 +1084,8 @@ again:
inode->i_state |= I_NEW;
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
+ if (!creating)
+ inode_sb_list_add(inode);
unlock:
spin_unlock(&inode_hash_lock);
@@ -1094,12 +1120,13 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
struct inode *inode = ilookup5(sb, hashval, test, data);
if (!inode) {
- struct inode *new = new_inode(sb);
+ struct inode *new = alloc_inode(sb);
if (new) {
+ new->i_state = 0;
inode = inode_insert5(new, hashval, test, set, data);
if (unlikely(inode != new))
- iput(new);
+ destroy_inode(new);
}
}
return inode;
@@ -1128,6 +1155,8 @@ again:
inode = find_inode_fast(sb, head, ino);
spin_unlock(&inode_hash_lock);
if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
iput(inode);
@@ -1165,6 +1194,8 @@ again:
*/
spin_unlock(&inode_hash_lock);
destroy_inode(inode);
+ if (IS_ERR(old))
+ return NULL;
inode = old;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
@@ -1282,7 +1313,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
inode = find_inode(sb, head, test, data);
spin_unlock(&inode_hash_lock);
- return inode;
+ return IS_ERR(inode) ? NULL : inode;
}
EXPORT_SYMBOL(ilookup5_nowait);
@@ -1338,6 +1369,8 @@ again:
spin_unlock(&inode_hash_lock);
if (inode) {
+ if (IS_ERR(inode))
+ return NULL;
wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) {
iput(inode);
@@ -1421,12 +1454,17 @@ int insert_inode_locked(struct inode *inode)
}
if (likely(!old)) {
spin_lock(&inode->i_lock);
- inode->i_state |= I_NEW;
+ inode->i_state |= I_NEW | I_CREATING;
hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
return 0;
}
+ if (unlikely(old->i_state & I_CREATING)) {
+ spin_unlock(&old->i_lock);
+ spin_unlock(&inode_hash_lock);
+ return -EBUSY;
+ }
__iget(old);
spin_unlock(&old->i_lock);
spin_unlock(&inode_hash_lock);
@@ -1443,7 +1481,10 @@ EXPORT_SYMBOL(insert_inode_locked);
int insert_inode_locked4(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct inode *old = inode_insert5(inode, hashval, test, NULL, data);
+ struct inode *old;
+
+ inode->i_state |= I_CREATING;
+ old = inode_insert5(inode, hashval, test, NULL, data);
if (old != inode) {
iput(old);
diff --git a/fs/internal.h b/fs/internal.h
index 4a18bdbd2214..50a28fc71300 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -95,7 +95,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
/*
* file_table.c
*/
-extern struct file *get_empty_filp(void);
+extern struct file *alloc_empty_file(int, const struct cred *);
/*
* super.c
@@ -127,9 +127,7 @@ int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
-extern int open_check_o_direct(struct file *f);
-extern int vfs_open(const struct path *, struct file *, const struct cred *);
-extern struct file *filp_clone_open(struct file *);
+extern int vfs_open(const struct path *, struct file *);
/*
* inode.c
diff --git a/fs/iomap.c b/fs/iomap.c
index 8bd54c08deee..009071e73bc0 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -2097,7 +2097,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno,
const struct iomap_ops *ops)
{
struct inode *inode = mapping->host;
- loff_t pos = bno >> inode->i_blkbits;
+ loff_t pos = bno << inode->i_blkbits;
unsigned blocksize = i_blocksize(inode);
if (filemap_write_and_wait(mapping))
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8de0e7723316..150cc030b4d7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -121,7 +121,7 @@ static int journal_submit_commit_record(journal_t *journal,
struct commit_header *tmp;
struct buffer_head *bh;
int ret;
- struct timespec64 now = current_kernel_time64();
+ struct timespec64 now;
*cbh = NULL;
@@ -134,6 +134,7 @@ static int journal_submit_commit_record(journal_t *journal,
return 1;
tmp = (struct commit_header *)bh->b_data;
+ ktime_get_coarse_real_ts64(&now);
tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 395c4c0d0f06..1682a87c00b2 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -115,6 +115,13 @@ struct dinode {
dxd_t _dxd; /* 16: */
union {
__le32 _rdev; /* 4: */
+ /*
+ * The fast symlink area
+ * is expected to overflow
+ * into _inlineea when
+ * needed (which will clear
+ * INLINEEA).
+ */
u8 _fastsymlink[128];
} _u;
u8 _inlineea[128];
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f36ef68905a7..93e8c590ff5c 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
/* release the page */
release_metapage(mp);
- /*
- * __mark_inode_dirty expects inodes to be hashed. Since we don't
- * want special inodes in the fileset inode space, we make them
- * appear hashed, but do not put on any lists. hlist_del()
- * will work fine and require no locking.
- */
- hlist_add_fake(&ip->i_hash);
+ inode_fake_hash(ip);
return (ip);
}
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 1f26d1910409..9940a1e04cbf 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -87,6 +87,7 @@ struct jfs_inode_info {
struct {
unchar _unused[16]; /* 16: */
dxd_t _dxd; /* 16: */
+ /* _inline may overflow into _inline_ea when needed */
unchar _inline[128]; /* 128: inline symlink */
/* _inline_ea may overlay the last part of
* file._xtroot if maxentry = XTROOTINITSLOT
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 5e9b7bb3aabf..4572b7cf183d 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -61,8 +61,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
inode = new_inode(sb);
if (!inode) {
jfs_warn("ialloc: new_inode returned NULL!");
- rc = -ENOMEM;
- goto fail;
+ return ERR_PTR(-ENOMEM);
}
jfs_inode = JFS_IP(inode);
@@ -70,8 +69,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
rc = diAlloc(parent, S_ISDIR(mode), inode);
if (rc) {
jfs_warn("ialloc: diAlloc returned %d!", rc);
- if (rc == -EIO)
- make_bad_inode(inode);
goto fail_put;
}
@@ -141,9 +138,10 @@ fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
clear_nlink(inode);
- unlock_new_inode(inode);
+ discard_new_inode(inode);
+ return ERR_PTR(rc);
+
fail_put:
iput(inode);
-fail:
return ERR_PTR(rc);
}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 56c3fcbfe80e..14528c0ffe63 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -175,8 +175,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
@@ -309,8 +308,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
@@ -1054,8 +1052,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
@@ -1441,8 +1438,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
if (rc) {
free_ea_wmap(ip);
clear_nlink(ip);
- unlock_new_inode(ip);
- iput(ip);
+ discard_new_inode(ip);
} else {
d_instantiate_new(dentry, ip);
}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1b9264fd54b6..09da5cf14e27 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
inode->i_ino = 0;
inode->i_size = i_size_read(sb->s_bdev->bd_inode);
inode->i_mapping->a_ops = &jfs_metapage_aops;
- hlist_add_fake(&inode->i_hash);
+ inode_fake_hash(inode);
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->direct_inode = inode;
@@ -967,8 +967,7 @@ static int __init init_jfs_fs(void)
jfs_inode_cachep =
kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info),
0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
- offsetof(struct jfs_inode_info, i_inline),
- sizeof_field(struct jfs_inode_info, i_inline),
+ offsetof(struct jfs_inode_info, i_inline), IDATASIZE,
init_once);
if (jfs_inode_cachep == NULL)
return -ENOMEM;
diff --git a/fs/locks.c b/fs/locks.c
index db7b6917d9c5..bc047a7edc47 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -202,10 +202,6 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
* we often hold the flc_lock as well. In certain cases, when reading the fields
* protected by this lock, we can skip acquiring it iff we already hold the
* flc_lock.
- *
- * In particular, adding an entry to the fl_block list requires that you hold
- * both the flc_lock and the blocked_lock_lock (acquired in that order).
- * Deleting an entry from the list however only requires the file_lock_lock.
*/
static DEFINE_SPINLOCK(blocked_lock_lock);
@@ -990,6 +986,7 @@ out:
if (new_fl)
locks_free_lock(new_fl);
locks_dispose_list(&dispose);
+ trace_flock_lock_inode(inode, request, error);
return error;
}
@@ -2072,6 +2069,13 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
return -1;
if (IS_REMOTELCK(fl))
return fl->fl_pid;
+ /*
+ * If the flock owner process is dead and its pid has been already
+ * freed, the translation below won't work, but we still want to show
+ * flock owner pid number in init pidns.
+ */
+ if (ns == &init_pid_ns)
+ return (pid_t)fl->fl_pid;
rcu_read_lock();
pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
@@ -2626,12 +2630,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
fl_pid = locks_translate_pid(fl, proc_pidns);
/*
- * If there isn't a fl_pid don't display who is waiting on
- * the lock if we are called from locks_show, or if we are
- * called from __show_fd_info - skip lock entirely
+ * If lock owner is dead (and pid is freed) or not visible in current
+ * pidns, zero is shown as a pid value. Check lock info from
+ * init_pid_ns to get saved lock pid value.
*/
- if (fl_pid == 0)
- return;
if (fl->fl_file != NULL)
inode = locks_inode(fl->fl_file);
diff --git a/fs/namei.c b/fs/namei.c
index 734cef54fdf8..3cd396277cd3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2028,6 +2028,8 @@ static int link_path_walk(const char *name, struct nameidata *nd)
{
int err;
+ if (IS_ERR(name))
+ return PTR_ERR(name);
while (*name=='/')
name++;
if (!*name)
@@ -2125,12 +2127,15 @@ OK:
}
}
+/* must be paired with terminate_walk() */
static const char *path_init(struct nameidata *nd, unsigned flags)
{
const char *s = nd->name->name;
if (!*s)
flags &= ~LOOKUP_RCU;
+ if (flags & LOOKUP_RCU)
+ rcu_read_lock();
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -2143,7 +2148,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->path = nd->root;
nd->inode = inode;
if (flags & LOOKUP_RCU) {
- rcu_read_lock();
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
nd->root_seq = nd->seq;
nd->m_seq = read_seqbegin(&mount_lock);
@@ -2159,21 +2163,15 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->m_seq = read_seqbegin(&mount_lock);
if (*s == '/') {
- if (flags & LOOKUP_RCU)
- rcu_read_lock();
set_root(nd);
if (likely(!nd_jump_root(nd)))
return s;
- nd->root.mnt = NULL;
- rcu_read_unlock();
return ERR_PTR(-ECHILD);
} else if (nd->dfd == AT_FDCWD) {
if (flags & LOOKUP_RCU) {
struct fs_struct *fs = current->fs;
unsigned seq;
- rcu_read_lock();
-
do {
seq = read_seqcount_begin(&fs->seq);
nd->path = fs->pwd;
@@ -2195,16 +2193,13 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
dentry = f.file->f_path.dentry;
- if (*s) {
- if (!d_can_lookup(dentry)) {
- fdput(f);
- return ERR_PTR(-ENOTDIR);
- }
+ if (*s && unlikely(!d_can_lookup(dentry))) {
+ fdput(f);
+ return ERR_PTR(-ENOTDIR);
}
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
- rcu_read_lock();
nd->inode = nd->path.dentry->d_inode;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
} else {
@@ -2272,24 +2267,15 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
const char *s = path_init(nd, flags);
int err;
- if (IS_ERR(s))
- return PTR_ERR(s);
-
- if (unlikely(flags & LOOKUP_DOWN)) {
+ if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
err = handle_lookup_down(nd);
- if (unlikely(err < 0)) {
- terminate_walk(nd);
- return err;
- }
+ if (unlikely(err < 0))
+ s = ERR_PTR(err);
}
while (!(err = link_path_walk(s, nd))
&& ((err = lookup_last(nd)) > 0)) {
s = trailing_symlink(nd);
- if (IS_ERR(s)) {
- err = PTR_ERR(s);
- break;
- }
}
if (!err)
err = complete_walk(nd);
@@ -2336,10 +2322,7 @@ static int path_parentat(struct nameidata *nd, unsigned flags,
struct path *parent)
{
const char *s = path_init(nd, flags);
- int err;
- if (IS_ERR(s))
- return PTR_ERR(s);
- err = link_path_walk(s, nd);
+ int err = link_path_walk(s, nd);
if (!err)
err = complete_walk(nd);
if (!err) {
@@ -2666,15 +2649,10 @@ path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
{
const char *s = path_init(nd, flags);
int err;
- if (IS_ERR(s))
- return PTR_ERR(s);
+
while (!(err = link_path_walk(s, nd)) &&
(err = mountpoint_last(nd)) > 0) {
s = trailing_symlink(nd);
- if (IS_ERR(s)) {
- err = PTR_ERR(s);
- break;
- }
}
if (!err) {
*path = nd->path;
@@ -3027,17 +3005,16 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
* Returns 0 if successful. The file will have been created and attached to
* @file by the filesystem calling finish_open().
*
- * Returns 1 if the file was looked up only or didn't need creating. The
- * caller will need to perform the open themselves. @path will have been
- * updated to point to the new dentry. This may be negative.
+ * If the file was looked up only or didn't need creating, FMODE_OPENED won't
+ * be set. The caller will need to perform the open themselves. @path will
+ * have been updated to point to the new dentry. This may be negative.
*
* Returns an error code otherwise.
*/
static int atomic_open(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct file *file,
const struct open_flags *op,
- int open_flag, umode_t mode,
- int *opened)
+ int open_flag, umode_t mode)
{
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
struct inode *dir = nd->path.dentry->d_inode;
@@ -3052,39 +3029,38 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
file->f_path.dentry = DENTRY_NOT_SET;
file->f_path.mnt = nd->path.mnt;
error = dir->i_op->atomic_open(dir, dentry, file,
- open_to_namei_flags(open_flag),
- mode, opened);
+ open_to_namei_flags(open_flag), mode);
d_lookup_done(dentry);
if (!error) {
- /*
- * We didn't have the inode before the open, so check open
- * permission here.
- */
- int acc_mode = op->acc_mode;
- if (*opened & FILE_CREATED) {
- WARN_ON(!(open_flag & O_CREAT));
- fsnotify_create(dir, dentry);
- acc_mode = 0;
- }
- error = may_open(&file->f_path, acc_mode, open_flag);
- if (WARN_ON(error > 0))
- error = -EINVAL;
- } else if (error > 0) {
- if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
+ if (file->f_mode & FMODE_OPENED) {
+ /*
+ * We didn't have the inode before the open, so check open
+ * permission here.
+ */
+ int acc_mode = op->acc_mode;
+ if (file->f_mode & FMODE_CREATED) {
+ WARN_ON(!(open_flag & O_CREAT));
+ fsnotify_create(dir, dentry);
+ acc_mode = 0;
+ }
+ error = may_open(&file->f_path, acc_mode, open_flag);
+ if (WARN_ON(error > 0))
+ error = -EINVAL;
+ } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
error = -EIO;
} else {
if (file->f_path.dentry) {
dput(dentry);
dentry = file->f_path.dentry;
}
- if (*opened & FILE_CREATED)
+ if (file->f_mode & FMODE_CREATED)
fsnotify_create(dir, dentry);
if (unlikely(d_is_negative(dentry))) {
error = -ENOENT;
} else {
path->dentry = dentry;
path->mnt = nd->path.mnt;
- return 1;
+ return 0;
}
}
}
@@ -3095,25 +3071,22 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
/*
* Look up and maybe create and open the last component.
*
- * Must be called with i_mutex held on parent.
- *
- * Returns 0 if the file was successfully atomically created (if necessary) and
- * opened. In this case the file will be returned attached to @file.
- *
- * Returns 1 if the file was not completely opened at this time, though lookups
- * and creations will have been performed and the dentry returned in @path will
- * be positive upon return if O_CREAT was specified. If O_CREAT wasn't
- * specified then a negative dentry may be returned.
+ * Must be called with parent locked (exclusive in O_CREAT case).
*
- * An error code is returned otherwise.
+ * Returns 0 on success, that is, if
+ * the file was successfully atomically created (if necessary) and opened, or
+ * the file was not completely opened at this time, though lookups and
+ * creations were performed.
+ * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
+ * In the latter case dentry returned in @path might be negative if O_CREAT
+ * hadn't been specified.
*
- * FILE_CREATE will be set in @*opened if the dentry was created and will be
- * cleared otherwise prior to returning.
+ * An error code is returned on failure.
*/
static int lookup_open(struct nameidata *nd, struct path *path,
struct file *file,
const struct open_flags *op,
- bool got_write, int *opened)
+ bool got_write)
{
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
@@ -3126,7 +3099,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
if (unlikely(IS_DEADDIR(dir_inode)))
return -ENOENT;
- *opened &= ~FILE_CREATED;
+ file->f_mode &= ~FMODE_CREATED;
dentry = d_lookup(dir, &nd->last);
for (;;) {
if (!dentry) {
@@ -3188,7 +3161,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
if (dir_inode->i_op->atomic_open) {
error = atomic_open(nd, dentry, path, file, op, open_flag,
- mode, opened);
+ mode);
if (unlikely(error == -ENOENT) && create_error)
error = create_error;
return error;
@@ -3211,7 +3184,7 @@ no_open:
/* Negative dentry, just create the file */
if (!dentry->d_inode && (open_flag & O_CREAT)) {
- *opened |= FILE_CREATED;
+ file->f_mode |= FMODE_CREATED;
audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
if (!dir_inode->i_op->create) {
error = -EACCES;
@@ -3230,7 +3203,7 @@ no_open:
out_no_open:
path->dentry = dentry;
path->mnt = nd->path.mnt;
- return 1;
+ return 0;
out_dput:
dput(dentry);
@@ -3241,8 +3214,7 @@ out_dput:
* Handle the last step of open()
*/
static int do_last(struct nameidata *nd,
- struct file *file, const struct open_flags *op,
- int *opened)
+ struct file *file, const struct open_flags *op)
{
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
@@ -3308,17 +3280,17 @@ static int do_last(struct nameidata *nd,
inode_lock(dir->d_inode);
else
inode_lock_shared(dir->d_inode);
- error = lookup_open(nd, &path, file, op, got_write, opened);
+ error = lookup_open(nd, &path, file, op, got_write);
if (open_flag & O_CREAT)
inode_unlock(dir->d_inode);
else
inode_unlock_shared(dir->d_inode);
- if (error <= 0) {
- if (error)
- goto out;
+ if (error)
+ goto out;
- if ((*opened & FILE_CREATED) ||
+ if (file->f_mode & FMODE_OPENED) {
+ if ((file->f_mode & FMODE_CREATED) ||
!S_ISREG(file_inode(file)->i_mode))
will_truncate = false;
@@ -3326,7 +3298,7 @@ static int do_last(struct nameidata *nd,
goto opened;
}
- if (*opened & FILE_CREATED) {
+ if (file->f_mode & FMODE_CREATED) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = false;
@@ -3395,20 +3367,15 @@ finish_open_created:
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto out;
- BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
- error = vfs_open(&nd->path, file, current_cred());
+ BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
+ error = vfs_open(&nd->path, file);
if (error)
goto out;
- *opened |= FILE_OPENED;
opened:
- error = open_check_o_direct(file);
- if (!error)
- error = ima_file_check(file, op->acc_mode, *opened);
+ error = ima_file_check(file, op->acc_mode);
if (!error && will_truncate)
error = handle_truncate(file);
out:
- if (unlikely(error) && (*opened & FILE_OPENED))
- fput(file);
if (unlikely(error > 0)) {
WARN_ON(1);
error = -EINVAL;
@@ -3458,7 +3425,7 @@ EXPORT_SYMBOL(vfs_tmpfile);
static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
- struct file *file, int *opened)
+ struct file *file)
{
struct dentry *child;
struct path path;
@@ -3480,12 +3447,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
if (error)
goto out2;
file->f_path.mnt = path.mnt;
- error = finish_open(file, child, NULL, opened);
- if (error)
- goto out2;
- error = open_check_o_direct(file);
- if (error)
- fput(file);
+ error = finish_open(file, child, NULL);
out2:
mnt_drop_write(path.mnt);
out:
@@ -3499,7 +3461,7 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
int error = path_lookupat(nd, flags, &path);
if (!error) {
audit_inode(nd->name, path.dentry, 0);
- error = vfs_open(&path, file, current_cred());
+ error = vfs_open(&path, file);
path_put(&path);
}
return error;
@@ -3508,59 +3470,40 @@ static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
static struct file *path_openat(struct nameidata *nd,
const struct open_flags *op, unsigned flags)
{
- const char *s;
struct file *file;
- int opened = 0;
int error;
- file = get_empty_filp();
+ file = alloc_empty_file(op->open_flag, current_cred());
if (IS_ERR(file))
return file;
- file->f_flags = op->open_flag;
-
if (unlikely(file->f_flags & __O_TMPFILE)) {
- error = do_tmpfile(nd, flags, op, file, &opened);
- goto out2;
- }
-
- if (unlikely(file->f_flags & O_PATH)) {
+ error = do_tmpfile(nd, flags, op, file);
+ } else if (unlikely(file->f_flags & O_PATH)) {
error = do_o_path(nd, flags, file);
- if (!error)
- opened |= FILE_OPENED;
- goto out2;
- }
-
- s = path_init(nd, flags);
- if (IS_ERR(s)) {
- put_filp(file);
- return ERR_CAST(s);
- }
- while (!(error = link_path_walk(s, nd)) &&
- (error = do_last(nd, file, op, &opened)) > 0) {
- nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
- s = trailing_symlink(nd);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- break;
+ } else {
+ const char *s = path_init(nd, flags);
+ while (!(error = link_path_walk(s, nd)) &&
+ (error = do_last(nd, file, op)) > 0) {
+ nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
+ s = trailing_symlink(nd);
}
+ terminate_walk(nd);
}
- terminate_walk(nd);
-out2:
- if (!(opened & FILE_OPENED)) {
- BUG_ON(!error);
- put_filp(file);
+ if (likely(!error)) {
+ if (likely(file->f_mode & FMODE_OPENED))
+ return file;
+ WARN_ON(1);
+ error = -EINVAL;
}
- if (unlikely(error)) {
- if (error == -EOPENSTALE) {
- if (flags & LOOKUP_RCU)
- error = -ECHILD;
- else
- error = -ESTALE;
- }
- file = ERR_PTR(error);
+ fput(file);
+ if (error == -EOPENSTALE) {
+ if (flags & LOOKUP_RCU)
+ error = -ECHILD;
+ else
+ error = -ESTALE;
}
- return file;
+ return ERR_PTR(error);
}
struct file *do_filp_open(int dfd, struct filename *pathname,
@@ -4712,29 +4655,6 @@ out:
return len;
}
-/*
- * A helper for ->readlink(). This should be used *ONLY* for symlinks that
- * have ->get_link() not calling nd_jump_link(). Using (or not using) it
- * for any given inode is up to filesystem.
- */
-static int generic_readlink(struct dentry *dentry, char __user *buffer,
- int buflen)
-{
- DEFINE_DELAYED_CALL(done);
- struct inode *inode = d_inode(dentry);
- const char *link = inode->i_link;
- int res;
-
- if (!link) {
- link = inode->i_op->get_link(dentry, inode, &done);
- if (IS_ERR(link))
- return PTR_ERR(link);
- }
- res = readlink_copy(buffer, buflen, link);
- do_delayed_call(&done);
- return res;
-}
-
/**
* vfs_readlink - copy symlink body into userspace buffer
* @dentry: dentry on which to get symbolic link
@@ -4748,6 +4668,9 @@ static int generic_readlink(struct dentry *dentry, char __user *buffer,
int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
struct inode *inode = d_inode(dentry);
+ DEFINE_DELAYED_CALL(done);
+ const char *link;
+ int res;
if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
if (unlikely(inode->i_op->readlink))
@@ -4761,7 +4684,15 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
spin_unlock(&inode->i_lock);
}
- return generic_readlink(dentry, buffer, buflen);
+ link = inode->i_link;
+ if (!link) {
+ link = inode->i_op->get_link(dentry, inode, &done);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+ }
+ res = readlink_copy(buffer, buflen, link);
+ do_delayed_call(&done);
+ return res;
}
EXPORT_SYMBOL(vfs_readlink);
diff --git a/fs/namespace.c b/fs/namespace.c
index 8ddd14806799..bd2f4c68506a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -659,12 +659,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
return 0;
mnt = real_mount(bastard);
mnt_add_count(mnt, 1);
+ smp_mb(); // see mntput_no_expire()
if (likely(!read_seqretry(&mount_lock, seq)))
return 0;
if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
mnt_add_count(mnt, -1);
return 1;
}
+ lock_mount_hash();
+ if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+ mnt_add_count(mnt, -1);
+ unlock_mount_hash();
+ return 1;
+ }
+ unlock_mount_hash();
+ /* caller will mntput() */
return -1;
}
@@ -1195,12 +1204,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
static void mntput_no_expire(struct mount *mnt)
{
rcu_read_lock();
- mnt_add_count(mnt, -1);
- if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+ if (likely(READ_ONCE(mnt->mnt_ns))) {
+ /*
+ * Since we don't do lock_mount_hash() here,
+ * ->mnt_ns can change under us. However, if it's
+ * non-NULL, then there's a reference that won't
+ * be dropped until after an RCU delay done after
+ * turning ->mnt_ns NULL. So if we observe it
+ * non-NULL under rcu_read_lock(), the reference
+ * we are dropping is not the final one.
+ */
+ mnt_add_count(mnt, -1);
rcu_read_unlock();
return;
}
lock_mount_hash();
+ /*
+ * make sure that if __legitimize_mnt() has not seen us grab
+ * mount_lock, we'll see their refcount increment here.
+ */
+ smp_mb();
+ mnt_add_count(mnt, -1);
if (mnt_get_count(mnt)) {
rcu_read_unlock();
unlock_mount_hash();
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7a9c14426855..d7f158c3efc8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1434,12 +1434,11 @@ static int do_open(struct inode *inode, struct file *filp)
static int nfs_finish_open(struct nfs_open_context *ctx,
struct dentry *dentry,
- struct file *file, unsigned open_flags,
- int *opened)
+ struct file *file, unsigned open_flags)
{
int err;
- err = finish_open(file, dentry, do_open, opened);
+ err = finish_open(file, dentry, do_open);
if (err)
goto out;
if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
@@ -1452,7 +1451,7 @@ out:
int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
- umode_t mode, int *opened)
+ umode_t mode)
{
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
@@ -1461,6 +1460,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct inode *inode;
unsigned int lookup_flags = 0;
bool switched = false;
+ int created = 0;
int err;
/* Expect a negative dentry */
@@ -1521,7 +1521,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
goto out;
trace_nfs_atomic_open_enter(dir, ctx, open_flags);
- inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
+ if (created)
+ file->f_mode |= FMODE_CREATED;
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
@@ -1546,7 +1548,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
goto out;
}
- err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened);
+ err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
put_nfs_open_context(ctx);
out:
@@ -1641,6 +1643,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
struct dentry *parent = dget_parent(dentry);
struct inode *dir = d_inode(parent);
struct inode *inode;
+ struct dentry *d;
int error = -EACCES;
d_drop(dentry);
@@ -1662,10 +1665,12 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
goto out_error;
}
inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
- error = PTR_ERR(inode);
- if (IS_ERR(inode))
+ d = d_splice_alias(inode, dentry);
+ if (IS_ERR(d)) {
+ error = PTR_ERR(d);
goto out_error;
- d_add(dentry, inode);
+ }
+ dput(d);
out:
dput(parent);
return 0;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 137e18abb7e7..51beb6e38c90 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,7 +258,7 @@ extern const struct dentry_operations nfs4_dentry_operations;
/* dir.c */
int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
- unsigned, umode_t, int *);
+ unsigned, umode_t);
/* super.c */
extern struct file_system_type nfs4_fs_type;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6dd146885da9..b790976d3913 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2951,7 +2951,7 @@ static int _nfs4_do_open(struct inode *dir,
}
}
if (opened && opendata->file_created)
- *opened |= FILE_CREATED;
+ *opened = 1;
if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
*ctx_th = opendata->f_attr.mdsthreshold;
@@ -6466,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
- break;
+ goto out_restart;
}
-
if (data->arg.new_lock_owner != 0) {
nfs_confirm_seqid(&lsp->ls_seqid, 0);
nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
- goto out_done;
- } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid))
- goto out_done;
-
+ } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
+ goto out_restart;
break;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
if (data->arg.new_lock_owner != 0) {
- if (nfs4_stateid_match(&data->arg.open_stateid,
+ if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
- goto out_done;
- } else if (nfs4_stateid_match(&data->arg.lock_stateid,
+ goto out_restart;
+ } else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
- goto out_done;
+ goto out_restart;
}
- if (!data->cancelled)
- rpc_restart_call_prepare(task);
out_done:
dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
+ return;
+out_restart:
+ if (!data->cancelled)
+ rpc_restart_call_prepare(task);
+ goto out_done;
}
static void nfs4_lock_release(void *calldata)
@@ -6502,7 +6502,7 @@ static void nfs4_lock_release(void *calldata)
dprintk("%s: begin!\n", __func__);
nfs_free_seqid(data->arg.open_seqid);
- if (data->cancelled) {
+ if (data->cancelled && data->rpc_status == 0) {
struct rpc_task *task;
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b0555d7d8200..55a099e47ba2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -763,7 +763,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
goto out_nfserr;
}
- host_err = ima_file_check(file, may_flags, 0);
+ host_err = ima_file_check(file, may_flags);
if (host_err) {
fput(file);
goto out_nfserr;
diff --git a/fs/open.c b/fs/open.c
index d0e955b558ad..d98e19239bb7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,27 +724,13 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
return ksys_fchown(fd, user, group);
}
-int open_check_o_direct(struct file *f)
-{
- /* NB: we're sure to have correct a_ops only after f_op->open */
- if (f->f_flags & O_DIRECT) {
- if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
- return 0;
-}
-
static int do_dentry_open(struct file *f,
struct inode *inode,
- int (*open)(struct inode *, struct file *),
- const struct cred *cred)
+ int (*open)(struct inode *, struct file *))
{
static const struct file_operations empty_fops = {};
int error;
- f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
- FMODE_PREAD | FMODE_PWRITE;
-
path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
@@ -753,7 +739,7 @@ static int do_dentry_open(struct file *f,
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
if (unlikely(f->f_flags & O_PATH)) {
- f->f_mode = FMODE_PATH;
+ f->f_mode = FMODE_PATH | FMODE_OPENED;
f->f_op = &empty_fops;
return 0;
}
@@ -780,7 +766,7 @@ static int do_dentry_open(struct file *f,
goto cleanup_all;
}
- error = security_file_open(f, cred);
+ error = security_file_open(f);
if (error)
goto cleanup_all;
@@ -788,6 +774,8 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
+ /* normally all 3 are set; ->open() can clear them if needed */
+ f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
if (!open)
open = f->f_op->open;
if (open) {
@@ -795,6 +783,7 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
}
+ f->f_mode |= FMODE_OPENED;
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(inode);
if ((f->f_mode & FMODE_READ) &&
@@ -809,9 +798,16 @@ static int do_dentry_open(struct file *f,
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+ /* NB: we're sure to have correct a_ops only after f_op->open */
+ if (f->f_flags & O_DIRECT) {
+ if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
+ return -EINVAL;
+ }
return 0;
cleanup_all:
+ if (WARN_ON_ONCE(error > 0))
+ error = -EINVAL;
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITER) {
put_write_access(inode);
@@ -847,19 +843,12 @@ cleanup_file:
* Returns zero on success or -errno if the open failed.
*/
int finish_open(struct file *file, struct dentry *dentry,
- int (*open)(struct inode *, struct file *),
- int *opened)
+ int (*open)(struct inode *, struct file *))
{
- int error;
- BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+ BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
file->f_path.dentry = dentry;
- error = do_dentry_open(file, d_backing_inode(dentry), open,
- current_cred());
- if (!error)
- *opened |= FILE_OPENED;
-
- return error;
+ return do_dentry_open(file, d_backing_inode(dentry), open);
}
EXPORT_SYMBOL(finish_open);
@@ -874,13 +863,13 @@ EXPORT_SYMBOL(finish_open);
* NB: unlike finish_open() this function does consume the dentry reference and
* the caller need not dput() it.
*
- * Returns "1" which must be the return value of ->atomic_open() after having
+ * Returns "0" which must be the return value of ->atomic_open() after having
* called this function.
*/
int finish_no_open(struct file *file, struct dentry *dentry)
{
file->f_path.dentry = dentry;
- return 1;
+ return 0;
}
EXPORT_SYMBOL(finish_no_open);
@@ -896,8 +885,7 @@ EXPORT_SYMBOL(file_path);
* @file: newly allocated file with f_flag initialized
* @cred: credentials to use
*/
-int vfs_open(const struct path *path, struct file *file,
- const struct cred *cred)
+int vfs_open(const struct path *path, struct file *file)
{
struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
@@ -905,7 +893,7 @@ int vfs_open(const struct path *path, struct file *file,
return PTR_ERR(dentry);
file->f_path = *path;
- return do_dentry_open(file, d_backing_inode(dentry), NULL, cred);
+ return do_dentry_open(file, d_backing_inode(dentry), NULL);
}
struct file *dentry_open(const struct path *path, int flags,
@@ -919,19 +907,11 @@ struct file *dentry_open(const struct path *path, int flags,
/* We must always pass in a valid mount pointer. */
BUG_ON(!path->mnt);
- f = get_empty_filp();
+ f = alloc_empty_file(flags, cred);
if (!IS_ERR(f)) {
- f->f_flags = flags;
- error = vfs_open(path, f, cred);
- if (!error) {
- /* from now on we need fput() to dispose of f */
- error = open_check_o_direct(f);
- if (error) {
- fput(f);
- f = ERR_PTR(error);
- }
- } else {
- put_filp(f);
+ error = vfs_open(path, f);
+ if (error) {
+ fput(f);
f = ERR_PTR(error);
}
}
@@ -1063,26 +1043,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(file_open_root);
-struct file *filp_clone_open(struct file *oldfile)
-{
- struct file *file;
- int retval;
-
- file = get_empty_filp();
- if (IS_ERR(file))
- return file;
-
- file->f_flags = oldfile->f_flags;
- retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
- if (retval) {
- put_filp(file);
- return ERR_PTR(retval);
- }
-
- return file;
-}
-EXPORT_SYMBOL(filp_clone_open);
-
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_flags op;
diff --git a/fs/pipe.c b/fs/pipe.c
index 39d6f431da83..bdc5d3c0977d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -741,54 +741,33 @@ fail_inode:
int create_pipe_files(struct file **res, int flags)
{
- int err;
struct inode *inode = get_pipe_inode();
struct file *f;
- struct path path;
if (!inode)
return -ENFILE;
- err = -ENOMEM;
- path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &empty_name);
- if (!path.dentry)
- goto err_inode;
- path.mnt = mntget(pipe_mnt);
-
- d_instantiate(path.dentry, inode);
-
- f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
+ f = alloc_file_pseudo(inode, pipe_mnt, "",
+ O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
+ &pipefifo_fops);
if (IS_ERR(f)) {
- err = PTR_ERR(f);
- goto err_dentry;
+ free_pipe_info(inode->i_pipe);
+ iput(inode);
+ return PTR_ERR(f);
}
- f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
f->private_data = inode->i_pipe;
- res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
+ res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
+ &pipefifo_fops);
if (IS_ERR(res[0])) {
- err = PTR_ERR(res[0]);
- goto err_file;
+ put_pipe_info(inode, inode->i_pipe);
+ fput(f);
+ return PTR_ERR(res[0]);
}
-
- path_get(&path);
res[0]->private_data = inode->i_pipe;
- res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
res[1] = f;
return 0;
-
-err_file:
- put_filp(f);
-err_dentry:
- free_pipe_info(inode->i_pipe);
- path_put(&path);
- return err;
-
-err_inode:
- free_pipe_info(inode->i_pipe);
- iput(inode);
- return err;
}
static int __do_pipe_flags(int *fd, struct file **files, int flags)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e9679016271f..dfd73a4616ce 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -831,7 +831,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
SEQ_PUT_DEC(" kB\nSwapPss: ",
mss->swap_pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nLocked: ",
+ mss->pss_locked >> PSS_SHIFT);
seq_puts(m, " kB\n");
}
if (!rollup_mode) {
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 7e288d97adcb..9fed1c05f1f4 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key)
}
/* %k */
-static void sprintf_le_key(char *buf, struct reiserfs_key *key)
+static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key)
{
if (key)
- sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id),
- le32_to_cpu(key->k_objectid), le_offset(key),
- le_type(key));
+ return scnprintf(buf, size, "[%d %d %s %s]",
+ le32_to_cpu(key->k_dir_id),
+ le32_to_cpu(key->k_objectid), le_offset(key),
+ le_type(key));
else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
/* %K */
-static void sprintf_cpu_key(char *buf, struct cpu_key *key)
+static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key)
{
if (key)
- sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
- key->on_disk_key.k_objectid, reiserfs_cpu_offset(key),
- cpu_type(key));
+ return scnprintf(buf, size, "[%d %d %s %s]",
+ key->on_disk_key.k_dir_id,
+ key->on_disk_key.k_objectid,
+ reiserfs_cpu_offset(key), cpu_type(key));
else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
-static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh)
+static int scnprintf_de_head(char *buf, size_t size,
+ struct reiserfs_de_head *deh)
{
if (deh)
- sprintf(buf,
- "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
- deh_offset(deh), deh_dir_id(deh), deh_objectid(deh),
- deh_location(deh), deh_state(deh));
+ return scnprintf(buf, size,
+ "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
+ deh_offset(deh), deh_dir_id(deh),
+ deh_objectid(deh), deh_location(deh),
+ deh_state(deh));
else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
-static void sprintf_item_head(char *buf, struct item_head *ih)
+static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih)
{
if (ih) {
- strcpy(buf,
- (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*");
- sprintf_le_key(buf + strlen(buf), &(ih->ih_key));
- sprintf(buf + strlen(buf), ", item_len %d, item_location %d, "
- "free_space(entry_count) %d",
- ih_item_len(ih), ih_location(ih), ih_free_space(ih));
+ char *p = buf;
+ char * const end = buf + size;
+
+ p += scnprintf(p, end - p, "%s",
+ (ih_version(ih) == KEY_FORMAT_3_6) ?
+ "*3.6* " : "*3.5*");
+
+ p += scnprintf_le_key(p, end - p, &ih->ih_key);
+
+ p += scnprintf(p, end - p,
+ ", item_len %d, item_location %d, free_space(entry_count) %d",
+ ih_item_len(ih), ih_location(ih),
+ ih_free_space(ih));
+ return p - buf;
} else
- sprintf(buf, "[NULL]");
+ return scnprintf(buf, size, "[NULL]");
}
-static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de)
+static int scnprintf_direntry(char *buf, size_t size,
+ struct reiserfs_dir_entry *de)
{
char name[20];
memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
- sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
+ return scnprintf(buf, size, "\"%s\"==>[%d %d]",
+ name, de->de_dir_id, de->de_objectid);
}
-static void sprintf_block_head(char *buf, struct buffer_head *bh)
+static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh)
{
- sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
- B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
+ return scnprintf(buf, size,
+ "level=%d, nr_items=%d, free_space=%d rdkey ",
+ B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
}
-static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
+static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh)
{
- sprintf(buf,
- "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
- bh->b_bdev, bh->b_size,
- (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
- bh->b_state, bh->b_page,
- buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
- buffer_dirty(bh) ? "DIRTY" : "CLEAN",
- buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
+ return scnprintf(buf, size,
+ "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+ bh->b_bdev, bh->b_size,
+ (unsigned long long)bh->b_blocknr,
+ atomic_read(&(bh->b_count)),
+ bh->b_state, bh->b_page,
+ buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
+ buffer_dirty(bh) ? "DIRTY" : "CLEAN",
+ buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
}
-static void sprintf_disk_child(char *buf, struct disk_child *dc)
+static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc)
{
- sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc),
- dc_size(dc));
+ return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]",
+ dc_block_number(dc), dc_size(dc));
}
static char *is_there_reiserfs_struct(char *fmt, int *what)
@@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args)
char *fmt1 = fmt_buf;
char *k;
char *p = error_buf;
+ char * const end = &error_buf[sizeof(error_buf)];
int what;
spin_lock(&error_lock);
- strcpy(fmt1, fmt);
+ if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) {
+ strscpy(error_buf, "format string too long", end - error_buf);
+ goto out_unlock;
+ }
while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) {
*k = 0;
- p += vsprintf(p, fmt1, args);
+ p += vscnprintf(p, end - p, fmt1, args);
switch (what) {
case 'k':
- sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
+ p += scnprintf_le_key(p, end - p,
+ va_arg(args, struct reiserfs_key *));
break;
case 'K':
- sprintf_cpu_key(p, va_arg(args, struct cpu_key *));
+ p += scnprintf_cpu_key(p, end - p,
+ va_arg(args, struct cpu_key *));
break;
case 'h':
- sprintf_item_head(p, va_arg(args, struct item_head *));
+ p += scnprintf_item_head(p, end - p,
+ va_arg(args, struct item_head *));
break;
case 't':
- sprintf_direntry(p,
- va_arg(args,
- struct reiserfs_dir_entry *));
+ p += scnprintf_direntry(p, end - p,
+ va_arg(args, struct reiserfs_dir_entry *));
break;
case 'y':
- sprintf_disk_child(p,
- va_arg(args, struct disk_child *));
+ p += scnprintf_disk_child(p, end - p,
+ va_arg(args, struct disk_child *));
break;
case 'z':
- sprintf_block_head(p,
- va_arg(args, struct buffer_head *));
+ p += scnprintf_block_head(p, end - p,
+ va_arg(args, struct buffer_head *));
break;
case 'b':
- sprintf_buffer_head(p,
- va_arg(args, struct buffer_head *));
+ p += scnprintf_buffer_head(p, end - p,
+ va_arg(args, struct buffer_head *));
break;
case 'a':
- sprintf_de_head(p,
- va_arg(args,
- struct reiserfs_de_head *));
+ p += scnprintf_de_head(p, end - p,
+ va_arg(args, struct reiserfs_de_head *));
break;
}
- p += strlen(p);
fmt1 = k + 2;
}
- vsprintf(p, fmt1, args);
+ p += vscnprintf(p, end - p, fmt1, args);
+out_unlock:
spin_unlock(&error_lock);
}
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2751476e6b6e..f098b9f1c396 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -167,6 +167,8 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
}
if (compressed) {
+ if (!msblk->stream)
+ goto read_failure;
length = squashfs_decompress(msblk, bh, b, offset, length,
output);
if (length < 0)
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 23813c078cc9..0839efa720b3 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -350,6 +350,9 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer,
TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
+ if (unlikely(length < 0))
+ return -EIO;
+
while (length) {
entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
if (entry->error) {
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 13d80947bf9e..f1c1430ae721 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -194,7 +194,11 @@ static long long read_indexes(struct super_block *sb, int n,
}
for (i = 0; i < blocks; i++) {
- int size = le32_to_cpu(blist[i]);
+ int size = squashfs_block_size(blist[i]);
+ if (size < 0) {
+ err = size;
+ goto failure;
+ }
block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size);
}
n -= blocks;
@@ -367,7 +371,24 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
sizeof(size));
if (res < 0)
return res;
- return le32_to_cpu(size);
+ return squashfs_block_size(size);
+}
+
+void squashfs_fill_page(struct page *page, struct squashfs_cache_entry *buffer, int offset, int avail)
+{
+ int copied;
+ void *pageaddr;
+
+ pageaddr = kmap_atomic(page);
+ copied = squashfs_copy_data(pageaddr, buffer, offset, avail);
+ memset(pageaddr + copied, 0, PAGE_SIZE - copied);
+ kunmap_atomic(pageaddr);
+
+ flush_dcache_page(page);
+ if (copied == avail)
+ SetPageUptodate(page);
+ else
+ SetPageError(page);
}
/* Copy data into page cache */
@@ -376,7 +397,6 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
{
struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- void *pageaddr;
int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
int start_index = page->index & ~mask, end_index = start_index | mask;
@@ -402,12 +422,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
if (PageUptodate(push_page))
goto skip_page;
- pageaddr = kmap_atomic(push_page);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_SIZE - avail);
- kunmap_atomic(pageaddr);
- flush_dcache_page(push_page);
- SetPageUptodate(push_page);
+ squashfs_fill_page(push_page, buffer, offset, avail);
skip_page:
unlock_page(push_page);
if (i != page->index)
@@ -416,10 +431,9 @@ skip_page:
}
/* Read datablock stored packed inside a fragment (tail-end packed block) */
-static int squashfs_readpage_fragment(struct page *page)
+static int squashfs_readpage_fragment(struct page *page, int expected)
{
struct inode *inode = page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
squashfs_i(inode)->fragment_block,
squashfs_i(inode)->fragment_size);
@@ -430,23 +444,16 @@ static int squashfs_readpage_fragment(struct page *page)
squashfs_i(inode)->fragment_block,
squashfs_i(inode)->fragment_size);
else
- squashfs_copy_cache(page, buffer, i_size_read(inode) &
- (msblk->block_size - 1),
+ squashfs_copy_cache(page, buffer, expected,
squashfs_i(inode)->fragment_offset);
squashfs_cache_put(buffer);
return res;
}
-static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
+static int squashfs_readpage_sparse(struct page *page, int expected)
{
- struct inode *inode = page->mapping->host;
- struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int bytes = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
-
- squashfs_copy_cache(page, NULL, bytes, 0);
+ squashfs_copy_cache(page, NULL, expected, 0);
return 0;
}
@@ -456,6 +463,9 @@ static int squashfs_readpage(struct file *file, struct page *page)
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
int index = page->index >> (msblk->block_log - PAGE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
+ int expected = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
int res;
void *pageaddr;
@@ -474,11 +484,11 @@ static int squashfs_readpage(struct file *file, struct page *page)
goto error_out;
if (bsize == 0)
- res = squashfs_readpage_sparse(page, index, file_end);
+ res = squashfs_readpage_sparse(page, expected);
else
- res = squashfs_readpage_block(page, block, bsize);
+ res = squashfs_readpage_block(page, block, bsize, expected);
} else
- res = squashfs_readpage_fragment(page);
+ res = squashfs_readpage_fragment(page, expected);
if (!res)
return 0;
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
index f2310d2a2019..a9ba8d96776a 100644
--- a/fs/squashfs/file_cache.c
+++ b/fs/squashfs/file_cache.c
@@ -20,7 +20,7 @@
#include "squashfs.h"
/* Read separately compressed datablock and memcopy into page cache */
-int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+int squashfs_readpage_block(struct page *page, u64 block, int bsize, int expected)
{
struct inode *i = page->mapping->host;
struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
@@ -31,7 +31,7 @@ int squashfs_readpage_block(struct page *page, u64 block, int bsize)
ERROR("Unable to read page, block %llx, size %x\n", block,
bsize);
else
- squashfs_copy_cache(page, buffer, buffer->length, 0);
+ squashfs_copy_cache(page, buffer, expected, 0);
squashfs_cache_put(buffer);
return res;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index cb485d8e0e91..80db1b86a27c 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -21,10 +21,11 @@
#include "page_actor.h"
static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page);
+ int pages, struct page **page, int bytes);
/* Read separately compressed datablock directly into page cache */
-int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
+ int expected)
{
struct inode *inode = target_page->mapping->host;
@@ -83,7 +84,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
* using an intermediate buffer.
*/
res = squashfs_read_cache(target_page, block, bsize, pages,
- page);
+ page, expected);
if (res < 0)
goto mark_errored;
@@ -95,6 +96,11 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
if (res < 0)
goto mark_errored;
+ if (res != expected) {
+ res = -EIO;
+ goto mark_errored;
+ }
+
/* Last page may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
if (bytes) {
@@ -138,13 +144,12 @@ out:
static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
- int pages, struct page **page)
+ int pages, struct page **page, int bytes)
{
struct inode *i = target_page->mapping->host;
struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
block, bsize);
- int bytes = buffer->length, res = buffer->error, n, offset = 0;
- void *pageaddr;
+ int res = buffer->error, n, offset = 0;
if (res) {
ERROR("Unable to read page, block %llx, size %x\n", block,
@@ -159,12 +164,7 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
if (page[n] == NULL)
continue;
- pageaddr = kmap_atomic(page[n]);
- squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_SIZE - avail);
- kunmap_atomic(pageaddr);
- flush_dcache_page(page[n]);
- SetPageUptodate(page[n]);
+ squashfs_fill_page(page[n], buffer, offset, avail);
unlock_page(page[n]);
if (page[n] != target_page)
put_page(page[n]);
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 0ed6edbc5c71..0681feab4a84 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -49,11 +49,16 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
u64 *fragment_block)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
- int block = SQUASHFS_FRAGMENT_INDEX(fragment);
- int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
- u64 start_block = le64_to_cpu(msblk->fragment_index[block]);
+ int block, offset, size;
struct squashfs_fragment_entry fragment_entry;
- int size;
+ u64 start_block;
+
+ if (fragment >= msblk->fragments)
+ return -EIO;
+ block = SQUASHFS_FRAGMENT_INDEX(fragment);
+ offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
+
+ start_block = le64_to_cpu(msblk->fragment_index[block]);
size = squashfs_read_metadata(sb, &fragment_entry, &start_block,
&offset, sizeof(fragment_entry));
@@ -61,9 +66,7 @@ int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
return size;
*fragment_block = le64_to_cpu(fragment_entry.start_block);
- size = le32_to_cpu(fragment_entry.size);
-
- return size;
+ return squashfs_block_size(fragment_entry.size);
}
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 887d6d270080..f89f8a74c6ce 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -67,11 +67,12 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
u64, u64, unsigned int);
/* file.c */
+void squashfs_fill_page(struct page *, struct squashfs_cache_entry *, int, int);
void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
int);
/* file_xxx.c */
-extern int squashfs_readpage_block(struct page *, u64, int);
+extern int squashfs_readpage_block(struct page *, u64, int, int);
/* id.c */
extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 24d12fd14177..4e6853f084d0 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -129,6 +129,12 @@
#define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK))
+static inline int squashfs_block_size(__le32 raw)
+{
+ u32 size = le32_to_cpu(raw);
+ return (size >> 25) ? -EIO : size;
+}
+
/*
* Inode number ops. Inodes consist of a compressed block number, and an
* uncompressed offset within that block
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565cb50c3..ef69c31947bf 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -75,6 +75,7 @@ struct squashfs_sb_info {
unsigned short block_log;
long long bytes_used;
unsigned int inodes;
+ unsigned int fragments;
int xattr_ids;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 8a73b97217c8..40e657386fa5 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -175,6 +175,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->inode_table = le64_to_cpu(sblk->inode_table_start);
msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
msblk->inodes = le32_to_cpu(sblk->inodes);
+ msblk->fragments = le32_to_cpu(sblk->fragments);
flags = le16_to_cpu(sblk->flags);
TRACE("Found valid superblock on %pg\n", sb->s_bdev);
@@ -185,7 +186,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
TRACE("Filesystem size %lld bytes\n", msblk->bytes_used);
TRACE("Block size %d\n", msblk->block_size);
TRACE("Number of inodes %d\n", msblk->inodes);
- TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments));
+ TRACE("Number of fragments %d\n", msblk->fragments);
TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
@@ -272,7 +273,7 @@ allocate_id_index_table:
sb->s_export_op = &squashfs_export_ops;
handle_fragments:
- fragments = le32_to_cpu(sblk->fragments);
+ fragments = msblk->fragments;
if (fragments == 0)
goto check_directory_table;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49da3ff7..d69ad801eb80 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -66,7 +66,7 @@ static void timerfd_triggered(struct timerfd_ctx *ctx)
spin_lock_irqsave(&ctx->wqh.lock, flags);
ctx->expired = 1;
ctx->ticks++;
- wake_up_locked(&ctx->wqh);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
}
@@ -107,7 +107,7 @@ void timerfd_clock_was_set(void)
if (ctx->moffs != moffs) {
ctx->moffs = KTIME_MAX;
ctx->ticks++;
- wake_up_locked(&ctx->wqh);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
}
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
}
@@ -345,7 +345,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
spin_lock_irq(&ctx->wqh.lock);
if (!timerfd_canceled(ctx)) {
ctx->ticks = ticks;
- wake_up_locked(&ctx->wqh);
+ wake_up_locked_poll(&ctx->wqh, EPOLLIN);
} else
ret = -ECANCELED;
spin_unlock_irq(&ctx->wqh.lock);
@@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
}
SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
- const struct itimerspec __user *, utmr,
- struct itimerspec __user *, otmr)
+ const struct __kernel_itimerspec __user *, utmr,
+ struct __kernel_itimerspec __user *, otmr)
{
struct itimerspec64 new, old;
int ret;
@@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
return ret;
}
-SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
+SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr)
{
struct itimerspec64 kotmr;
int ret = do_timerfd_gettime(ufd, &kotmr);
@@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
const struct compat_itimerspec __user *, utmr,
struct compat_itimerspec __user *, otmr)
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 06f37ddd2997..58cc2414992b 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -602,8 +602,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (unlikely(!fi)) {
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -694,8 +693,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
if (!fi) {
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
goto out;
}
set_nlink(inode, 2);
@@ -713,8 +711,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!fi) {
clear_nlink(inode);
mark_inode_dirty(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
goto out;
}
cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
@@ -1041,8 +1038,7 @@ out:
out_no_entry:
up_write(&iinfo->i_data_sem);
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
goto out;
}
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index e1ef0f0a1353..02c0a4be4212 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -343,8 +343,7 @@ cg_found:
fail_remove_inode:
mutex_unlock(&sbi->s_lock);
clear_nlink(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
UFSD("EXIT (FAILED): err %d\n", err);
return ERR_PTR(err);
failed:
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index d5f43ba76c59..9ef40f100415 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -43,8 +43,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
return 0;
}
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
@@ -142,8 +141,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
out_fail:
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput(inode);
+ discard_new_inode(inode);
return err;
}
@@ -198,8 +196,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
out_fail:
inode_dec_link_count(inode);
inode_dec_link_count(inode);
- unlock_new_inode(inode);
- iput (inode);
+ discard_new_inode(inode);
out_dir:
inode_dec_link_count(dir);
return err;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 594d192b2331..bad9cea37f12 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -633,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next)
- if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
+ if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+ vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
+ }
up_write(&mm->mmap_sem);
userfaultfd_ctx_put(release_new_ctx);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 33dc34655ac3..30d1d60f1d46 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -731,7 +731,8 @@ xfs_inode_validate_extsize(
if ((hint_flag || inherit_flag) && extsize == 0)
return __this_address;
- if (!(hint_flag || inherit_flag) && extsize != 0)
+ /* free inodes get flags set to zero but extsize remains */
+ if (mode && !(hint_flag || inherit_flag) && extsize != 0)
return __this_address;
if (extsize_bytes % blocksize_bytes)
@@ -777,7 +778,8 @@ xfs_inode_validate_cowextsize(
if (hint_flag && cowextsize == 0)
return __this_address;
- if (!hint_flag && cowextsize != 0)
+ /* free inodes get flags set to zero but cowextsize remains */
+ if (mode && !hint_flag && cowextsize != 0)
return __this_address;
if (hint_flag && rt_flag)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 0ef5ad7fb851..c3e74f9128e8 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1254,7 +1254,7 @@ xfs_setup_inode(
inode_sb_list_add(inode);
/* make the inode look hashed for the writeback code */
- hlist_add_fake(&inode->i_hash);
+ inode_fake_hash(inode);
inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid);
inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid);