summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c4
-rw-r--r--fs/9p/vfs_dir.c2
-rw-r--r--fs/9p/xattr.c4
-rw-r--r--fs/afs/cmservice.c2
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/file.c4
-rw-r--r--fs/afs/fs_probe.c4
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/rxrpc.c10
-rw-r--r--fs/afs/server.c2
-rw-r--r--fs/afs/write.c4
-rw-r--r--fs/aio.c4
-rw-r--r--fs/binfmt_elf.c302
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/btrfs/backref.c138
-rw-r--r--fs/btrfs/backref.h1
-rw-r--r--fs/btrfs/block-group.c2
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.c52
-rw-r--r--fs/btrfs/ctree.h16
-rw-r--r--fs/btrfs/disk-io.c14
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/export.h2
-rw-r--r--fs/btrfs/extent-io-tree.c15
-rw-r--r--fs/btrfs/extent-tree.c25
-rw-r--r--fs/btrfs/file.c29
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/ioctl.c27
-rw-r--r--fs/btrfs/ordered-data.c6
-rw-r--r--fs/btrfs/qgroup.c9
-rw-r--r--fs/btrfs/raid56.c18
-rw-r--r--fs/btrfs/scrub.c9
-rw-r--r--fs/btrfs/send.c53
-rw-r--r--fs/btrfs/send.h5
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/sysfs.c7
-rw-r--r--fs/btrfs/tests/btrfs-tests.c2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c36
-rw-r--r--fs/btrfs/tree-log.c59
-rw-r--r--fs/btrfs/volumes.c51
-rw-r--r--fs/btrfs/volumes.h4
-rw-r--r--fs/btrfs/zoned.c49
-rw-r--r--fs/btrfs/zoned.h11
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c48
-rw-r--r--fs/ceph/file.c4
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/locks.c4
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/ceph/snap.c3
-rw-r--r--fs/cifs/cached_dir.c39
-rw-r--r--fs/cifs/cached_dir.h4
-rw-r--r--fs/cifs/cifsfs.c37
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/connect.c21
-rw-r--r--fs/cifs/dir.c6
-rw-r--r--fs/cifs/file.c30
-rw-r--r--fs/cifs/fscache.c4
-rw-r--r--fs/cifs/inode.c11
-rw-r--r--fs/cifs/ioctl.c4
-rw-r--r--fs/cifs/misc.c6
-rw-r--r--fs/cifs/sess.c5
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2misc.c81
-rw-r--r--fs/cifs/smb2ops.c41
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/cifs/smb2transport.c19
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coredump.c8
-rw-r--r--fs/crypto/keyring.c17
-rw-r--r--fs/debugfs/file.c28
-rw-r--r--fs/efivarfs/vars.c16
-rw-r--r--fs/erofs/fscache.c44
-rw-r--r--fs/erofs/internal.h6
-rw-r--r--fs/erofs/super.c39
-rw-r--r--fs/erofs/sysfs.c8
-rw-r--r--fs/erofs/zdata.c38
-rw-r--r--fs/erofs/zdata.h6
-rw-r--r--fs/erofs/zmap.c22
-rw-r--r--fs/exec.c40
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext4/extents.c18
-rw-r--r--fs/ext4/fast_commit.c5
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/ioctl.c3
-rw-r--r--fs/ext4/migrate.c3
-rw-r--r--fs/ext4/mmp.c8
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/f2fs/gc.c2
-rw-r--r--fs/f2fs/segment.c8
-rw-r--r--fs/fat/nfs.c4
-rw-r--r--fs/file.c11
-rw-r--r--fs/fs-writeback.c30
-rw-r--r--fs/fscache/cookie.c8
-rw-r--r--fs/fscache/io.c2
-rw-r--r--fs/fscache/volume.c7
-rw-r--r--fs/fuse/file.c41
-rw-r--r--fs/fuse/ioctl.c4
-rw-r--r--fs/fuse/readdir.c10
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/trans.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hugetlbfs/inode.c13
-rw-r--r--fs/inode.c3
-rw-r--r--fs/kernfs/dir.c14
-rw-r--r--fs/ksmbd/vfs.c8
-rw-r--r--fs/libfs.c22
-rw-r--r--fs/lockd/svcsubs.c4
-rw-r--r--fs/locks.c50
-rw-r--r--fs/namei.c3
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/netfs/buffered_read.c20
-rw-r--r--fs/netfs/io.c9
-rw-r--r--fs/nfs/client.c4
-rw-r--r--fs/nfs/delegation.c38
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/dns_resolve.c7
-rw-r--r--fs/nfs/dns_resolve.h2
-rw-r--r--fs/nfs/fs_context.c14
-rw-r--r--fs/nfs/fscache.c4
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs3client.c4
-rw-r--r--fs/nfs/nfs42proc.c3
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4client.c19
-rw-r--r--fs/nfs/nfs4namespace.c16
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs_nfs.c6
-rw-r--r--fs/nfs/super.c5
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/filecache.c6
-rw-r--r--fs/nfsd/nfs4state.c7
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/trace.h5
-rw-r--r--fs/nfsd/vfs.c15
-rw-r--r--fs/nilfs2/dat.c7
-rw-r--r--fs/nilfs2/segment.c15
-rw-r--r--fs/nilfs2/sufile.c8
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/nilfs2/the_nilfs.c75
-rw-r--r--fs/ocfs2/cluster/heartbeat.c38
-rw-r--r--fs/ocfs2/cluster/heartbeat.h2
-rw-r--r--fs/ocfs2/cluster/netdebug.c2
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c8
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c19
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c30
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/journal.h1
-rw-r--r--fs/ocfs2/namei.c23
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/stack_o2cb.c6
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/orangefs/inode.c8
-rw-r--r--fs/proc/cmdline.c6
-rw-r--r--fs/proc/consoles.c21
-rw-r--r--fs/proc/fd.c45
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/proc/vmcore.c7
-rw-r--r--fs/pstore/platform.c25
-rw-r--r--fs/pstore/ram.c44
-rw-r--r--fs/pstore/ram_core.c20
-rw-r--r--fs/pstore/ram_internal.h98
-rw-r--r--fs/pstore/zone.c2
-rw-r--r--fs/read_write.c31
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/splice.c10
-rw-r--r--fs/squashfs/Kconfig51
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/squashfs/decompressor.c2
-rw-r--r--fs/squashfs/decompressor_multi.c20
-rw-r--r--fs/squashfs/decompressor_multi_percpu.c23
-rw-r--r--fs/squashfs/decompressor_single.c15
-rw-r--r--fs/squashfs/file.c23
-rw-r--r--fs/squashfs/page_actor.c3
-rw-r--r--fs/squashfs/page_actor.h6
-rw-r--r--fs/squashfs/squashfs.h23
-rw-r--r--fs/squashfs/squashfs_fs_sb.h4
-rw-r--r--fs/squashfs/super.c100
-rw-r--r--fs/super.c63
-rw-r--r--fs/sysv/itree.c2
-rw-r--r--fs/ubifs/debug.c8
-rw-r--r--fs/ubifs/lpt_commit.c14
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/userfaultfd.c3
-rw-r--r--fs/xfs/libxfs/xfs_ag.h15
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_format.h22
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h60
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c286
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h40
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c15
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c9
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c4
-rw-r--r--fs/xfs/libxfs/xfs_types.h30
-rw-r--r--fs/xfs/scrub/alloc.c4
-rw-r--r--fs/xfs/scrub/ialloc.c5
-rw-r--r--fs/xfs/scrub/refcount.c72
-rw-r--r--fs/xfs/xfs_attr_item.c67
-rw-r--r--fs/xfs/xfs_bmap_item.c54
-rw-r--r--fs/xfs/xfs_error.c11
-rw-r--r--fs/xfs/xfs_extfree_item.c94
-rw-r--r--fs/xfs/xfs_extfree_item.h16
-rw-r--r--fs/xfs/xfs_file.c7
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_log_recover.c10
-rw-r--r--fs/xfs/xfs_ondisk.h23
-rw-r--r--fs/xfs/xfs_refcount_item.c57
-rw-r--r--fs/xfs/xfs_rmap_item.c70
-rw-r--r--fs/xfs/xfs_super.c12
-rw-r--r--fs/xfs/xfs_sysfs.h7
-rw-r--r--fs/xfs/xfs_trace.h48
-rw-r--r--fs/xfs/xfs_trans_ail.c3
-rw-r--r--fs/zonefs/super.c60
-rw-r--r--fs/zonefs/sysfs.c5
-rw-r--r--fs/zonefs/zonefs.h6
234 files changed, 2751 insertions, 1627 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 47b9a1122f34..a19891015f19 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -40,7 +40,7 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
size_t len = subreq->len - subreq->transferred;
int total, err;
- iov_iter_xarray(&to, READ, &rreq->mapping->i_pages, pos, len);
+ iov_iter_xarray(&to, ITER_DEST, &rreq->mapping->i_pages, pos, len);
total = p9_client_read(fid, pos, &to, &err);
@@ -172,7 +172,7 @@ static int v9fs_vfs_write_folio_locked(struct folio *folio)
len = min_t(loff_t, i_size - start, len);
- iov_iter_xarray(&from, WRITE, &folio_mapping(folio)->i_pages, start, len);
+ iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len);
/* We should have writeback_fid always set */
BUG_ON(!v9inode->writeback_fid);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 000fbaae9b18..3bb95adc9619 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -109,7 +109,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
struct iov_iter to;
int n;
- iov_iter_kvec(&to, READ, &kvec, 1, buflen);
+ iov_iter_kvec(&to, ITER_DEST, &kvec, 1, buflen);
n = p9_client_read(file->private_data, ctx->pos, &to,
&err);
if (err)
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index ae6a93871338..b6984311e00a 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -25,7 +25,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
struct iov_iter to;
int err;
- iov_iter_kvec(&to, READ, &kvec, 1, buffer_size);
+ iov_iter_kvec(&to, ITER_DEST, &kvec, 1, buffer_size);
attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
if (IS_ERR(attr_fid)) {
@@ -110,7 +110,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
struct iov_iter from;
int retval, err;
- iov_iter_kvec(&from, WRITE, &kvec, 1, value_len);
+ iov_iter_kvec(&from, ITER_SOURCE, &kvec, 1, value_len);
p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
name, value_len, flags);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 0a090d614e76..7dcd59693a0c 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -298,7 +298,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
if (call->count2 != call->count && call->count2 != 0)
return afs_protocol_error(call, afs_eproto_cb_count);
call->iter = &call->def_iter;
- iov_iter_discard(&call->def_iter, READ, call->count2 * 3 * 4);
+ iov_iter_discard(&call->def_iter, ITER_DEST, call->count2 * 3 * 4);
call->unmarshall++;
fallthrough;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 230c2d19116d..104df2964225 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -305,7 +305,7 @@ expand:
req->actual_len = i_size; /* May change */
req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
req->data_version = dvnode->status.data_version; /* May change */
- iov_iter_xarray(&req->def_iter, READ, &dvnode->netfs.inode.i_mapping->i_pages,
+ iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages,
0, i_size);
req->iter = &req->def_iter;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index d1cfb235c4b9..2eeab57df133 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -324,7 +324,7 @@ static void afs_issue_read(struct netfs_io_subrequest *subreq)
fsreq->vnode = vnode;
fsreq->iter = &fsreq->def_iter;
- iov_iter_xarray(&fsreq->def_iter, READ,
+ iov_iter_xarray(&fsreq->def_iter, ITER_DEST,
&fsreq->vnode->netfs.inode.i_mapping->i_pages,
fsreq->pos, fsreq->len);
@@ -346,7 +346,7 @@ static int afs_symlink_read_folio(struct file *file, struct folio *folio)
fsreq->len = folio_size(folio);
fsreq->vnode = vnode;
fsreq->iter = &fsreq->def_iter;
- iov_iter_xarray(&fsreq->def_iter, READ, &folio->mapping->i_pages,
+ iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages,
fsreq->pos, fsreq->len);
ret = afs_fetch_data(fsreq->vnode, fsreq);
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index c0031a3ab42f..3ac5fcf98d0d 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -167,8 +167,8 @@ responded:
clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
}
- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
- rtt_us < server->probe.rtt) {
+ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 723d162078a3..9ba7b68375c9 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1301,7 +1301,7 @@ static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t si
call->iov_len = size;
call->kvec[0].iov_base = buf;
call->kvec[0].iov_len = size;
- iov_iter_kvec(&call->def_iter, READ, call->kvec, 1, size);
+ iov_iter_kvec(&call->def_iter, ITER_DEST, call->kvec, 1, size);
}
static inline void afs_extract_to_tmp(struct afs_call *call)
@@ -1319,7 +1319,7 @@ static inline void afs_extract_to_tmp64(struct afs_call *call)
static inline void afs_extract_discard(struct afs_call *call, size_t size)
{
call->iov_len = size;
- iov_iter_discard(&call->def_iter, READ, size);
+ iov_iter_discard(&call->def_iter, ITER_DEST, size);
}
static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index eccc3cd0cb70..c62939e5ea1f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -359,7 +359,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0);
@@ -400,7 +400,7 @@ error_do_abort:
RX_USER_ABORT, ret, "KSD");
} else {
len = 0;
- iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0);
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
rxrpc_kernel_recv_data(call->net->socket, rxcall,
&msg.msg_iter, &len, false,
&call->abort_code, &call->service_id);
@@ -485,7 +485,7 @@ static void afs_deliver_to_call(struct afs_call *call)
) {
if (state == AFS_CALL_SV_AWAIT_ACK) {
len = 0;
- iov_iter_kvec(&call->def_iter, READ, NULL, 0, 0);
+ iov_iter_kvec(&call->def_iter, ITER_DEST, NULL, 0, 0);
ret = rxrpc_kernel_recv_data(call->net->socket,
call->rxcall, &call->def_iter,
&len, false, &remote_abort,
@@ -822,7 +822,7 @@ void afs_send_empty_reply(struct afs_call *call)
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0);
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, NULL, 0, 0);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -862,7 +862,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
iov[0].iov_len = len;
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, iov, 1, len);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 4981baf97835..b5237206eac3 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server,
if (!server)
return;
- a = atomic_inc_return(&server->active);
+ a = atomic_read(&server->active);
zero = __refcount_dec_and_test(&server->ref, &r);
trace_afs_server(debug_id, r - 1, a, reason);
if (unlikely(zero))
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9ebdd36eaf2f..08fd456dde67 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -609,7 +609,7 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
*/
afs_write_to_cache(vnode, start, len, i_size, caching);
- iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len);
+ iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len);
ret = afs_store_data(vnode, &iter, start, false);
} else {
_debug("write discard %x @%llx [%llx]", len, start, i_size);
@@ -1000,7 +1000,7 @@ int afs_launder_folio(struct folio *folio)
bv[0].bv_page = &folio->page;
bv[0].bv_offset = f;
bv[0].bv_len = t - f;
- iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
+ iov_iter_bvec(&iter, ITER_SOURCE, bv, 1, bv[0].bv_len);
trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
diff --git a/fs/aio.c b/fs/aio.c
index 5b2ff20ad322..562916d85cba 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1552,7 +1552,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
if (unlikely(!file->f_op->read_iter))
return -EINVAL;
- ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
+ ret = aio_setup_rw(ITER_DEST, iocb, &iovec, vectored, compat, &iter);
if (ret < 0)
return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
@@ -1580,7 +1580,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
if (unlikely(!file->f_op->write_iter))
return -EINVAL;
- ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
+ ret = aio_setup_rw(ITER_SOURCE, iocb, &iovec, vectored, compat, &iter);
if (ret < 0)
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 63c7ebb0da89..de63572a9404 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -248,7 +248,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
} while (0)
#ifdef ARCH_DLINFO
- /*
+ /*
* ARCH_DLINFO must come first so PPC can do its special alignment of
* AUXV.
* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
@@ -456,13 +456,13 @@ static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
*
* Loads ELF program headers from the binary file elf_file, which has the ELF
* header pointed to by elf_ex, into a newly allocated array. The caller is
- * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
+ * responsible for freeing the allocated data. Returns NULL upon failure.
*/
static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
struct file *elf_file)
{
struct elf_phdr *elf_phdata = NULL;
- int retval, err = -1;
+ int retval = -1;
unsigned int size;
/*
@@ -484,15 +484,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
/* Read in the program headers */
retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
- if (retval < 0) {
- err = retval;
- goto out;
- }
- /* Success! */
- err = 0;
out:
- if (err) {
+ if (retval) {
kfree(elf_phdata);
elf_phdata = NULL;
}
@@ -911,7 +905,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
if (!interp_elf_ex) {
retval = -ENOMEM;
- goto out_free_ph;
+ goto out_free_file;
}
/* Get the exec headers */
@@ -1020,7 +1014,7 @@ out_free_interp:
executable_stack);
if (retval < 0)
goto out_free_dentry;
-
+
elf_bss = 0;
elf_brk = 0;
@@ -1043,7 +1037,7 @@ out_free_interp:
if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
-
+
/* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
@@ -1166,7 +1160,7 @@ out_free_interp:
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
- retval = IS_ERR((void *)error) ?
+ retval = IS_ERR_VALUE(error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
@@ -1251,7 +1245,7 @@ out_free_interp:
interpreter,
load_bias, interp_elf_phdata,
&arch_state);
- if (!IS_ERR((void *)elf_entry)) {
+ if (!IS_ERR_VALUE(elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
@@ -1260,7 +1254,7 @@ out_free_interp:
elf_entry += interp_elf_ex->e_entry;
}
if (BAD_ADDR(elf_entry)) {
- retval = IS_ERR((void *)elf_entry) ?
+ retval = IS_ERR_VALUE(elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
@@ -1354,6 +1348,7 @@ out:
out_free_dentry:
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
+out_free_file:
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
@@ -1520,7 +1515,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
phdr->p_align = 0;
}
-static void fill_note(struct memelfnote *note, const char *name, int type,
+static void fill_note(struct memelfnote *note, const char *name, int type,
unsigned int sz, void *data)
{
note->name = name;
@@ -1723,7 +1718,6 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm
return 0;
}
-#ifdef CORE_DUMP_USE_REGSET
#include <linux/regset.h>
struct elf_thread_core_info {
@@ -1744,6 +1738,7 @@ struct elf_note_info {
int thread_notes;
};
+#ifdef CORE_DUMP_USE_REGSET
/*
* When a regset has a writeback hook, we call it on each thread before
* dumping user memory. On register window machines, this makes sure the
@@ -1823,34 +1818,58 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
return 1;
}
+#else
+static int fill_thread_core_info(struct elf_thread_core_info *t,
+ const struct user_regset_view *view,
+ long signr, struct elf_note_info *info)
+{
+ struct task_struct *p = t->task;
+ elf_fpregset_t *fpu;
+
+ fill_prstatus(&t->prstatus.common, p, signr);
+ elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
+
+ fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
+ &(t->prstatus));
+ info->size += notesize(&t->notes[0]);
+
+ fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
+ if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
+ kfree(fpu);
+ return 1;
+ }
+
+ t->prstatus.pr_fpvalid = 1;
+ fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
+ info->size += notesize(&t->notes[1]);
+
+ return 1;
+}
+#endif
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
struct coredump_params *cprm)
{
struct task_struct *dump_task = current;
- const struct user_regset_view *view = task_user_regset_view(dump_task);
+ const struct user_regset_view *view;
struct elf_thread_core_info *t;
struct elf_prpsinfo *psinfo;
struct core_thread *ct;
- unsigned int i;
-
- info->size = 0;
- info->thread = NULL;
psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
- if (psinfo == NULL) {
- info->psinfo.data = NULL; /* So we don't free this wrongly */
+ if (!psinfo)
return 0;
- }
-
fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+#ifdef CORE_DUMP_USE_REGSET
+ view = task_user_regset_view(dump_task);
+
/*
* Figure out how many notes we're going to need for each thread.
*/
info->thread_notes = 0;
- for (i = 0; i < view->n; ++i)
+ for (int i = 0; i < view->n; ++i)
if (view->regsets[i].core_note_type != 0)
++info->thread_notes;
@@ -1869,11 +1888,23 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
*/
fill_elf_header(elf, phdrs,
view->e_machine, view->e_flags);
+#else
+ view = NULL;
+ info->thread_notes = 2;
+ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
+#endif
/*
* Allocate a structure for each thread.
*/
- for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
+ info->thread = kzalloc(offsetof(struct elf_thread_core_info,
+ notes[info->thread_notes]),
+ GFP_KERNEL);
+ if (unlikely(!info->thread))
+ return 0;
+
+ info->thread->task = dump_task;
+ for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
t = kzalloc(offsetof(struct elf_thread_core_info,
notes[info->thread_notes]),
GFP_KERNEL);
@@ -1881,17 +1912,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
return 0;
t->task = ct->task;
- if (ct->task == dump_task || !info->thread) {
- t->next = info->thread;
- info->thread = t;
- } else {
- /*
- * Make sure to keep the original task at
- * the head of the list.
- */
- t->next = info->thread->next;
- info->thread->next = t;
- }
+ t->next = info->thread->next;
+ info->thread->next = t;
}
/*
@@ -1919,11 +1941,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
return 1;
}
-static size_t get_note_info_size(struct elf_note_info *info)
-{
- return info->size;
-}
-
/*
* Write all the notes for each thread. When writing the first thread, the
* process-wide notes are interleaved after the first thread-specific note.
@@ -1978,197 +1995,6 @@ static void free_note_info(struct elf_note_info *info)
kvfree(info->files.data);
}
-#else
-
-/* Here is the structure in which status of each thread is captured. */
-struct elf_thread_status
-{
- struct list_head list;
- struct elf_prstatus prstatus; /* NT_PRSTATUS */
- elf_fpregset_t fpu; /* NT_PRFPREG */
- struct task_struct *thread;
- struct memelfnote notes[3];
- int num_notes;
-};
-
-/*
- * In order to add the specific thread information for the elf file format,
- * we need to keep a linked list of every threads pr_status and then create
- * a single section for them in the final core file.
- */
-static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
-{
- int sz = 0;
- struct task_struct *p = t->thread;
- t->num_notes = 0;
-
- fill_prstatus(&t->prstatus.common, p, signr);
- elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
-
- fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
- &(t->prstatus));
- t->num_notes++;
- sz += notesize(&t->notes[0]);
-
- if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
- &t->fpu))) {
- fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
- &(t->fpu));
- t->num_notes++;
- sz += notesize(&t->notes[1]);
- }
- return sz;
-}
-
-struct elf_note_info {
- struct memelfnote *notes;
- struct memelfnote *notes_files;
- struct elf_prstatus *prstatus; /* NT_PRSTATUS */
- struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
- struct list_head thread_list;
- elf_fpregset_t *fpu;
- user_siginfo_t csigdata;
- int thread_status_size;
- int numnote;
-};
-
-static int elf_note_info_init(struct elf_note_info *info)
-{
- memset(info, 0, sizeof(*info));
- INIT_LIST_HEAD(&info->thread_list);
-
- /* Allocate space for ELF notes */
- info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
- if (!info->notes)
- return 0;
- info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
- if (!info->psinfo)
- return 0;
- info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
- if (!info->prstatus)
- return 0;
- info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
- if (!info->fpu)
- return 0;
- return 1;
-}
-
-static int fill_note_info(struct elfhdr *elf, int phdrs,
- struct elf_note_info *info,
- struct coredump_params *cprm)
-{
- struct core_thread *ct;
- struct elf_thread_status *ets;
-
- if (!elf_note_info_init(info))
- return 0;
-
- for (ct = current->signal->core_state->dumper.next;
- ct; ct = ct->next) {
- ets = kzalloc(sizeof(*ets), GFP_KERNEL);
- if (!ets)
- return 0;
-
- ets->thread = ct->task;
- list_add(&ets->list, &info->thread_list);
- }
-
- list_for_each_entry(ets, &info->thread_list, list) {
- int sz;
-
- sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
- info->thread_status_size += sz;
- }
- /* now collect the dump for the current */
- memset(info->prstatus, 0, sizeof(*info->prstatus));
- fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo);
- elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
-
- /* Set up header */
- fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
-
- /*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
- */
-
- fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
- sizeof(*info->prstatus), info->prstatus);
- fill_psinfo(info->psinfo, current->group_leader, current->mm);
- fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
- sizeof(*info->psinfo), info->psinfo);
-
- fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
- fill_auxv_note(info->notes + 3, current->mm);
- info->numnote = 4;
-
- if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
- info->notes_files = info->notes + info->numnote;
- info->numnote++;
- }
-
- /* Try to dump the FPU. */
- info->prstatus->pr_fpvalid =
- elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
- if (info->prstatus->pr_fpvalid)
- fill_note(info->notes + info->numnote++,
- "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
- return 1;
-}
-
-static size_t get_note_info_size(struct elf_note_info *info)
-{
- int sz = 0;
- int i;
-
- for (i = 0; i < info->numnote; i++)
- sz += notesize(info->notes + i);
-
- sz += info->thread_status_size;
-
- return sz;
-}
-
-static int write_note_info(struct elf_note_info *info,
- struct coredump_params *cprm)
-{
- struct elf_thread_status *ets;
- int i;
-
- for (i = 0; i < info->numnote; i++)
- if (!writenote(info->notes + i, cprm))
- return 0;
-
- /* write out the thread status notes section */
- list_for_each_entry(ets, &info->thread_list, list) {
- for (i = 0; i < ets->num_notes; i++)
- if (!writenote(&ets->notes[i], cprm))
- return 0;
- }
-
- return 1;
-}
-
-static void free_note_info(struct elf_note_info *info)
-{
- while (!list_empty(&info->thread_list)) {
- struct list_head *tmp = info->thread_list.next;
- list_del(tmp);
- kfree(list_entry(tmp, struct elf_thread_status, list));
- }
-
- /* Free data possibly allocated by fill_files_note(): */
- if (info->notes_files)
- kvfree(info->notes_files->data);
-
- kfree(info->prstatus);
- kfree(info->psinfo);
- kfree(info->notes);
- kfree(info->fpu);
-}
-
-#endif
-
static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
elf_addr_t e_shoff, int segs)
{
@@ -2232,7 +2058,7 @@ static int elf_core_dump(struct coredump_params *cprm)
/* Write notes phdr entry */
{
- size_t sz = get_note_info_size(&info);
+ size_t sz = info.size;
/* For cell spufs */
sz += elf_coredump_extra_notes_size();
@@ -2294,7 +2120,7 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
- /* write out the notes section */
+ /* write out the notes section */
if (!write_note_info(&info, cprm))
goto end_coredump;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 08d0c8797828..096e3520a0b1 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
current->mm->start_stack = current->mm->start_brk + stack_size;
#endif
- if (create_elf_fdpic_tables(bprm, current->mm,
- &exec_params, &interp_params) < 0)
+ retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params,
+ &interp_params);
+ if (retval < 0)
goto error;
kdebug("- start_code %lx", current->mm->start_code);
@@ -1603,7 +1604,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
- /* write out the notes section */
+ /* write out the notes section */
if (!writenote(thread_list->notes, cprm))
goto end_coredump;
if (!writenote(&psinfo_note, cprm))
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1eae7ea823a..bb202ad369d5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -44,10 +44,10 @@ static LIST_HEAD(entries);
static int enabled = 1;
enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
-#define MISC_FMT_OPEN_BINARY (1 << 30)
-#define MISC_FMT_CREDENTIALS (1 << 29)
-#define MISC_FMT_OPEN_FILE (1 << 28)
+#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
+#define MISC_FMT_OPEN_BINARY (1UL << 30)
+#define MISC_FMT_CREDENTIALS (1UL << 29)
+#define MISC_FMT_OPEN_FILE (1UL << 28)
typedef struct {
struct list_head list;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index dce3a16996b9..18374a6d05bd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -138,6 +138,7 @@ struct share_check {
u64 root_objectid;
u64 inum;
int share_count;
+ bool have_delayed_delete_refs;
};
static inline int extent_is_shared(struct share_check *sc)
@@ -288,8 +289,10 @@ static void prelim_release(struct preftree *preftree)
struct prelim_ref *ref, *next_ref;
rbtree_postorder_for_each_entry_safe(ref, next_ref,
- &preftree->root.rb_root, rbnode)
+ &preftree->root.rb_root, rbnode) {
+ free_inode_elem_list(ref->inode_list);
free_pref(ref);
+ }
preftree->root = RB_ROOT_CACHED;
preftree->count = 0;
@@ -647,6 +650,18 @@ unode_aux_to_inode_list(struct ulist_node *node)
return (struct extent_inode_elem *)(uintptr_t)node->aux;
}
+static void free_leaf_list(struct ulist *ulist)
+{
+ struct ulist_node *node;
+ struct ulist_iterator uiter;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((node = ulist_next(ulist, &uiter)))
+ free_inode_elem_list(unode_aux_to_inode_list(node));
+
+ ulist_free(ulist);
+}
+
/*
* We maintain three separate rbtrees: one for direct refs, one for
* indirect refs which have a key, and one for indirect refs which do not
@@ -761,7 +776,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
cond_resched();
}
out:
- ulist_free(parents);
+ /*
+ * We may have inode lists attached to refs in the parents ulist, so we
+ * must free them before freeing the ulist and its refs.
+ */
+ free_leaf_list(parents);
return ret;
}
@@ -820,16 +839,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct preftrees *preftrees, struct share_check *sc)
{
struct btrfs_delayed_ref_node *node;
- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct btrfs_key key;
- struct btrfs_key tmp_op_key;
struct rb_node *n;
int count;
int ret = 0;
- if (extent_op && extent_op->update_key)
- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
-
spin_lock(&head->lock);
for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
@@ -855,10 +869,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
case BTRFS_TREE_BLOCK_REF_KEY: {
/* NORMAL INDIRECT METADATA backref */
struct btrfs_delayed_tree_ref *ref;
+ struct btrfs_key *key_ptr = NULL;
+
+ if (head->extent_op && head->extent_op->update_key) {
+ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
+ key_ptr = &key;
+ }
ref = btrfs_delayed_node_to_tree_ref(node);
ret = add_indirect_ref(fs_info, preftrees, ref->root,
- &tmp_op_key, ref->level + 1,
+ key_ptr, ref->level + 1,
node->bytenr, count, sc,
GFP_ATOMIC);
break;
@@ -884,13 +904,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
key.offset = ref->offset;
/*
- * Found a inum that doesn't match our known inum, we
- * know it's shared.
+ * If we have a share check context and a reference for
+ * another inode, we can't exit immediately. This is
+ * because even if this is a BTRFS_ADD_DELAYED_REF
+ * reference we may find next a BTRFS_DROP_DELAYED_REF
+ * which cancels out this ADD reference.
+ *
+ * If this is a DROP reference and there was no previous
+ * ADD reference, then we need to signal that when we
+ * process references from the extent tree (through
+ * add_inline_refs() and add_keyed_refs()), we should
+ * not exit early if we find a reference for another
+ * inode, because one of the delayed DROP references
+ * may cancel that reference in the extent tree.
*/
- if (sc && sc->inum && ref->objectid != sc->inum) {
- ret = BACKREF_FOUND_SHARED;
- goto out;
- }
+ if (sc && count < 0)
+ sc->have_delayed_delete_refs = true;
ret = add_indirect_ref(fs_info, preftrees, ref->root,
&key, 0, node->bytenr, count, sc,
@@ -920,7 +949,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
if (!ret)
ret = extent_is_shared(sc);
-out:
+
spin_unlock(&head->lock);
return ret;
}
@@ -1023,7 +1052,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1033,6 +1063,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
ret = add_indirect_ref(fs_info, preftrees, root,
&key, 0, bytenr, count,
sc, GFP_NOFS);
+
break;
}
default:
@@ -1122,7 +1153,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1354,6 +1386,12 @@ again:
if (ret < 0)
goto out;
ref->inode_list = eie;
+ /*
+ * We transferred the list ownership to the ref,
+ * so set to NULL to avoid a double free in case
+ * an error happens after this.
+ */
+ eie = NULL;
}
ret = ulist_add_merge_ptr(refs, ref->parent,
ref->inode_list,
@@ -1379,6 +1417,14 @@ again:
eie->next = ref->inode_list;
}
eie = NULL;
+ /*
+ * We have transferred the inode list ownership from
+ * this ref to the ref we added to the 'refs' ulist.
+ * So set this ref's inode list to NULL to avoid
+ * use-after-free when our caller uses it or double
+ * frees in case an error happens before we return.
+ */
+ ref->inode_list = NULL;
}
cond_resched();
}
@@ -1395,24 +1441,6 @@ out:
return ret;
}
-static void free_leaf_list(struct ulist *blocks)
-{
- struct ulist_node *node = NULL;
- struct extent_inode_elem *eie;
- struct ulist_iterator uiter;
-
- ULIST_ITER_INIT(&uiter);
- while ((node = ulist_next(blocks, &uiter))) {
- if (!node->aux)
- continue;
- eie = unode_aux_to_inode_list(node);
- free_inode_elem_list(eie);
- node->aux = 0;
- }
-
- ulist_free(blocks);
-}
-
/*
* Finds all leafs with a reference to the specified combination of bytenr and
* offset. key_list_head will point to a list of corresponding keys (caller must
@@ -1522,6 +1550,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache
{
struct btrfs_backref_shared_cache_entry *entry;
+ if (!cache->use_cache)
+ return false;
+
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return false;
@@ -1557,6 +1588,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache
return false;
*is_shared = entry->is_shared;
+ /*
+ * If the node at this level is shared, than all nodes below are also
+ * shared. Currently some of the nodes below may be marked as not shared
+ * because we have just switched from one leaf to another, and switched
+ * also other nodes above the leaf and below the current level, so mark
+ * them as shared.
+ */
+ if (*is_shared) {
+ for (int i = 0; i < level; i++) {
+ cache->entries[i].is_shared = true;
+ cache->entries[i].gen = entry->gen;
+ }
+ }
return true;
}
@@ -1573,6 +1617,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
struct btrfs_backref_shared_cache_entry *entry;
u64 gen;
+ if (!cache->use_cache)
+ return;
+
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return;
@@ -1648,6 +1695,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
.root_objectid = root->root_key.objectid,
.inum = inum,
.share_count = 0,
+ .have_delayed_delete_refs = false,
};
int level;
@@ -1669,6 +1717,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
/* -1 means we are in the bytenr of the data extent. */
level = -1;
ULIST_ITER_INIT(&uiter);
+ cache->use_cache = true;
while (1) {
bool is_shared;
bool cached;
@@ -1698,6 +1747,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
extent_gen > btrfs_root_last_snapshot(&root->root_item))
break;
+ /*
+ * If our data extent was not directly shared (without multiple
+ * reference items), than it might have a single reference item
+ * with a count > 1 for the same offset, which means there are 2
+ * (or more) file extent items that point to the data extent -
+ * this happens when a file extent item needs to be split and
+ * then one item gets moved to another leaf due to a b+tree leaf
+ * split when inserting some item. In this case the file extent
+ * items may be located in different leaves and therefore some
+ * of the leaves may be referenced through shared subtrees while
+ * others are not. Since our extent buffer cache only works for
+ * a single path (by far the most common case and simpler to
+ * deal with), we can not use it if we have multiple leaves
+ * (which implies multiple paths).
+ */
+ if (level == -1 && tmp->nnodes > 1)
+ cache->use_cache = false;
+
if (level >= 0)
store_backref_shared_cache(cache, root, bytenr,
level, false);
@@ -1713,6 +1780,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
break;
}
shared.share_count = 0;
+ shared.have_delayed_delete_refs = false;
cond_resched();
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 52ae6957b414..8e69584d538d 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -29,6 +29,7 @@ struct btrfs_backref_shared_cache {
* a given data extent should never exceed the maximum b+tree height.
*/
struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL];
+ bool use_cache;
};
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 32c415cfbdfe..deebc8ddbd93 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -774,10 +774,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
- /* REVIEW */
if (wait && caching_ctl)
ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
- /* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f1f051ad3147..e6635fe70067 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -512,7 +512,7 @@ static u64 bio_end_offset(struct bio *bio)
static noinline int add_ra_bio_pages(struct inode *inode,
u64 compressed_end,
struct compressed_bio *cb,
- unsigned long *pflags)
+ int *memstall, unsigned long *pflags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long end_index;
@@ -581,8 +581,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
continue;
}
- if (PageWorkingset(page))
+ if (!*memstall && PageWorkingset(page)) {
psi_memstall_enter(pflags);
+ *memstall = 1;
+ }
ret = set_page_extent_mapped(page);
if (ret < 0) {
@@ -670,8 +672,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
- /* Initialize to 1 to make skip psi_memstall_leave unless needed */
- unsigned long pflags = 1;
+ unsigned long pflags;
+ int memstall = 0;
blk_status_t ret;
int ret2;
int i;
@@ -727,7 +729,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
goto fail;
}
- add_ra_bio_pages(inode, em_start + em_len, cb, &pflags);
+ add_ra_bio_pages(inode, em_start + em_len, cb, &memstall, &pflags);
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
@@ -807,7 +809,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
}
- if (!pflags)
+ if (memstall)
psi_memstall_leave(&pflags);
if (refcount_dec_and_test(&cb->pending_ios))
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b39b339fbf96..dcb510f38dda 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -114,6 +114,22 @@ noinline void btrfs_release_path(struct btrfs_path *p)
}
/*
+ * We want the transaction abort to print stack trace only for errors where the
+ * cause could be a bug, eg. due to ENOSPC, and not for common errors that are
+ * caused by external factors.
+ */
+bool __cold abort_should_print_stack(int errno)
+{
+ switch (errno) {
+ case -EIO:
+ case -EROFS:
+ case -ENOMEM:
+ return false;
+ }
+ return true;
+}
+
+/*
* safely gets a reference on the root node of a tree. A lock
* is not taken, so a concurrent writer may put a different node
* at the root of the tree. See btrfs_lock_root_node for the
@@ -4647,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
int ret;
int i;
- ASSERT(!path->nowait);
+ /*
+ * The nowait semantics are used only for write paths, where we don't
+ * use the tree mod log and sequence numbers.
+ */
+ if (time_seq)
+ ASSERT(!path->nowait);
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
@@ -4667,7 +4688,14 @@ again:
if (path->need_commit_sem) {
path->need_commit_sem = 0;
need_commit_sem = true;
- down_read(&fs_info->commit_root_sem);
+ if (path->nowait) {
+ if (!down_read_trylock(&fs_info->commit_root_sem)) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ } else {
+ down_read(&fs_info->commit_root_sem);
+ }
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
}
@@ -4743,7 +4771,7 @@ again:
next = c;
ret = read_block_for_search(root, path, &next, level,
slot, &key);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN && !path->nowait)
goto again;
if (ret < 0) {
@@ -4753,6 +4781,10 @@ again:
if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next);
+ if (!ret && path->nowait) {
+ ret = -EAGAIN;
+ goto done;
+ }
if (!ret && time_seq) {
/*
* If we don't get the lock, we may be racing
@@ -4783,7 +4815,7 @@ again:
ret = read_block_for_search(root, path, &next, level,
0, &key);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN && !path->nowait)
goto again;
if (ret < 0) {
@@ -4791,8 +4823,16 @@ again:
goto done;
}
- if (!path->skip_locking)
- btrfs_tree_read_lock(next);
+ if (!path->skip_locking) {
+ if (path->nowait) {
+ if (!btrfs_try_tree_read_lock(next)) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ } else {
+ btrfs_tree_read_lock(next);
+ }
+ }
}
ret = 0;
done:
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d93a4d027706..919670d35919 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3462,7 +3462,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before);
+ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
+ size_t done_before);
+struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
+ size_t done_before);
extern const struct dentry_operations btrfs_dentry_operations;
@@ -3793,9 +3796,11 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
const char *function,
unsigned int line, int errno, bool first_hit);
+bool __cold abort_should_print_stack(int errno);
+
/*
* Call btrfs_abort_transaction as early as possible when an error condition is
- * detected, that way the exact line number is reported.
+ * detected, that way the exact stack trace is reported for some errors.
*/
#define btrfs_abort_transaction(trans, errno) \
do { \
@@ -3804,10 +3809,11 @@ do { \
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
&((trans)->fs_info->fs_state))) { \
first = true; \
- if ((errno) != -EIO && (errno) != -EROFS) { \
- WARN(1, KERN_DEBUG \
+ if (WARN(abort_should_print_stack(errno), \
+ KERN_DEBUG \
"BTRFS: Transaction aborted (error %d)\n", \
- (errno)); \
+ (errno))) { \
+ /* Stack trace printed. */ \
} else { \
btrfs_debug((trans)->fs_info, \
"Transaction aborted (error %d)", \
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a2da9313c694..d99bf7c64611 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -166,11 +166,9 @@ static bool btrfs_supported_super_csum(u16 csum_type)
* Return 0 if the superblock checksum type matches the checksum value of that
* algorithm. Pass the raw disk superblock data.
*/
-static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
- char *raw_disk_sb)
+int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *disk_sb)
{
- struct btrfs_super_block *disk_sb =
- (struct btrfs_super_block *)raw_disk_sb;
char result[BTRFS_CSUM_SIZE];
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
@@ -181,7 +179,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
* filled with zeros and is included in the checksum.
*/
- crypto_shash_digest(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
+ crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
if (memcmp(disk_sb->csum, result, fs_info->csum_size))
@@ -2553,7 +2551,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
fs_info->dev_root = root;
}
/* Initialize fs_info for all devices in any case */
- btrfs_init_devices_late(fs_info);
+ ret = btrfs_init_devices_late(fs_info);
+ if (ret)
+ goto out;
/*
* This tree can share blocks with some other fs tree during relocation
@@ -3479,7 +3479,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* We want to check superblock checksum, the type is stored inside.
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
*/
- if (btrfs_check_super_csum(fs_info, (u8 *)disk_super)) {
+ if (btrfs_check_super_csum(fs_info, disk_super)) {
btrfs_err(fs_info, "superblock checksum mismatch");
err = -EINVAL;
btrfs_release_disk_super(disk_super);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index c67c15d4d20b..9fa923e005a3 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -42,6 +42,8 @@ struct extent_buffer *btrfs_find_create_tree_block(
void btrfs_clean_tree_block(struct extent_buffer *buf);
void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info);
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
+int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
+ const struct btrfs_super_block *disk_sb);
int __cold open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 1d4c2397d0d6..fab7eb76e53b 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -58,7 +58,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
}
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
+ u64 root_objectid, u64 generation,
int check_generation)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index f32f4113c976..5afb7ca42828 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -19,7 +19,7 @@ struct btrfs_fid {
} __attribute__ ((packed));
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
+ u64 root_objectid, u64 generation,
int check_generation);
struct dentry *btrfs_get_parent(struct dentry *child);
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 618275af19c4..83cb0378096f 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -1641,16 +1641,17 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
int err;
u64 failed_start;
- while (1) {
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
+ cached_state, NULL, GFP_NOFS);
+ while (err == -EEXIST) {
+ if (failed_start != start)
+ clear_extent_bit(tree, start, failed_start - 1,
+ EXTENT_LOCKED, cached_state);
+
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
&failed_start, cached_state, NULL,
GFP_NOFS);
- if (err == -EEXIST) {
- wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
- start = failed_start;
- } else
- break;
- WARN_ON(start > end);
}
return err;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cd2d36580f1a..2801c991814f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3295,21 +3295,22 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
/*
- * If this is a leaf and there are tree mod log users, we may
- * have recorded mod log operations that point to this leaf.
- * So we must make sure no one reuses this leaf's extent before
- * mod log operations are applied to a node, otherwise after
- * rewinding a node using the mod log operations we get an
- * inconsistent btree, as the leaf's extent may now be used as
- * a node or leaf for another different btree.
+ * If there are tree mod log users we may have recorded mod log
+ * operations for this node. If we re-allocate this node we
+ * could replay operations on this node that happened when it
+ * existed in a completely different root. For example if it
+ * was part of root A, then was reallocated to root B, and we
+ * are doing a btrfs_old_search_slot(root b), we could replay
+ * operations that happened when the block was part of root A,
+ * giving us an inconsistent view of the btree.
+ *
* We are safe from races here because at this point no other
* node or root points to this extent buffer, so if after this
- * check a new tree mod log user joins, it will not be able to
- * find a node pointing to this leaf and record operations that
- * point to this leaf.
+ * check a new tree mod log user joins we will not have an
+ * existing log of operations on this node that we have to
+ * contend with.
*/
- if (btrfs_header_level(buf) == 0 &&
- test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
+ if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
must_pin = true;
if (must_pin || btrfs_is_zoned(fs_info)) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 176b432035ae..d01631d47806 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1598,14 +1598,19 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
write_bytes);
else
btrfs_check_nocow_unlock(BTRFS_I(inode));
+
+ if (nowait && ret == -ENOSPC)
+ ret = -EAGAIN;
break;
}
release_bytes = reserve_bytes;
again:
ret = balance_dirty_pages_ratelimited_flags(inode->i_mapping, bdp_flags);
- if (ret)
+ if (ret) {
+ btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
break;
+ }
/*
* This is going to setup the pages array with the number of
@@ -1765,6 +1770,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
loff_t endbyte;
ssize_t err;
unsigned int ilock_flags = 0;
+ struct iomap_dio *dio;
if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY;
@@ -1825,11 +1831,22 @@ relock:
* So here we disable page faults in the iov_iter and then retry if we
* got -EFAULT, faulting in the pages before the retry.
*/
-again:
from->nofault = true;
- err = btrfs_dio_rw(iocb, from, written);
+ dio = btrfs_dio_write(iocb, from, written);
from->nofault = false;
+ /*
+ * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
+ * iocb, and that needs to lock the inode. So unlock it before calling
+ * iomap_dio_complete() to avoid a deadlock.
+ */
+ btrfs_inode_unlock(inode, ilock_flags);
+
+ if (IS_ERR_OR_NULL(dio))
+ err = PTR_ERR_OR_ZERO(dio);
+ else
+ err = iomap_dio_complete(dio);
+
/* No increment (+=) because iomap returns a cumulative value. */
if (err > 0)
written = err;
@@ -1855,12 +1872,10 @@ again:
} else {
fault_in_iov_iter_readable(from, left);
prev_left = left;
- goto again;
+ goto relock;
}
}
- btrfs_inode_unlock(inode, ilock_flags);
-
/*
* If 'err' is -ENOTBLK or we have not written all data, then it means
* we must fallback to buffered IO.
@@ -4035,7 +4050,7 @@ again:
*/
pagefault_disable();
to->nofault = true;
- ret = btrfs_dio_rw(iocb, to, read);
+ ret = btrfs_dio_read(iocb, to, read);
to->nofault = false;
pagefault_enable();
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7a3076ccbab4..5a54bb93c413 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7980,7 +7980,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
*/
status = BLK_STS_RESOURCE;
dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS);
- if (!dip)
+ if (!dip->csums)
goto out_err;
status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
@@ -8078,13 +8078,21 @@ static const struct iomap_dio_ops btrfs_dio_ops = {
.bio_set = &btrfs_dio_bioset,
};
-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
+ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
{
struct btrfs_dio_data data;
return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
- IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC,
- &data, done_before);
+ IOMAP_DIO_PARTIAL, &data, done_before);
+}
+
+struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
+ size_t done_before)
+{
+ struct btrfs_dio_data data;
+
+ return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+ IOMAP_DIO_PARTIAL, &data, done_before);
}
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d5dd8bed1488..f897be9ec1e9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
}
}
+ btrfs_free_path(path);
+ path = NULL;
if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
ret = -EFAULT;
@@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
}
out:
+ btrfs_free_path(path);
+
if (!ret || ret == -EOVERFLOW) {
rootrefs->num_items = found;
/* update min_treeid for next search */
@@ -3205,7 +3209,6 @@ out:
}
kfree(rootrefs);
- btrfs_free_path(path);
return ret;
}
@@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
ipath->fspath->val[i] = rel_ptr;
}
+ btrfs_free_path(path);
+ path = NULL;
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
ipath->fspath, size);
if (ret) {
@@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
size = min_t(u32, loi->size, SZ_16M);
}
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
inodes = init_data_container(size);
if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes);
- inodes = NULL;
- goto out;
+ goto out_loi;
}
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
inodes, ignore_offset);
+ btrfs_free_path(path);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
@@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
ret = -EFAULT;
out:
- btrfs_free_path(path);
kvfree(inodes);
out_loi:
kfree(loi);
@@ -5283,7 +5286,7 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
goto out_acct;
}
- ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+ ret = import_iovec(ITER_DEST, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
&iov, &iter);
if (ret < 0)
goto out_acct;
@@ -5382,7 +5385,7 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
if (args.len > args.unencoded_len - args.unencoded_offset)
goto out_acct;
- ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+ ret = import_iovec(ITER_SOURCE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
&iov, &iter);
if (ret < 0)
goto out_acct;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e54f8280031f..100d9f4836b1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -761,11 +761,11 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
struct btrfs_ordered_extent *ordered;
if (start + len < start) {
- orig_end = INT_LIMIT(loff_t);
+ orig_end = OFFSET_MAX;
} else {
orig_end = start + len - 1;
- if (orig_end > INT_LIMIT(loff_t))
- orig_end = INT_LIMIT(loff_t);
+ if (orig_end > OFFSET_MAX)
+ orig_end = OFFSET_MAX;
}
/* start IO across the range first to instantiate any delalloc
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9334c3157c22..b74105a10f16 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;
- ret = update_qgroup_limit_item(trans, dstgroup);
- if (ret) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_info(fs_info,
- "unable to update quota limit for %llu",
- dstgroup->qgroupid);
- goto unlock;
- }
+ qgroup_dirty(fs_info, dstgroup);
}
if (srcid) {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f6395e8288d6..82c8e991300e 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1632,10 +1632,8 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio)
int ret;
ret = alloc_rbio_parity_pages(rbio);
- if (ret) {
- __free_raid_bio(rbio);
+ if (ret)
return ret;
- }
ret = lock_stripe_add(rbio);
if (ret == 0)
@@ -1823,8 +1821,10 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
*/
if (rbio_is_full(rbio)) {
ret = full_stripe_write(rbio);
- if (ret)
+ if (ret) {
+ __free_raid_bio(rbio);
goto fail;
+ }
return;
}
@@ -1838,8 +1838,10 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
list_add_tail(&rbio->plug_list, &plug->rbio_list);
} else {
ret = __raid56_parity_write(rbio);
- if (ret)
+ if (ret) {
+ __free_raid_bio(rbio);
goto fail;
+ }
}
return;
@@ -2742,8 +2744,10 @@ raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
- BUG();
- kfree(rbio);
+ btrfs_warn_rl(fs_info,
+ "can not determine the failed stripe number for full stripe %llu",
+ bioc->raid_map[0]);
+ __free_raid_bio(rbio);
return NULL;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f260c53829e5..196c4c6ed1ed 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2672,17 +2672,11 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize;
- /*
- * Block size determines how many scrub_block will be allocated. Here
- * we use BTRFS_STRIPE_LEN (64KiB) as default limit, so we won't
- * allocate too many scrub_block, while still won't cause too large
- * bios for large extents.
- */
if (flags & BTRFS_EXTENT_FLAG_DATA) {
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
blocksize = map->stripe_len;
else
- blocksize = BTRFS_STRIPE_LEN;
+ blocksize = sctx->fs_info->sectorsize;
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed++;
sctx->stat.data_bytes_scrubbed += len;
@@ -3917,7 +3911,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) {
- spin_unlock(&cache->lock);
btrfs_put_block_group(cache);
goto skip;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 4ef4167072b8..1c4b693ee4a3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -348,6 +348,7 @@ static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
switch (sctx->proto) {
case 1: return cmd <= BTRFS_SEND_C_MAX_V1;
case 2: return cmd <= BTRFS_SEND_C_MAX_V2;
+ case 3: return cmd <= BTRFS_SEND_C_MAX_V3;
default: return false;
}
}
@@ -5701,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
u64 ext_len;
u64 clone_len;
u64 clone_data_offset;
+ bool crossed_src_i_size = false;
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(clone_root->root, path);
@@ -5758,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
if (key.offset >= clone_src_i_size)
break;
- if (key.offset + ext_len > clone_src_i_size)
+ if (key.offset + ext_len > clone_src_i_size) {
ext_len = clone_src_i_size - key.offset;
+ crossed_src_i_size = true;
+ }
clone_data_offset = btrfs_file_extent_offset(leaf, ei);
if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
@@ -5820,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = send_clone(sctx, offset, clone_len,
clone_root);
}
+ } else if (crossed_src_i_size && clone_len < len) {
+ /*
+ * If we are at i_size of the clone source inode and we
+ * can not clone from it, terminate the loop. This is
+ * to avoid sending two write operations, one with a
+ * length matching clone_len and the final one after
+ * this loop with a length of len - clone_len.
+ *
+ * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
+ * was passed to the send ioctl), this helps avoid
+ * sending an encoded write for an offset that is not
+ * sector size aligned, in case the i_size of the source
+ * inode is not sector size aligned. That will make the
+ * receiver fallback to decompression of the data and
+ * writing it using regular buffered IO, therefore while
+ * not incorrect, it's not optimal due decompression and
+ * possible re-compression at the receiver.
+ */
+ break;
} else {
ret = send_extent_data(sctx, dst_path, offset,
clone_len);
@@ -6469,7 +6492,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
if (ret < 0)
goto out;
}
- if (sctx->cur_inode_needs_verity) {
+
+ if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY)
+ && sctx->cur_inode_needs_verity) {
ret = process_verity(sctx);
if (ret < 0)
goto out;
@@ -6665,17 +6690,19 @@ static int changed_inode(struct send_ctx *sctx,
/*
* First, process the inode as if it was deleted.
*/
- sctx->cur_inode_gen = right_gen;
- sctx->cur_inode_new = false;
- sctx->cur_inode_deleted = true;
- sctx->cur_inode_size = btrfs_inode_size(
- sctx->right_path->nodes[0], right_ii);
- sctx->cur_inode_mode = btrfs_inode_mode(
- sctx->right_path->nodes[0], right_ii);
- ret = process_all_refs(sctx,
- BTRFS_COMPARE_TREE_DELETED);
- if (ret < 0)
- goto out;
+ if (old_nlinks > 0) {
+ sctx->cur_inode_gen = right_gen;
+ sctx->cur_inode_new = false;
+ sctx->cur_inode_deleted = true;
+ sctx->cur_inode_size = btrfs_inode_size(
+ sctx->right_path->nodes[0], right_ii);
+ sctx->cur_inode_mode = btrfs_inode_mode(
+ sctx->right_path->nodes[0], right_ii);
+ ret = process_all_refs(sctx,
+ BTRFS_COMPARE_TREE_DELETED);
+ if (ret < 0)
+ goto out;
+ }
/*
* Now process the inode as if it was new.
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 0a4537775e0c..f7585cfa7e52 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -10,7 +10,12 @@
#include <linux/types.h>
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
+/* Conditional support for the upcoming protocol version. */
+#ifdef CONFIG_BTRFS_DEBUG
+#define BTRFS_SEND_STREAM_VERSION 3
+#else
#define BTRFS_SEND_STREAM_VERSION 2
+#endif
/*
* In send stream v1, no command is larger than 64K. In send stream v2, no limit
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9be4fd2db0f4..5942b9384088 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2555,6 +2555,7 @@ static int check_dev_super(struct btrfs_device *dev)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
struct btrfs_super_block *sb;
+ u16 csum_type;
int ret = 0;
/* This should be called with fs still frozen. */
@@ -2569,6 +2570,21 @@ static int check_dev_super(struct btrfs_device *dev)
if (IS_ERR(sb))
return PTR_ERR(sb);
+ /* Verify the checksum. */
+ csum_type = btrfs_super_csum_type(sb);
+ if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) {
+ btrfs_err(fs_info, "csum type changed, has %u expect %u",
+ csum_type, btrfs_super_csum_type(fs_info->super_copy));
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ if (btrfs_check_super_csum(fs_info, sb)) {
+ btrfs_err(fs_info, "csum for on-disk super block no longer matches");
+ ret = -EUCLEAN;
+ goto out;
+ }
+
/* Btrfs_validate_super() includes fsid check against super->fsid. */
ret = btrfs_validate_super(fs_info, sb, 0);
if (ret < 0)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 699b54b3acaa..74fef1f49c35 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void)
#ifdef CONFIG_BTRFS_DEBUG
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group);
- if (ret)
- goto out2;
+ if (ret) {
+ sysfs_unmerge_group(&btrfs_kset->kobj,
+ &btrfs_static_feature_attr_group);
+ goto out_remove_group;
+ }
#endif
return 0;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 9c478fa256f6..d43cb5242fec 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -200,7 +200,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
void btrfs_free_dummy_root(struct btrfs_root *root)
{
- if (!root)
+ if (IS_ERR_OR_NULL(root))
return;
/* Will be freed by btrfs_free_fs_roots */
if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index eee1e4459541..63676ea19f29 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -225,20 +225,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
*/
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
BTRFS_FS_TREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -250,29 +250,31 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
}
+ /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */
+ old_roots = NULL;
+ new_roots = NULL;
+
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
- old_roots = NULL;
- new_roots = NULL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = remove_extent_item(root, nodesize, nodesize);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return -EINVAL;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -322,20 +324,20 @@ static int test_multiple_refs(struct btrfs_root *root,
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
BTRFS_FS_TREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -355,20 +357,20 @@ static int test_multiple_refs(struct btrfs_root *root,
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = add_tree_ref(root, nodesize, nodesize, 0,
BTRFS_FIRST_FREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -394,20 +396,20 @@ static int test_multiple_refs(struct btrfs_root *root,
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = remove_extent_ref(root, nodesize, nodesize, 0,
BTRFS_FIRST_FREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 813986e38258..c3cf3dabe0b1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
u64 *last_old_dentry_offset)
{
struct btrfs_root *log = inode->root->log_root;
- struct extent_buffer *src = path->nodes[0];
- const int nritems = btrfs_header_nritems(src);
+ struct extent_buffer *src;
+ const int nritems = btrfs_header_nritems(path->nodes[0]);
const u64 ino = btrfs_ino(inode);
bool last_found = false;
int batch_start = 0;
int batch_size = 0;
int i;
- for (i = path->slots[0]; i < nritems; i++) {
+ /*
+ * We need to clone the leaf, release the read lock on it, and use the
+ * clone before modifying the log tree. See the comment at copy_items()
+ * about why we need to do this.
+ */
+ src = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!src)
+ return -ENOMEM;
+
+ i = path->slots[0];
+ btrfs_release_path(path);
+ path->nodes[0] = src;
+ path->slots[0] = i;
+
+ for (; i < nritems; i++) {
struct btrfs_dir_item *di;
struct btrfs_key key;
int ret;
@@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
- struct extent_buffer *src = src_path->nodes[0];
+ struct extent_buffer *src;
int ret = 0;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
@@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
const u64 i_size = i_size_read(&inode->vfs_inode);
+ /*
+ * To keep lockdep happy and avoid deadlocks, clone the source leaf and
+ * use the clone. This is because otherwise we would be changing the log
+ * tree, to insert items from the subvolume tree or insert csum items,
+ * while holding a read lock on a leaf from the subvolume tree, which
+ * creates a nasty lock dependency when COWing log tree nodes/leaves:
+ *
+ * 1) Modifying the log tree triggers an extent buffer allocation while
+ * holding a write lock on a parent extent buffer from the log tree.
+ * Allocating the pages for an extent buffer, or the extent buffer
+ * struct, can trigger inode eviction and finally the inode eviction
+ * will trigger a release/remove of a delayed node, which requires
+ * taking the delayed node's mutex;
+ *
+ * 2) Allocating a metadata extent for a log tree can trigger the async
+ * reclaim thread and make us wait for it to release enough space and
+ * unblock our reservation ticket. The reclaim thread can start
+ * flushing delayed items, and that in turn results in the need to
+ * lock delayed node mutexes and in the need to write lock extent
+ * buffers of a subvolume tree - all this while holding a write lock
+ * on the parent extent buffer in the log tree.
+ *
+ * So one task in scenario 1) running in parallel with another task in
+ * scenario 2) could lead to a deadlock, one wanting to lock a delayed
+ * node mutex while having a read lock on a leaf from the subvolume,
+ * while the other is holding the delayed node's mutex and wants to
+ * write lock the same subvolume leaf for flushing delayed items.
+ */
+ src = btrfs_clone_extent_buffer(src_path->nodes[0]);
+ if (!src)
+ return -ENOMEM;
+
+ i = src_path->slots[0];
+ btrfs_release_path(src_path);
+ src_path->nodes[0] = src;
+ src_path->slots[0] = i;
+
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
if (!ins_data)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 94ba46d57920..635f45f1a2ef 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1011,6 +1011,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
rcu_assign_pointer(device->name, name);
}
+ if (orig_dev->zone_info) {
+ struct btrfs_zoned_device_info *zone_info;
+
+ zone_info = btrfs_clone_dev_zone_info(orig_dev);
+ if (!zone_info) {
+ btrfs_free_device(device);
+ ret = -ENOMEM;
+ goto error;
+ }
+ device->zone_info = zone_info;
+ }
+
list_add(&device->dev_list, &fs_devices->devices);
device->fs_devices = fs_devices;
fs_devices->num_devices++;
@@ -6918,18 +6930,18 @@ static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args,
const struct btrfs_device *device)
{
- ASSERT((args->devid != (u64)-1) || args->missing);
+ if (args->missing) {
+ if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) &&
+ !device->bdev)
+ return true;
+ return false;
+ }
- if ((args->devid != (u64)-1) && device->devid != args->devid)
+ if (device->devid != args->devid)
return false;
if (args->uuid && memcmp(device->uuid, args->uuid, BTRFS_UUID_SIZE) != 0)
return false;
- if (!args->missing)
- return true;
- if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) &&
- !device->bdev)
- return true;
- return false;
+ return true;
}
/*
@@ -7142,6 +7154,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
u64 devid;
u64 type;
u8 uuid[BTRFS_UUID_SIZE];
+ int index;
int num_stripes;
int ret;
int i;
@@ -7149,6 +7162,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
type = btrfs_chunk_type(leaf, chunk);
+ index = btrfs_bg_flags_to_raid_index(type);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
#if BITS_PER_LONG == 32
@@ -7202,7 +7216,15 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
map->io_align = btrfs_chunk_io_align(leaf, chunk);
map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
map->type = type;
- map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ /*
+ * We can't use the sub_stripes value, as for profiles other than
+ * RAID10, they may have 0 as sub_stripes for filesystems created by
+ * older mkfs (<v5.4).
+ * In that case, it can cause divide-by-zero errors later.
+ * Since currently sub_stripes is fixed for each profile, let's
+ * use the trusted value instead.
+ */
+ map->sub_stripes = btrfs_raid_array[index].sub_stripes;
map->verified_stripes = 0;
em->orig_block_len = btrfs_calc_stripe_length(em);
for (i = 0; i < num_stripes; i++) {
@@ -7734,10 +7756,11 @@ error:
return ret;
}
-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
+int btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
struct btrfs_device *device;
+ int ret = 0;
fs_devices->fs_info = fs_info;
@@ -7746,12 +7769,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
device->fs_info = fs_info;
list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
- list_for_each_entry(device, &seed_devs->devices, dev_list)
+ list_for_each_entry(device, &seed_devs->devices, dev_list) {
device->fs_info = fs_info;
+ ret = btrfs_get_dev_zone_info(device, false);
+ if (ret)
+ break;
+ }
seed_devs->fs_info = fs_info;
}
mutex_unlock(&fs_devices->device_list_mutex);
+
+ return ret;
}
static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 599b9d5af349..099def5613b8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -395,6 +395,7 @@ typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
*/
struct btrfs_bio {
unsigned int mirror_num;
+ struct bvec_iter iter;
/* for direct I/O */
u64 file_offset;
@@ -403,7 +404,6 @@ struct btrfs_bio {
struct btrfs_device *device;
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
- struct bvec_iter iter;
/* End I/O information supplied to btrfs_bio_alloc */
btrfs_bio_end_io_t end_io;
@@ -671,7 +671,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_get_dev_stats *stats);
-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
+int btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index e2d073b08a7d..c9e2b0c85309 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
super[i] = page_address(page[i]);
}
- if (super[0]->generation > super[1]->generation)
+ if (btrfs_super_generation(super[0]) >
+ btrfs_super_generation(super[1]))
sector = zones[1].start;
else
sector = zones[0].start;
@@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
goto out;
}
- zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
+ zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
if (!zones) {
ret = -ENOMEM;
goto out;
@@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
- kfree(zones);
+ kvfree(zones);
switch (bdev_zoned_model(bdev)) {
case BLK_ZONED_HM:
@@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
return 0;
out:
- kfree(zones);
+ kvfree(zones);
out_free_zone_info:
btrfs_destroy_dev_zone_info(device);
@@ -639,6 +640,46 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
device->zone_info = NULL;
}
+struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev)
+{
+ struct btrfs_zoned_device_info *zone_info;
+
+ zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL);
+ if (!zone_info)
+ return NULL;
+
+ zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->seq_zones)
+ goto out;
+
+ bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones,
+ zone_info->nr_zones);
+
+ zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->empty_zones)
+ goto out;
+
+ bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones,
+ zone_info->nr_zones);
+
+ zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->active_zones)
+ goto out;
+
+ bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones,
+ zone_info->nr_zones);
+ zone_info->zone_cache = NULL;
+
+ return zone_info;
+
+out:
+ bitmap_free(zone_info->seq_zones);
+ bitmap_free(zone_info->empty_zones);
+ bitmap_free(zone_info->active_zones);
+ kfree(zone_info);
+ return NULL;
+}
+
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
{
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index e17462db3a84..8bd16d40b7c6 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -36,6 +36,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
+struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev);
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
@@ -103,6 +104,16 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device,
static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { }
+/*
+ * In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never call
+ * into btrfs_clone_dev_zone_info() so it's safe to return NULL here.
+ */
+static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(
+ struct btrfs_device *orig_dev)
+{
+ return NULL;
+}
+
static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
{
if (!btrfs_is_zoned(fs_info))
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index dcf701b05cc1..61f47debec5a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -288,7 +288,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
}
len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
- iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
+ iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
if (err == 0)
err = -EFAULT;
@@ -327,7 +327,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
}
dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
- iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
+ iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
if (err < 0) {
dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index fb023f9fafcb..e54814d0c2f7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2248,7 +2248,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
- unsigned int max_sessions;
int ret, err = 0;
spin_lock(&ci->i_unsafe_lock);
@@ -2267,27 +2266,23 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
spin_unlock(&ci->i_unsafe_lock);
/*
- * The mdsc->max_sessions is unlikely to be changed
- * mostly, here we will retry it by reallocating the
- * sessions array memory to get rid of the mdsc->mutex
- * lock.
- */
-retry:
- max_sessions = mdsc->max_sessions;
-
- /*
* Trigger to flush the journal logs in all the relevant MDSes
* manually, or in the worst case we must wait at most 5 seconds
* to wait the journal logs to be flushed by the MDSes periodically.
*/
- if ((req1 || req2) && likely(max_sessions)) {
- struct ceph_mds_session **sessions = NULL;
- struct ceph_mds_session *s;
+ if (req1 || req2) {
struct ceph_mds_request *req;
+ struct ceph_mds_session **sessions;
+ struct ceph_mds_session *s;
+ unsigned int max_sessions;
int i;
+ mutex_lock(&mdsc->mutex);
+ max_sessions = mdsc->max_sessions;
+
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
+ mutex_unlock(&mdsc->mutex);
err = -ENOMEM;
goto out;
}
@@ -2299,16 +2294,6 @@ retry:
s = req->r_session;
if (!s)
continue;
- if (unlikely(s->s_mds >= max_sessions)) {
- spin_unlock(&ci->i_unsafe_lock);
- for (i = 0; i < max_sessions; i++) {
- s = sessions[i];
- if (s)
- ceph_put_mds_session(s);
- }
- kfree(sessions);
- goto retry;
- }
if (!sessions[s->s_mds]) {
s = ceph_get_mds_session(s);
sessions[s->s_mds] = s;
@@ -2321,16 +2306,6 @@ retry:
s = req->r_session;
if (!s)
continue;
- if (unlikely(s->s_mds >= max_sessions)) {
- spin_unlock(&ci->i_unsafe_lock);
- for (i = 0; i < max_sessions; i++) {
- s = sessions[i];
- if (s)
- ceph_put_mds_session(s);
- }
- kfree(sessions);
- goto retry;
- }
if (!sessions[s->s_mds]) {
s = ceph_get_mds_session(s);
sessions[s->s_mds] = s;
@@ -2342,11 +2317,12 @@ retry:
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
if (ci->i_auth_cap) {
- s = ci->i_auth_cap->session;
- if (!sessions[s->s_mds])
- sessions[s->s_mds] = ceph_get_mds_session(s);
+ s = ci->i_auth_cap->session;
+ if (!sessions[s->s_mds])
+ sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&mdsc->mutex);
/* send flush mdlog request to MDSes */
for (i = 0; i < max_sessions; i++) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 04fd34557de8..6f9580defb2b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1161,7 +1161,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
aio_req->total_len = rc + zlen;
}
- iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs,
+ iov_iter_bvec(&i, ITER_DEST, osd_data->bvec_pos.bvecs,
osd_data->num_bvecs, len);
iov_iter_advance(&i, rc);
iov_iter_zero(zlen, &i);
@@ -1400,7 +1400,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
int zlen = min_t(size_t, len - ret,
size - pos - ret);
- iov_iter_bvec(&i, READ, bvecs, num_pages, len);
+ iov_iter_bvec(&i, ITER_DEST, bvecs, num_pages, len);
iov_iter_advance(&i, ret);
iov_iter_zero(zlen, &i);
ret += zlen;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 31cd27843eca..f23c5a6edc6f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -362,7 +362,7 @@ static int ceph_fill_fragtree(struct inode *inode,
if (nsplits != ci->i_fragtree_nsplits) {
update = true;
} else if (nsplits) {
- i = prandom_u32_max(nsplits);
+ i = get_random_u32_below(nsplits);
id = le32_to_cpu(fragtree->splits[i].frag);
if (!__ceph_find_frag(ci, id))
update = true;
@@ -2492,7 +2492,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct inode *parent;
parent = ceph_lookup_inode(sb, ceph_ino(inode));
- if (!parent)
+ if (IS_ERR(parent))
return PTR_ERR(parent);
pci = ceph_inode(parent);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 3e2843e86e27..f3b461c708a8 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -364,7 +364,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
*fcntl_count = 0;
*flock_count = 0;
- ctx = inode->i_flctx;
+ ctx = locks_inode_context(inode);
if (ctx) {
spin_lock(&ctx->flc_lock);
list_for_each_entry(lock, &ctx->flc_posix, fl_list)
@@ -418,7 +418,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
int num_fcntl_locks, int num_flock_locks)
{
struct file_lock *lock;
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(inode);
int err = 0;
int seen_fcntl = 0;
int seen_flock = 0;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 3fbabc98e1f7..7dac21ee6ce7 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -29,7 +29,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
return -1;
/* pick */
- n = prandom_u32_max(n);
+ n = get_random_u32_below(n);
for (j = 0, i = 0; i < m->possible_max_rank; i++) {
if (CEPH_MDS_IS_READY(i, ignore_laggy))
j++;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 864cdaa0d2bd..e4151852184e 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -763,7 +763,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_mds_snap_realm *ri; /* encoded */
__le64 *snaps; /* encoded */
__le64 *prior_parent_snaps; /* encoded */
- struct ceph_snap_realm *realm = NULL;
+ struct ceph_snap_realm *realm;
struct ceph_snap_realm *first_realm = NULL;
struct ceph_snap_realm *realm_to_rebuild = NULL;
int rebuild_snapcs;
@@ -774,6 +774,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
dout("%s deletion=%d\n", __func__, deletion);
more:
+ realm = NULL;
rebuild_snapcs = 0;
ceph_decode_need(&p, e, sizeof(*ri), bad);
ri = p;
diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c
index fe88b67c863f..60399081046a 100644
--- a/fs/cifs/cached_dir.c
+++ b/fs/cifs/cached_dir.c
@@ -253,8 +253,10 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
dentry = dget(cifs_sb->root);
else {
dentry = path_to_dentry(cifs_sb, path);
- if (IS_ERR(dentry))
+ if (IS_ERR(dentry)) {
+ rc = -ENOENT;
goto oshr_free;
+ }
}
cfid->dentry = dentry;
cfid->tcon = tcon;
@@ -338,6 +340,27 @@ smb2_close_cached_fid(struct kref *ref)
free_cached_dir(cfid);
}
+void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *name, struct cifs_sb_info *cifs_sb)
+{
+ struct cached_fid *cfid = NULL;
+ int rc;
+
+ rc = open_cached_dir(xid, tcon, name, cifs_sb, true, &cfid);
+ if (rc) {
+ cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name);
+ return;
+ }
+ spin_lock(&cfid->cfids->cfid_list_lock);
+ if (cfid->has_lease) {
+ cfid->has_lease = false;
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
+ }
+ spin_unlock(&cfid->cfids->cfid_list_lock);
+ close_cached_dir(cfid);
+}
+
+
void close_cached_dir(struct cached_fid *cfid)
{
kref_put(&cfid->refcount, smb2_close_cached_fid);
@@ -378,22 +401,20 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
{
struct cached_fids *cfids = tcon->cfids;
struct cached_fid *cfid, *q;
- struct list_head entry;
+ LIST_HEAD(entry);
- INIT_LIST_HEAD(&entry);
spin_lock(&cfids->cfid_list_lock);
list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
- list_del(&cfid->entry);
- list_add(&cfid->entry, &entry);
+ list_move(&cfid->entry, &entry);
cfids->num_entries--;
cfid->is_open = false;
+ cfid->on_list = false;
/* To prevent race with smb2_cached_lease_break() */
kref_get(&cfid->refcount);
}
spin_unlock(&cfids->cfid_list_lock);
list_for_each_entry_safe(cfid, q, &entry, entry) {
- cfid->on_list = false;
list_del(&cfid->entry);
cancel_work_sync(&cfid->lease_break);
if (cfid->has_lease) {
@@ -518,15 +539,13 @@ struct cached_fids *init_cached_dirs(void)
void free_cached_dirs(struct cached_fids *cfids)
{
struct cached_fid *cfid, *q;
- struct list_head entry;
+ LIST_HEAD(entry);
- INIT_LIST_HEAD(&entry);
spin_lock(&cfids->cfid_list_lock);
list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
cfid->on_list = false;
cfid->is_open = false;
- list_del(&cfid->entry);
- list_add(&cfid->entry, &entry);
+ list_move(&cfid->entry, &entry);
}
spin_unlock(&cfids->cfid_list_lock);
diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h
index e536304ca2ce..2f4e764c9ca9 100644
--- a/fs/cifs/cached_dir.h
+++ b/fs/cifs/cached_dir.h
@@ -69,6 +69,10 @@ extern int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
struct dentry *dentry,
struct cached_fid **cfid);
extern void close_cached_dir(struct cached_fid *cfid);
+extern void drop_cached_dir_by_name(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ const char *name,
+ struct cifs_sb_info *cifs_sb);
extern void close_all_cached_dirs(struct cifs_sb_info *cifs_sb);
extern void invalidate_all_cached_dirs(struct cifs_tcon *tcon);
extern int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9d8f077a23c4..040267ed8a64 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1147,8 +1147,32 @@ const struct inode_operations cifs_file_inode_ops = {
.set_acl = cifs_set_acl,
};
+const char *cifs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
+{
+ char *target_path;
+
+ target_path = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!target_path)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock(&inode->i_lock);
+ if (likely(CIFS_I(inode)->symlink_target)) {
+ strscpy(target_path, CIFS_I(inode)->symlink_target, PATH_MAX);
+ } else {
+ kfree(target_path);
+ target_path = ERR_PTR(-EOPNOTSUPP);
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (!IS_ERR(target_path))
+ set_delayed_call(done, kfree_link, target_path);
+
+ return target_path;
+}
+
const struct inode_operations cifs_symlink_inode_ops = {
- .get_link = simple_get_link,
+ .get_link = cifs_get_link,
.permission = cifs_permission,
.listxattr = cifs_listxattr,
};
@@ -1261,7 +1285,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
off + len - 1);
if (rc)
- goto out;
+ goto unlock;
/* should we flush first and last page first */
truncate_inode_pages(&target_inode->i_data, 0);
@@ -1277,6 +1301,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
* that target is updated on the server
*/
CIFS_I(target_inode)->time = 0;
+
+unlock:
/* although unlocking in the reverse order from locking is not
* strictly necessary here it is a little cleaner to be consistent
*/
@@ -1306,8 +1332,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
ssize_t rc;
struct cifsFileInfo *cfile = dst_file->private_data;
- if (cfile->swapfile)
- return -EOPNOTSUPP;
+ if (cfile->swapfile) {
+ rc = -EOPNOTSUPP;
+ free_xid(xid);
+ return rc;
+ }
rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
len, flags);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5b4a7a32bdc5..388b745a978e 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -153,6 +153,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 39
-#define CIFS_VERSION "2.39"
+#define SMB3_PRODUCT_BUILD 40
+#define CIFS_VERSION "2.40"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ffb291579bb9..e80252a83225 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -759,7 +759,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
{
struct msghdr smb_msg = {};
struct kvec iov = {.iov_base = buf, .iov_len = to_read};
- iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
+ iov_iter_kvec(&smb_msg.msg_iter, ITER_DEST, &iov, 1, to_read);
return cifs_readv_from_socket(server, &smb_msg);
}
@@ -774,7 +774,7 @@ cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
* and cifs_readv_from_socket sets msg_control and msg_controllen
* so little to initialize in struct msghdr
*/
- iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
+ iov_iter_discard(&smb_msg.msg_iter, ITER_DEST, to_read);
return cifs_readv_from_socket(server, &smb_msg);
}
@@ -786,7 +786,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
struct msghdr smb_msg = {};
struct bio_vec bv = {
.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
- iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
+ iov_iter_bvec(&smb_msg.msg_iter, ITER_DEST, &bv, 1, to_read);
return cifs_readv_from_socket(server, &smb_msg);
}
@@ -1584,6 +1584,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
server->session_key.response = NULL;
server->session_key.len = 0;
kfree(server->hostname);
+ server->hostname = NULL;
task = xchg(&server->tsk, NULL);
if (task)
@@ -3854,9 +3855,13 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id);
out:
- free_xid(mnt_ctx.xid);
cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
- return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ if (rc)
+ goto error;
+
+ free_xid(mnt_ctx.xid);
+ return rc;
error:
dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id);
@@ -3883,8 +3888,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
goto error;
}
+ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ if (rc)
+ goto error;
+
free_xid(mnt_ctx.xid);
- return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ return rc;
error:
mount_put_conns(&mnt_ctx);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index a5c73c2af3a2..8b1c37158556 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -543,8 +543,10 @@ int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
inode, direntry, direntry);
- if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
- return -EIO;
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb)))) {
+ rc = -EIO;
+ goto out_free_xid;
+ }
tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
rc = PTR_ERR(tlink);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f6ffee514c34..87b56b1ae117 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1413,7 +1413,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
struct inode *inode = d_inode(cfile->dentry);
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
unsigned int count = 0, i;
int rc = 0, xid, type;
struct list_head locks_to_send, *el;
@@ -1885,11 +1885,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
struct cifsFileInfo *cfile;
__u32 type;
- rc = -EACCES;
xid = get_xid();
- if (!(fl->fl_flags & FL_FLOCK))
- return -ENOLCK;
+ if (!(fl->fl_flags & FL_FLOCK)) {
+ rc = -ENOLCK;
+ free_xid(xid);
+ return rc;
+ }
cfile = (struct cifsFileInfo *)file->private_data;
tcon = tlink_tcon(cfile->tlink);
@@ -1908,8 +1910,9 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
* if no lock or unlock then nothing to do since we do not
* know what it is
*/
+ rc = -EOPNOTSUPP;
free_xid(xid);
- return -EOPNOTSUPP;
+ return rc;
}
rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
@@ -2431,12 +2434,16 @@ cifs_writev_complete(struct work_struct *work)
struct cifs_writedata *
cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
{
+ struct cifs_writedata *writedata = NULL;
struct page **pages =
kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
- if (pages)
- return cifs_writedata_direct_alloc(pages, complete);
+ if (pages) {
+ writedata = cifs_writedata_direct_alloc(pages, complete);
+ if (!writedata)
+ kvfree(pages);
+ }
- return NULL;
+ return writedata;
}
struct cifs_writedata *
@@ -3296,6 +3303,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
cifs_uncached_writev_complete);
if (!wdata) {
rc = -ENOMEM;
+ for (i = 0; i < nr_pages; i++)
+ put_page(pagevec[i]);
+ kvfree(pagevec);
add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -3522,7 +3532,7 @@ static ssize_t __cifs_writev(
ctx->iter = *from;
ctx->len = len;
} else {
- rc = setup_aio_ctx_iter(ctx, from, WRITE);
+ rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE);
if (rc) {
kref_put(&ctx->refcount, cifs_aio_ctx_release);
return rc;
@@ -4266,7 +4276,7 @@ static ssize_t __cifs_readv(
ctx->iter = *to;
ctx->len = len;
} else {
- rc = setup_aio_ctx_iter(ctx, to, READ);
+ rc = setup_aio_ctx_iter(ctx, to, ITER_DEST);
if (rc) {
kref_put(&ctx->refcount, cifs_aio_ctx_release);
return rc;
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index a1751b956318..f6f3a6b75601 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -150,7 +150,7 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page)
bvec[0].bv_page = page;
bvec[0].bv_offset = 0;
bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+ iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
ret = fscache_begin_read_operation(&cres, cookie);
if (ret < 0)
@@ -180,7 +180,7 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page,
bvec[0].bv_page = page;
bvec[0].bv_offset = 0;
bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+ iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
ret = fscache_begin_write_operation(&cres, cookie);
if (ret < 0)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7cf96e581d24..4e2ca3c6e5c0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -215,11 +215,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
kfree(cifs_i->symlink_target);
cifs_i->symlink_target = fattr->cf_symlink_target;
fattr->cf_symlink_target = NULL;
-
- if (unlikely(!cifs_i->symlink_target))
- inode->i_link = ERR_PTR(-EOPNOTSUPP);
- else
- inode->i_link = cifs_i->symlink_target;
}
spin_unlock(&inode->i_lock);
@@ -368,8 +363,10 @@ cifs_get_file_info_unix(struct file *filp)
if (cfile->symlink_target) {
fattr.cf_symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL);
- if (!fattr.cf_symlink_target)
- return -ENOMEM;
+ if (!fattr.cf_symlink_target) {
+ rc = -ENOMEM;
+ goto cifs_gfiunix_out;
+ }
}
rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 89d5fa887364..6419ec47c2a8 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -343,7 +343,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = put_user(ExtAttrBits &
FS_FL_USER_VISIBLE,
(int __user *)arg);
- if (rc != EOPNOTSUPP)
+ if (rc != -EOPNOTSUPP)
break;
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
@@ -373,7 +373,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
* pSMBFile->fid.netfid,
* extAttrBits,
* &ExtAttrMask);
- * if (rc != EOPNOTSUPP)
+ * if (rc != -EOPNOTSUPP)
* break;
*/
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index da51ffd02928..3e68d8208cf5 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -400,6 +400,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
{
struct smb_hdr *buf = (struct smb_hdr *)buffer;
struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifsInodeInfo *pCifsInode;
@@ -464,9 +465,12 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
return false;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(srv) ? srv->primary_server : srv;
+
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &srv->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid != buf->Tid)
continue;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 0435d1dfa9e1..9e7d9f0baa18 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
/* now drop the ref to the current iface */
if (old_iface && iface) {
- kref_put(&old_iface->refcount, release_iface);
cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
&old_iface->sockaddr,
&iface->sockaddr);
- } else if (old_iface) {
kref_put(&old_iface->refcount, release_iface);
+ } else if (old_iface) {
cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
&old_iface->sockaddr);
+ kref_put(&old_iface->refcount, release_iface);
} else {
WARN_ON(!iface);
cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr);
@@ -496,6 +496,7 @@ out:
cifs_put_tcp_session(chan->server, 0);
}
+ free_xid(xid);
return rc;
}
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index a6640e6ea58b..68e08c85fbb8 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -655,6 +655,7 @@ int
smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
+ drop_cached_dir_by_name(xid, tcon, name, cifs_sb);
return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
CREATE_NOT_FILE, ACL_NO_MODE,
NULL, SMB2_OP_RMDIR, NULL, NULL, NULL);
@@ -698,6 +699,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
{
struct cifsFileInfo *cfile;
+ drop_cached_dir_by_name(xid, tcon, from_name, cifs_sb);
cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile);
return smb2_set_path_attr(xid, tcon, from_name, to_name,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index a38720477966..572293c18e16 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -135,6 +135,7 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len,
int
smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
{
+ struct TCP_Server_Info *pserver;
struct smb2_hdr *shdr = (struct smb2_hdr *)buf;
struct smb2_pdu *pdu = (struct smb2_pdu *)shdr;
int hdr_size = sizeof(struct smb2_hdr);
@@ -143,6 +144,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
__u32 calc_len; /* calculated length */
__u64 mid;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
/*
* Add function to do table lookup of StructureSize by command
* ie Validate the wct via smb2_struct_sizes table above
@@ -155,7 +159,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server)
/* decrypt frame now that it is completely read in */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(iter, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(iter, &pserver->smb_ses_list, smb_ses_list) {
if (iter->Suid == le64_to_cpu(thdr->SessionId)) {
ses = iter;
break;
@@ -608,51 +612,52 @@ smb2_tcon_find_pending_open_lease(struct cifs_tcon *tcon,
}
static bool
-smb2_is_valid_lease_break(char *buffer)
+smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
{
struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
- struct TCP_Server_Info *server;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifs_pending_open *open;
cifs_dbg(FYI, "Checking for lease break\n");
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
- spin_lock(&tcon->open_file_lock);
- cifs_stats_inc(
- &tcon->stats.cifs_stats.num_oplock_brks);
- if (smb2_tcon_has_lease(tcon, rsp)) {
- spin_unlock(&tcon->open_file_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- return true;
- }
- open = smb2_tcon_find_pending_open_lease(tcon,
- rsp);
- if (open) {
- __u8 lease_key[SMB2_LEASE_KEY_SIZE];
- struct tcon_link *tlink;
-
- tlink = cifs_get_tlink(open->tlink);
- memcpy(lease_key, open->lease_key,
- SMB2_LEASE_KEY_SIZE);
- spin_unlock(&tcon->open_file_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- smb2_queue_pending_open_break(tlink,
- lease_key,
- rsp->NewLeaseState);
- return true;
- }
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ spin_lock(&tcon->open_file_lock);
+ cifs_stats_inc(
+ &tcon->stats.cifs_stats.num_oplock_brks);
+ if (smb2_tcon_has_lease(tcon, rsp)) {
spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return true;
+ }
+ open = smb2_tcon_find_pending_open_lease(tcon,
+ rsp);
+ if (open) {
+ __u8 lease_key[SMB2_LEASE_KEY_SIZE];
+ struct tcon_link *tlink;
+
+ tlink = cifs_get_tlink(open->tlink);
+ memcpy(lease_key, open->lease_key,
+ SMB2_LEASE_KEY_SIZE);
+ spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ smb2_queue_pending_open_break(tlink,
+ lease_key,
+ rsp->NewLeaseState);
+ return true;
+ }
+ spin_unlock(&tcon->open_file_lock);
- if (cached_dir_lease_break(tcon, rsp->LeaseKey)) {
- spin_unlock(&cifs_tcp_ses_lock);
- return true;
- }
+ if (cached_dir_lease_break(tcon, rsp->LeaseKey)) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return true;
}
}
}
@@ -671,6 +676,7 @@ bool
smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
{
struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
struct cifsInodeInfo *cinode;
@@ -684,16 +690,19 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
if (rsp->StructureSize !=
smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
if (le16_to_cpu(rsp->StructureSize) == 44)
- return smb2_is_valid_lease_break(buffer);
+ return smb2_is_valid_lease_break(buffer, server);
else
return false;
}
cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel);
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 17b25153cb68..32b3877b538a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -530,6 +530,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
p = buf;
spin_lock(&ses->iface_lock);
+ ses->iface_count = 0;
/*
* Go through iface_list and do kref_put to remove
* any unused ifaces. ifaces in use will be removed
@@ -651,9 +652,9 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
kref_put(&iface->refcount, release_iface);
} else
list_add_tail(&info->iface_head, &ses->iface_list);
- spin_unlock(&ses->iface_lock);
ses->iface_count++;
+ spin_unlock(&ses->iface_lock);
ses->iface_last_update = jiffies;
next_iface:
nb_iface++;
@@ -1115,6 +1116,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
COMPOUND_FID, current->tgid,
FILE_FULL_EA_INFORMATION,
SMB2_O_INFO_FILE, 0, data, size);
+ if (rc)
+ goto sea_exit;
smb2_set_next_command(tcon, &rqst[1]);
smb2_set_related(&rqst[1]);
@@ -1125,6 +1128,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
rqst[2].rq_nvec = 1;
rc = SMB2_close_init(tcon, server,
&rqst[2], COMPOUND_FID, COMPOUND_FID, false);
+ if (rc)
+ goto sea_exit;
smb2_set_related(&rqst[2]);
rc = compound_send_recv(xid, ses, server,
@@ -2301,14 +2306,18 @@ static void
smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
{
struct smb2_hdr *shdr = (struct smb2_hdr *)buf;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
if (shdr->Status != STATUS_NETWORK_NAME_DELETED)
return;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
spin_lock(&tcon->tc_lock);
@@ -4263,21 +4272,23 @@ init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign)
static int
smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
{
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
u8 *ses_enc_key;
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- if (ses->Suid == ses_id) {
- spin_lock(&ses->ses_lock);
- ses_enc_key = enc ? ses->smb3encryptionkey :
- ses->smb3decryptionkey;
- memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
- spin_unlock(&ses->ses_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- return 0;
- }
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (ses->Suid == ses_id) {
+ spin_lock(&ses->ses_lock);
+ ses_enc_key = enc ? ses->smb3encryptionkey :
+ ses->smb3decryptionkey;
+ memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
+ spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
}
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -4712,13 +4723,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
return 0;
}
- iov_iter_bvec(&iter, WRITE, bvec, npages, data_len);
+ iov_iter_bvec(&iter, ITER_SOURCE, bvec, npages, data_len);
} else if (buf_len >= data_offset + data_len) {
/* read response payload is in buf */
WARN_ONCE(npages > 0, "read data can be either in buf or in pages");
iov.iov_base = buf + data_offset;
iov.iov_len = data_len;
- iov_iter_kvec(&iter, WRITE, &iov, 1, data_len);
+ iov_iter_kvec(&iter, ITER_SOURCE, &iov, 1, data_len);
} else {
/* read response payload cannot be in both buf and pages */
WARN_ONCE(1, "buf can not contain only a part of read data");
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a2384509ea84..a5695748a89b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1341,14 +1341,13 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
static void
SMB2_sess_free_buffer(struct SMB2_sess_data *sess_data)
{
- int i;
+ struct kvec *iov = sess_data->iov;
- /* zero the session data before freeing, as it might contain sensitive info (keys, etc) */
- for (i = 0; i < 2; i++)
- if (sess_data->iov[i].iov_base)
- memzero_explicit(sess_data->iov[i].iov_base, sess_data->iov[i].iov_len);
+ /* iov[1] is already freed by caller */
+ if (sess_data->buf0_type != CIFS_NO_BUFFER && iov[0].iov_base)
+ memzero_explicit(iov[0].iov_base, iov[0].iov_len);
- free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
+ free_rsp_buf(sess_data->buf0_type, iov[0].iov_base);
sess_data->buf0_type = CIFS_NO_BUFFER;
}
@@ -1531,7 +1530,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
&blob_length, ses, server,
sess_data->nls_cp);
if (rc)
- goto out_err;
+ goto out;
if (use_spnego) {
/* BB eventually need to add this */
@@ -1578,7 +1577,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
}
out:
- memzero_explicit(ntlmssp_blob, blob_length);
+ kfree_sensitive(ntlmssp_blob);
SMB2_sess_free_buffer(sess_data);
if (!rc) {
sess_data->result = 0;
@@ -1662,7 +1661,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
}
#endif
out:
- memzero_explicit(ntlmssp_blob, blob_length);
+ kfree_sensitive(ntlmssp_blob);
SMB2_sess_free_buffer(sess_data);
kfree_sensitive(ses->ntlmssp);
ses->ntlmssp = NULL;
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 8e3f26e6f6b9..381babc1212c 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -77,18 +77,19 @@ static
int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
{
struct cifs_chan *chan;
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses = NULL;
- struct TCP_Server_Info *it = NULL;
int i;
int rc = 0;
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) {
- list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) {
- if (ses->Suid == ses_id)
- goto found;
- }
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (ses->Suid == ses_id)
+ goto found;
}
cifs_server_dbg(VFS, "%s: Could not find session 0x%llx\n",
__func__, ses_id);
@@ -136,9 +137,13 @@ out:
static struct cifs_ses *
smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
{
+ struct TCP_Server_Info *pserver;
struct cifs_ses *ses;
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ /* If server is a channel, select the primary channel */
+ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
+ list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
if (ses->Suid != ses_id)
continue;
++ses->ses_count;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 575fa8f58342..3851d0aaa288 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -347,7 +347,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
.iov_base = &rfc1002_marker,
.iov_len = 4
};
- iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4);
+ iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4);
rc = smb_send_kvec(server, &smb_msg, &sent);
if (rc < 0)
goto unmask;
@@ -368,7 +368,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
size += iov[i].iov_len;
}
- iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size);
+ iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size);
rc = smb_send_kvec(server, &smb_msg, &sent);
if (rc < 0)
@@ -384,7 +384,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
rqst_page_get_length(&rqst[j], i, &bvec.bv_len,
&bvec.bv_offset);
- iov_iter_bvec(&smb_msg.msg_iter, WRITE,
+ iov_iter_bvec(&smb_msg.msg_iter, ITER_SOURCE,
&bvec, 1, bvec.bv_len);
rc = smb_send_kvec(server, &smb_msg, &sent);
if (rc < 0)
diff --git a/fs/coredump.c b/fs/coredump.c
index 7bad7785e8e6..9a745d08c57f 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -325,6 +325,10 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
err = cn_printf(cn, "%lu",
rlimit(RLIMIT_CORE));
break;
+ /* CPU the task ran on */
+ case 'C':
+ err = cn_printf(cn, "%d", cprm->cpu);
+ break;
default:
break;
}
@@ -525,7 +529,6 @@ void do_coredump(const kernel_siginfo_t *siginfo)
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.siginfo = siginfo,
- .regs = signal_pt_regs(),
.limit = rlimit(RLIMIT_CORE),
/*
* We must use the same mm->flags while dumping core to avoid
@@ -534,6 +537,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
*/
.mm_flags = mm->flags,
.vma_meta = NULL,
+ .cpu = raw_smp_processor_id(),
};
audit_core_dumps(siginfo->si_signo);
@@ -853,7 +857,7 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
if (dump_interrupted())
return 0;
pos = file->f_pos;
- iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE);
+ iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
n = __kernel_write_iter(cprm->file, &iter, &pos);
if (n != PAGE_SIZE)
return 0;
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 1cca09aa43f8..2a24b1f0ae68 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -205,14 +205,19 @@ static int allocate_filesystem_keyring(struct super_block *sb)
}
/*
- * This is called at unmount time to release all encryption keys that have been
- * added to the filesystem, along with the keyring that contains them.
+ * Release all encryption keys that have been added to the filesystem, along
+ * with the keyring that contains them.
*
- * Note that besides clearing and freeing memory, this might need to evict keys
- * from the keyslots of an inline crypto engine. Therefore, this must be called
- * while the filesystem's underlying block device(s) are still available.
+ * This is called at unmount time. The filesystem's underlying block device(s)
+ * are still available at this time; this is important because after user file
+ * accesses have been allowed, this function may need to evict keys from the
+ * keyslots of an inline crypto engine, which requires the block device(s).
+ *
+ * This is also called when the super_block is being freed. This is needed to
+ * avoid a memory leak if mounting fails after the "test_dummy_encryption"
+ * option was processed, as in that case the unmount-time call isn't made.
*/
-void fscrypt_sb_delete(struct super_block *sb)
+void fscrypt_destroy_keyring(struct super_block *sb)
{
struct fscrypt_keyring *keyring = sb->s_master_keys;
size_t i;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ddb3fc258df9..b54f470e0d03 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -378,8 +378,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf,
}
EXPORT_SYMBOL_GPL(debugfs_attr_read);
-ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
+static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos, bool is_signed)
{
struct dentry *dentry = F_DENTRY(file);
ssize_t ret;
@@ -387,12 +387,28 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
ret = debugfs_file_get(dentry);
if (unlikely(ret))
return ret;
- ret = simple_attr_write(file, buf, len, ppos);
+ if (is_signed)
+ ret = simple_attr_write_signed(file, buf, len, ppos);
+ else
+ ret = simple_attr_write(file, buf, len, ppos);
debugfs_file_put(dentry);
return ret;
}
+
+ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return debugfs_attr_write_xsigned(file, buf, len, ppos, false);
+}
EXPORT_SYMBOL_GPL(debugfs_attr_write);
+ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return debugfs_attr_write_xsigned(file, buf, len, ppos, true);
+}
+EXPORT_SYMBOL_GPL(debugfs_attr_write_signed);
+
static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode,
struct dentry *parent, void *value,
const struct file_operations *fops,
@@ -738,11 +754,11 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
*val = atomic_read((atomic_t *)data);
return 0;
}
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get,
debugfs_atomic_t_set, "%lld\n");
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
"%lld\n");
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
"%lld\n");
/**
diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c
index a0ef63cfcecb..9e4f47808bd5 100644
--- a/fs/efivarfs/vars.c
+++ b/fs/efivarfs/vars.c
@@ -651,22 +651,6 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes,
if (err)
return err;
- /*
- * Ensure that the available space hasn't shrunk below the safe level
- */
- status = check_var_size(attributes, *size + ucs2_strsize(name, 1024));
- if (status != EFI_SUCCESS) {
- if (status != EFI_UNSUPPORTED) {
- err = efi_status_to_err(status);
- goto out;
- }
-
- if (*size > 65536) {
- err = -ENOSPC;
- goto out;
- }
- }
-
status = efivar_set_variable_locked(name, vendor, attributes, *size,
data, false);
if (status != EFI_SUCCESS) {
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 998cd26a1b3b..4c837be3b6e3 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -75,11 +75,15 @@ static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
rcu_read_lock();
xas_for_each(&xas, folio, last_page) {
- unsigned int pgpos =
- (folio_index(folio) - start_page) * PAGE_SIZE;
- unsigned int pgend = pgpos + folio_size(folio);
+ unsigned int pgpos, pgend;
bool pg_failed = false;
+ if (xas_retry(&xas, folio))
+ continue;
+
+ pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
+ pgend = pgpos + folio_size(folio);
+
for (;;) {
if (!subreq) {
pg_failed = true;
@@ -190,7 +194,7 @@ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
atomic_inc(&rreq->nr_outstanding);
- iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
+ iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages,
start + done, subreq->len);
ret = fscache_read(cres, subreq->start, &iter,
@@ -286,23 +290,26 @@ static int erofs_fscache_data_read(struct address_space *mapping,
if (IS_ERR(src))
return PTR_ERR(src);
- iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE);
- if (copy_to_iter(src + offset, size, &iter) != size)
+ iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
+ if (copy_to_iter(src + offset, size, &iter) != size) {
+ erofs_put_metabuf(&buf);
return -EFAULT;
+ }
iov_iter_zero(PAGE_SIZE - size, &iter);
erofs_put_metabuf(&buf);
return PAGE_SIZE;
}
- count = min_t(size_t, map.m_llen - (pos - map.m_la), len);
- DBG_BUGON(!count || count % PAGE_SIZE);
-
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
- iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count);
+ count = len;
+ iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
iov_iter_zero(count, &iter);
return count;
}
+ count = min_t(size_t, map.m_llen - (pos - map.m_la), len);
+ DBG_BUGON(!count || count % PAGE_SIZE);
+
mdev = (struct erofs_map_dev) {
.m_deviceid = map.m_deviceid,
.m_pa = map.m_pa,
@@ -403,13 +410,13 @@ static void erofs_fscache_domain_put(struct erofs_domain *domain)
static int erofs_fscache_register_volume(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- char *domain_id = sbi->opt.domain_id;
+ char *domain_id = sbi->domain_id;
struct fscache_volume *volume;
char *name;
int ret = 0;
name = kasprintf(GFP_KERNEL, "erofs,%s",
- domain_id ? domain_id : sbi->opt.fsid);
+ domain_id ? domain_id : sbi->fsid);
if (!name)
return -ENOMEM;
@@ -435,7 +442,7 @@ static int erofs_fscache_init_domain(struct super_block *sb)
if (!domain)
return -ENOMEM;
- domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL);
+ domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
if (!domain->domain_id) {
kfree(domain);
return -ENOMEM;
@@ -472,7 +479,7 @@ static int erofs_fscache_register_domain(struct super_block *sb)
mutex_lock(&erofs_domain_list_lock);
list_for_each_entry(domain, &erofs_domain_list, list) {
- if (!strcmp(domain->domain_id, sbi->opt.domain_id)) {
+ if (!strcmp(domain->domain_id, sbi->domain_id)) {
sbi->domain = domain;
sbi->volume = domain->volume;
refcount_inc(&domain->ref);
@@ -590,14 +597,17 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
mutex_lock(&erofs_domain_cookies_lock);
+ spin_lock(&psb->s_inode_list_lock);
list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
ctx = inode->i_private;
if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
continue;
igrab(inode);
+ spin_unlock(&psb->s_inode_list_lock);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
+ spin_unlock(&psb->s_inode_list_lock);
ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
@@ -606,7 +616,7 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
char *name, bool need_inode)
{
- if (EROFS_SB(sb)->opt.domain_id)
+ if (EROFS_SB(sb)->domain_id)
return erofs_domain_register_cookie(sb, name, need_inode);
return erofs_fscache_acquire_cookie(sb, name, need_inode);
}
@@ -638,7 +648,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_fscache *fscache;
- if (sbi->opt.domain_id)
+ if (sbi->domain_id)
ret = erofs_fscache_register_domain(sb);
else
ret = erofs_fscache_register_volume(sb);
@@ -646,7 +656,7 @@ int erofs_fscache_register_fs(struct super_block *sb)
return ret;
/* acquired domain/volume will be relinquished in kill_sb() on error */
- fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true);
+ fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true);
if (IS_ERR(fscache))
return PTR_ERR(fscache);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 1701df48c446..05dc68627722 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -75,8 +75,6 @@ struct erofs_mount_opts {
unsigned int max_sync_decompress_pages;
#endif
unsigned int mount_opt;
- char *fsid;
- char *domain_id;
};
struct erofs_dev_context {
@@ -89,6 +87,8 @@ struct erofs_dev_context {
struct erofs_fs_context {
struct erofs_mount_opts opt;
struct erofs_dev_context *devs;
+ char *fsid;
+ char *domain_id;
};
/* all filesystem-wide lz4 configurations */
@@ -170,6 +170,8 @@ struct erofs_sb_info {
struct fscache_volume *volume;
struct erofs_fscache *s_fscache;
struct erofs_domain *domain;
+ char *fsid;
+ char *domain_id;
};
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 2cf96ce1c32e..1c7dcca702b3 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -579,9 +579,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
break;
case Opt_fsid:
#ifdef CONFIG_EROFS_FS_ONDEMAND
- kfree(ctx->opt.fsid);
- ctx->opt.fsid = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->opt.fsid)
+ kfree(ctx->fsid);
+ ctx->fsid = kstrdup(param->string, GFP_KERNEL);
+ if (!ctx->fsid)
return -ENOMEM;
#else
errorfc(fc, "fsid option not supported");
@@ -589,9 +589,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
break;
case Opt_domain_id:
#ifdef CONFIG_EROFS_FS_ONDEMAND
- kfree(ctx->opt.domain_id);
- ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->opt.domain_id)
+ kfree(ctx->domain_id);
+ ctx->domain_id = kstrdup(param->string, GFP_KERNEL);
+ if (!ctx->domain_id)
return -ENOMEM;
#else
errorfc(fc, "domain_id option not supported");
@@ -728,10 +728,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
- ctx->opt.fsid = NULL;
- ctx->opt.domain_id = NULL;
sbi->devs = ctx->devs;
ctx->devs = NULL;
+ sbi->fsid = ctx->fsid;
+ ctx->fsid = NULL;
+ sbi->domain_id = ctx->domain_id;
+ ctx->domain_id = NULL;
if (erofs_is_fscache_mode(sb)) {
sb->s_blocksize = EROFS_BLKSIZ;
@@ -820,7 +822,7 @@ static int erofs_fc_get_tree(struct fs_context *fc)
{
struct erofs_fs_context *ctx = fc->fs_private;
- if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->opt.fsid)
+ if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid)
return get_tree_nodev(fc, erofs_fc_fill_super);
return get_tree_bdev(fc, erofs_fc_fill_super);
@@ -834,6 +836,9 @@ static int erofs_fc_reconfigure(struct fs_context *fc)
DBG_BUGON(!sb_rdonly(sb));
+ if (ctx->fsid || ctx->domain_id)
+ erofs_info(sb, "ignoring reconfiguration for fsid|domain_id.");
+
if (test_opt(&ctx->opt, POSIX_ACL))
fc->sb_flags |= SB_POSIXACL;
else
@@ -873,8 +878,8 @@ static void erofs_fc_free(struct fs_context *fc)
struct erofs_fs_context *ctx = fc->fs_private;
erofs_free_dev_context(ctx->devs);
- kfree(ctx->opt.fsid);
- kfree(ctx->opt.domain_id);
+ kfree(ctx->fsid);
+ kfree(ctx->domain_id);
kfree(ctx);
}
@@ -944,8 +949,8 @@ static void erofs_kill_sb(struct super_block *sb)
erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev, NULL);
erofs_fscache_unregister_fs(sb);
- kfree(sbi->opt.fsid);
- kfree(sbi->opt.domain_id);
+ kfree(sbi->fsid);
+ kfree(sbi->domain_id);
kfree(sbi);
sb->s_fs_info = NULL;
}
@@ -1098,10 +1103,10 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(opt, DAX_NEVER))
seq_puts(seq, ",dax=never");
#ifdef CONFIG_EROFS_FS_ONDEMAND
- if (opt->fsid)
- seq_printf(seq, ",fsid=%s", opt->fsid);
- if (opt->domain_id)
- seq_printf(seq, ",domain_id=%s", opt->domain_id);
+ if (sbi->fsid)
+ seq_printf(seq, ",fsid=%s", sbi->fsid);
+ if (sbi->domain_id)
+ seq_printf(seq, ",domain_id=%s", sbi->domain_id);
#endif
return 0;
}
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index 783bb7b21b51..fd476961f742 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -210,14 +210,14 @@ int erofs_register_sysfs(struct super_block *sb)
int err;
if (erofs_is_fscache_mode(sb)) {
- if (sbi->opt.domain_id) {
- str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id,
- sbi->opt.fsid);
+ if (sbi->domain_id) {
+ str = kasprintf(GFP_KERNEL, "%s,%s", sbi->domain_id,
+ sbi->fsid);
if (!str)
return -ENOMEM;
name = str;
} else {
- name = sbi->opt.fsid;
+ name = sbi->fsid;
}
} else {
name = sb->s_id;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 559380a535af..b792d424d774 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -660,6 +660,9 @@ static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
u8 *src, *dst;
unsigned int i, cnt;
+ if (!packed_inode)
+ return -EFSCORRUPTED;
+
pos += EROFS_I(inode)->z_fragmentoff;
for (i = 0; i < len; i += cnt) {
cnt = min_t(unsigned int, len - i,
@@ -813,15 +816,14 @@ retry:
++spiltted;
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
-
- if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
- !(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
- fe->pcl->length == map->m_llen)
- fe->pcl->partial = false;
if (fe->pcl->length < offset + end - map->m_la) {
fe->pcl->length = offset + end - map->m_la;
fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
}
+ if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
+ !(map->m_flags & EROFS_MAP_PARTIAL_REF) &&
+ fe->pcl->length == map->m_llen)
+ fe->pcl->partial = false;
next_part:
/* shorten the remaining extent to update progress */
map->m_llen = offset + cur - map->m_la;
@@ -888,15 +890,13 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
unsigned int pgnr;
- struct page *oldpage;
pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
- oldpage = be->decompressed_pages[pgnr];
- be->decompressed_pages[pgnr] = bvec->page;
-
- if (!oldpage)
+ if (!be->decompressed_pages[pgnr]) {
+ be->decompressed_pages[pgnr] = bvec->page;
return;
+ }
}
/* (cold path) one pcluster is requested multiple times */
@@ -1415,8 +1415,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct block_device *last_bdev;
unsigned int nr_bios = 0;
struct bio *bio = NULL;
- /* initialize to 1 to make skip psi_memstall_leave unless needed */
- unsigned long pflags = 1;
+ unsigned long pflags;
+ int memstall = 0;
bi_private = jobqueueset_init(sb, q, fgq, force_fg);
qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
@@ -1466,14 +1466,18 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
if (bio && (cur != last_index + 1 ||
last_bdev != mdev.m_bdev)) {
submit_bio_retry:
- if (!pflags)
- psi_memstall_leave(&pflags);
submit_bio(bio);
+ if (memstall) {
+ psi_memstall_leave(&pflags);
+ memstall = 0;
+ }
bio = NULL;
}
- if (unlikely(PageWorkingset(page)))
+ if (unlikely(PageWorkingset(page)) && !memstall) {
psi_memstall_enter(&pflags);
+ memstall = 1;
+ }
if (!bio) {
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
@@ -1503,9 +1507,9 @@ submit_bio_retry:
} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
if (bio) {
- if (!pflags)
- psi_memstall_leave(&pflags);
submit_bio(bio);
+ if (memstall)
+ psi_memstall_leave(&pflags);
}
/*
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index e7f04c4fbb81..d98c95212985 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -126,10 +126,10 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
}
/*
- * bit 31: I/O error occurred on this page
- * bit 0 - 30: remaining parts to complete this page
+ * bit 30: I/O error occurred on this page
+ * bit 0 - 29: remaining parts to complete this page
*/
-#define Z_EROFS_PAGE_EIO (1 << 31)
+#define Z_EROFS_PAGE_EIO (1 << 30)
static inline void z_erofs_onlinepage_init(struct page *page)
{
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 44c27ef39c43..0bb66927e3d0 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -57,8 +57,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8);
- kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
- EROFS_KMAP_ATOMIC);
+ kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
if (IS_ERR(kaddr)) {
err = PTR_ERR(kaddr);
goto out_unlock;
@@ -73,7 +72,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
vi->z_tailextent_headlcn = 0;
- goto unmap_done;
+ goto done;
}
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
@@ -85,7 +84,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
err = -EOPNOTSUPP;
- goto unmap_done;
+ goto out_put_metabuf;
}
vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
@@ -95,7 +94,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto unmap_done;
+ goto out_put_metabuf;
}
if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
@@ -103,12 +102,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
- goto unmap_done;
+ goto out_put_metabuf;
}
-unmap_done:
- erofs_put_metabuf(&buf);
- if (err)
- goto out_unlock;
if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
struct erofs_map_blocks map = {
@@ -127,7 +122,7 @@ unmap_done:
err = -EFSCORRUPTED;
}
if (err < 0)
- goto out_unlock;
+ goto out_put_metabuf;
}
if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
@@ -141,11 +136,14 @@ unmap_done:
EROFS_GET_BLOCKS_FINDTAIL);
erofs_put_metabuf(&map.buf);
if (err < 0)
- goto out_unlock;
+ goto out_put_metabuf;
}
+done:
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
+out_put_metabuf:
+ erofs_put_metabuf(&buf);
out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err;
diff --git a/fs/exec.c b/fs/exec.c
index 349a5da91efe..089a743f636b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -64,6 +64,7 @@
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/coredump.h>
+#include <linux/time_namespace.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -171,7 +172,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
exit:
fput(file);
out:
- return error;
+ return error;
}
#endif /* #ifdef CONFIG_USELIB */
@@ -199,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
struct page *page;
int ret;
- unsigned int gup_flags = FOLL_FORCE;
+ unsigned int gup_flags = 0;
#ifdef CONFIG_STACK_GROWSUP
if (write) {
@@ -842,16 +843,13 @@ int setup_arg_pages(struct linux_binprm *bprm,
* will align it up.
*/
rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
+
+ stack_expand = min(rlim_stack, stack_size + stack_expand);
+
#ifdef CONFIG_STACK_GROWSUP
- if (stack_size + stack_expand > rlim_stack)
- stack_base = vma->vm_start + rlim_stack;
- else
- stack_base = vma->vm_end + stack_expand;
+ stack_base = vma->vm_start + stack_expand;
#else
- if (stack_size + stack_expand > rlim_stack)
- stack_base = vma->vm_end - rlim_stack;
- else
- stack_base = vma->vm_start - stack_expand;
+ stack_base = vma->vm_end - stack_expand;
#endif
current->mm->start_stack = bprm->p;
ret = expand_stack(vma, stack_base);
@@ -1012,7 +1010,6 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
- lru_gen_add_mm(mm);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
@@ -1025,6 +1022,7 @@ static int exec_mmap(struct mm_struct *mm)
activate_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
+ lru_gen_add_mm(mm);
task_unlock(tsk);
lru_gen_use_mm(mm);
if (old_mm) {
@@ -1197,11 +1195,11 @@ static int unshare_sighand(struct task_struct *me)
return -ENOMEM;
refcount_set(&newsighand->count, 1);
- memcpy(newsighand->action, oldsighand->action,
- sizeof(newsighand->action));
write_lock_irq(&tasklist_lock);
spin_lock(&oldsighand->siglock);
+ memcpy(newsighand->action, oldsighand->action,
+ sizeof(newsighand->action));
rcu_assign_pointer(me->sighand, newsighand);
spin_unlock(&oldsighand->siglock);
write_unlock_irq(&tasklist_lock);
@@ -1297,6 +1295,10 @@ int begin_new_exec(struct linux_binprm * bprm)
bprm->mm = NULL;
+ retval = exec_task_namespaces();
+ if (retval)
+ goto out_unlock;
+
#ifdef CONFIG_POSIX_TIMERS
spin_lock_irq(&me->sighand->siglock);
posix_cpu_timers_exit(me);
@@ -1568,6 +1570,12 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
+ /*
+ * If another task is sharing our fs, we cannot safely
+ * suid exec because the differently privileged task
+ * will be able to manipulate the current directory, etc.
+ * It would be nice to force an unshare instead...
+ */
t = p;
n_fs = 1;
spin_lock(&p->fs->lock);
@@ -1748,6 +1756,7 @@ static int search_binary_handler(struct linux_binprm *bprm)
return retval;
}
+/* binfmt handlers will call back into begin_new_exec() on success. */
static int exec_binprm(struct linux_binprm *bprm)
{
pid_t old_pid, old_vpid;
@@ -1806,6 +1815,11 @@ static int bprm_execve(struct linux_binprm *bprm,
if (retval)
return retval;
+ /*
+ * Check for unsafe execution states before exec_binprm(), which
+ * will call back into begin_new_exec(), into bprm_creds_from_file(),
+ * where setuid-ness is evaluated.
+ */
check_unsafe_exec(bprm);
current->in_execve = 1;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index f4944c4dee60..78b8686d9a4a 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -277,7 +277,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int best_ndir = inodes_per_group;
int best_group = -1;
- parent_group = prandom_u32_max(ngroups);
+ parent_group = get_random_u32_below(ngroups);
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext2_get_group_desc (sb, group, NULL);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f1956288307f..6c399a8b22b3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* and it is decreased till we reach start.
*/
again:
+ ret = 0;
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
@@ -5227,14 +5228,21 @@ again:
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- if (le32_to_cpu(extent->ee_block) > 0)
+ if (le32_to_cpu(extent->ee_block) > start)
*iterator = le32_to_cpu(extent->ee_block) - 1;
- else
- /* Beginning is reached, end of the loop */
+ else if (le32_to_cpu(extent->ee_block) == start)
iterator = NULL;
- /* Update path extent in case we need to stop */
- while (le32_to_cpu(extent->ee_block) < start)
+ else {
+ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+ while (le32_to_cpu(extent->ee_block) >= start)
+ extent--;
+
+ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
+ break;
+
extent++;
+ iterator = NULL;
+ }
path[depth].p_ext = extent;
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index ef05bfa87798..0f6d0a80467d 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1521,6 +1521,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
struct ext4_iloc iloc;
int inode_len, ino, ret, tag = tl->fc_tag;
struct ext4_extent_header *eh;
+ size_t off_gen = offsetof(struct ext4_inode, i_generation);
memcpy(&fc_inode, val, sizeof(fc_inode));
@@ -1548,8 +1549,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
raw_inode = ext4_raw_inode(&iloc);
memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
- memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
- inode_len - offsetof(struct ext4_inode, i_generation));
+ memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen,
+ inode_len - off_gen);
if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
if (eh->eh_magic != EXT4_EXT_MAGIC) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 67a257a69758..9aa8b18bdac1 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -465,7 +465,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo);
parent_group = hinfo.hash % ngroups;
} else
- parent_group = prandom_u32_max(ngroups);
+ parent_group = get_random_u32_below(ngroups);
for (i = 0; i < ngroups; i++) {
g = (parent_group + i) % ngroups;
get_orlov_stats(sb, g, flex_size, &stats);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index ded535535b27..95dfea28bf4e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -145,9 +145,8 @@ static int ext4_update_backup_sb(struct super_block *sb,
if (ext4_has_metadata_csum(sb) &&
es->s_checksum != ext4_superblock_csum(sb, es)) {
ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
- "superblock %llu\n", sb_block);
+ "superblock %llu", sb_block);
unlock_buffer(bh);
- err = -EFSBADCRC;
goto out_bh;
}
func(es, arg);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 0a220ec9862d..a19a9661646e 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -424,7 +424,8 @@ int ext4_ext_migrate(struct inode *inode)
* already is extent-based, error out.
*/
if (!ext4_has_feature_extents(inode->i_sb) ||
- (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ ext4_has_inline_data(inode))
return -EINVAL;
if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 588cb09c5291..4681fff6665f 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -262,13 +262,7 @@ void ext4_stop_mmpd(struct ext4_sb_info *sbi)
*/
static unsigned int mmp_new_seq(void)
{
- u32 new_seq;
-
- do {
- new_seq = get_random_u32();
- } while (new_seq > EXT4_MMP_SEQ_MAX);
-
- return new_seq;
+ return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1);
}
/*
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b8a91d74fdd1..27a863e1120e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2259,8 +2259,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
memset(de, 0, len); /* wipe old data */
de = (struct ext4_dir_entry_2 *) data2;
top = data2 + len;
- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
+ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len,
+ (data2 + (blocksize - csum_size) -
+ (char *) de))) {
+ brelse(bh2);
+ brelse(bh);
+ return -EFSCORRUPTED;
+ }
de = de2;
+ }
de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
(char *) de, blocksize);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 6dfe9ccae0c5..46b87ffeb304 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1158,6 +1158,7 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
while (group < sbi->s_groups_count) {
struct buffer_head *bh;
ext4_fsblk_t backup_block;
+ struct ext4_super_block *es;
/* Out of journal space, and can't get more - abort - so sad */
err = ext4_resize_ensure_credits_batch(handle, 1);
@@ -1186,6 +1187,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
memcpy(bh->b_data, data, size);
if (rest)
memset(bh->b_data + size, 0, rest);
+ es = (struct ext4_super_block *) bh->b_data;
+ es->s_block_group_nr = cpu_to_le16(group);
+ if (ext4_has_metadata_csum(sb))
+ es->s_checksum = ext4_superblock_csum(sb, es);
set_buffer_uptodate(bh);
unlock_buffer(bh);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 989365b878a6..63ef74eb8091 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1741,10 +1741,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
-static const char deprecated_msg[] =
- "Mount option \"%s\" will be removed by %s\n"
- "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
-
#define MOPT_SET 0x0001
#define MOPT_CLEAR 0x0002
#define MOPT_NOSUPPORT 0x0004
@@ -3782,7 +3778,7 @@ cont_thread:
}
if (!progress) {
elr->lr_next_sched = jiffies +
- prandom_u32_max(EXT4_DEF_LI_MAX_START_DELAY * HZ);
+ get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
}
if (time_before(elr->lr_next_sched, next_wakeup))
next_wakeup = elr->lr_next_sched;
@@ -3929,8 +3925,7 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
* spread the inode table initialization requests
* better.
*/
- elr->lr_next_sched = jiffies + prandom_u32_max(
- EXT4_DEF_LI_MAX_START_DELAY * HZ);
+ elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
return elr;
}
@@ -4885,7 +4880,7 @@ out:
flush_work(&sbi->s_error_work);
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
- return err;
+ return -EINVAL;
}
static int ext4_journal_data_mode_check(struct super_block *sb)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 4546e01b2ee0..536d332d9e2e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -282,7 +282,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
/* let's select beginning hot/small space first in no_heap mode*/
if (f2fs_need_rand_seg(sbi))
- p->offset = prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
+ p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
else if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
p->offset = 0;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index acf3d3fa4363..b304692c0cf5 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2534,7 +2534,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
sanity_check_seg_type(sbi, seg_type);
if (f2fs_need_rand_seg(sbi))
- return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
+ return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
@@ -2588,7 +2588,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
- prandom_u32_max(sbi->max_fragment_chunk) + 1;
+ get_random_u32_inclusive(1, sbi->max_fragment_chunk);
}
static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2625,9 +2625,9 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
/* To allocate block chunks in different sizes, use random number */
if (--seg->fragment_remained_chunk <= 0) {
seg->fragment_remained_chunk =
- prandom_u32_max(sbi->max_fragment_chunk) + 1;
+ get_random_u32_inclusive(1, sbi->max_fragment_chunk);
seg->next_blkoff +=
- prandom_u32_max(sbi->max_fragment_hole) + 1;
+ get_random_u32_inclusive(1, sbi->max_fragment_hole);
}
}
}
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index af191371c352..3626eb585a98 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -17,7 +17,7 @@ struct fat_fid {
#define FAT_FID_SIZE_WITHOUT_PARENT 3
#define FAT_FID_SIZE_WITH_PARENT (sizeof(struct fat_fid)/sizeof(u32))
-/**
+/*
* Look up a directory inode given its starting cluster.
*/
static struct inode *fat_dget(struct super_block *sb, int i_logstart)
@@ -135,7 +135,7 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp,
return type;
}
-/**
+/*
* Map a NFS file handle to a corresponding dentry.
* The dentry may or may not be connected to the filesystem root.
*/
diff --git a/fs/file.c b/fs/file.c
index 5f9c802a5d8d..c942c89ca4cd 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
struct files_struct *files = current->files;
struct file *file;
- if (atomic_read(&files->count) == 1) {
+ /*
+ * If another thread is concurrently calling close_fd() followed
+ * by put_files_struct(), we must not observe the old table
+ * entry combined with the new refcount - otherwise we could
+ * return a file that is concurrently being freed.
+ *
+ * atomic_read_acquire() pairs with atomic_dec_and_test() in
+ * put_files_struct().
+ */
+ if (atomic_read_acquire(&files->count) == 1) {
file = files_lookup_fd_raw(files, fd);
if (!file || unlikely(file->f_mode & mask))
return 0;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 443f83382b9b..9958d4020771 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode,
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
- * If the inode is now fully clean, then it can be safely removed from
- * its writeback list (if any). Otherwise the flusher threads are
- * responsible for the writeback lists.
+ * If the inode is freeing, its i_io_list shoudn't be updated
+ * as it can be finally deleted at this moment.
*/
- if (!(inode->i_state & I_DIRTY_ALL))
- inode_cgwb_move_to_attached(inode, wb);
- else if (!(inode->i_state & I_SYNC_QUEUED)) {
- if ((inode->i_state & I_DIRTY))
- redirty_tail_locked(inode, wb);
- else if (inode->i_state & I_DIRTY_TIME) {
- inode->dirtied_when = jiffies;
- inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ if (!(inode->i_state & I_FREEING)) {
+ /*
+ * If the inode is now fully clean, then it can be safely
+ * removed from its writeback list (if any). Otherwise the
+ * flusher threads are responsible for the writeback lists.
+ */
+ if (!(inode->i_state & I_DIRTY_ALL))
+ inode_cgwb_move_to_attached(inode, wb);
+ else if (!(inode->i_state & I_SYNC_QUEUED)) {
+ if ((inode->i_state & I_DIRTY))
+ redirty_tail_locked(inode, wb);
+ else if (inode->i_state & I_DIRTY_TIME) {
+ inode->dirtied_when = jiffies;
+ inode_io_list_move_locked(inode,
+ wb,
+ &wb->b_dirty_time);
+ }
}
}
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 451d8a077e12..bce2492186d0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -605,6 +605,14 @@ again:
set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags);
queue = true;
}
+ /*
+ * We could race with cookie_lru which may set LRU_DISCARD bit
+ * but has yet to run the cookie state machine. If this happens
+ * and another thread tries to use the cookie, clear LRU_DISCARD
+ * so we don't end up withdrawing the cookie while in use.
+ */
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags))
+ fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear);
break;
case FSCACHE_COOKIE_STATE_FAILED:
diff --git a/fs/fscache/io.c b/fs/fscache/io.c
index 3af3b08a9bb3..0d2b8dec8f82 100644
--- a/fs/fscache/io.c
+++ b/fs/fscache/io.c
@@ -286,7 +286,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
* taken into account.
*/
- iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len);
+ iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len);
fscache_write(cres, start, &iter, fscache_wreq_done, wreq);
return;
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index a058e0136bfe..ab8ceddf9efa 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
struct fscache_volume *volume;
struct fscache_cache *cache;
size_t klen, hlen;
- char *key;
+ u8 *key;
+
+ klen = strlen(volume_key);
+ if (klen > NAME_MAX)
+ return NULL;
if (!coherency_data)
coherency_len = 0;
@@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
/* Stick the length on the front of the key and pad it out to make
* hashing easier.
*/
- klen = strlen(volume_key);
hlen = round_up(1 + klen + 1, sizeof(__le32));
key = kzalloc(hlen, GFP_KERNEL);
if (!key)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 1a3afd469e3a..89f4741728ba 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
.mode = mode
};
int err;
- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
- (mode & (FALLOC_FL_PUNCH_HOLE |
- FALLOC_FL_ZERO_RANGE));
-
- bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
+ bool block_faults = FUSE_IS_DAX(inode) &&
+ (!(mode & FALLOC_FL_KEEP_SIZE) ||
+ (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)));
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_ZERO_RANGE))
@@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (fm->fc->no_fallocate)
return -EOPNOTSUPP;
- if (lock_inode) {
- inode_lock(inode);
- if (block_faults) {
- filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
- if (err)
- goto out;
- }
+ inode_lock(inode);
+ if (block_faults) {
+ filemap_invalidate_lock(inode->i_mapping);
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err)
+ goto out;
+ }
- if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
- loff_t endbyte = offset + length - 1;
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
+ loff_t endbyte = offset + length - 1;
- err = fuse_writeback_range(inode, offset, endbyte);
- if (err)
- goto out;
- }
+ err = fuse_writeback_range(inode, offset, endbyte);
+ if (err)
+ goto out;
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
@@ -3001,6 +2997,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
goto out;
}
+ err = file_modified(file);
+ if (err)
+ goto out;
+
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
@@ -3035,8 +3035,7 @@ out:
if (block_faults)
filemap_invalidate_unlock(inode->i_mapping);
- if (lock_inode)
- inode_unlock(inode);
+ inode_unlock(inode);
fuse_flush_time_update(inode);
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
index 61d8afcb10a3..fcce94ace2c2 100644
--- a/fs/fuse/ioctl.c
+++ b/fs/fuse/ioctl.c
@@ -255,7 +255,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
ap.args.in_pages = true;
err = -EFAULT;
- iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
+ iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
@@ -324,7 +324,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
goto out;
err = -EFAULT;
- iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
+ iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index b4e565711045..e8deaacf1832 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file,
goto unlock;
addr = kmap_local_page(page);
- if (!offset)
+ if (!offset) {
clear_page(addr);
+ SetPageUptodate(page);
+ }
memcpy(addr + offset, dirent, reclen);
kunmap_local(addr);
fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
@@ -516,6 +518,12 @@ retry_locked:
page = find_get_page_flags(file->f_mapping, index,
FGP_ACCESSED | FGP_LOCK);
+ /* Page gone missing, then re-added to cache, but not initialized? */
+ if (page && !PageUptodate(page)) {
+ unlock_page(page);
+ put_page(page);
+ page = NULL;
+ }
spin_lock(&fi->rdc.lock);
if (!page) {
/*
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index c4526f16355d..a0746be3c1de 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -458,6 +458,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
/* panic? */
return -EIO;
+ if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN)
+ return -EIO;
fd.search_key->cat = HFS_I(main_inode)->cat_key;
if (hfs_brec_find(&fd))
/* panic? */
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index 39f5e343bf4d..fdb0edb8a607 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -109,7 +109,7 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
if (nls_io) {
wchar_t ch;
- while (srclen > 0) {
+ while (srclen > 0 && dstlen > 0) {
size = nls_io->char2uni(src, srclen, &ch);
if (size < 0) {
ch = '?';
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a5db2e3b2980..6aa919e59483 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -198,6 +198,8 @@ struct hfsplus_sb_info {
#define HFSPLUS_SB_HFSX 3
#define HFSPLUS_SB_CASEFOLD 4
#define HFSPLUS_SB_NOBARRIER 5
+#define HFSPLUS_SB_UID 6
+#define HFSPLUS_SB_GID 7
static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
{
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index aeab83ed1c9c..b675581aa9d0 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -192,11 +192,11 @@ static void hfsplus_get_perms(struct inode *inode,
mode = be16_to_cpu(perms->mode);
i_uid_write(inode, be32_to_cpu(perms->owner));
- if (!i_uid_read(inode) && !mode)
+ if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode))
inode->i_uid = sbi->uid;
i_gid_write(inode, be32_to_cpu(perms->group));
- if (!i_gid_read(inode) && !mode)
+ if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode))
inode->i_gid = sbi->gid;
if (dir) {
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 047e05c57560..c94a58762ad6 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
if (!uid_valid(sbi->uid)) {
pr_err("invalid uid specified\n");
return 0;
+ } else {
+ set_bit(HFSPLUS_SB_UID, &sbi->flags);
}
break;
case opt_gid:
@@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
if (!gid_valid(sbi->gid)) {
pr_err("invalid gid specified\n");
return 0;
+ } else {
+ set_bit(HFSPLUS_SB_GID, &sbi->flags);
}
break;
case opt_part:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dd54f67e47fd..df7772335dc0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
} else {
unlock_page(page);
+ if (PageHWPoison(page)) {
+ put_page(page);
+ retval = -EIO;
+ break;
+ }
+
/*
* We have the page, copy it to user space buffer.
*/
@@ -1111,13 +1117,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
static int hugetlbfs_error_remove_page(struct address_space *mapping,
struct page *page)
{
- struct inode *inode = mapping->host;
- pgoff_t index = page->index;
-
- hugetlb_delete_from_page_cache(page);
- if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
- hugetlb_fix_reserve_counts(inode);
-
return 0;
}
diff --git a/fs/inode.c b/fs/inode.c
index b608528efd3a..762feb8d0c6c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2071,9 +2071,6 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
sync_it |= S_VERSION;
- if (!sync_it)
- return 0;
-
return sync_it;
}
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 3990f3e270cb..f33b3baad07c 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -31,10 +31,15 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
+static bool __kernfs_active(struct kernfs_node *kn)
+{
+ return atomic_read(&kn->active) >= 0;
+}
+
static bool kernfs_active(struct kernfs_node *kn)
{
lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
- return atomic_read(&kn->active) >= 0;
+ return __kernfs_active(kn);
}
static bool kernfs_lockdep(struct kernfs_node *kn)
@@ -705,7 +710,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
goto err_unlock;
}
- if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
+ /*
+ * We should fail if @kn has never been activated and guarantee success
+ * if the caller knows that @kn is active. Both can be achieved by
+ * __kernfs_active() which tests @kn->active without kernfs_rwsem.
+ */
+ if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
goto err_unlock;
spin_unlock(&kernfs_idr_lock);
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 7ccda91b0dc4..ff0e7a4fcd4d 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -321,7 +321,7 @@ static int check_lock_range(struct file *filp, loff_t start, loff_t end,
unsigned char type)
{
struct file_lock *flock;
- struct file_lock_context *ctx = file_inode(filp)->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(file_inode(filp));
int error = 0;
if (!ctx || list_empty_careful(&ctx->flc_posix))
@@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src_fp->filp, src_off,
- dst_fp->filp, dst_off,
- len, 0);
+ ret = vfs_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off, len,
+ COPY_FILE_SPLICE);
if (ret < 0)
return ret;
diff --git a/fs/libfs.c b/fs/libfs.c
index 682d56345a1c..aada4e7c8713 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -995,8 +995,8 @@ out:
EXPORT_SYMBOL_GPL(simple_attr_read);
/* interpret the buffer as a number to call the set function with */
-ssize_t simple_attr_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
+static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos, bool is_signed)
{
struct simple_attr *attr;
unsigned long long val;
@@ -1017,7 +1017,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- ret = kstrtoull(attr->set_buf, 0, &val);
+ if (is_signed)
+ ret = kstrtoll(attr->set_buf, 0, &val);
+ else
+ ret = kstrtoull(attr->set_buf, 0, &val);
if (ret)
goto out;
ret = attr->set(attr->data, val);
@@ -1027,8 +1030,21 @@ out:
mutex_unlock(&attr->mutex);
return ret;
}
+
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return simple_attr_write_xsigned(file, buf, len, ppos, false);
+}
EXPORT_SYMBOL_GPL(simple_attr_write);
+ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return simple_attr_write_xsigned(file, buf, len, ppos, true);
+}
+EXPORT_SYMBOL_GPL(simple_attr_write_signed);
+
/**
* generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
* @sb: filesystem to do the file handle conversion on
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index e1c4617de771..720684345817 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -207,7 +207,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct nlm_host *lockhost;
if (!flctx || list_empty_careful(&flctx->flc_posix))
@@ -262,7 +262,7 @@ nlm_file_inuse(struct nlm_file *file)
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1;
diff --git a/fs/locks.c b/fs/locks.c
index 607f94a0e789..8f01bee17715 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type)
struct file_lock_context *ctx;
/* paired with cmpxchg() below */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (likely(ctx) || type == F_UNLCK)
goto out;
@@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type)
*/
if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
kmem_cache_free(flctx_cache, ctx);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
}
out:
trace_locks_get_lock_context(inode, type, ctx);
@@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
void
locks_free_lock_context(struct inode *inode)
{
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(inode);
if (unlikely(ctx)) {
locks_check_ctx_lists(inode);
@@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
void *owner;
void (*func)(void);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
fl->fl_type = F_UNLCK;
return;
@@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
new_fl->fl_flags = type;
/* typically we will check that ctx is non-NULL before calling */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
WARN_ON_ONCE(1);
goto free_lock;
@@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
struct file_lock_context *ctx;
struct file_lock *fl;
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
fl = list_first_entry_or_null(&ctx->flc_lease,
@@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp)
int type = F_UNLCK;
LIST_HEAD(dispose);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
struct file_lock_context *ctx;
LIST_HEAD(dispose);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
trace_generic_delete_lease(inode, NULL);
return error;
@@ -2096,7 +2096,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
* throw a warning to let people know that they don't actually work.
*/
if (cmd & LOCK_MAND) {
- pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
+ pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid);
return 0;
}
@@ -2146,6 +2146,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
@@ -2295,6 +2296,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, cmd, fl);
else
@@ -2561,7 +2563,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
* posix_lock_file(). Another process could be setting a lock on this
* file at the same time, but we wouldn't remove that lock anyway.
*/
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty(&ctx->flc_posix))
return;
@@ -2634,7 +2636,7 @@ void locks_remove_file(struct file *filp)
{
struct file_lock_context *ctx;
- ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
+ ctx = locks_inode_context(locks_inode(filp));
if (!ctx)
return;
@@ -2663,12 +2665,36 @@ void locks_remove_file(struct file *filp)
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
EXPORT_SYMBOL_GPL(vfs_cancel_lock);
+/**
+ * vfs_inode_has_locks - are any file locks held on @inode?
+ * @inode: inode to check for locks
+ *
+ * Return true if there are any FL_POSIX or FL_FLOCK locks currently
+ * set on @inode.
+ */
+bool vfs_inode_has_locks(struct inode *inode)
+{
+ struct file_lock_context *ctx;
+ bool ret;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return false;
+
+ spin_lock(&ctx->flc_lock);
+ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
+ spin_unlock(&ctx->flc_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
+
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -2839,7 +2865,7 @@ void show_fd_locks(struct seq_file *f,
struct file_lock_context *ctx;
int id = 0;
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx)
return;
diff --git a/fs/namei.c b/fs/namei.c
index 1c80fd23cda2..7bfebfa993ed 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
struct inode *dir = d_inode(parentpath->dentry);
struct inode *inode;
int error;
+ int open_flag = file->f_flags;
/* we want directory to be writable */
error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
@@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
if (error)
return error;
inode = file_inode(file);
- if (!(file->f_flags & O_EXCL)) {
+ if (!(open_flag & O_EXCL)) {
spin_lock(&inode->i_lock);
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
diff --git a/fs/namespace.c b/fs/namespace.c
index df137ba19d37..c80f422084eb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3515,8 +3515,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
q = next_mnt(q, new);
if (!q)
break;
+ // an mntns binding we'd skipped?
while (p->mnt.mnt_root != q->mnt.mnt_root)
- p = next_mnt(p, old);
+ p = next_mnt(skip_mnt_tree(p), old);
}
namespace_unlock();
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 0ce535852151..7679a68e8193 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -17,9 +17,9 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct folio *folio;
- unsigned int iopos, account = 0;
pgoff_t start_page = rreq->start / PAGE_SIZE;
pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+ size_t account = 0;
bool subreq_failed = false;
XA_STATE(xas, &rreq->mapping->i_pages, start_page);
@@ -39,18 +39,23 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
*/
subreq = list_first_entry(&rreq->subrequests,
struct netfs_io_subrequest, rreq_link);
- iopos = 0;
subreq_failed = (subreq->error < 0);
trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
rcu_read_lock();
xas_for_each(&xas, folio, last_page) {
- unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
- unsigned int pgend = pgpos + folio_size(folio);
+ loff_t pg_end;
bool pg_failed = false;
+ if (xas_retry(&xas, folio))
+ continue;
+
+ pg_end = folio_pos(folio) + folio_size(folio) - 1;
+
for (;;) {
+ loff_t sreq_end;
+
if (!subreq) {
pg_failed = true;
break;
@@ -58,11 +63,11 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
folio_start_fscache(folio);
pg_failed |= subreq_failed;
- if (pgend < iopos + subreq->len)
+ sreq_end = subreq->start + subreq->len - 1;
+ if (pg_end < sreq_end)
break;
account += subreq->transferred;
- iopos += subreq->len;
if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
subreq = list_next_entry(subreq, rreq_link);
subreq_failed = (subreq->error < 0);
@@ -70,7 +75,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
subreq = NULL;
subreq_failed = false;
}
- if (pgend == iopos)
+
+ if (pg_end == sreq_end)
break;
}
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index 428925899282..7f753380e047 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -23,7 +23,7 @@ static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
{
struct iov_iter iter;
- iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages,
+ iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages,
subreq->start + subreq->transferred,
subreq->len - subreq->transferred);
iov_iter_zero(iov_iter_count(&iter), &iter);
@@ -49,7 +49,7 @@ static void netfs_read_from_cache(struct netfs_io_request *rreq,
struct iov_iter iter;
netfs_stat(&netfs_n_rh_read);
- iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
+ iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages,
subreq->start + subreq->transferred,
subreq->len - subreq->transferred);
@@ -121,6 +121,9 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+ if (xas_retry(&xas, folio))
+ continue;
+
/* We might have multiple writes from the same huge
* folio, but we mustn't unlock a folio more than once.
*/
@@ -205,7 +208,7 @@ static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
continue;
}
- iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages,
+ iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages,
subreq->start, subreq->len);
atomic_inc(&rreq->nr_copy_ops);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index da8da5cdbbc1..f50e025ae406 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -280,7 +280,7 @@ EXPORT_SYMBOL_GPL(nfs_put_client);
static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
{
struct nfs_client *clp;
- const struct sockaddr *sap = data->addr;
+ const struct sockaddr *sap = (struct sockaddr *)data->addr;
struct nfs_net *nn = net_generic(data->net, nfs_net_id);
int error;
@@ -666,7 +666,7 @@ static int nfs_init_server(struct nfs_server *server,
struct rpc_timeout timeparms;
struct nfs_client_initdata cl_init = {
.hostname = ctx->nfs_server.hostname,
- .addr = (const struct sockaddr *)&ctx->nfs_server.address,
+ .addr = &ctx->nfs_server._address,
.addrlen = ctx->nfs_server.addrlen,
.nfs_mod = ctx->nfs_mod,
.proto = ctx->nfs_server.protocol,
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5c97cad741a7..cf7365581031 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -146,7 +146,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
{
struct inode *inode = state->inode;
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct list_head *list;
int status = 0;
@@ -228,8 +228,7 @@ again:
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type,
- const nfs4_stateid *stateid,
+ fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit)
{
struct nfs_delegation *delegation;
@@ -239,25 +238,24 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
- if (nfs4_is_valid_delegation(delegation, 0)) {
- nfs4_stateid_copy(&delegation->stateid, stateid);
- delegation->type = type;
- delegation->pagemod_limit = pagemod_limit;
- oldcred = delegation->cred;
- delegation->cred = get_cred(cred);
- clear_bit(NFS_DELEGATION_NEED_RECLAIM,
- &delegation->flags);
- spin_unlock(&delegation->lock);
- rcu_read_unlock();
- put_cred(oldcred);
- trace_nfs4_reclaim_delegation(inode, type);
- return;
- }
- /* We appear to have raced with a delegation return. */
+ nfs4_stateid_copy(&delegation->stateid, stateid);
+ delegation->type = type;
+ delegation->pagemod_limit = pagemod_limit;
+ oldcred = delegation->cred;
+ delegation->cred = get_cred(cred);
+ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
+ &delegation->flags))
+ atomic_long_inc(&nfs_active_delegations);
spin_unlock(&delegation->lock);
+ rcu_read_unlock();
+ put_cred(oldcred);
+ trace_nfs4_reclaim_delegation(inode, type);
+ } else {
+ rcu_read_unlock();
+ nfs_inode_set_delegation(inode, cred, type, stateid,
+ pagemod_limit);
}
- rcu_read_unlock();
- nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit);
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 58036f657126..f594dac436a7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2489,9 +2489,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
goto out;
}
- if (dentry->d_fsdata)
- /* old devname */
- kfree(dentry->d_fsdata);
+ /* old devname */
+ kfree(dentry->d_fsdata);
dentry->d_fsdata = NFS_FSDATA_BLOCKED;
spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index e87d500ad95a..6603b5cee029 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -16,8 +16,9 @@
#include "dns_resolve.h"
ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
- struct sockaddr *sa, size_t salen)
+ struct sockaddr_storage *ss, size_t salen)
{
+ struct sockaddr *sa = (struct sockaddr *)ss;
ssize_t ret;
char *ip_addr = NULL;
int ip_len;
@@ -341,7 +342,7 @@ out:
}
ssize_t nfs_dns_resolve_name(struct net *net, char *name,
- size_t namelen, struct sockaddr *sa, size_t salen)
+ size_t namelen, struct sockaddr_storage *ss, size_t salen)
{
struct nfs_dns_ent key = {
.hostname = name,
@@ -354,7 +355,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name,
ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
if (ret == 0) {
if (salen >= item->addrlen) {
- memcpy(sa, &item->addr, item->addrlen);
+ memcpy(ss, &item->addr, item->addrlen);
ret = item->addrlen;
} else
ret = -EOVERFLOW;
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
index 576ff4b54c82..fe3b172c4de1 100644
--- a/fs/nfs/dns_resolve.h
+++ b/fs/nfs/dns_resolve.h
@@ -32,6 +32,6 @@ extern void nfs_dns_resolver_cache_destroy(struct net *net);
#endif
extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
- size_t namelen, struct sockaddr *sa, size_t salen);
+ size_t namelen, struct sockaddr_storage *sa, size_t salen);
#endif
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 4da701fd1424..09833ec102fc 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -273,9 +273,9 @@ static const struct constant_table nfs_secflavor_tokens[] = {
* Address family must be initialized, and address must not be
* the ANY address for that family.
*/
-static int nfs_verify_server_address(struct sockaddr *addr)
+static int nfs_verify_server_address(struct sockaddr_storage *addr)
{
- switch (addr->sa_family) {
+ switch (addr->ss_family) {
case AF_INET: {
struct sockaddr_in *sa = (struct sockaddr_in *)addr;
return sa->sin_addr.s_addr != htonl(INADDR_ANY);
@@ -969,7 +969,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_fh *mntfh = ctx->mntfh;
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
int ret;
@@ -1044,7 +1044,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
memcpy(sap, &data->addr, sizeof(data->addr));
ctx->nfs_server.addrlen = sizeof(data->addr);
ctx->nfs_server.port = ntohs(data->addr.sin_port);
- if (sap->sa_family != AF_INET ||
+ if (sap->ss_family != AF_INET ||
!nfs_verify_server_address(sap))
goto out_no_address;
@@ -1200,7 +1200,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
struct nfs4_mount_data *data)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int ret;
char *c;
@@ -1314,7 +1314,7 @@ static int nfs_fs_context_validate(struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_subversion *nfs_mod;
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int max_namelen = PAGE_SIZE;
int max_pathlen = NFS_MAXPATHLEN;
int port = 0;
@@ -1540,7 +1540,7 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->version = nfss->nfs_client->rpc_ops->version;
ctx->minorversion = nfss->nfs_client->cl_minorversion;
- memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr,
+ memcpy(&ctx->nfs_server._address, &nfss->nfs_client->cl_addr,
ctx->nfs_server.addrlen);
if (fc->net_ns != net) {
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index e861d7bae305..e731c00a9fcb 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -252,7 +252,7 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page)
bvec[0].bv_page = page;
bvec[0].bv_offset = 0;
bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+ iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
ret = fscache_begin_read_operation(&cres, cookie);
if (ret < 0)
@@ -282,7 +282,7 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page,
bvec[0].bv_page = page;
bvec[0].bv_offset = 0;
bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+ iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
ret = fscache_begin_write_operation(&cres, cookie);
if (ret < 0)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d914d609b85b..647fc3f547cb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -69,7 +69,7 @@ static inline fmode_t flags_to_mode(int flags)
struct nfs_client_initdata {
unsigned long init_flags;
const char *hostname; /* Hostname of the server */
- const struct sockaddr *addr; /* Address of the server */
+ const struct sockaddr_storage *addr; /* Address of the server */
const char *nodename; /* Hostname of the client */
const char *ip_addr; /* IP address of the client */
size_t addrlen;
@@ -180,7 +180,7 @@ static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc)
/* mount_clnt.c */
struct nfs_mount_request {
- struct sockaddr *sap;
+ struct sockaddr_storage *sap;
size_t salen;
char *hostname;
char *dirpath;
@@ -223,7 +223,7 @@ extern void nfs4_server_set_init_caps(struct nfs_server *);
extern struct nfs_server *nfs4_create_server(struct fs_context *);
extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen,
+ struct sockaddr_storage *sap, size_t salen,
struct net *net);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
@@ -235,7 +235,7 @@ extern int nfs_client_init_status(const struct nfs_client *clp);
extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr,
+ const struct sockaddr_storage *ds_addr,
int ds_addrlen, int ds_proto,
unsigned int ds_timeo,
unsigned int ds_retrans,
@@ -243,7 +243,7 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
struct inode *);
extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo,
unsigned int ds_retrans);
#ifdef CONFIG_PROC_FS
@@ -894,13 +894,13 @@ static inline bool nfs_error_is_fatal_on_server(int err)
* Select between a default port value and a user-specified port value.
* If a zero value is set, then autobind will be used.
*/
-static inline void nfs_set_port(struct sockaddr *sap, int *port,
+static inline void nfs_set_port(struct sockaddr_storage *sap, int *port,
const unsigned short default_port)
{
if (*port == NFS_UNSPEC_PORT)
*port = default_port;
- rpc_set_port(sap, *port);
+ rpc_set_port((struct sockaddr *)sap, *port);
}
struct nfs_direct_req {
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index c5e3b6b3366a..68e76b626371 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -158,7 +158,7 @@ int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
struct rpc_create_args args = {
.net = info->net,
.protocol = info->protocol,
- .address = info->sap,
+ .address = (struct sockaddr *)info->sap,
.addrsize = info->salen,
.timeout = &mnt_timeout,
.servername = info->hostname,
@@ -245,7 +245,7 @@ void nfs_umount(const struct nfs_mount_request *info)
struct rpc_create_args args = {
.net = info->net,
.protocol = IPPROTO_UDP,
- .address = info->sap,
+ .address = (struct sockaddr *)info->sap,
.addrsize = info->salen,
.timeout = &nfs_umnt_timeout,
.servername = info->hostname,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 3295af4110f1..2f336ace7555 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -175,7 +175,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
}
/* for submounts we want the same server; referrals will reassign */
- memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen);
+ memcpy(&ctx->nfs_server._address, &client->cl_addr, client->cl_addrlen);
ctx->nfs_server.addrlen = client->cl_addrlen;
ctx->nfs_server.port = server->port;
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index b49359afac88..669cda757a5c 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -78,7 +78,7 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source,
* the MDS.
*/
struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct rpc_timeout ds_timeout;
@@ -98,7 +98,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
char buf[INET6_ADDRSTRLEN + 1];
/* fake a hostname because lockd wants it */
- if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 13424f0d793b..ecb428512fe1 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1093,6 +1093,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
&args.seq_args, &res.seq_res, 0);
trace_nfs4_clone(src_inode, dst_inode, &args, status);
if (status == 0) {
+ /* a zero-length count means clone to EOF in src */
+ if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE)
+ count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset;
nfs42_copy_dest_done(dst_inode, dst_offset, count);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 400a71e75238..cfef738d765e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -281,7 +281,7 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
int nfs4_submount(struct fs_context *, struct nfs_server *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
-size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss,
size_t salen, struct net *net, int port);
/* nfs4proc.c */
extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 7a5162afa5c0..d3051b051a56 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -346,6 +346,7 @@ int nfs40_init_client(struct nfs_client *clp)
ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE,
"NFSv4.0 transport Slot table");
if (ret) {
+ nfs4_shutdown_slot_table(tbl);
kfree(tbl);
return ret;
}
@@ -889,7 +890,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
*/
static int nfs4_set_client(struct nfs_server *server,
const char *hostname,
- const struct sockaddr *addr,
+ const struct sockaddr_storage *addr,
const size_t addrlen,
const char *ip_addr,
int proto, const struct rpc_timeout *timeparms,
@@ -924,7 +925,7 @@ static int nfs4_set_client(struct nfs_server *server,
__set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
__set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
- server->port = rpc_get_port(addr);
+ server->port = rpc_get_port((struct sockaddr *)addr);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
@@ -960,7 +961,7 @@ static int nfs4_set_client(struct nfs_server *server,
* the MDS.
*/
struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans,
u32 minor_version)
{
@@ -980,7 +981,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
};
char buf[INET6_ADDRSTRLEN + 1];
- if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
@@ -1148,7 +1149,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
/* Get a client record */
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
ctx->client_address,
ctx->nfs_server.protocol,
@@ -1238,7 +1239,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_RDMA,
@@ -1254,7 +1255,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_TCP,
@@ -1303,14 +1304,14 @@ error:
* Returns zero on success, or a negative errno value.
*/
int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen, struct net *net)
+ struct sockaddr_storage *sap, size_t salen, struct net *net)
{
struct nfs_client *clp = server->nfs_client;
struct rpc_clnt *clnt = server->client;
struct xprt_create xargs = {
.ident = clp->cl_proto,
.net = net,
- .dstaddr = sap,
+ .dstaddr = (struct sockaddr *)sap,
.addrlen = salen,
.servername = hostname,
};
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index f2dbf904c598..9a98595bb160 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -164,16 +164,17 @@ static int nfs4_validate_fspath(struct dentry *dentry,
return 0;
}
-size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss,
size_t salen, struct net *net, int port)
{
+ struct sockaddr *sa = (struct sockaddr *)ss;
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
ret = rpc_uaddr2sockaddr(net, string, len, sa, salen);
if (ret == 0) {
- ret = nfs_dns_resolve_name(net, string, len, sa, salen);
+ ret = nfs_dns_resolve_name(net, string, len, ss, salen);
if (ret < 0)
ret = 0;
}
@@ -331,7 +332,7 @@ static int try_location(struct fs_context *fc,
ctx->nfs_server.addrlen =
nfs_parse_server_name(buf->data, buf->len,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
sizeof(ctx->nfs_server._address),
fc->net_ns, 0);
if (ctx->nfs_server.addrlen == 0)
@@ -483,14 +484,13 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
char *page, char *page2,
const struct nfs4_fs_location *location)
{
- const size_t addr_bufsize = sizeof(struct sockaddr_storage);
struct net *net = rpc_net_ns(server->client);
- struct sockaddr *sap;
+ struct sockaddr_storage *sap;
unsigned int s;
size_t salen;
int error;
- sap = kmalloc(addr_bufsize, GFP_KERNEL);
+ sap = kmalloc(sizeof(*sap), GFP_KERNEL);
if (sap == NULL)
return -ENOMEM;
@@ -506,10 +506,10 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, net, 0);
+ sap, sizeof(*sap), net, 0);
if (salen == 0)
continue;
- rpc_set_port(sap, NFS_PORT);
+ rpc_set_port((struct sockaddr *)sap, NFS_PORT);
error = -ENOMEM;
hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e2efcd26336c..86ed5c0142c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3951,7 +3951,7 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
for (i = 0; i < location->nservers; i++) {
struct nfs4_string *srv_loc = &location->servers[i];
- struct sockaddr addr;
+ struct sockaddr_storage addr;
size_t addrlen;
struct xprt_create xprt_args = {
.ident = 0,
@@ -3974,7 +3974,7 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
clp->cl_net, server->port);
if (!addrlen)
return;
- xprt_args.dstaddr = &addr;
+ xprt_args.dstaddr = (struct sockaddr *)&addr;
xprt_args.addrlen = addrlen;
servername = kmalloc(srv_loc->len + 1, GFP_KERNEL);
if (!servername)
@@ -7138,6 +7138,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
struct nfs4_lock_state *lsp = data->lsp;
+ struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -7145,8 +7146,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
switch (task->tk_status) {
case 0:
- renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
- data->timestamp);
+ renew_lease(server, data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
@@ -7167,6 +7167,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
goto out_restart;
+ else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
+ goto out_restart;
} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
goto out_restart;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c3503fb26fa2..dd18344648f3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1501,7 +1501,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
struct file_lock *fl;
struct nfs4_lock_state *lsp;
int status = 0;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct list_head *list;
if (flctx == NULL)
@@ -1786,6 +1786,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
{
+ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
/* Mark all delegations for reclaim */
nfs_delegation_mark_reclaim(clp);
nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
@@ -2670,6 +2671,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
if (status < 0)
goto out_error;
nfs4_state_end_reclaim_reboot(clp);
+ continue;
}
/* Detect expired delegations... */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 317cedfa52bf..16be6dae524f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1055,7 +1055,7 @@ static unsigned int nfs_coalesce_size(struct nfs_page *prev,
if (prev) {
if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
return 0;
- flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
+ flctx = locks_inode_context(d_inode(nfs_req_openctx(req)->dentry));
if (flctx != NULL &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock)) &&
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 987c88ddeaf0..5d035dd2d7bf 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -821,7 +821,7 @@ static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
static struct nfs_client *(*get_v3_ds_connect)(
struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr,
+ const struct sockaddr_storage *ds_addr,
int ds_addrlen,
int ds_proto,
unsigned int ds_timeo,
@@ -882,7 +882,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
continue;
}
clp = get_v3_ds_connect(mds_srv,
- (struct sockaddr *)&da->da_addr,
+ &da->da_addr,
da->da_addrlen, da->da_transport,
timeo, retrans);
if (IS_ERR(clp))
@@ -951,7 +951,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
put_cred(xprtdata.cred);
} else {
clp = nfs4_set_ds_client(mds_srv,
- (struct sockaddr *)&da->da_addr,
+ &da->da_addr,
da->da_addrlen,
da->da_transport, timeo,
retrans, minor_version);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ee66ffdb985e..05ae23657527 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -822,8 +822,7 @@ static int nfs_request_mount(struct fs_context *fc,
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_mount_request request = {
- .sap = (struct sockaddr *)
- &ctx->mount_server.address,
+ .sap = &ctx->mount_server._address,
.dirpath = ctx->nfs_server.export_path,
.protocol = ctx->mount_server.protocol,
.fh = root_fh,
@@ -854,7 +853,7 @@ static int nfs_request_mount(struct fs_context *fc,
* Construct the mount server's address.
*/
if (ctx->mount_server.address.sa_family == AF_UNSPEC) {
- memcpy(request.sap, &ctx->nfs_server.address,
+ memcpy(request.sap, &ctx->nfs_server._address,
ctx->nfs_server.addrlen);
ctx->mount_server.addrlen = ctx->nfs_server.addrlen;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f41d24b54fd1..80c240e50952 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1185,7 +1185,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct nfs_lock_context *l_ctx;
- struct file_lock_context *flctx = file_inode(file)->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(file_inode(file));
struct nfs_page *req;
int do_flush, status;
/*
@@ -1321,7 +1321,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page,
struct inode *inode, unsigned int pagelen)
{
int ret;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct file_lock *fl;
if (file->f_flags & O_DSYNC)
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 29a62db155fb..ec3fceb92236 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -893,9 +893,8 @@ __nfsd_file_cache_purge(struct net *net)
nf = rhashtable_walk_next(&iter);
while (!IS_ERR_OR_NULL(nf)) {
- if (net && nf->nf_net != net)
- continue;
- nfsd_file_unhash_and_dispose(nf, &dispose);
+ if (!net || nf->nf_net == net)
+ nfsd_file_unhash_and_dispose(nf, &dispose);
nf = rhashtable_walk_next(&iter);
}
@@ -1077,6 +1076,7 @@ retry:
goto open_file;
nfsd_file_slab_free(&nf->nf_rcu);
+ nf = NULL;
if (ret == -EEXIST)
goto retry;
trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e718500a00c..da8d0ea66229 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4758,7 +4758,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
static bool nfsd4_deleg_present(const struct inode *inode)
{
- struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
+ struct file_lock_context *ctx = locks_inode_context(inode);
return ctx && !list_empty_careful(&ctx->flc_lease);
}
@@ -5382,6 +5382,7 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
if (err)
return -EAGAIN;
+ exp_put(exp);
dput(child);
if (child != file_dentry(fp->fi_deleg_file->nf_file))
return -EAGAIN;
@@ -5896,7 +5897,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
nf = stp->st_stid.sc_file;
- ctx = nf->fi_inode->i_flctx;
+ ctx = locks_inode_context(nf->fi_inode);
if (!ctx)
continue;
if (locks_owner_has_blockers(ctx, lo))
@@ -7712,7 +7713,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
inode = locks_inode(nf->nf_file);
- flctx = inode->i_flctx;
+ flctx = locks_inode_context(inode);
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6a29bcfc9390..dc74a947a440 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1458,12 +1458,14 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_drc_error;
retval = nfsd_reply_cache_init(nn);
if (retval)
- goto out_drc_error;
+ goto out_cache_error;
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
return 0;
+out_cache_error:
+ nfsd4_leases_net_shutdown(nn);
out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index d73434200df9..8c52b6c9d31a 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -392,8 +392,8 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
skip_pseudoflavor_check:
/* Finally, check access permissions. */
error = nfsd_permission(rqstp, exp, dentry, access);
- trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
out:
+ trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale)
nfsd_stats_fh_stale_inc(exp);
return error;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 06a96e955bd0..d4b6839bb459 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -254,7 +254,10 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
rqstp->rq_xprt->xpt_remotelen);
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
- __entry->inode = d_inode(fhp->fh_dentry);
+ if (fhp->fh_dentry)
+ __entry->inode = d_inode(fhp->fh_dentry);
+ else
+ __entry->inode = NULL;
__entry->type = type;
__entry->access = access;
__entry->error = be32_to_cpu(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4eded0729108..08a929607641 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
- count, 0);
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
+ COPY_FILE_SPLICE);
return ret;
}
@@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct svc_rqst *rqstp = sd->u.data;
struct page *page = buf->page; // may be a compound one
unsigned offset = buf->offset;
+ struct page *last_page;
- page += offset / PAGE_SIZE;
- for (int i = sd->len; i > 0; i -= PAGE_SIZE)
- svc_rqst_replace_page(rqstp, page++);
+ last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
+ for (page += offset / PAGE_SIZE; page <= last_page; page++)
+ svc_rqst_replace_page(rqstp, page);
if (rqstp->rq_res.page_len == 0) // first call
rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len;
@@ -942,7 +943,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err;
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, READ, vec, vlen, *count);
+ iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@@ -1032,7 +1033,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (stable && !use_wgather)
flags |= RWF_SYNC;
- iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+ iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt);
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 3b55e239705f..9930fa901039 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
+
+ if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
+ nilfs_error(dat->i_sb,
+ "state inconsistency probably due to duplicate use of vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ return;
+ }
nilfs_palloc_commit_free_entry(dat, req);
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index b4cebad21b48..3335ef352915 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -317,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb)
struct the_nilfs *nilfs = sb->s_fs_info;
struct nilfs_sc_info *sci = nilfs->ns_writer;
- if (!sci || !sci->sc_flush_request)
+ if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request)
return;
set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
@@ -2242,7 +2242,7 @@ int nilfs_construct_segment(struct super_block *sb)
struct nilfs_sc_info *sci = nilfs->ns_writer;
struct nilfs_transaction_info *ti;
- if (!sci)
+ if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
/* A call inside transactions causes a deadlock. */
@@ -2280,7 +2280,7 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
struct nilfs_transaction_info ti;
int err = 0;
- if (!sci)
+ if (sb_rdonly(sb) || unlikely(!sci))
return -EROFS;
nilfs_transaction_lock(sb, &ti, 0);
@@ -2776,11 +2776,12 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
if (nilfs->ns_writer) {
/*
- * This happens if the filesystem was remounted
- * read/write after nilfs_error degenerated it into a
- * read-only mount.
+ * This happens if the filesystem is made read-only by
+ * __nilfs_error or nilfs_remount and then remounted
+ * read/write. In these cases, reuse the existing
+ * writer.
*/
- nilfs_detach_log_writer(sb);
+ return 0;
}
nilfs->ns_writer = nilfs_segctor_new(sb, root);
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 77ff8e95421f..dc359b56fdfa 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
+ void *kaddr;
+ struct nilfs_segment_usage *su;
int ret;
+ down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
+ kaddr = kmap_atomic(bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ nilfs_segment_usage_set_dirty(su);
+ kunmap_atomic(kaddr);
brelse(bh);
}
+ up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index ba108f915391..6edb6e0dd61f 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1133,8 +1133,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
if (*flags & SB_RDONLY) {
- /* Shutting down log writer */
- nilfs_detach_log_writer(sb);
sb->s_flags |= SB_RDONLY;
/*
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 3b4a079c9617..2064e6473d30 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/random.h>
+#include <linux/log2.h>
#include <linux/crc32.h>
#include "nilfs.h"
#include "segment.h"
@@ -193,6 +194,34 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
}
/**
+ * nilfs_get_blocksize - get block size from raw superblock data
+ * @sb: super block instance
+ * @sbp: superblock raw data buffer
+ * @blocksize: place to store block size
+ *
+ * nilfs_get_blocksize() calculates the block size from the block size
+ * exponent information written in @sbp and stores it in @blocksize,
+ * or aborts with an error message if it's too large.
+ *
+ * Return Value: On success, 0 is returned. If the block size is too
+ * large, -EINVAL is returned.
+ */
+static int nilfs_get_blocksize(struct super_block *sb,
+ struct nilfs_super_block *sbp, int *blocksize)
+{
+ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
+
+ if (unlikely(shift_bits >
+ ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)) {
+ nilfs_err(sb, "too large filesystem blocksize: 2 ^ %u KiB",
+ shift_bits);
+ return -EINVAL;
+ }
+ *blocksize = BLOCK_SIZE << shift_bits;
+ return 0;
+}
+
+/**
* load_nilfs - load and recover the nilfs
* @nilfs: the_nilfs structure to be released
* @sb: super block instance used to recover past segment
@@ -245,11 +274,15 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
/* verify consistency between two super blocks */
- blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size);
+ err = nilfs_get_blocksize(sb, sbp[0], &blocksize);
+ if (err)
+ goto scan_error;
+
if (blocksize != nilfs->ns_blocksize) {
nilfs_warn(sb,
"blocksize differs between two super blocks (%d != %d)",
blocksize, nilfs->ns_blocksize);
+ err = -EINVAL;
goto scan_error;
}
@@ -443,11 +476,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
return crc == le32_to_cpu(sbp->s_sum);
}
-static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
+/**
+ * nilfs_sb2_bad_offset - check the location of the second superblock
+ * @sbp: superblock raw data buffer
+ * @offset: byte offset of second superblock calculated from device size
+ *
+ * nilfs_sb2_bad_offset() checks if the position on the second
+ * superblock is valid or not based on the filesystem parameters
+ * stored in @sbp. If @offset points to a location within the segment
+ * area, or if the parameters themselves are not normal, it is
+ * determined to be invalid.
+ *
+ * Return Value: true if invalid, false if valid.
+ */
+static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
{
- return offset < ((le64_to_cpu(sbp->s_nsegments) *
- le32_to_cpu(sbp->s_blocks_per_segment)) <<
- (le32_to_cpu(sbp->s_log_block_size) + 10));
+ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
+ u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
+ u64 nsegments = le64_to_cpu(sbp->s_nsegments);
+ u64 index;
+
+ if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS ||
+ shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)
+ return true;
+
+ index = offset >> (shift_bits + BLOCK_SIZE_BITS);
+ do_div(index, blocks_per_segment);
+ return index < nsegments;
}
static void nilfs_release_super_block(struct the_nilfs *nilfs)
@@ -586,9 +641,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
- blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
- if (blocksize < NILFS_MIN_BLOCK_SIZE ||
- blocksize > NILFS_MAX_BLOCK_SIZE) {
+ err = nilfs_get_blocksize(sb, sbp, &blocksize);
+ if (err)
+ goto failed_sbh;
+
+ if (blocksize < NILFS_MIN_BLOCK_SIZE) {
nilfs_err(sb,
"couldn't mount because of unsupported filesystem blocksize %d",
blocksize);
@@ -690,9 +747,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
{
unsigned long ncleansegs;
- down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
- up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
*nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
return 0;
}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b13d344d40b6..60b97c92e2b2 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -335,7 +335,7 @@ static void o2hb_arm_timeout(struct o2hb_region *reg)
/* negotiate timeout must be less than write timeout. */
schedule_delayed_work(&reg->hr_nego_timeout_work,
msecs_to_jiffies(O2HB_NEGO_TIMEOUT_MS));
- memset(reg->hr_nego_node_bitmap, 0, sizeof(reg->hr_nego_node_bitmap));
+ bitmap_zero(reg->hr_nego_node_bitmap, O2NM_MAX_NODES);
}
static void o2hb_disarm_timeout(struct o2hb_region *reg)
@@ -375,7 +375,7 @@ static void o2hb_nego_timeout(struct work_struct *work)
if (reg->hr_last_hb_status)
return;
- o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
+ o2hb_fill_node_map(live_node_bitmap, O2NM_MAX_NODES);
/* lowest node as master node to make negotiate decision. */
master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES);
@@ -386,8 +386,8 @@ static void o2hb_nego_timeout(struct work_struct *work)
config_item_name(&reg->hr_item), reg->hr_bdev);
set_bit(master_node, reg->hr_nego_node_bitmap);
}
- if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap,
- sizeof(reg->hr_nego_node_bitmap))) {
+ if (!bitmap_equal(reg->hr_nego_node_bitmap, live_node_bitmap,
+ O2NM_MAX_NODES)) {
/* check negotiate bitmap every second to do timeout
* approve decision.
*/
@@ -856,8 +856,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg)
* live nodes heartbeat on it. In other words, the region has been
* added to all nodes.
*/
- if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
- sizeof(o2hb_live_node_bitmap)))
+ if (!bitmap_equal(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
+ O2NM_MAX_NODES))
goto unlock;
printk(KERN_NOTICE "o2hb: Region %s (%pg) is now a quorum device\n",
@@ -1087,7 +1087,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
* If a node is not configured but is in the livemap, we still need
* to read the slot so as to be able to remove it from the livemap.
*/
- o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
+ o2hb_fill_node_map(live_node_bitmap, O2NM_MAX_NODES);
i = -1;
while ((i = find_next_bit(live_node_bitmap,
O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
@@ -1437,11 +1437,11 @@ void o2hb_init(void)
for (i = 0; i < ARRAY_SIZE(o2hb_live_slots); i++)
INIT_LIST_HEAD(&o2hb_live_slots[i]);
- memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
- memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
- memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));
- memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
- memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
+ bitmap_zero(o2hb_live_node_bitmap, O2NM_MAX_NODES);
+ bitmap_zero(o2hb_region_bitmap, O2NM_MAX_REGIONS);
+ bitmap_zero(o2hb_live_region_bitmap, O2NM_MAX_REGIONS);
+ bitmap_zero(o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS);
+ bitmap_zero(o2hb_failed_region_bitmap, O2NM_MAX_REGIONS);
o2hb_dependent_users = 0;
@@ -1450,23 +1450,21 @@ void o2hb_init(void)
/* if we're already in a callback then we're already serialized by the sem */
static void o2hb_fill_node_map_from_callback(unsigned long *map,
- unsigned bytes)
+ unsigned int bits)
{
- BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long)));
-
- memcpy(map, &o2hb_live_node_bitmap, bytes);
+ bitmap_copy(map, o2hb_live_node_bitmap, bits);
}
/*
* get a map of all nodes that are heartbeating in any regions
*/
-void o2hb_fill_node_map(unsigned long *map, unsigned bytes)
+void o2hb_fill_node_map(unsigned long *map, unsigned int bits)
{
/* callers want to serialize this map and callbacks so that they
* can trust that they don't miss nodes coming to the party */
down_read(&o2hb_callback_sem);
spin_lock(&o2hb_live_lock);
- o2hb_fill_node_map_from_callback(map, bytes);
+ o2hb_fill_node_map_from_callback(map, bits);
spin_unlock(&o2hb_live_lock);
up_read(&o2hb_callback_sem);
}
@@ -2460,7 +2458,7 @@ int o2hb_check_node_heartbeating_no_sem(u8 node_num)
unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
spin_lock(&o2hb_live_lock);
- o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
+ o2hb_fill_node_map_from_callback(testing_map, O2NM_MAX_NODES);
spin_unlock(&o2hb_live_lock);
if (!test_bit(node_num, testing_map)) {
mlog(ML_HEARTBEAT,
@@ -2477,7 +2475,7 @@ int o2hb_check_node_heartbeating_from_callback(u8 node_num)
{
unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
- o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
+ o2hb_fill_node_map_from_callback(testing_map, O2NM_MAX_NODES);
if (!test_bit(node_num, testing_map)) {
mlog(ML_HEARTBEAT,
"node (%u) does not have heartbeating enabled.\n",
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index 1d4100abf6f8..8ef8c1b9eeb7 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -59,7 +59,7 @@ int o2hb_register_callback(const char *region_uuid,
void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map,
- unsigned bytes);
+ unsigned int bits);
void o2hb_exit(void);
void o2hb_init(void);
int o2hb_check_node_heartbeating_no_sem(u8 node_num);
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 7524994e3199..35c05c18de59 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -438,7 +438,7 @@ static int o2net_fill_bitmap(char *buf, int len)
unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
int i = -1, out = 0;
- o2net_fill_node_map(map, sizeof(map));
+ o2net_fill_node_map(map, O2NM_MAX_NODES);
while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 27fee68f860a..2f61d39e4e50 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -54,7 +54,7 @@ int o2nm_configured_node_map(unsigned long *map, unsigned bytes)
return -EINVAL;
read_lock(&cluster->cl_nodes_lock);
- memcpy(map, cluster->cl_nodes_bitmap, sizeof(cluster->cl_nodes_bitmap));
+ bitmap_copy(map, cluster->cl_nodes_bitmap, O2NM_MAX_NODES);
read_unlock(&cluster->cl_nodes_lock);
return 0;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index f660c0dbdb63..37d222bdfc8c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -900,7 +900,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
{
struct kvec vec = { .iov_len = len, .iov_base = data, };
struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
- iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len);
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, len);
return sock_recvmsg(sock, &msg, MSG_DONTWAIT);
}
@@ -990,14 +990,12 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
}
/* Get a map of all nodes to which this node is currently connected to */
-void o2net_fill_node_map(unsigned long *map, unsigned bytes)
+void o2net_fill_node_map(unsigned long *map, unsigned int bits)
{
struct o2net_sock_container *sc;
int node, ret;
- BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long)));
-
- memset(map, 0, bytes);
+ bitmap_zero(map, bits);
for (node = 0; node < O2NM_MAX_NODES; ++node) {
if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret))
continue;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index fd2022712167..20f790a47484 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -1094,7 +1094,7 @@ static inline enum dlm_status dlm_err_to_dlm_status(int err)
static inline void dlm_node_iter_init(unsigned long *map,
struct dlm_node_iter *iter)
{
- memcpy(iter->node_map, map, sizeof(iter->node_map));
+ bitmap_copy(iter->node_map, map, O2NM_MAX_NODES);
iter->curnode = -1;
}
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index c4eccd499db8..5c04dde99981 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1576,8 +1576,8 @@ static int dlm_should_restart_join(struct dlm_ctxt *dlm,
spin_lock(&dlm->spinlock);
/* For now, we restart the process if the node maps have
* changed at all */
- ret = memcmp(ctxt->live_map, dlm->live_nodes_map,
- sizeof(dlm->live_nodes_map));
+ ret = !bitmap_equal(ctxt->live_map, dlm->live_nodes_map,
+ O2NM_MAX_NODES);
spin_unlock(&dlm->spinlock);
if (ret)
@@ -1604,13 +1604,11 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
/* group sem locking should work for us here -- we're already
* registered for heartbeat events so filling this should be
* atomic wrt getting those handlers called. */
- o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map));
+ o2hb_fill_node_map(dlm->live_nodes_map, O2NM_MAX_NODES);
spin_lock(&dlm->spinlock);
- memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map));
-
+ bitmap_copy(ctxt->live_map, dlm->live_nodes_map, O2NM_MAX_NODES);
__dlm_set_joining_node(dlm, dlm->node_num);
-
spin_unlock(&dlm->spinlock);
node = -1;
@@ -1643,8 +1641,7 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
* yes_resp_map. Copy that into our domain map and send a join
* assert message to clean up everyone elses state. */
spin_lock(&dlm->spinlock);
- memcpy(dlm->domain_map, ctxt->yes_resp_map,
- sizeof(ctxt->yes_resp_map));
+ bitmap_copy(dlm->domain_map, ctxt->yes_resp_map, O2NM_MAX_NODES);
set_bit(dlm->node_num, dlm->domain_map);
spin_unlock(&dlm->spinlock);
@@ -2009,9 +2006,9 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
mlog(0, "dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n",
dlm->recovery_map, &(dlm->recovery_map[0]));
- memset(dlm->recovery_map, 0, sizeof(dlm->recovery_map));
- memset(dlm->live_nodes_map, 0, sizeof(dlm->live_nodes_map));
- memset(dlm->domain_map, 0, sizeof(dlm->domain_map));
+ bitmap_zero(dlm->recovery_map, O2NM_MAX_NODES);
+ bitmap_zero(dlm->live_nodes_map, O2NM_MAX_NODES);
+ bitmap_zero(dlm->domain_map, O2NM_MAX_NODES);
dlm->dlm_thread_task = NULL;
dlm->dlm_reco_thread_task = NULL;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 227da5b1b6ab..d610da8e2f24 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -258,12 +258,12 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
mle->type = type;
INIT_HLIST_NODE(&mle->master_hash_node);
INIT_LIST_HEAD(&mle->hb_events);
- memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
+ bitmap_zero(mle->maybe_map, O2NM_MAX_NODES);
spin_lock_init(&mle->spinlock);
init_waitqueue_head(&mle->wq);
atomic_set(&mle->woken, 0);
kref_init(&mle->mle_refs);
- memset(mle->response_map, 0, sizeof(mle->response_map));
+ bitmap_zero(mle->response_map, O2NM_MAX_NODES);
mle->master = O2NM_MAX_NODES;
mle->new_master = O2NM_MAX_NODES;
mle->inuse = 0;
@@ -290,8 +290,8 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle,
atomic_inc(&dlm->mle_cur_count[mle->type]);
/* copy off the node_map and register hb callbacks on our copy */
- memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map));
- memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map));
+ bitmap_copy(mle->node_map, dlm->domain_map, O2NM_MAX_NODES);
+ bitmap_copy(mle->vote_map, dlm->domain_map, O2NM_MAX_NODES);
clear_bit(dlm->node_num, mle->vote_map);
clear_bit(dlm->node_num, mle->node_map);
@@ -572,7 +572,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
spin_unlock(&dlm->track_lock);
memset(res->lvb, 0, DLM_LVB_LEN);
- memset(res->refmap, 0, sizeof(res->refmap));
+ bitmap_zero(res->refmap, O2NM_MAX_NODES);
}
struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
@@ -1036,10 +1036,10 @@ recheck:
spin_lock(&mle->spinlock);
m = mle->master;
- map_changed = (memcmp(mle->vote_map, mle->node_map,
- sizeof(mle->vote_map)) != 0);
- voting_done = (memcmp(mle->vote_map, mle->response_map,
- sizeof(mle->vote_map)) == 0);
+ map_changed = !bitmap_equal(mle->vote_map, mle->node_map,
+ O2NM_MAX_NODES);
+ voting_done = bitmap_equal(mle->vote_map, mle->response_map,
+ O2NM_MAX_NODES);
/* restart if we hit any errors */
if (map_changed) {
@@ -1277,11 +1277,11 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
/* now blank out everything, as if we had never
* contacted anyone */
- memset(mle->maybe_map, 0, sizeof(mle->maybe_map));
- memset(mle->response_map, 0, sizeof(mle->response_map));
+ bitmap_zero(mle->maybe_map, O2NM_MAX_NODES);
+ bitmap_zero(mle->response_map, O2NM_MAX_NODES);
/* reset the vote_map to the current node_map */
- memcpy(mle->vote_map, mle->node_map,
- sizeof(mle->node_map));
+ bitmap_copy(mle->vote_map, mle->node_map,
+ O2NM_MAX_NODES);
/* put myself into the maybe map */
if (mle->type != DLM_MLE_BLOCK)
set_bit(dlm->node_num, mle->maybe_map);
@@ -2094,7 +2094,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
flags = item->u.am.flags;
spin_lock(&dlm->spinlock);
- memcpy(nodemap, dlm->domain_map, sizeof(nodemap));
+ bitmap_copy(nodemap, dlm->domain_map, O2NM_MAX_NODES);
spin_unlock(&dlm->spinlock);
clear_bit(dlm->node_num, nodemap);
@@ -3447,7 +3447,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
ret = 0;
}
- memset(iter.node_map, 0, sizeof(iter.node_map));
+ bitmap_zero(iter.node_map, O2NM_MAX_NODES);
set_bit(old_master, iter.node_map);
mlog(0, "doing assert master of %.*s back to %u\n",
res->lockname.len, res->lockname.name, old_master);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 52ad342fec3e..50da8af988c1 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -733,7 +733,7 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
struct dlm_reco_node_data *ndata;
spin_lock(&dlm->spinlock);
- memcpy(dlm->reco.node_map, dlm->domain_map, sizeof(dlm->domain_map));
+ bitmap_copy(dlm->reco.node_map, dlm->domain_map, O2NM_MAX_NODES);
/* nodes can only be removed (by dying) after dropping
* this lock, and death will be trapped later, so this should do */
spin_unlock(&dlm->spinlock);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 126671e6caed..3fb98b4569a2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -157,7 +157,7 @@ static void ocfs2_queue_replay_slots(struct ocfs2_super *osb,
replay_map->rm_state = REPLAY_DONE;
}
-static void ocfs2_free_replay_slots(struct ocfs2_super *osb)
+void ocfs2_free_replay_slots(struct ocfs2_super *osb)
{
struct ocfs2_replay_map *replay_map = osb->replay_map;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 969d0aa28718..41c382f68529 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -150,6 +150,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
+void ocfs2_free_replay_slots(struct ocfs2_super *osb);
/*
* Journal Control:
* Initialize, Load, Shutdown, Wipe a journal.
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index c5ffded7ac92..a8fd51afb794 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -232,6 +232,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns,
handle_t *handle = NULL;
struct ocfs2_super *osb;
struct ocfs2_dinode *dirfe;
+ struct ocfs2_dinode *fe = NULL;
struct buffer_head *new_fe_bh = NULL;
struct inode *inode = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
@@ -382,6 +383,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns,
goto leave;
}
+ fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
if (S_ISDIR(mode)) {
status = ocfs2_fill_new_dir(osb, handle, dir, inode,
new_fe_bh, data_ac, meta_ac);
@@ -454,8 +456,11 @@ roll_back:
leave:
if (status < 0 && did_quota_inode)
dquot_free_inode(inode);
- if (handle)
+ if (handle) {
+ if (status < 0 && fe)
+ ocfs2_set_links_count(fe, 0);
ocfs2_commit_trans(osb, handle);
+ }
ocfs2_inode_unlock(dir, 1);
if (did_block_signals)
@@ -632,18 +637,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
return status;
}
- status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
+ return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
parent_fe_bh, handle, inode_ac,
fe_blkno, suballoc_loc, suballoc_bit);
- if (status < 0) {
- u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit);
- int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode,
- inode_ac->ac_bh, suballoc_bit, bg_blkno, 1);
- if (tmp)
- mlog_errno(tmp);
- }
-
- return status;
}
static int ocfs2_mkdir(struct user_namespace *mnt_userns,
@@ -2028,8 +2024,11 @@ bail:
ocfs2_clusters_to_bytes(osb->sb, 1));
if (status < 0 && did_quota_inode)
dquot_free_inode(inode);
- if (handle)
+ if (handle) {
+ if (status < 0 && fe)
+ ocfs2_set_links_count(fe, 0);
ocfs2_commit_trans(osb, handle);
+ }
ocfs2_inode_unlock(dir, 1);
if (did_block_signals)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 740b64238312..a503c553bab2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -560,8 +560,7 @@ static inline unsigned int ocfs2_read_links_count(struct ocfs2_dinode *di)
u32 nlink = le16_to_cpu(di->i_links_count);
u32 hi = le16_to_cpu(di->i_links_count_hi);
- if (di->i_dyn_features & cpu_to_le16(OCFS2_INDEXED_DIR_FL))
- nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
+ nlink |= (hi << OCFS2_LINKS_HI_SHIFT);
return nlink;
}
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 88f75f7f02d7..c973c03f6fd8 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -273,17 +273,17 @@ static int o2cb_cluster_check(void)
*/
#define O2CB_MAP_STABILIZE_COUNT 60
for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) {
- o2hb_fill_node_map(hbmap, sizeof(hbmap));
+ o2hb_fill_node_map(hbmap, O2NM_MAX_NODES);
if (!test_bit(node_num, hbmap)) {
printk(KERN_ERR "o2cb: %s heartbeat has not been "
"started.\n", (o2hb_global_heartbeat_active() ?
"Global" : "Local"));
return -EINVAL;
}
- o2net_fill_node_map(netmap, sizeof(netmap));
+ o2net_fill_node_map(netmap, O2NM_MAX_NODES);
/* Force set the current node to allow easy compare */
set_bit(node_num, netmap);
- if (!memcmp(hbmap, netmap, sizeof(hbmap)))
+ if (bitmap_equal(hbmap, netmap, O2NM_MAX_NODES))
return 0;
if (i < O2CB_MAP_STABILIZE_COUNT - 1)
msleep(1000);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 317126261523..a8d5ca98fa57 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -669,6 +669,8 @@ static struct ctl_table_header *ocfs2_table_header;
static int __init ocfs2_stack_glue_init(void)
{
+ int ret;
+
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table);
@@ -678,7 +680,11 @@ static int __init ocfs2_stack_glue_init(void)
return -ENOMEM; /* or something. */
}
- return ocfs2_sysfs_init();
+ ret = ocfs2_sysfs_init();
+ if (ret)
+ unregister_sysctl_table(ocfs2_table_header);
+
+ return ret;
}
static void __exit ocfs2_stack_glue_exit(void)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 42c993e53924..0b0e6a132101 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1159,6 +1159,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
out_dismount:
atomic_set(&osb->vol_state, VOLUME_DISABLED);
wake_up(&osb->osb_mount_event);
+ ocfs2_free_replay_slots(osb);
ocfs2_dismount_volume(sb, 1);
goto out;
@@ -1822,12 +1823,14 @@ static int ocfs2_mount_volume(struct super_block *sb)
status = ocfs2_truncate_log_init(osb);
if (status < 0) {
mlog_errno(status);
- goto out_system_inodes;
+ goto out_check_volume;
}
ocfs2_super_unlock(osb, 1);
return 0;
+out_check_volume:
+ ocfs2_free_replay_slots(osb);
out_system_inodes:
if (osb->local_alloc_state == OCFS2_LA_ENABLED)
ocfs2_shutdown_local_alloc(osb);
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 3d65accfae17..370bd3bbf5e4 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -53,7 +53,7 @@ static int orangefs_writepage_locked(struct page *page,
bv.bv_len = wlen;
bv.bv_offset = off % PAGE_SIZE;
WARN_ON(wlen == 0);
- iov_iter_bvec(&iter, WRITE, &bv, 1, wlen);
+ iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, wlen);
ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen,
len, wr, NULL, NULL);
@@ -112,7 +112,7 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
else
ow->bv[i].bv_offset = 0;
}
- iov_iter_bvec(&iter, WRITE, ow->bv, ow->npages, ow->len);
+ iov_iter_bvec(&iter, ITER_SOURCE, ow->bv, ow->npages, ow->len);
WARN_ON(ow->off >= len);
if (ow->off + ow->len > len)
@@ -270,7 +270,7 @@ static void orangefs_readahead(struct readahead_control *rac)
offset = readahead_pos(rac);
i_pages = &rac->mapping->i_pages;
- iov_iter_xarray(&iter, READ, i_pages, offset, readahead_length(rac));
+ iov_iter_xarray(&iter, ITER_DEST, i_pages, offset, readahead_length(rac));
/* read in the pages. */
if ((ret = wait_for_direct_io(ORANGEFS_IO_READ, inode,
@@ -303,7 +303,7 @@ static int orangefs_read_folio(struct file *file, struct folio *folio)
bv.bv_page = &folio->page;
bv.bv_len = folio_size(folio);
bv.bv_offset = 0;
- iov_iter_bvec(&iter, READ, &bv, 1, folio_size(folio));
+ iov_iter_bvec(&iter, ITER_DEST, &bv, 1, folio_size(folio));
ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter,
folio_size(folio), inode->i_size, NULL, NULL, file);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index fa762c5fbcb2..91fe1597af7b 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include "internal.h"
static int cmdline_proc_show(struct seq_file *m, void *v)
{
@@ -13,7 +14,10 @@ static int cmdline_proc_show(struct seq_file *m, void *v)
static int __init proc_cmdline_init(void)
{
- proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
+ pde->size = saved_command_line_len + 1;
return 0;
}
fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index dfe6ce3505ce..e0758fe7936d 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -33,7 +33,16 @@ static int show_console_dev(struct seq_file *m, void *v)
if (con->device) {
const struct tty_driver *driver;
int index;
+
+ /*
+ * Take console_lock to serialize device() callback with
+ * other console operations. For example, fg_console is
+ * modified under console_lock when switching vt.
+ */
+ console_lock();
driver = con->device(con, &index);
+ console_unlock();
+
if (driver) {
dev = MKDEV(driver->major, driver->minor_start);
dev += index;
@@ -63,7 +72,12 @@ static void *c_start(struct seq_file *m, loff_t *pos)
struct console *con;
loff_t off = 0;
- console_lock();
+ /*
+ * Hold the console_list_lock to guarantee safe traversal of the
+ * console list. SRCU cannot be used because there is no
+ * place to store the SRCU cookie.
+ */
+ console_list_lock();
for_each_console(con)
if (off++ == *pos)
break;
@@ -74,13 +88,14 @@ static void *c_start(struct seq_file *m, loff_t *pos)
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
struct console *con = v;
+
++*pos;
- return con->next;
+ return hlist_entry_safe(con->node.next, struct console, node);
}
static void c_stop(struct seq_file *m, void *v)
{
- console_unlock();
+ console_list_unlock();
}
static const struct seq_operations consoles_op = {
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 913bef0d2a36..fc46d6fe080c 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -7,6 +7,7 @@
#include <linux/namei.h>
#include <linux/pid.h>
#include <linux/ptrace.h>
+#include <linux/bitmap.h>
#include <linux/security.h>
#include <linux/file.h>
#include <linux/seq_file.h>
@@ -279,6 +280,30 @@ out:
return 0;
}
+static int proc_readfd_count(struct inode *inode, loff_t *count)
+{
+ struct task_struct *p = get_proc_task(inode);
+ struct fdtable *fdt;
+
+ if (!p)
+ return -ENOENT;
+
+ task_lock(p);
+ if (p->files) {
+ rcu_read_lock();
+
+ fdt = files_fdtable(p->files);
+ *count = bitmap_weight(fdt->open_fds, fdt->max_fds);
+
+ rcu_read_unlock();
+ }
+ task_unlock(p);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
static int proc_readfd(struct file *file, struct dir_context *ctx)
{
return proc_readfd_common(file, ctx, proc_fd_instantiate);
@@ -319,9 +344,29 @@ int proc_fd_permission(struct user_namespace *mnt_userns,
return rv;
}
+static int proc_fd_getattr(struct user_namespace *mnt_userns,
+ const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+ int rv = 0;
+
+ generic_fillattr(&init_user_ns, inode, stat);
+
+ /* If it's a directory, put the number of open fds there */
+ if (S_ISDIR(inode->i_mode)) {
+ rv = proc_readfd_count(inode, &stat->size);
+ if (rv < 0)
+ return rv;
+ }
+
+ return rv;
+}
+
const struct inode_operations proc_fd_inode_operations = {
.lookup = proc_lookupfd,
.permission = proc_fd_permission,
+ .getattr = proc_fd_getattr,
.setattr = proc_setattr,
};
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5101131e6047..440960110a42 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#endif
show_val_kb(m, "PageTables: ",
global_node_page_state(NR_PAGETABLE));
- show_val_kb(m, "SecPageTables: ",
+ show_val_kb(m, "SecPageTables: ",
global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8b4f3073f8f5..8a74cdcc9af0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -902,7 +902,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
goto out_put_mm;
hold_task_mempolicy(priv);
- vma = mas_find(&mas, 0);
+ vma = mas_find(&mas, ULONG_MAX);
if (unlikely(!vma))
goto empty_set;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index f2aa86c421f2..09a81e4b1273 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -199,7 +199,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
struct kvec kvec = { .iov_base = buf, .iov_len = count };
struct iov_iter iter;
- iov_iter_kvec(&iter, READ, &kvec, 1, count);
+ iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
return read_from_oldmem(&iter, count, ppos, false);
}
@@ -212,7 +212,7 @@ ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
struct kvec kvec = { .iov_base = buf, .iov_len = count };
struct iov_iter iter;
- iov_iter_kvec(&iter, READ, &kvec, 1, count);
+ iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
return read_from_oldmem(&iter, count, ppos,
cc_platform_has(CC_ATTR_MEM_ENCRYPT));
@@ -437,7 +437,7 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
offset = (loff_t) index << PAGE_SHIFT;
kvec.iov_base = page_address(page);
kvec.iov_len = PAGE_SIZE;
- iov_iter_kvec(&iter, READ, &kvec, 1, PAGE_SIZE);
+ iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, PAGE_SIZE);
rc = __read_vmcore(&iter, &offset);
if (rc < 0) {
@@ -1567,6 +1567,7 @@ static int __init vmcore_init(void)
return rc;
rc = parse_crash_elf_headers();
if (rc) {
+ elfcorehdr_free(elfcorehdr_addr);
pr_warn("Kdump: vmcore not initialized\n");
return rc;
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 0c034ea39954..cbc0b468c1ab 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -89,6 +89,11 @@ static char *compress =
module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
+/* How much of the kernel log to snapshot */
+unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
+module_param(kmsg_bytes, ulong, 0444);
+MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
+
/* Compression parameters */
static struct crypto_comp *tfm;
@@ -100,9 +105,6 @@ struct pstore_zbackend {
static char *big_oops_buf;
static size_t big_oops_buf_sz;
-/* How much of the console log to snapshot */
-unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
-
void pstore_set_kmsg_bytes(int bytes)
{
kmsg_bytes = bytes;
@@ -391,6 +393,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
const char *why;
unsigned int part = 1;
unsigned long flags = 0;
+ int saved_ret = 0;
int ret;
why = kmsg_dump_reason_str(reason);
@@ -461,12 +464,21 @@ static void pstore_dump(struct kmsg_dumper *dumper,
if (ret == 0 && reason == KMSG_DUMP_OOPS) {
pstore_new_entry = 1;
pstore_timer_kick();
+ } else {
+ /* Preserve only the first non-zero returned value. */
+ if (!saved_ret)
+ saved_ret = ret;
}
total += record.size;
part++;
}
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+
+ if (saved_ret) {
+ pr_err_once("backend (%s) writing error (%d)\n", psinfo->name,
+ saved_ret);
+ }
}
static struct kmsg_dumper pstore_dumper = {
@@ -562,8 +574,9 @@ out:
int pstore_register(struct pstore_info *psi)
{
if (backend && strcmp(backend, psi->name)) {
- pr_warn("ignoring unexpected backend '%s'\n", psi->name);
- return -EPERM;
+ pr_warn("backend '%s' already in use: ignoring '%s'\n",
+ backend, psi->name);
+ return -EBUSY;
}
/* Sanity check flags. */
@@ -662,6 +675,8 @@ void pstore_unregister(struct pstore_info *psi)
psinfo = NULL;
kfree(backend);
backend = NULL;
+
+ pr_info("Unregistered %s as persistent store backend\n", psi->name);
mutex_unlock(&psinfo_lock);
}
EXPORT_SYMBOL_GPL(pstore_unregister);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index fefe3d391d3a..9a5052431fd3 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -18,10 +18,11 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/compiler.h>
-#include <linux/pstore_ram.h>
#include <linux/of.h>
#include <linux/of_address.h>
+
#include "internal.h"
+#include "ram_internal.h"
#define RAMOOPS_KERNMSG_HDR "===="
#define MIN_MEM_SIZE 4096UL
@@ -451,20 +452,28 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
{
int i;
+ /* Free pmsg PRZ */
+ persistent_ram_free(&cxt->mprz);
+
+ /* Free console PRZ */
+ persistent_ram_free(&cxt->cprz);
+
/* Free dump PRZs */
if (cxt->dprzs) {
for (i = 0; i < cxt->max_dump_cnt; i++)
- persistent_ram_free(cxt->dprzs[i]);
+ persistent_ram_free(&cxt->dprzs[i]);
kfree(cxt->dprzs);
+ cxt->dprzs = NULL;
cxt->max_dump_cnt = 0;
}
/* Free ftrace PRZs */
if (cxt->fprzs) {
for (i = 0; i < cxt->max_ftrace_cnt; i++)
- persistent_ram_free(cxt->fprzs[i]);
+ persistent_ram_free(&cxt->fprzs[i]);
kfree(cxt->fprzs);
+ cxt->fprzs = NULL;
cxt->max_ftrace_cnt = 0;
}
}
@@ -548,9 +557,10 @@ static int ramoops_init_przs(const char *name,
while (i > 0) {
i--;
- persistent_ram_free(prz_ar[i]);
+ persistent_ram_free(&prz_ar[i]);
}
kfree(prz_ar);
+ prz_ar = NULL;
goto fail;
}
*paddr += zone_sz;
@@ -735,6 +745,7 @@ static int ramoops_probe(struct platform_device *pdev)
/* Make sure we didn't get bogus platform data pointer. */
if (!pdata) {
pr_err("NULL platform data\n");
+ err = -EINVAL;
goto fail_out;
}
@@ -742,6 +753,7 @@ static int ramoops_probe(struct platform_device *pdev)
!pdata->ftrace_size && !pdata->pmsg_size)) {
pr_err("The memory size and the record/console size must be "
"non-zero\n");
+ err = -EINVAL;
goto fail_out;
}
@@ -772,12 +784,17 @@ static int ramoops_probe(struct platform_device *pdev)
dump_mem_sz, cxt->record_size,
&cxt->max_dump_cnt, 0, 0);
if (err)
- goto fail_out;
+ goto fail_init;
err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr,
cxt->console_size, 0);
if (err)
- goto fail_init_cprz;
+ goto fail_init;
+
+ err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr,
+ cxt->pmsg_size, 0);
+ if (err)
+ goto fail_init;
cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
? nr_cpu_ids
@@ -788,12 +805,7 @@ static int ramoops_probe(struct platform_device *pdev)
(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
? PRZ_FLAG_NO_LOCK : 0);
if (err)
- goto fail_init_fprz;
-
- err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr,
- cxt->pmsg_size, 0);
- if (err)
- goto fail_init_mprz;
+ goto fail_init;
cxt->pstore.data = cxt;
/*
@@ -857,11 +869,7 @@ fail_buf:
kfree(cxt->pstore.buf);
fail_clear:
cxt->pstore.bufsize = 0;
- persistent_ram_free(cxt->mprz);
-fail_init_mprz:
-fail_init_fprz:
- persistent_ram_free(cxt->cprz);
-fail_init_cprz:
+fail_init:
ramoops_free_przs(cxt);
fail_out:
return err;
@@ -876,8 +884,6 @@ static int ramoops_remove(struct platform_device *pdev)
kfree(cxt->pstore.buf);
cxt->pstore.bufsize = 0;
- persistent_ram_free(cxt->mprz);
- persistent_ram_free(cxt->cprz);
ramoops_free_przs(cxt);
return 0;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index a89e33719fcf..966191d3a5ba 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -13,13 +13,14 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/memblock.h>
-#include <linux/pstore_ram.h>
#include <linux/rslib.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/page.h>
+#include "ram_internal.h"
+
/**
* struct persistent_ram_buffer - persistent circular RAM buffer
*
@@ -439,7 +440,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size,
phys_addr_t addr = page_start + i * PAGE_SIZE;
pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
}
- vaddr = vmap(pages, page_count, VM_MAP, prot);
+ /*
+ * VM_IOREMAP used here to bypass this region during vread()
+ * and kmap_atomic() (i.e. kcore) to avoid __va() failures.
+ */
+ vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot);
kfree(pages);
/*
@@ -543,8 +548,14 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
return 0;
}
-void persistent_ram_free(struct persistent_ram_zone *prz)
+void persistent_ram_free(struct persistent_ram_zone **_prz)
{
+ struct persistent_ram_zone *prz;
+
+ if (!_prz)
+ return;
+
+ prz = *_prz;
if (!prz)
return;
@@ -568,6 +579,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
persistent_ram_free_old(prz);
kfree(prz->label);
kfree(prz);
+ *_prz = NULL;
}
struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
@@ -604,6 +616,6 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
return prz;
err:
- persistent_ram_free(prz);
+ persistent_ram_free(&prz);
return ERR_PTR(ret);
}
diff --git a/fs/pstore/ram_internal.h b/fs/pstore/ram_internal.h
new file mode 100644
index 000000000000..5f694698351f
--- /dev/null
+++ b/fs/pstore/ram_internal.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com>
+ * Copyright (C) 2011 Kees Cook <keescook@chromium.org>
+ * Copyright (C) 2011 Google, Inc.
+ */
+
+#include <linux/pstore_ram.h>
+
+/*
+ * Choose whether access to the RAM zone requires locking or not. If a zone
+ * can be written to from different CPUs like with ftrace for example, then
+ * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required.
+ */
+#define PRZ_FLAG_NO_LOCK BIT(0)
+/*
+ * If a PRZ should only have a single-boot lifetime, this marks it as
+ * getting wiped after its contents get copied out after boot.
+ */
+#define PRZ_FLAG_ZAP_OLD BIT(1)
+
+/**
+ * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ)
+ * used as a pstore backend
+ *
+ * @paddr: physical address of the mapped RAM area
+ * @size: size of mapping
+ * @label: unique name of this PRZ
+ * @type: frontend type for this PRZ
+ * @flags: holds PRZ_FLAGS_* bits
+ *
+ * @buffer_lock:
+ * locks access to @buffer "size" bytes and "start" offset
+ * @buffer:
+ * pointer to actual RAM area managed by this PRZ
+ * @buffer_size:
+ * bytes in @buffer->data (not including any trailing ECC bytes)
+ *
+ * @par_buffer:
+ * pointer into @buffer->data containing ECC bytes for @buffer->data
+ * @par_header:
+ * pointer into @buffer->data containing ECC bytes for @buffer header
+ * (i.e. all fields up to @data)
+ * @rs_decoder:
+ * RSLIB instance for doing ECC calculations
+ * @corrected_bytes:
+ * ECC corrected bytes accounting since boot
+ * @bad_blocks:
+ * ECC uncorrectable bytes accounting since boot
+ * @ecc_info:
+ * ECC configuration details
+ *
+ * @old_log:
+ * saved copy of @buffer->data prior to most recent wipe
+ * @old_log_size:
+ * bytes contained in @old_log
+ *
+ */
+struct persistent_ram_zone {
+ phys_addr_t paddr;
+ size_t size;
+ void *vaddr;
+ char *label;
+ enum pstore_type_id type;
+ u32 flags;
+
+ raw_spinlock_t buffer_lock;
+ struct persistent_ram_buffer *buffer;
+ size_t buffer_size;
+
+ char *par_buffer;
+ char *par_header;
+ struct rs_control *rs_decoder;
+ int corrected_bytes;
+ int bad_blocks;
+ struct persistent_ram_ecc_info ecc_info;
+
+ char *old_log;
+ size_t old_log_size;
+};
+
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+ u32 sig, struct persistent_ram_ecc_info *ecc_info,
+ unsigned int memtype, u32 flags, char *label);
+void persistent_ram_free(struct persistent_ram_zone **_prz);
+void persistent_ram_zap(struct persistent_ram_zone *prz);
+
+int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
+ unsigned int count);
+int persistent_ram_write_user(struct persistent_ram_zone *prz,
+ const void __user *s, unsigned int count);
+
+void persistent_ram_save_old(struct persistent_ram_zone *prz);
+size_t persistent_ram_old_size(struct persistent_ram_zone *prz);
+void *persistent_ram_old(struct persistent_ram_zone *prz);
+void persistent_ram_free_old(struct persistent_ram_zone *prz);
+ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
+ char *str, size_t len);
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 017d0d4ad329..2770746bb7aa 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -761,7 +761,7 @@ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt,
/* avoid destroying old data, allocate a new one */
len = zone->buffer_size + sizeof(*zone->buffer);
zone->oldbuf = zone->buffer;
- zone->buffer = kzalloc(len, GFP_KERNEL);
+ zone->buffer = kzalloc(len, GFP_ATOMIC);
if (!zone->buffer) {
zone->buffer = zone->oldbuf;
return -ENOMEM;
diff --git a/fs/read_write.c b/fs/read_write.c
index 328ce8cf9a85..7a2ff6157eda 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -384,7 +384,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = (ppos ? *ppos : 0);
- iov_iter_ubuf(&iter, READ, buf, len);
+ iov_iter_ubuf(&iter, ITER_DEST, buf, len);
ret = call_read_iter(filp, &kiocb, &iter);
BUG_ON(ret == -EIOCBQUEUED);
@@ -424,7 +424,7 @@ ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos ? *pos : 0;
- iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len);
+ iov_iter_kvec(&iter, ITER_DEST, &iov, 1, iov.iov_len);
ret = file->f_op->read_iter(&kiocb, &iter);
if (ret > 0) {
if (pos)
@@ -486,7 +486,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = (ppos ? *ppos : 0);
- iov_iter_ubuf(&iter, WRITE, (void __user *)buf, len);
+ iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)buf, len);
ret = call_write_iter(filp, &kiocb, &iter);
BUG_ON(ret == -EIOCBQUEUED);
@@ -533,7 +533,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
.iov_len = min_t(size_t, count, MAX_RW_COUNT),
};
struct iov_iter iter;
- iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
+ iov_iter_kvec(&iter, ITER_SOURCE, &iov, 1, iov.iov_len);
return __kernel_write_iter(file, &iter, pos);
}
/*
@@ -911,7 +911,7 @@ static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
struct iov_iter iter;
ssize_t ret;
- ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
if (ret >= 0) {
ret = do_iter_read(file, &iter, pos, flags);
kfree(iov);
@@ -928,7 +928,7 @@ static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
struct iov_iter iter;
ssize_t ret;
- ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
if (ret >= 0) {
file_start_write(file);
ret = do_iter_write(file, &iter, pos, flags);
@@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
+ lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
+
return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
}
@@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
* and several different sets of file_operations, but they all end up
* using the same ->copy_file_range() function pointer.
*/
- if (file_out->f_op->copy_file_range) {
+ if (flags & COPY_FILE_SPLICE) {
+ /* cross sb splice is allowed */
+ } else if (file_out->f_op->copy_file_range) {
if (file_in->f_op->copy_file_range !=
file_out->f_op->copy_file_range)
return -EXDEV;
@@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
size_t len, unsigned int flags)
{
ssize_t ret;
+ bool splice = flags & COPY_FILE_SPLICE;
- if (flags != 0)
+ if (flags & ~COPY_FILE_SPLICE)
return -EINVAL;
ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
@@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* same sb using clone, but for filesystems where both clone and copy
* are supported (e.g. nfs,cifs), we only call the copy method.
*/
- if (file_out->f_op->copy_file_range) {
+ if (!splice && file_out->f_op->copy_file_range) {
ret = file_out->f_op->copy_file_range(file_in, pos_in,
file_out, pos_out,
len, flags);
goto done;
}
- if (file_in->f_op->remap_file_range &&
+ if (!splice && file_in->f_op->remap_file_range &&
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
@@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* consistent story about which filesystems support copy_file_range()
* and which filesystems do not, that will allow userspace tools to
* make consistent desicions w.r.t using copy_file_range().
+ *
+ * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
*/
ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
flags);
@@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
pos_out = f_out.file->f_pos;
}
+ ret = -EINVAL;
+ if (flags != 0)
+ goto out;
+
ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
flags);
if (ret > 0) {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 9456a2032224..f5fdaf3b1572 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -156,7 +156,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
ssize_t ret;
init_sync_kiocb(&kiocb, file);
- iov_iter_init(&iter, READ, &iov, 1, size);
+ iov_iter_init(&iter, ITER_DEST, &iov, 1, size);
kiocb.ki_pos = *ppos;
ret = seq_read_iter(&kiocb, &iter);
diff --git a/fs/splice.c b/fs/splice.c
index 0878b852b355..5969b7a1d353 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -303,7 +303,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct kiocb kiocb;
int ret;
- iov_iter_pipe(&to, READ, pipe, len);
+ iov_iter_pipe(&to, ITER_DEST, pipe, len);
init_sync_kiocb(&kiocb, in);
kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to);
@@ -682,7 +682,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
n++;
}
- iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
+ iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
ret = vfs_iter_write(out, &from, &sd.pos, 0);
if (ret <= 0)
break;
@@ -1263,9 +1263,9 @@ static int vmsplice_type(struct fd f, int *type)
if (!f.file)
return -EBADF;
if (f.file->f_mode & FMODE_WRITE) {
- *type = WRITE;
+ *type = ITER_SOURCE;
} else if (f.file->f_mode & FMODE_READ) {
- *type = READ;
+ *type = ITER_DEST;
} else {
fdput(f);
return -EBADF;
@@ -1314,7 +1314,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
if (!iov_iter_count(&iter))
error = 0;
- else if (iov_iter_rw(&iter) == WRITE)
+ else if (type == ITER_SOURCE)
error = vmsplice_to_pipe(f.file, &iter, flags);
else
error = vmsplice_to_user(f.file, &iter, flags);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 916e78fabcaa..60fc98bdf421 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -54,9 +54,35 @@ config SQUASHFS_FILE_DIRECT
endchoice
+config SQUASHFS_DECOMP_SINGLE
+ depends on SQUASHFS
+ def_bool n
+
+config SQUASHFS_DECOMP_MULTI
+ depends on SQUASHFS
+ def_bool n
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+ depends on SQUASHFS
+ def_bool n
+
+config SQUASHFS_CHOICE_DECOMP_BY_MOUNT
+ bool "Select the parallel decompression mode during mount"
+ depends on SQUASHFS
+ default n
+ select SQUASHFS_DECOMP_SINGLE
+ select SQUASHFS_DECOMP_MULTI
+ select SQUASHFS_DECOMP_MULTI_PERCPU
+ select SQUASHFS_MOUNT_DECOMP_THREADS
+ help
+ Compile all parallel decompression modes and specify the
+ decompression mode by setting "threads=" during mount.
+ default Decompressor parallelisation is SQUASHFS_DECOMP_SINGLE
+
choice
- prompt "Decompressor parallelisation options"
+ prompt "Select decompression parallel mode at compile time"
depends on SQUASHFS
+ depends on !SQUASHFS_CHOICE_DECOMP_BY_MOUNT
help
Squashfs now supports three parallelisation options for
decompression. Each one exhibits various trade-offs between
@@ -64,15 +90,17 @@ choice
If in doubt, select "Single threaded compression"
-config SQUASHFS_DECOMP_SINGLE
+config SQUASHFS_COMPILE_DECOMP_SINGLE
bool "Single threaded compression"
+ select SQUASHFS_DECOMP_SINGLE
help
Traditionally Squashfs has used single-threaded decompression.
Only one block (data or metadata) can be decompressed at any
one time. This limits CPU and memory usage to a minimum.
-config SQUASHFS_DECOMP_MULTI
+config SQUASHFS_COMPILE_DECOMP_MULTI
bool "Use multiple decompressors for parallel I/O"
+ select SQUASHFS_DECOMP_MULTI
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
@@ -85,8 +113,9 @@ config SQUASHFS_DECOMP_MULTI
decompressors per core. It dynamically allocates decompressors
on a demand basis.
-config SQUASHFS_DECOMP_MULTI_PERCPU
+config SQUASHFS_COMPILE_DECOMP_MULTI_PERCPU
bool "Use percpu multiple decompressors for parallel I/O"
+ select SQUASHFS_DECOMP_MULTI_PERCPU
help
By default Squashfs uses a single decompressor but it gives
poor performance on parallel I/O workloads when using multiple CPU
@@ -95,9 +124,21 @@ config SQUASHFS_DECOMP_MULTI_PERCPU
This decompressor implementation uses a maximum of one
decompressor per core. It uses percpu variables to ensure
decompression is load-balanced across the cores.
-
endchoice
+config SQUASHFS_MOUNT_DECOMP_THREADS
+ bool "Add the mount parameter 'threads=' for squashfs"
+ depends on SQUASHFS
+ depends on SQUASHFS_DECOMP_MULTI
+ default n
+ help
+ Use threads= to set the decompression parallel mode and the number of threads.
+ If SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
+ threads=<single|multi|percpu|1|2|3|...>
+ else
+ threads=<2|3|...>
+ The upper limit is num_online_cpus() * 2.
+
config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 833aca92301f..bed3bb8b27fa 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -216,7 +216,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
res = -EIO;
goto out_free_bio;
}
- res = squashfs_decompress(msblk, bio, offset, length, output);
+ res = msblk->thread_ops->decompress(msblk, bio, offset, length, output);
} else {
res = copy_bio_to_actor(bio, output, offset, length);
}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index d57bef91ab08..8893cb9b4198 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -134,7 +134,7 @@ void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
if (IS_ERR(comp_opts))
return comp_opts;
- stream = squashfs_decompressor_create(msblk, comp_opts);
+ stream = msblk->thread_ops->create(msblk, comp_opts);
if (IS_ERR(stream))
kfree(comp_opts);
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
index db9f12a3ea05..416c53eedbd1 100644
--- a/fs/squashfs/decompressor_multi.c
+++ b/fs/squashfs/decompressor_multi.c
@@ -29,12 +29,11 @@
#define MAX_DECOMPRESSOR (num_online_cpus() * 2)
-int squashfs_max_decompressors(void)
+static int squashfs_max_decompressors(void)
{
return MAX_DECOMPRESSOR;
}
-
struct squashfs_stream {
void *comp_opts;
struct list_head strm_list;
@@ -59,7 +58,7 @@ static void put_decomp_stream(struct decomp_stream *decomp_strm,
wake_up(&stream->wait);
}
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
@@ -103,7 +102,7 @@ out:
}
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream *stream = msblk->stream;
if (stream) {
@@ -145,7 +144,7 @@ static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
* If there is no available decomp and already full,
* let's wait for releasing decomp from other users.
*/
- if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+ if (stream->avail_decomp >= msblk->max_thread_num)
goto wait;
/* Let's allocate new decomp */
@@ -161,7 +160,7 @@ static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
}
stream->avail_decomp++;
- WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+ WARN_ON(stream->avail_decomp > msblk->max_thread_num);
mutex_unlock(&stream->mutex);
break;
@@ -180,7 +179,7 @@ wait:
}
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
int offset, int length,
struct squashfs_page_actor *output)
{
@@ -195,3 +194,10 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
msblk->decompressor->name);
return res;
}
+
+const struct squashfs_decompressor_thread_ops squashfs_decompressor_multi = {
+ .create = squashfs_decompressor_create,
+ .destroy = squashfs_decompressor_destroy,
+ .decompress = squashfs_decompress,
+ .max_decompressors = squashfs_max_decompressors,
+};
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
index b881b9283b7f..1dfadf76ed9a 100644
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -25,7 +25,7 @@ struct squashfs_stream {
local_lock_t lock;
};
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
@@ -59,7 +59,7 @@ out:
return ERR_PTR(err);
}
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
@@ -75,19 +75,21 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
}
}
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
int offset, int length, struct squashfs_page_actor *output)
{
struct squashfs_stream *stream;
+ struct squashfs_stream __percpu *percpu =
+ (struct squashfs_stream __percpu *) msblk->stream;
int res;
- local_lock(&msblk->stream->lock);
- stream = this_cpu_ptr(msblk->stream);
+ local_lock(&percpu->lock);
+ stream = this_cpu_ptr(percpu);
res = msblk->decompressor->decompress(msblk, stream->stream, bio,
offset, length, output);
- local_unlock(&msblk->stream->lock);
+ local_unlock(&percpu->lock);
if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
@@ -96,7 +98,14 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
return res;
}
-int squashfs_max_decompressors(void)
+static int squashfs_max_decompressors(void)
{
return num_possible_cpus();
}
+
+const struct squashfs_decompressor_thread_ops squashfs_decompressor_percpu = {
+ .create = squashfs_decompressor_create,
+ .destroy = squashfs_decompressor_destroy,
+ .decompress = squashfs_decompress,
+ .max_decompressors = squashfs_max_decompressors,
+};
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
index 4eb3d083d45e..6f161887710b 100644
--- a/fs/squashfs/decompressor_single.c
+++ b/fs/squashfs/decompressor_single.c
@@ -24,7 +24,7 @@ struct squashfs_stream {
struct mutex mutex;
};
-void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+static void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
struct squashfs_stream *stream;
@@ -49,7 +49,7 @@ out:
return ERR_PTR(err);
}
-void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+static void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
{
struct squashfs_stream *stream = msblk->stream;
@@ -59,7 +59,7 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
}
}
-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
+static int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
int offset, int length,
struct squashfs_page_actor *output)
{
@@ -78,7 +78,14 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
return res;
}
-int squashfs_max_decompressors(void)
+static int squashfs_max_decompressors(void)
{
return 1;
}
+
+const struct squashfs_decompressor_thread_ops squashfs_decompressor_single = {
+ .create = squashfs_decompressor_create,
+ .destroy = squashfs_decompressor_destroy,
+ .decompress = squashfs_decompress,
+ .max_decompressors = squashfs_max_decompressors,
+};
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e56510964b22..8ba8c4c50770 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -506,8 +506,9 @@ static int squashfs_readahead_fragment(struct page **page,
squashfs_i(inode)->fragment_size);
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+ int error = buffer->error;
- if (buffer->error)
+ if (error)
goto out;
expected += squashfs_i(inode)->fragment_offset;
@@ -529,7 +530,7 @@ static int squashfs_readahead_fragment(struct page **page,
out:
squashfs_cache_put(buffer);
- return buffer->error;
+ return error;
}
static void squashfs_readahead(struct readahead_control *ractl)
@@ -557,6 +558,13 @@ static void squashfs_readahead(struct readahead_control *ractl)
int res, bsize;
u64 block = 0;
unsigned int expected;
+ struct page *last_page;
+
+ expected = start >> msblk->block_log == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ max_pages = (expected + PAGE_SIZE - 1) >> PAGE_SHIFT;
nr_pages = __readahead_batch(ractl, pages, max_pages);
if (!nr_pages)
@@ -566,13 +574,10 @@ static void squashfs_readahead(struct readahead_control *ractl)
goto skip_pages;
index = pages[0]->index >> shift;
+
if ((pages[nr_pages - 1]->index >> shift) != index)
goto skip_pages;
- expected = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
-
if (index == file_end && squashfs_i(inode)->fragment_block !=
SQUASHFS_INVALID_BLK) {
res = squashfs_readahead_fragment(pages, nr_pages,
@@ -593,15 +598,15 @@ static void squashfs_readahead(struct readahead_control *ractl)
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- squashfs_page_actor_free(actor);
+ last_page = squashfs_page_actor_free(actor);
if (res == expected) {
int bytes;
/* Last page (if present) may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
- if (pages[nr_pages - 1]->index == file_end && bytes)
- memzero_page(pages[nr_pages - 1], bytes,
+ if (index == file_end && bytes && last_page)
+ memzero_page(last_page, bytes,
PAGE_SIZE - bytes);
for (i = 0; i < nr_pages; i++) {
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 54b93bf4a25c..81af6c4ca115 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -71,11 +71,13 @@ static void *handle_next_page(struct squashfs_page_actor *actor)
(actor->next_index != actor->page[actor->next_page]->index)) {
actor->next_index++;
actor->returned_pages++;
+ actor->last_page = NULL;
return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM);
}
actor->next_index++;
actor->returned_pages++;
+ actor->last_page = actor->page[actor->next_page];
return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]);
}
@@ -125,6 +127,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
actor->returned_pages = 0;
actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1);
actor->pageaddr = NULL;
+ actor->last_page = NULL;
actor->alloc_buffer = msblk->decompressor->alloc_buffer;
actor->squashfs_first_page = direct_first_page;
actor->squashfs_next_page = direct_next_page;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 95ffbb543d91..97d4983559b1 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -16,6 +16,7 @@ struct squashfs_page_actor {
void *(*squashfs_first_page)(struct squashfs_page_actor *);
void *(*squashfs_next_page)(struct squashfs_page_actor *);
void (*squashfs_finish_page)(struct squashfs_page_actor *);
+ struct page *last_page;
int pages;
int length;
int next_page;
@@ -29,10 +30,13 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
extern struct squashfs_page_actor *squashfs_page_actor_init_special(
struct squashfs_sb_info *msblk,
struct page **page, int pages, int length);
-static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor)
+static inline struct page *squashfs_page_actor_free(struct squashfs_page_actor *actor)
{
+ struct page *last_page = actor->last_page;
+
kfree(actor->tmp_buffer);
kfree(actor);
+ return last_page;
}
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 9783e01c8100..a6164fdf9435 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -38,11 +38,24 @@ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
/* decompressor_xxx.c */
-extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
-extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
-extern int squashfs_decompress(struct squashfs_sb_info *, struct bio *,
- int, int, struct squashfs_page_actor *);
-extern int squashfs_max_decompressors(void);
+
+struct squashfs_decompressor_thread_ops {
+ void * (*create)(struct squashfs_sb_info *msblk, void *comp_opts);
+ void (*destroy)(struct squashfs_sb_info *msblk);
+ int (*decompress)(struct squashfs_sb_info *msblk, struct bio *bio,
+ int offset, int length, struct squashfs_page_actor *output);
+ int (*max_decompressors)(void);
+};
+
+#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE
+extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_single;
+#endif
+#ifdef CONFIG_SQUASHFS_DECOMP_MULTI
+extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_multi;
+#endif
+#ifdef CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU
+extern const struct squashfs_decompressor_thread_ops squashfs_decompressor_percpu;
+#endif
/* export.c */
extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1e90c2575f9b..659082e9e51d 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -53,7 +53,7 @@ struct squashfs_sb_info {
__le64 *xattr_id_table;
struct mutex meta_index_mutex;
struct meta_index *meta_index;
- struct squashfs_stream *stream;
+ void *stream;
__le64 *inode_lookup_table;
u64 inode_table;
u64 directory_table;
@@ -66,5 +66,7 @@ struct squashfs_sb_info {
int xattr_ids;
unsigned int ids;
bool panic_on_errors;
+ const struct squashfs_decompressor_thread_ops *thread_ops;
+ int max_thread_num;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 32565dafa7f3..7d5265a39d20 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -47,10 +47,13 @@ enum Opt_errors {
enum squashfs_param {
Opt_errors,
+ Opt_threads,
};
struct squashfs_mount_opts {
enum Opt_errors errors;
+ const struct squashfs_decompressor_thread_ops *thread_ops;
+ int thread_num;
};
static const struct constant_table squashfs_param_errors[] = {
@@ -61,9 +64,66 @@ static const struct constant_table squashfs_param_errors[] = {
static const struct fs_parameter_spec squashfs_fs_parameters[] = {
fsparam_enum("errors", Opt_errors, squashfs_param_errors),
+ fsparam_string("threads", Opt_threads),
{}
};
+
+static int squashfs_parse_param_threads_str(const char *str, struct squashfs_mount_opts *opts)
+{
+#ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT
+ if (strcmp(str, "single") == 0) {
+ opts->thread_ops = &squashfs_decompressor_single;
+ return 0;
+ }
+ if (strcmp(str, "multi") == 0) {
+ opts->thread_ops = &squashfs_decompressor_multi;
+ return 0;
+ }
+ if (strcmp(str, "percpu") == 0) {
+ opts->thread_ops = &squashfs_decompressor_percpu;
+ return 0;
+ }
+#endif
+ return -EINVAL;
+}
+
+static int squashfs_parse_param_threads_num(const char *str, struct squashfs_mount_opts *opts)
+{
+#ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS
+ int ret;
+ unsigned long num;
+
+ ret = kstrtoul(str, 0, &num);
+ if (ret != 0)
+ return -EINVAL;
+ if (num > 1) {
+ opts->thread_ops = &squashfs_decompressor_multi;
+ if (num > opts->thread_ops->max_decompressors())
+ return -EINVAL;
+ opts->thread_num = (int)num;
+ return 0;
+ }
+#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE
+ if (num == 1) {
+ opts->thread_ops = &squashfs_decompressor_single;
+ opts->thread_num = 1;
+ return 0;
+ }
+#endif
+#endif /* !CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS */
+ return -EINVAL;
+}
+
+static int squashfs_parse_param_threads(const char *str, struct squashfs_mount_opts *opts)
+{
+ int ret = squashfs_parse_param_threads_str(str, opts);
+
+ if (ret == 0)
+ return ret;
+ return squashfs_parse_param_threads_num(str, opts);
+}
+
static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct squashfs_mount_opts *opts = fc->fs_private;
@@ -78,6 +138,10 @@ static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *para
case Opt_errors:
opts->errors = result.uint_32;
break;
+ case Opt_threads:
+ if (squashfs_parse_param_threads(param->string, opts) != 0)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
@@ -133,6 +197,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
return -ENOMEM;
}
msblk = sb->s_fs_info;
+ msblk->thread_ops = opts->thread_ops;
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
@@ -168,6 +233,12 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto failed_mount;
}
+ if (opts->thread_num == 0) {
+ msblk->max_thread_num = msblk->thread_ops->max_decompressors();
+ } else {
+ msblk->max_thread_num = opts->thread_num;
+ }
+
/* Check the MAJOR & MINOR versions and lookup compression type */
msblk->decompressor = supported_squashfs_filesystem(
fc,
@@ -252,7 +323,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
/* Allocate read_page block */
msblk->read_page = squashfs_cache_init("data",
- squashfs_max_decompressors(), msblk->block_size);
+ msblk->max_thread_num, msblk->block_size);
if (msblk->read_page == NULL) {
errorf(fc, "Failed to allocate read_page block");
goto failed_mount;
@@ -383,7 +454,7 @@ failed_mount:
squashfs_cache_delete(msblk->block_cache);
squashfs_cache_delete(msblk->fragment_cache);
squashfs_cache_delete(msblk->read_page);
- squashfs_decompressor_destroy(msblk);
+ msblk->thread_ops->destroy(msblk);
kfree(msblk->inode_lookup_table);
kfree(msblk->fragment_index);
kfree(msblk->id_table);
@@ -435,6 +506,19 @@ static int squashfs_show_options(struct seq_file *s, struct dentry *root)
else
seq_puts(s, ",errors=continue");
+#ifdef CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT
+ if (msblk->thread_ops == &squashfs_decompressor_single) {
+ seq_puts(s, ",threads=single");
+ return 0;
+ }
+ if (msblk->thread_ops == &squashfs_decompressor_percpu) {
+ seq_puts(s, ",threads=percpu");
+ return 0;
+ }
+#endif
+#ifdef CONFIG_SQUASHFS_MOUNT_DECOMP_THREADS
+ seq_printf(s, ",threads=%d", msblk->max_thread_num);
+#endif
return 0;
}
@@ -446,6 +530,16 @@ static int squashfs_init_fs_context(struct fs_context *fc)
if (!opts)
return -ENOMEM;
+#ifdef CONFIG_SQUASHFS_DECOMP_SINGLE
+ opts->thread_ops = &squashfs_decompressor_single;
+#elif defined(CONFIG_SQUASHFS_DECOMP_MULTI)
+ opts->thread_ops = &squashfs_decompressor_multi;
+#elif defined(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU)
+ opts->thread_ops = &squashfs_decompressor_percpu;
+#else
+#error "fail: unknown squashfs decompression thread mode?"
+#endif
+ opts->thread_num = 0;
fc->fs_private = opts;
fc->ops = &squashfs_context_ops;
return 0;
@@ -478,7 +572,7 @@ static void squashfs_put_super(struct super_block *sb)
squashfs_cache_delete(sbi->block_cache);
squashfs_cache_delete(sbi->fragment_cache);
squashfs_cache_delete(sbi->read_page);
- squashfs_decompressor_destroy(sbi);
+ sbi->thread_ops->destroy(sbi);
kfree(sbi->id_table);
kfree(sbi->fragment_index);
kfree(sbi->meta_index);
diff --git a/fs/super.c b/fs/super.c
index 6a82660e1adb..12c08cb20405 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -291,6 +291,7 @@ static void __put_super(struct super_block *s)
WARN_ON(s->s_inode_lru.node);
WARN_ON(!list_empty(&s->s_mounts));
security_sb_free(s);
+ fscrypt_destroy_keyring(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
call_rcu(&s->rcu, destroy_super_rcu);
@@ -479,7 +480,7 @@ void generic_shutdown_super(struct super_block *sb)
evict_inodes(sb);
/* only nonzero refcount inodes can have marks */
fsnotify_sb_delete(sb);
- fscrypt_sb_delete(sb);
+ fscrypt_destroy_keyring(sb);
security_sb_delete(sb);
if (sb->s_dio_done_wq) {
@@ -1111,55 +1112,14 @@ static int test_single_super(struct super_block *s, struct fs_context *fc)
return 1;
}
-/**
- * vfs_get_super - Get a superblock with a search key set in s_fs_info.
- * @fc: The filesystem context holding the parameters
- * @keying: How to distinguish superblocks
- * @fill_super: Helper to initialise a new superblock
- *
- * Search for a superblock and create a new one if not found. The search
- * criterion is controlled by @keying. If the search fails, a new superblock
- * is created and @fill_super() is called to initialise it.
- *
- * @keying can take one of a number of values:
- *
- * (1) vfs_get_single_super - Only one superblock of this type may exist on the
- * system. This is typically used for special system filesystems.
- *
- * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have
- * distinct keys (where the key is in s_fs_info). Searching for the same
- * key again will turn up the superblock for that key.
- *
- * (3) vfs_get_independent_super - Multiple superblocks may exist and are
- * unkeyed. Each call will get a new superblock.
- *
- * A permissions check is made by sget_fc() unless we're getting a superblock
- * for a kernel-internal mount or a submount.
- */
-int vfs_get_super(struct fs_context *fc,
- enum vfs_get_super_keying keying,
- int (*fill_super)(struct super_block *sb,
- struct fs_context *fc))
+static int vfs_get_super(struct fs_context *fc, bool reconf,
+ int (*test)(struct super_block *, struct fs_context *),
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc))
{
- int (*test)(struct super_block *, struct fs_context *);
struct super_block *sb;
int err;
- switch (keying) {
- case vfs_get_single_super:
- case vfs_get_single_reconf_super:
- test = test_single_super;
- break;
- case vfs_get_keyed_super:
- test = test_keyed_super;
- break;
- case vfs_get_independent_super:
- test = NULL;
- break;
- default:
- BUG();
- }
-
sb = sget_fc(fc, test, set_anon_super_fc);
if (IS_ERR(sb))
return PTR_ERR(sb);
@@ -1173,7 +1133,7 @@ int vfs_get_super(struct fs_context *fc,
fc->root = dget(sb->s_root);
} else {
fc->root = dget(sb->s_root);
- if (keying == vfs_get_single_reconf_super) {
+ if (reconf) {
err = reconfigure_super(fc);
if (err < 0) {
dput(fc->root);
@@ -1189,13 +1149,12 @@ error:
deactivate_locked_super(sb);
return err;
}
-EXPORT_SYMBOL(vfs_get_super);
int get_tree_nodev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
- return vfs_get_super(fc, vfs_get_independent_super, fill_super);
+ return vfs_get_super(fc, false, NULL, fill_super);
}
EXPORT_SYMBOL(get_tree_nodev);
@@ -1203,7 +1162,7 @@ int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
- return vfs_get_super(fc, vfs_get_single_super, fill_super);
+ return vfs_get_super(fc, false, test_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single);
@@ -1211,7 +1170,7 @@ int get_tree_single_reconf(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
- return vfs_get_super(fc, vfs_get_single_reconf_super, fill_super);
+ return vfs_get_super(fc, true, test_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single_reconf);
@@ -1221,7 +1180,7 @@ int get_tree_keyed(struct fs_context *fc,
void *key)
{
fc->s_fs_info = key;
- return vfs_get_super(fc, vfs_get_keyed_super, fill_super);
+ return vfs_get_super(fc, false, test_keyed_super, fill_super);
}
EXPORT_SYMBOL(get_tree_keyed);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index d4ec9bb97de9..3b8567564e7e 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -438,7 +438,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
res += blocks;
direct = 1;
}
- return blocks;
+ return res;
}
int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path,
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 3f128b9fdfbb..9c9d3f0e36a4 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2467,7 +2467,7 @@ error_dump:
static inline int chance(unsigned int n, unsigned int out_of)
{
- return !!(prandom_u32_max(out_of) + 1 <= n);
+ return !!(get_random_u32_below(out_of) + 1 <= n);
}
@@ -2485,13 +2485,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
if (chance(1, 2)) {
d->pc_delay = 1;
/* Fail within 1 minute */
- delay = prandom_u32_max(60000);
+ delay = get_random_u32_below(60000);
d->pc_timeout = jiffies;
d->pc_timeout += msecs_to_jiffies(delay);
ubifs_warn(c, "failing after %lums", delay);
} else {
d->pc_delay = 2;
- delay = prandom_u32_max(10000);
+ delay = get_random_u32_below(10000);
/* Fail within 10000 operations */
d->pc_cnt_max = delay;
ubifs_warn(c, "failing after %lu calls", delay);
@@ -2571,7 +2571,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
unsigned int from, to, ffs = chance(1, 2);
unsigned char *p = (void *)buf;
- from = prandom_u32_max(len);
+ from = get_random_u32_below(len);
/* Corruption span max to end of write unit */
to = min(len, ALIGN(from + 1, c->max_write_size));
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index cfbc31f709f4..c4d079328b92 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1970,28 +1970,28 @@ static int dbg_populate_lsave(struct ubifs_info *c)
if (!dbg_is_chk_gen(c))
return 0;
- if (prandom_u32_max(4))
+ if (get_random_u32_below(4))
return 0;
for (i = 0; i < c->lsave_cnt; i++)
c->lsave[i] = c->main_first;
list_for_each_entry(lprops, &c->empty_list, list)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum;
list_for_each_entry(lprops, &c->freeable_list, list)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum;
list_for_each_entry(lprops, &c->frdi_idx_list, list)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = lprops->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = lprops->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_DIRTY - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum;
heap = &c->lpt_heap[LPROPS_FREE - 1];
for (i = 0; i < heap->cnt; i++)
- c->lsave[prandom_u32_max(c->lsave_cnt)] = heap->arr[i]->lnum;
+ c->lsave[get_random_u32_below(c->lsave_cnt)] = heap->arr[i]->lnum;
return 1;
}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 01362ad5f804..a55e04822d16 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -700,7 +700,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
c->ilebs[c->ileb_cnt++] = lnum;
dbg_cmt("LEB %d", lnum);
}
- if (dbg_is_chk_index(c) && !prandom_u32_max(8))
+ if (dbg_is_chk_index(c) && !get_random_u32_below(8))
return -ENOSPC;
return 0;
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index fb4c30e05245..ae7bc13a5298 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -240,7 +240,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
poffset - lfi);
else {
if (!copy_name) {
- copy_name = kmalloc(UDF_NAME_LEN,
+ copy_name = kmalloc(UDF_NAME_LEN_CS0,
GFP_NOFS);
if (!copy_name) {
fi = ERR_PTR(-ENOMEM);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 07c81ab3fd4d..98ac37e34e3d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1630,17 +1630,20 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
vma = prev;
+ mas_pause(&mas);
goto next;
}
if (vma->vm_start < start) {
ret = split_vma(mm, vma, start, 1);
if (ret)
break;
+ mas_pause(&mas);
}
if (vma->vm_end > end) {
ret = split_vma(mm, vma, end, 0);
if (ret)
break;
+ mas_pause(&mas);
}
next:
/*
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 517a138faa66..191b22b9a35b 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -133,6 +133,21 @@ xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno)
return true;
}
+static inline bool
+xfs_verify_agbext(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
+ xfs_agblock_t len)
+{
+ if (agbno + len <= agbno)
+ return false;
+
+ if (!xfs_verify_agbno(pag, agbno))
+ return false;
+
+ return xfs_verify_agbno(pag, agbno + len - 1);
+}
+
/*
* Verify that an AG inode number pointer neither points outside the AG
* nor points at static metadata.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 6261599bb389..989cf341779b 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -263,11 +263,7 @@ xfs_alloc_get_rec(
goto out_bad_rec;
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, *bno))
- goto out_bad_rec;
- if (*bno > *bno + *len)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag, *bno + *len - 1))
+ if (!xfs_verify_agbext(pag, *bno, *len))
goto out_bad_rec;
return 0;
@@ -1520,7 +1516,7 @@ xfs_alloc_ag_vextent_lastblock(
#ifdef DEBUG
/* Randomly don't execute the first algorithm. */
- if (prandom_u32_max(2))
+ if (get_random_u32_below(2))
return 0;
#endif
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index d9b66306a9a7..cb9e950a911d 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int(
xfs_dir2_leaf_tail_t *ltp;
int stale;
int i;
+ bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
+ hdr->magic == XFS_DIR3_LEAF1_MAGIC);
ltp = xfs_dir2_leaf_tail_p(geo, leaf);
@@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int(
return __this_address;
/* Leaves and bests don't overlap in leaf format. */
- if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
- hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
+ if (isleaf1 &&
(char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
return __this_address;
@@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int(
}
if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
+ if (isleaf1 && xfs_dir2_dataptr_to_db(geo,
+ be32_to_cpu(hdr->ents[i].address)) >=
+ be32_to_cpu(ltp->bestcount))
+ return __this_address;
}
if (hdr->stale != stale)
return __this_address;
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index b55bdfa9c8a8..371dc07233e0 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1564,20 +1564,6 @@ struct xfs_rmap_rec {
#define RMAPBT_UNUSED_OFFSET_BITLEN 7
#define RMAPBT_OFFSET_BITLEN 54
-#define XFS_RMAP_ATTR_FORK (1 << 0)
-#define XFS_RMAP_BMBT_BLOCK (1 << 1)
-#define XFS_RMAP_UNWRITTEN (1 << 2)
-#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
- XFS_RMAP_BMBT_BLOCK)
-#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
-struct xfs_rmap_irec {
- xfs_agblock_t rm_startblock; /* extent start block */
- xfs_extlen_t rm_blockcount; /* extent length */
- uint64_t rm_owner; /* extent owner */
- uint64_t rm_offset; /* offset within the owner */
- unsigned int rm_flags; /* state flags */
-};
-
/*
* Key structure
*
@@ -1626,7 +1612,7 @@ unsigned int xfs_refc_block(struct xfs_mount *mp);
* on the startblock. This speeds up mount time deletion of stale
* staging extents because they're all at the right side of the tree.
*/
-#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31))
+#define XFS_REFC_COWFLAG (1U << 31)
#define REFCNTBT_COWFLAG_BITLEN 1
#define REFCNTBT_AGBLOCK_BITLEN 31
@@ -1640,12 +1626,6 @@ struct xfs_refcount_key {
__be32 rc_startblock; /* starting block number */
};
-struct xfs_refcount_irec {
- xfs_agblock_t rc_startblock; /* starting block number */
- xfs_extlen_t rc_blockcount; /* count of free blocks */
- xfs_nlink_t rc_refcount; /* number of inodes linked here */
-};
-
#define MAXREFCOUNT ((xfs_nlink_t)~0U)
#define MAXREFCEXTLEN ((xfs_extlen_t)~0U)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 94db50eb706a..5118dedf9267 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -636,7 +636,7 @@ xfs_ialloc_ag_alloc(
/* randomly do sparse inode allocations */
if (xfs_has_sparseinodes(tp->t_mountp) &&
igeo->ialloc_min_blks < igeo->ialloc_blks)
- do_sparse = prandom_u32_max(2);
+ do_sparse = get_random_u32_below(2);
#endif
/*
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index b351b9dc6561..f13e0809dc63 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -613,25 +613,49 @@ typedef struct xfs_efi_log_format {
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_t;
+static inline size_t
+xfs_efi_log_format_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format) +
+ nr * sizeof(struct xfs_extent);
+}
+
typedef struct xfs_efi_log_format_32 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_32_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_32_t efi_extents[]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t;
+static inline size_t
+xfs_efi_log_format32_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format_32) +
+ nr * sizeof(struct xfs_extent_32);
+}
+
typedef struct xfs_efi_log_format_64 {
uint16_t efi_type; /* efi log item type */
uint16_t efi_size; /* size of this item */
uint32_t efi_nextents; /* # extents to free */
uint64_t efi_id; /* efi identifier */
- xfs_extent_64_t efi_extents[1]; /* array of extents to free */
+ xfs_extent_64_t efi_extents[]; /* array of extents to free */
} xfs_efi_log_format_64_t;
+static inline size_t
+xfs_efi_log_format64_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efi_log_format_64) +
+ nr * sizeof(struct xfs_extent_64);
+}
+
/*
* This is the structure used to lay out an efd log item in the
* log. The efd_extents array is a variable size array whose
@@ -642,25 +666,49 @@ typedef struct xfs_efd_log_format {
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_t;
+static inline size_t
+xfs_efd_log_format_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format) +
+ nr * sizeof(struct xfs_extent);
+}
+
typedef struct xfs_efd_log_format_32 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_32_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_32_t efd_extents[]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t;
+static inline size_t
+xfs_efd_log_format32_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format_32) +
+ nr * sizeof(struct xfs_extent_32);
+}
+
typedef struct xfs_efd_log_format_64 {
uint16_t efd_type; /* efd log item type */
uint16_t efd_size; /* size of this item */
uint32_t efd_nextents; /* # of extents freed */
uint64_t efd_efi_id; /* id of corresponding efi */
- xfs_extent_64_t efd_extents[1]; /* array of extents freed */
+ xfs_extent_64_t efd_extents[]; /* array of extents freed */
} xfs_efd_log_format_64_t;
+static inline size_t
+xfs_efd_log_format64_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct xfs_efd_log_format_64) +
+ nr * sizeof(struct xfs_extent_64);
+}
+
/*
* RUI/RUD (reverse mapping) log format definitions
*/
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 64b910caafaa..3f34bafe18dd 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -46,13 +46,16 @@ STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
int
xfs_refcount_lookup_le(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
}
@@ -63,13 +66,16 @@ xfs_refcount_lookup_le(
int
xfs_refcount_lookup_ge(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_GE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
}
@@ -80,13 +86,16 @@ xfs_refcount_lookup_ge(
int
xfs_refcount_lookup_eq(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
int *stat)
{
- trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno,
+ trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ xfs_refcount_encode_startblock(bno, domain),
XFS_LOOKUP_LE);
cur->bc_rec.rc.rc_startblock = bno;
cur->bc_rec.rc.rc_blockcount = 0;
+ cur->bc_rec.rc.rc_domain = domain;
return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
}
@@ -96,7 +105,17 @@ xfs_refcount_btrec_to_irec(
const union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec)
{
- irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock);
+ uint32_t start;
+
+ start = be32_to_cpu(rec->refc.rc_startblock);
+ if (start & XFS_REFC_COWFLAG) {
+ start &= ~XFS_REFC_COWFLAG;
+ irec->rc_domain = XFS_REFC_DOMAIN_COW;
+ } else {
+ irec->rc_domain = XFS_REFC_DOMAIN_SHARED;
+ }
+
+ irec->rc_startblock = start;
irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
}
@@ -114,7 +133,6 @@ xfs_refcount_get_rec(
struct xfs_perag *pag = cur->bc_ag.pag;
union xfs_btree_rec *rec;
int error;
- xfs_agblock_t realstart;
error = xfs_btree_get_rec(cur, &rec, stat);
if (error || !*stat)
@@ -124,22 +142,11 @@ xfs_refcount_get_rec(
if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
goto out_bad_rec;
- /* handle special COW-staging state */
- realstart = irec->rc_startblock;
- if (realstart & XFS_REFC_COW_START) {
- if (irec->rc_refcount != 1)
- goto out_bad_rec;
- realstart &= ~XFS_REFC_COW_START;
- } else if (irec->rc_refcount < 2) {
+ if (!xfs_refcount_check_domain(irec))
goto out_bad_rec;
- }
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, realstart))
- goto out_bad_rec;
- if (realstart > realstart + irec->rc_blockcount)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1))
+ if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
goto out_bad_rec;
if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
@@ -169,12 +176,17 @@ xfs_refcount_update(
struct xfs_refcount_irec *irec)
{
union xfs_btree_rec rec;
+ uint32_t start;
int error;
trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
- rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock);
+
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ rec.refc.rc_startblock = cpu_to_be32(start);
rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
+
error = xfs_btree_update(cur, &rec);
if (error)
trace_xfs_refcount_update_error(cur->bc_mp,
@@ -196,9 +208,12 @@ xfs_refcount_insert(
int error;
trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
+
cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
+ cur->bc_rec.rc.rc_domain = irec->rc_domain;
+
error = xfs_btree_insert(cur, i);
if (error)
goto out_error;
@@ -244,7 +259,8 @@ xfs_refcount_delete(
}
if (error)
goto out_error;
- error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock,
+ &found_rec);
out_error:
if (error)
trace_xfs_refcount_delete_error(cur->bc_mp,
@@ -343,6 +359,7 @@ xfs_refc_next(
STATIC int
xfs_refcount_split_extent(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
bool *shape_changed)
{
@@ -351,7 +368,7 @@ xfs_refcount_split_extent(
int error;
*shape_changed = false;
- error = xfs_refcount_lookup_le(cur, agbno, &found_rec);
+ error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -364,6 +381,8 @@ xfs_refcount_split_extent(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (rcext.rc_domain != domain)
+ return 0;
if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
return 0;
@@ -415,6 +434,9 @@ xfs_refcount_merge_center_extents(
trace_xfs_refcount_merge_center_extents(cur->bc_mp,
cur->bc_ag.pag->pag_agno, left, center, right);
+ ASSERT(left->rc_domain == center->rc_domain);
+ ASSERT(right->rc_domain == center->rc_domain);
+
/*
* Make sure the center and right extents are not in the btree.
* If the center extent was synthesized, the first delete call
@@ -423,8 +445,8 @@ xfs_refcount_merge_center_extents(
* call removes the center and the second one removes the right
* extent.
*/
- error = xfs_refcount_lookup_ge(cur, center->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_ge(cur, center->rc_domain,
+ center->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -451,8 +473,8 @@ xfs_refcount_merge_center_extents(
}
/* Enlarge the left extent. */
- error = xfs_refcount_lookup_le(cur, left->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, left->rc_domain,
+ left->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -491,10 +513,12 @@ xfs_refcount_merge_left_extent(
trace_xfs_refcount_merge_left_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, left, cleft);
+ ASSERT(left->rc_domain == cleft->rc_domain);
+
/* If the extent at agbno (cleft) wasn't synthesized, remove it. */
if (cleft->rc_refcount > 1) {
- error = xfs_refcount_lookup_le(cur, cleft->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, cleft->rc_domain,
+ cleft->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -512,8 +536,8 @@ xfs_refcount_merge_left_extent(
}
/* Enlarge the left extent. */
- error = xfs_refcount_lookup_le(cur, left->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, left->rc_domain,
+ left->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -552,13 +576,15 @@ xfs_refcount_merge_right_extent(
trace_xfs_refcount_merge_right_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, cright, right);
+ ASSERT(right->rc_domain == cright->rc_domain);
+
/*
* If the extent ending at agbno+aglen (cright) wasn't synthesized,
* remove it.
*/
if (cright->rc_refcount > 1) {
- error = xfs_refcount_lookup_le(cur, cright->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, cright->rc_domain,
+ cright->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -576,8 +602,8 @@ xfs_refcount_merge_right_extent(
}
/* Enlarge the right extent. */
- error = xfs_refcount_lookup_le(cur, right->rc_startblock,
- &found_rec);
+ error = xfs_refcount_lookup_le(cur, right->rc_domain,
+ right->rc_startblock, &found_rec);
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
@@ -600,8 +626,6 @@ out_error:
return error;
}
-#define XFS_FIND_RCEXT_SHARED 1
-#define XFS_FIND_RCEXT_COW 2
/*
* Find the left extent and the one after it (cleft). This function assumes
* that we've already split any extent crossing agbno.
@@ -611,16 +635,16 @@ xfs_refcount_find_left_extents(
struct xfs_btree_cur *cur,
struct xfs_refcount_irec *left,
struct xfs_refcount_irec *cleft,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
- xfs_extlen_t aglen,
- int flags)
+ xfs_extlen_t aglen)
{
struct xfs_refcount_irec tmp;
int error;
int found_rec;
left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
- error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec);
+ error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -634,11 +658,9 @@ xfs_refcount_find_left_extents(
goto out_error;
}
- if (xfs_refc_next(&tmp) != agbno)
- return 0;
- if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
+ if (tmp.rc_domain != domain)
return 0;
- if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
+ if (xfs_refc_next(&tmp) != agbno)
return 0;
/* We have a left extent; retrieve (or invent) the next right one */
*left = tmp;
@@ -655,6 +677,9 @@ xfs_refcount_find_left_extents(
goto out_error;
}
+ if (tmp.rc_domain != domain)
+ goto not_found;
+
/* if tmp starts at the end of our range, just use that */
if (tmp.rc_startblock == agbno)
*cleft = tmp;
@@ -671,8 +696,10 @@ xfs_refcount_find_left_extents(
cleft->rc_blockcount = min(aglen,
tmp.rc_startblock - agbno);
cleft->rc_refcount = 1;
+ cleft->rc_domain = domain;
}
} else {
+not_found:
/*
* No extents, so pretend that there's one covering the whole
* range.
@@ -680,6 +707,7 @@ xfs_refcount_find_left_extents(
cleft->rc_startblock = agbno;
cleft->rc_blockcount = aglen;
cleft->rc_refcount = 1;
+ cleft->rc_domain = domain;
}
trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
left, cleft, agbno);
@@ -700,16 +728,16 @@ xfs_refcount_find_right_extents(
struct xfs_btree_cur *cur,
struct xfs_refcount_irec *right,
struct xfs_refcount_irec *cright,
+ enum xfs_refc_domain domain,
xfs_agblock_t agbno,
- xfs_extlen_t aglen,
- int flags)
+ xfs_extlen_t aglen)
{
struct xfs_refcount_irec tmp;
int error;
int found_rec;
right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
- error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec);
if (error)
goto out_error;
if (!found_rec)
@@ -723,11 +751,9 @@ xfs_refcount_find_right_extents(
goto out_error;
}
- if (tmp.rc_startblock != agbno + aglen)
- return 0;
- if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2)
+ if (tmp.rc_domain != domain)
return 0;
- if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1)
+ if (tmp.rc_startblock != agbno + aglen)
return 0;
/* We have a right extent; retrieve (or invent) the next left one */
*right = tmp;
@@ -744,6 +770,9 @@ xfs_refcount_find_right_extents(
goto out_error;
}
+ if (tmp.rc_domain != domain)
+ goto not_found;
+
/* if tmp ends at the end of our range, just use that */
if (xfs_refc_next(&tmp) == agbno + aglen)
*cright = tmp;
@@ -760,8 +789,10 @@ xfs_refcount_find_right_extents(
cright->rc_blockcount = right->rc_startblock -
cright->rc_startblock;
cright->rc_refcount = 1;
+ cright->rc_domain = domain;
}
} else {
+not_found:
/*
* No extents, so pretend that there's one covering the whole
* range.
@@ -769,6 +800,7 @@ xfs_refcount_find_right_extents(
cright->rc_startblock = agbno;
cright->rc_blockcount = aglen;
cright->rc_refcount = 1;
+ cright->rc_domain = domain;
}
trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
cright, right, agbno + aglen);
@@ -794,10 +826,10 @@ xfs_refc_valid(
STATIC int
xfs_refcount_merge_extents(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t *agbno,
xfs_extlen_t *aglen,
enum xfs_refc_adjust_op adjust,
- int flags,
bool *shape_changed)
{
struct xfs_refcount_irec left = {0}, cleft = {0};
@@ -812,12 +844,12 @@ xfs_refcount_merge_extents(
* just below (agbno + aglen) [cright], and just above (agbno + aglen)
* [right].
*/
- error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno,
- *aglen, flags);
+ error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain,
+ *agbno, *aglen);
if (error)
return error;
- error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno,
- *aglen, flags);
+ error = xfs_refcount_find_right_extents(cur, &right, &cright, domain,
+ *agbno, *aglen);
if (error)
return error;
@@ -870,7 +902,7 @@ xfs_refcount_merge_extents(
aglen);
}
- return error;
+ return 0;
}
/*
@@ -933,7 +965,8 @@ xfs_refcount_adjust_extents(
if (*aglen == 0)
return 0;
- error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno,
+ &found_rec);
if (error)
goto out_error;
@@ -941,10 +974,11 @@ xfs_refcount_adjust_extents(
error = xfs_refcount_get_rec(cur, &ext, &found_rec);
if (error)
goto out_error;
- if (!found_rec) {
+ if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) {
ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
ext.rc_blockcount = 0;
ext.rc_refcount = 0;
+ ext.rc_domain = XFS_REFC_DOMAIN_SHARED;
}
/*
@@ -957,6 +991,8 @@ xfs_refcount_adjust_extents(
tmp.rc_blockcount = min(*aglen,
ext.rc_startblock - *agbno);
tmp.rc_refcount = 1 + adj;
+ tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
+
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &tmp);
@@ -986,15 +1022,30 @@ xfs_refcount_adjust_extents(
(*agbno) += tmp.rc_blockcount;
(*aglen) -= tmp.rc_blockcount;
- error = xfs_refcount_lookup_ge(cur, *agbno,
+ /* Stop if there's nothing left to modify */
+ if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
+ break;
+
+ /* Move the cursor to the start of ext. */
+ error = xfs_refcount_lookup_ge(cur,
+ XFS_REFC_DOMAIN_SHARED, *agbno,
&found_rec);
if (error)
goto out_error;
}
- /* Stop if there's nothing left to modify */
- if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
- break;
+ /*
+ * A previous step trimmed agbno/aglen such that the end of the
+ * range would not be in the middle of the record. If this is
+ * no longer the case, something is seriously wrong with the
+ * btree. Make sure we never feed the synthesized record into
+ * the processing loop below.
+ */
+ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
+ XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/*
* Adjust the reference count and either update the tree
@@ -1070,13 +1121,15 @@ xfs_refcount_adjust(
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
*/
- error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+ agbno, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
shape_changes++;
- error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+ agbno + aglen, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1085,8 +1138,8 @@ xfs_refcount_adjust(
/*
* Try to merge with the left or right extents of the range.
*/
- error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj,
- XFS_FIND_RCEXT_SHARED, &shape_changed);
+ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
+ new_agbno, new_aglen, adj, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1125,6 +1178,32 @@ xfs_refcount_finish_one_cleanup(
}
/*
+ * Set up a continuation a deferred refcount operation by updating the intent.
+ * Checks to make sure we're not going to run off the end of the AG.
+ */
+static inline int
+xfs_refcount_continue_op(
+ struct xfs_btree_cur *cur,
+ xfs_fsblock_t startblock,
+ xfs_agblock_t new_agbno,
+ xfs_extlen_t new_len,
+ xfs_fsblock_t *new_fsbno)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len)))
+ return -EFSCORRUPTED;
+
+ *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+
+ ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len));
+ ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno));
+
+ return 0;
+}
+
+/*
* Process one of the deferred refcount operations. We pass back the
* btree cursor to maintain our lock on the btree between calls.
* This saves time and eliminates a buffer deadlock between the
@@ -1191,12 +1270,20 @@ xfs_refcount_finish_one(
case XFS_REFCOUNT_INCREASE:
error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
new_len, XFS_REFCOUNT_ADJUST_INCREASE);
- *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ if (error)
+ goto out_drop;
+ if (*new_len > 0)
+ error = xfs_refcount_continue_op(rcur, startblock,
+ new_agbno, *new_len, new_fsb);
break;
case XFS_REFCOUNT_DECREASE:
error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
new_len, XFS_REFCOUNT_ADJUST_DECREASE);
- *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ if (error)
+ goto out_drop;
+ if (*new_len > 0)
+ error = xfs_refcount_continue_op(rcur, startblock,
+ new_agbno, *new_len, new_fsb);
break;
case XFS_REFCOUNT_ALLOC_COW:
*new_fsb = startblock + blockcount;
@@ -1307,7 +1394,8 @@ xfs_refcount_find_shared(
*flen = 0;
/* Try to find a refcount extent that crosses the start */
- error = xfs_refcount_lookup_le(cur, agbno, &have);
+ error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno,
+ &have);
if (error)
goto out_error;
if (!have) {
@@ -1325,6 +1413,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
+ goto done;
/* If the extent ends before the start, look at the next one */
if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
@@ -1340,6 +1430,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
+ goto done;
}
/* If the extent starts after the range we want, bail out */
@@ -1371,7 +1463,8 @@ xfs_refcount_find_shared(
error = -EFSCORRUPTED;
goto out_error;
}
- if (tmp.rc_startblock >= agbno + aglen ||
+ if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED ||
+ tmp.rc_startblock >= agbno + aglen ||
tmp.rc_startblock != *fbno + *flen)
break;
*flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
@@ -1455,17 +1548,23 @@ xfs_refcount_adjust_cow_extents(
return 0;
/* Find any overlapping refcount records */
- error = xfs_refcount_lookup_ge(cur, agbno, &found_rec);
+ error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno,
+ &found_rec);
if (error)
goto out_error;
error = xfs_refcount_get_rec(cur, &ext, &found_rec);
if (error)
goto out_error;
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
+ ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (!found_rec) {
- ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks +
- XFS_REFC_COW_START;
+ ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
ext.rc_blockcount = 0;
ext.rc_refcount = 0;
+ ext.rc_domain = XFS_REFC_DOMAIN_COW;
}
switch (adj) {
@@ -1480,6 +1579,8 @@ xfs_refcount_adjust_cow_extents(
tmp.rc_startblock = agbno;
tmp.rc_blockcount = aglen;
tmp.rc_refcount = 1;
+ tmp.rc_domain = XFS_REFC_DOMAIN_COW;
+
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &tmp);
@@ -1542,24 +1643,24 @@ xfs_refcount_adjust_cow(
bool shape_changed;
int error;
- agbno += XFS_REFC_COW_START;
-
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
*/
- error = xfs_refcount_split_extent(cur, agbno, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
+ agbno, &shape_changed);
if (error)
goto out_error;
- error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed);
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
+ agbno + aglen, &shape_changed);
if (error)
goto out_error;
/*
* Try to merge with the left or right extents of the range.
*/
- error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj,
- XFS_FIND_RCEXT_COW, &shape_changed);
+ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno,
+ &aglen, adj, &shape_changed);
if (error)
goto out_error;
@@ -1666,10 +1767,18 @@ xfs_refcount_recover_extent(
be32_to_cpu(rec->refc.rc_refcount) != 1))
return -EFSCORRUPTED;
- rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0);
+ rr = kmalloc(sizeof(struct xfs_refcount_recovery),
+ GFP_KERNEL | __GFP_NOFAIL);
+ INIT_LIST_HEAD(&rr->rr_list);
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
- list_add_tail(&rr->rr_list, debris);
+ if (XFS_IS_CORRUPT(cur->bc_mp,
+ rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ kfree(rr);
+ return -EFSCORRUPTED;
+ }
+
+ list_add_tail(&rr->rr_list, debris);
return 0;
}
@@ -1687,10 +1796,11 @@ xfs_refcount_recover_cow_leftovers(
union xfs_btree_irec low;
union xfs_btree_irec high;
xfs_fsblock_t fsb;
- xfs_agblock_t agbno;
int error;
- if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
+ /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */
+ BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG);
+ if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS)
return -EOPNOTSUPP;
INIT_LIST_HEAD(&debris);
@@ -1717,7 +1827,7 @@ xfs_refcount_recover_cow_leftovers(
/* Find all the leftover CoW staging extents. */
memset(&low, 0, sizeof(low));
memset(&high, 0, sizeof(high));
- low.rc.rc_startblock = XFS_REFC_COW_START;
+ low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
high.rc.rc_startblock = -1U;
error = xfs_btree_query_range(cur, &low, &high,
xfs_refcount_recover_extent, &debris);
@@ -1738,8 +1848,8 @@ xfs_refcount_recover_cow_leftovers(
&rr->rr_rrec);
/* Free the orphan record */
- agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
- fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno);
+ fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
+ rr->rr_rrec.rc_startblock);
xfs_refcount_free_cow_extent(tp, fsb,
rr->rr_rrec.rc_blockcount);
@@ -1751,7 +1861,7 @@ xfs_refcount_recover_cow_leftovers(
goto out_free;
list_del(&rr->rr_list);
- kmem_free(rr);
+ kfree(rr);
}
return error;
@@ -1761,7 +1871,7 @@ out_free:
/* Free the leftover list */
list_for_each_entry_safe(rr, n, &debris, rr_list) {
list_del(&rr->rr_list);
- kmem_free(rr);
+ kfree(rr);
}
return error;
}
@@ -1770,6 +1880,7 @@ out_free:
int
xfs_refcount_has_record(
struct xfs_btree_cur *cur,
+ enum xfs_refc_domain domain,
xfs_agblock_t bno,
xfs_extlen_t len,
bool *exists)
@@ -1781,6 +1892,7 @@ xfs_refcount_has_record(
low.rc.rc_startblock = bno;
memset(&high, 0xFF, sizeof(high));
high.rc.rc_startblock = bno + len - 1;
+ low.rc.rc_domain = high.rc.rc_domain = domain;
return xfs_btree_has_record(cur, &low, &high, exists);
}
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index e8b322de7f3d..452f30556f5a 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -14,14 +14,33 @@ struct xfs_bmbt_irec;
struct xfs_refcount_irec;
extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, int *stat);
+ enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat);
extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur,
struct xfs_refcount_irec *irec, int *stat);
+static inline uint32_t
+xfs_refcount_encode_startblock(
+ xfs_agblock_t startblock,
+ enum xfs_refc_domain domain)
+{
+ uint32_t start;
+
+ /*
+ * low level btree operations need to handle the generic btree range
+ * query functions (which set rc_domain == -1U), so we check that the
+ * domain is /not/ shared.
+ */
+ start = startblock & ~XFS_REFC_COWFLAG;
+ if (domain != XFS_REFC_DOMAIN_SHARED)
+ start |= XFS_REFC_COWFLAG;
+
+ return start;
+}
+
enum xfs_refcount_intent_type {
XFS_REFCOUNT_INCREASE = 1,
XFS_REFCOUNT_DECREASE,
@@ -36,6 +55,18 @@ struct xfs_refcount_intent {
xfs_fsblock_t ri_startblock;
};
+/* Check that the refcount is appropriate for the record domain. */
+static inline bool
+xfs_refcount_check_domain(
+ const struct xfs_refcount_irec *irec)
+{
+ if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1)
+ return false;
+ if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2)
+ return false;
+ return true;
+}
+
void xfs_refcount_increase_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
void xfs_refcount_decrease_extent(struct xfs_trans *tp,
@@ -79,7 +110,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
#define XFS_REFCOUNT_ITEM_OVERHEAD 32
extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
- xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+ enum xfs_refc_domain domain, xfs_agblock_t bno,
+ xfs_extlen_t len, bool *exists);
union xfs_btree_rec;
extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_refcount_irec *irec);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 316c1ec0c3c2..e1f789866683 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -13,6 +13,7 @@
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
#include "xfs_refcount_btree.h"
+#include "xfs_refcount.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock);
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
+
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ rec->refc.rc_startblock = cpu_to_be32(start);
rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount);
rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount);
}
@@ -182,10 +188,13 @@ xfs_refcountbt_key_diff(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
- struct xfs_refcount_irec *rec = &cur->bc_rec.rc;
const struct xfs_refcount_key *kp = &key->refc;
+ const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
+ uint32_t start;
- return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+ start = xfs_refcount_encode_startblock(irec->rc_startblock,
+ irec->rc_domain);
+ return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
}
STATIC int64_t
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 094dfc897ebc..b56aca1e7c66 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -235,13 +235,8 @@ xfs_rmap_get_rec(
goto out_bad_rec;
} else {
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbno(pag, irec->rm_startblock))
- goto out_bad_rec;
- if (irec->rm_startblock >
- irec->rm_startblock + irec->rm_blockcount)
- goto out_bad_rec;
- if (!xfs_verify_agbno(pag,
- irec->rm_startblock + irec->rm_blockcount - 1))
+ if (!xfs_verify_agbext(pag, irec->rm_startblock,
+ irec->rm_blockcount))
goto out_bad_rec;
}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 2c4ad6e4bb14..5b2f27cbdb80 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -422,7 +422,7 @@ xfs_calc_itruncate_reservation_minlogsize(
/*
* In renaming a files we can modify:
- * the four inodes involved: 4 * inode size
+ * the five inodes involved: 5 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
@@ -437,7 +437,7 @@ xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 4) +
+ max((xfs_calc_inode_res(mp, 5) +
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index a6b7d98cf68f..5ebdda7e1078 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -166,6 +166,36 @@ typedef struct xfs_bmbt_irec
xfs_exntst_t br_state; /* extent state */
} xfs_bmbt_irec_t;
+enum xfs_refc_domain {
+ XFS_REFC_DOMAIN_SHARED = 0,
+ XFS_REFC_DOMAIN_COW,
+};
+
+#define XFS_REFC_DOMAIN_STRINGS \
+ { XFS_REFC_DOMAIN_SHARED, "shared" }, \
+ { XFS_REFC_DOMAIN_COW, "cow" }
+
+struct xfs_refcount_irec {
+ xfs_agblock_t rc_startblock; /* starting block number */
+ xfs_extlen_t rc_blockcount; /* count of free blocks */
+ xfs_nlink_t rc_refcount; /* number of inodes linked here */
+ enum xfs_refc_domain rc_domain; /* shared or cow staging extent? */
+};
+
+#define XFS_RMAP_ATTR_FORK (1 << 0)
+#define XFS_RMAP_BMBT_BLOCK (1 << 1)
+#define XFS_RMAP_UNWRITTEN (1 << 2)
+#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \
+ XFS_RMAP_BMBT_BLOCK)
+#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN)
+struct xfs_rmap_irec {
+ xfs_agblock_t rm_startblock; /* extent start block */
+ xfs_extlen_t rm_blockcount; /* extent length */
+ uint64_t rm_owner; /* extent owner */
+ uint64_t rm_offset; /* offset within the owner */
+ unsigned int rm_flags; /* state flags */
+};
+
/* per-AG block reservation types */
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index ab427b4d7fe0..3b38f4e2a537 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -100,9 +100,7 @@ xchk_allocbt_rec(
bno = be32_to_cpu(rec->alloc.ar_startblock);
len = be32_to_cpu(rec->alloc.ar_blockcount);
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+ if (!xfs_verify_agbext(pag, bno, len))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
xchk_allocbt_xref(bs->sc, bno, len);
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index e1026e07bf94..e312be7cd375 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -108,9 +108,8 @@ xchk_iallocbt_chunk(
xfs_agblock_t bno;
bno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+
+ if (!xfs_verify_agbext(pag, bno, len))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index c68b767dc08f..a26ee0f24ef2 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -269,15 +269,13 @@ done:
STATIC void
xchk_refcountbt_xref_rmap(
struct xfs_scrub *sc,
- xfs_agblock_t bno,
- xfs_extlen_t len,
- xfs_nlink_t refcount)
+ const struct xfs_refcount_irec *irec)
{
struct xchk_refcnt_check refchk = {
- .sc = sc,
- .bno = bno,
- .len = len,
- .refcount = refcount,
+ .sc = sc,
+ .bno = irec->rc_startblock,
+ .len = irec->rc_blockcount,
+ .refcount = irec->rc_refcount,
.seen = 0,
};
struct xfs_rmap_irec low;
@@ -291,9 +289,9 @@ xchk_refcountbt_xref_rmap(
/* Cross-reference with the rmapbt to confirm the refcount. */
memset(&low, 0, sizeof(low));
- low.rm_startblock = bno;
+ low.rm_startblock = irec->rc_startblock;
memset(&high, 0xFF, sizeof(high));
- high.rm_startblock = bno + len - 1;
+ high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1;
INIT_LIST_HEAD(&refchk.fragments);
error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
@@ -302,7 +300,7 @@ xchk_refcountbt_xref_rmap(
goto out_free;
xchk_refcountbt_process_rmap_fragments(&refchk);
- if (refcount != refchk.seen)
+ if (irec->rc_refcount != refchk.seen)
xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
out_free:
@@ -315,17 +313,16 @@ out_free:
/* Cross-reference with the other btrees. */
STATIC void
xchk_refcountbt_xref(
- struct xfs_scrub *sc,
- xfs_agblock_t agbno,
- xfs_extlen_t len,
- xfs_nlink_t refcount)
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *irec)
{
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return;
- xchk_xref_is_used_space(sc, agbno, len);
- xchk_xref_is_not_inode_chunk(sc, agbno, len);
- xchk_refcountbt_xref_rmap(sc, agbno, len, refcount);
+ xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount);
+ xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock,
+ irec->rc_blockcount);
+ xchk_refcountbt_xref_rmap(sc, irec);
}
/* Scrub a refcountbt record. */
@@ -334,35 +331,27 @@ xchk_refcountbt_rec(
struct xchk_btree *bs,
const union xfs_btree_rec *rec)
{
+ struct xfs_refcount_irec irec;
xfs_agblock_t *cow_blocks = bs->private;
struct xfs_perag *pag = bs->cur->bc_ag.pag;
- xfs_agblock_t bno;
- xfs_extlen_t len;
- xfs_nlink_t refcount;
- bool has_cowflag;
- bno = be32_to_cpu(rec->refc.rc_startblock);
- len = be32_to_cpu(rec->refc.rc_blockcount);
- refcount = be32_to_cpu(rec->refc.rc_refcount);
+ xfs_refcount_btrec_to_irec(rec, &irec);
- /* Only CoW records can have refcount == 1. */
- has_cowflag = (bno & XFS_REFC_COW_START);
- if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
+ /* Check the domain and refcount are not incompatible. */
+ if (!xfs_refcount_check_domain(&irec))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- if (has_cowflag)
- (*cow_blocks) += len;
+
+ if (irec.rc_domain == XFS_REFC_DOMAIN_COW)
+ (*cow_blocks) += irec.rc_blockcount;
/* Check the extent. */
- bno &= ~XFS_REFC_COW_START;
- if (bno + len <= bno ||
- !xfs_verify_agbno(pag, bno) ||
- !xfs_verify_agbno(pag, bno + len - 1))
+ if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount))
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- if (refcount == 0)
+ if (irec.rc_refcount == 0)
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
- xchk_refcountbt_xref(bs->sc, bno, len, refcount);
+ xchk_refcountbt_xref(bs->sc, &irec);
return 0;
}
@@ -426,7 +415,6 @@ xchk_xref_is_cow_staging(
xfs_extlen_t len)
{
struct xfs_refcount_irec rc;
- bool has_cowflag;
int has_refcount;
int error;
@@ -434,8 +422,8 @@ xchk_xref_is_cow_staging(
return;
/* Find the CoW staging extent. */
- error = xfs_refcount_lookup_le(sc->sa.refc_cur,
- agbno + XFS_REFC_COW_START, &has_refcount);
+ error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW,
+ agbno, &has_refcount);
if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
return;
if (!has_refcount) {
@@ -451,9 +439,8 @@ xchk_xref_is_cow_staging(
return;
}
- /* CoW flag must be set, refcount must be 1. */
- has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
- if (!has_cowflag || rc.rc_refcount != 1)
+ /* CoW lookup returned a shared extent record? */
+ if (rc.rc_domain != XFS_REFC_DOMAIN_COW)
xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
/* Must be at least as long as what was passed in */
@@ -477,7 +464,8 @@ xchk_xref_is_not_shared(
if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
return;
- error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
+ error = xfs_refcount_has_record(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED,
+ agbno, len, &shared);
if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
return;
if (shared)
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index cf5ce607dc05..2788a6f2edcd 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -245,28 +245,6 @@ xfs_attri_init(
return attrip;
}
-/*
- * Copy an attr format buffer from the given buf, and into the destination attr
- * format structure.
- */
-STATIC int
-xfs_attri_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_attri_log_format *dst_attr_fmt)
-{
- struct xfs_attri_log_format *src_attr_fmt = buf->i_addr;
- size_t len;
-
- len = sizeof(struct xfs_attri_log_format);
- if (buf->i_len != len) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
- }
-
- memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len);
- return 0;
-}
-
static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_attrd_log_item, attrd_item);
@@ -731,24 +709,50 @@ xlog_recover_attri_commit_pass2(
struct xfs_attri_log_nameval *nv;
const void *attr_value = NULL;
const void *attr_name;
- int error;
+ size_t len;
attri_formatp = item->ri_buf[0].i_addr;
attr_name = item->ri_buf[1].i_addr;
/* Validate xfs_attri_log_format before the large memory allocation */
+ len = sizeof(struct xfs_attri_log_format);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
if (!xfs_attri_validate(mp, attri_formatp)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ /* Validate the attr name */
+ if (item->ri_buf[1].i_len !=
+ xlog_calc_iovec_len(attri_formatp->alfi_name_len)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[1].i_addr, item->ri_buf[1].i_len);
return -EFSCORRUPTED;
}
- if (attri_formatp->alfi_value_len)
+ /* Validate the attr value, if present */
+ if (attri_formatp->alfi_value_len != 0) {
+ if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr,
+ item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
attr_value = item->ri_buf[2].i_addr;
+ }
/*
* Memory alloc failure will cause replay to abort. We attach the
@@ -760,9 +764,7 @@ xlog_recover_attri_commit_pass2(
attri_formatp->alfi_value_len);
attrip = xfs_attri_init(mp, nv);
- error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format);
- if (error)
- goto out;
+ memcpy(&attrip->attri_format, attri_formatp, len);
/*
* The ATTRI has two references. One for the ATTRD and one for ATTRI to
@@ -774,10 +776,6 @@ xlog_recover_attri_commit_pass2(
xfs_attri_release(attrip);
xfs_attri_log_nameval_put(nv);
return 0;
-out:
- xfs_attri_item_free(attrip);
- xfs_attri_log_nameval_put(nv);
- return error;
}
/*
@@ -842,7 +840,8 @@ xlog_recover_attrd_commit_pass2(
attrd_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 51f66e982484..41323da523d1 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -608,28 +608,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_relog = xfs_bui_item_relog,
};
-/*
- * Copy an BUI format buffer from the given buf, and into the destination
- * BUI format structure. The BUI/BUD items were designed not to need any
- * special alignment handling.
- */
-static int
+static inline void
xfs_bui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_bui_log_format *dst_bui_fmt)
+ struct xfs_bui_log_format *dst,
+ const struct xfs_bui_log_format *src)
{
- struct xfs_bui_log_format *src_bui_fmt;
- uint len;
+ unsigned int i;
- src_bui_fmt = buf->i_addr;
- len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents);
+ memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents));
- if (buf->i_len == len) {
- memcpy(dst_bui_fmt, src_bui_fmt, len);
- return 0;
- }
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
+ for (i = 0; i < src->bui_nextents; i++)
+ memcpy(&dst->bui_extents[i], &src->bui_extents[i],
+ sizeof(struct xfs_map_extent));
}
/*
@@ -646,23 +636,34 @@ xlog_recover_bui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_bui_log_item *buip;
struct xfs_bui_log_format *bui_formatp;
+ size_t len;
bui_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
- buip = xfs_bui_init(mp);
- error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
- if (error) {
- xfs_bui_item_free(buip);
- return error;
+
+ len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ buip = xfs_bui_init(mp);
+ xfs_bui_copy_format(&buip->bui_format, bui_formatp);
atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -696,7 +697,8 @@ xlog_recover_bud_commit_pass2(
bud_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 7db588ed0be5..822e6a0e9d1a 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -234,13 +234,18 @@ int
xfs_errortag_init(
struct xfs_mount *mp)
{
+ int ret;
+
mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
KM_MAYFAIL);
if (!mp->m_errortag)
return -ENOMEM;
- return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
- &mp->m_kobj, "errortag");
+ ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+ &mp->m_kobj, "errortag");
+ if (ret)
+ kmem_free(mp->m_errortag);
+ return ret;
}
void
@@ -274,7 +279,7 @@ xfs_errortag_test(
ASSERT(error_tag < XFS_ERRTAG_MAX);
randfactor = mp->m_errortag[error_tag];
- if (!randfactor || prandom_u32_max(randfactor))
+ if (!randfactor || get_random_u32_below(randfactor))
return false;
xfs_warn_ratelimited(mp,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 27ccfcd82f04..d5130d1fcfae 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -66,27 +66,16 @@ xfs_efi_release(
xfs_efi_item_free(efip);
}
-/*
- * This returns the number of iovecs needed to log the given efi item.
- * We only need 1 iovec for an efi item. It just logs the efi_log_format
- * structure.
- */
-static inline int
-xfs_efi_item_sizeof(
- struct xfs_efi_log_item *efip)
-{
- return sizeof(struct xfs_efi_log_format) +
- (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
-}
-
STATIC void
xfs_efi_item_size(
struct xfs_log_item *lip,
int *nvecs,
int *nbytes)
{
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+
*nvecs += 1;
- *nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip));
+ *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents);
}
/*
@@ -112,7 +101,7 @@ xfs_efi_item_format(
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT,
&efip->efi_format,
- xfs_efi_item_sizeof(efip));
+ xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents));
}
@@ -155,13 +144,11 @@ xfs_efi_init(
{
struct xfs_efi_log_item *efip;
- uint size;
ASSERT(nextents > 0);
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
- size = (uint)(sizeof(struct xfs_efi_log_item) +
- ((nextents - 1) * sizeof(xfs_extent_t)));
- efip = kmem_zalloc(size, 0);
+ efip = kzalloc(xfs_efi_log_item_sizeof(nextents),
+ GFP_KERNEL | __GFP_NOFAIL);
} else {
efip = kmem_cache_zalloc(xfs_efi_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -188,15 +175,17 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
{
xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
uint i;
- uint len = sizeof(xfs_efi_log_format_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);
- uint len32 = sizeof(xfs_efi_log_format_32_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t);
- uint len64 = sizeof(xfs_efi_log_format_64_t) +
- (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t);
+ uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents);
+ uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents);
+ uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents);
if (buf->i_len == len) {
- memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
+ memcpy(dst_efi_fmt, src_efi_fmt,
+ offsetof(struct xfs_efi_log_format, efi_extents));
+ for (i = 0; i < src_efi_fmt->efi_nextents; i++)
+ memcpy(&dst_efi_fmt->efi_extents[i],
+ &src_efi_fmt->efi_extents[i],
+ sizeof(struct xfs_extent));
return 0;
} else if (buf->i_len == len32) {
xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
@@ -227,7 +216,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
}
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr,
+ buf->i_len);
return -EFSCORRUPTED;
}
@@ -246,27 +236,16 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp)
kmem_cache_free(xfs_efd_cache, efdp);
}
-/*
- * This returns the number of iovecs needed to log the given efd item.
- * We only need 1 iovec for an efd item. It just logs the efd_log_format
- * structure.
- */
-static inline int
-xfs_efd_item_sizeof(
- struct xfs_efd_log_item *efdp)
-{
- return sizeof(xfs_efd_log_format_t) +
- (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
-}
-
STATIC void
xfs_efd_item_size(
struct xfs_log_item *lip,
int *nvecs,
int *nbytes)
{
+ struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
*nvecs += 1;
- *nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip));
+ *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents);
}
/*
@@ -291,7 +270,7 @@ xfs_efd_item_format(
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT,
&efdp->efd_format,
- xfs_efd_item_sizeof(efdp));
+ xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents));
}
/*
@@ -340,9 +319,8 @@ xfs_trans_get_efd(
ASSERT(nextents > 0);
if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
- efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
- (nextents - 1) * sizeof(struct xfs_extent),
- 0);
+ efdp = kzalloc(xfs_efd_log_item_sizeof(nextents),
+ GFP_KERNEL | __GFP_NOFAIL);
} else {
efdp = kmem_cache_zalloc(xfs_efd_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -733,6 +711,12 @@ xlog_recover_efi_commit_pass2(
efi_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
if (error) {
@@ -769,12 +753,24 @@ xlog_recover_efd_commit_pass2(
xfs_lsn_t lsn)
{
struct xfs_efd_log_format *efd_formatp;
+ int buflen = item->ri_buf[0].i_len;
efd_formatp = item->ri_buf[0].i_addr;
- ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) +
- ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) ||
- (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) +
- ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t)))));
+
+ if (buflen < sizeof(struct xfs_efd_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ efd_formatp->efd_nextents) &&
+ item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ efd_formatp->efd_nextents)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id);
return 0;
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 186d0f2137f1..da6a5afa607c 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -52,6 +52,14 @@ struct xfs_efi_log_item {
xfs_efi_log_format_t efi_format;
};
+static inline size_t
+xfs_efi_log_item_sizeof(
+ unsigned int nr)
+{
+ return offsetof(struct xfs_efi_log_item, efi_format) +
+ xfs_efi_log_format_sizeof(nr);
+}
+
/*
* This is the "extent free done" log item. It is used to log
* the fact that some extents earlier mentioned in an efi item
@@ -64,6 +72,14 @@ struct xfs_efd_log_item {
xfs_efd_log_format_t efd_format;
};
+static inline size_t
+xfs_efd_log_item_sizeof(
+ unsigned int nr)
+{
+ return offsetof(struct xfs_efd_log_item, efd_format) +
+ xfs_efd_log_format_sizeof(nr);
+}
+
/*
* Max number of extents in fast allocation path.
*/
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c6c80265c0b2..e462d39c840e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1261,7 +1261,7 @@ xfs_file_llseek(
}
#ifdef CONFIG_FS_DAX
-static int
+static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
@@ -1274,14 +1274,15 @@ xfs_dax_fault(
&xfs_read_iomap_ops);
}
#else
-static int
+static inline vm_fault_t
xfs_dax_fault(
struct vm_fault *vmf,
enum page_entry_size pe_size,
bool write_fault,
pfn_t *pfn)
{
- return 0;
+ ASSERT(0);
+ return VM_FAULT_SIGBUS;
}
#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c000b74dd203..aa303be11576 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2818,7 +2818,7 @@ retry:
* Lock all the participating inodes. Depending upon whether
* the target_name exists in the target directory, and
* whether the target directory is the same as the source
- * directory, we can lock from 2 to 4 inodes.
+ * directory, we can lock from 2 to 5 inodes.
*/
xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 17e923b9c5fa..322eb2ee6c55 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2552,6 +2552,8 @@ xlog_recover_process_intents(
for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
lip != NULL;
lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
+ const struct xfs_item_ops *ops;
+
if (!xlog_item_is_intent(lip))
break;
@@ -2567,13 +2569,17 @@ xlog_recover_process_intents(
* deferred ops, you /must/ attach them to the capture list in
* the recover routine or else those subsequent intents will be
* replayed in the wrong order!
+ *
+ * The recovery function can free the log item, so we must not
+ * access lip after it returns.
*/
spin_unlock(&ailp->ail_lock);
- error = lip->li_ops->iop_recover(lip, &capture_list);
+ ops = lip->li_ops;
+ error = ops->iop_recover(lip, &capture_list);
spin_lock(&ailp->ail_lock);
if (error) {
trace_xlog_intent_recovery_failed(log->l_mp, error,
- lip->li_ops->iop_recover);
+ ops->iop_recover);
break;
}
}
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 758702b9495f..9737b5a9f405 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void)
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28);
- XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176);
@@ -134,6 +134,21 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40);
XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
+
+ XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
+ XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
/*
* The v5 superblock format extended several v4 header structures with
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 7e97bf19793d..858e3e9eb4a8 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -523,7 +523,9 @@ xfs_cui_item_recover(
type = refc_type;
break;
default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &cuip->cui_format,
+ sizeof(cuip->cui_format));
error = -EFSCORRUPTED;
goto abort_error;
}
@@ -536,7 +538,8 @@ xfs_cui_item_recover(
&new_fsb, &new_len, &rcur);
if (error == -EFSCORRUPTED)
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- refc, sizeof(*refc));
+ &cuip->cui_format,
+ sizeof(cuip->cui_format));
if (error)
goto abort_error;
@@ -622,28 +625,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_relog = xfs_cui_item_relog,
};
-/*
- * Copy an CUI format buffer from the given buf, and into the destination
- * CUI format structure. The CUI/CUD items were designed not to need any
- * special alignment handling.
- */
-static int
+static inline void
xfs_cui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_cui_log_format *dst_cui_fmt)
+ struct xfs_cui_log_format *dst,
+ const struct xfs_cui_log_format *src)
{
- struct xfs_cui_log_format *src_cui_fmt;
- uint len;
+ unsigned int i;
- src_cui_fmt = buf->i_addr;
- len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
+ memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents));
- if (buf->i_len == len) {
- memcpy(dst_cui_fmt, src_cui_fmt, len);
- return 0;
- }
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
+ for (i = 0; i < src->cui_nextents; i++)
+ memcpy(&dst->cui_extents[i], &src->cui_extents[i],
+ sizeof(struct xfs_phys_extent));
}
/*
@@ -660,19 +653,28 @@ xlog_recover_cui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_cui_log_item *cuip;
struct xfs_cui_log_format *cui_formatp;
+ size_t len;
cui_formatp = item->ri_buf[0].i_addr;
- cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
- error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
- if (error) {
- xfs_cui_item_free(cuip);
- return error;
+ if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+ xfs_cui_copy_format(&cuip->cui_format, cui_formatp);
atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -706,7 +708,8 @@ xlog_recover_cud_commit_pass2(
cud_formatp = item->ri_buf[0].i_addr;
if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index fef92e02f3bb..534504ede1a3 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -155,31 +155,6 @@ xfs_rui_init(
return ruip;
}
-/*
- * Copy an RUI format buffer from the given buf, and into the destination
- * RUI format structure. The RUI/RUD items were designed not to need any
- * special alignment handling.
- */
-STATIC int
-xfs_rui_copy_format(
- struct xfs_log_iovec *buf,
- struct xfs_rui_log_format *dst_rui_fmt)
-{
- struct xfs_rui_log_format *src_rui_fmt;
- uint len;
-
- src_rui_fmt = buf->i_addr;
- len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
-
- if (buf->i_len != len) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
- return -EFSCORRUPTED;
- }
-
- memcpy(dst_rui_fmt, src_rui_fmt, len);
- return 0;
-}
-
static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
{
return container_of(lip, struct xfs_rud_log_item, rud_item);
@@ -582,7 +557,9 @@ xfs_rui_item_recover(
type = XFS_RMAP_FREE;
break;
default:
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &ruip->rui_format,
+ sizeof(ruip->rui_format));
error = -EFSCORRUPTED;
goto abort_error;
}
@@ -652,6 +629,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_relog = xfs_rui_item_relog,
};
+static inline void
+xfs_rui_copy_format(
+ struct xfs_rui_log_format *dst,
+ const struct xfs_rui_log_format *src)
+{
+ unsigned int i;
+
+ memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents));
+
+ for (i = 0; i < src->rui_nextents; i++)
+ memcpy(&dst->rui_extents[i], &src->rui_extents[i],
+ sizeof(struct xfs_map_extent));
+}
+
/*
* This routine is called to create an in-core extent rmap update
* item from the rui format structure which was logged on disk.
@@ -666,19 +657,28 @@ xlog_recover_rui_commit_pass2(
struct xlog_recover_item *item,
xfs_lsn_t lsn)
{
- int error;
struct xfs_mount *mp = log->l_mp;
struct xfs_rui_log_item *ruip;
struct xfs_rui_log_format *rui_formatp;
+ size_t len;
rui_formatp = item->ri_buf[0].i_addr;
- ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
- error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
- if (error) {
- xfs_rui_item_free(ruip);
- return error;
+ if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
}
+
+ ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+ xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
@@ -711,7 +711,11 @@ xlog_recover_rud_commit_pass2(
struct xfs_rud_log_format *rud_formatp;
rud_formatp = item->ri_buf[0].i_addr;
- ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ rud_formatp, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id);
return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f029c6702dda..ee4b429a2f2c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2028,18 +2028,14 @@ xfs_init_caches(void)
goto out_destroy_trans_cache;
xfs_efd_cache = kmem_cache_create("xfs_efd_item",
- (sizeof(struct xfs_efd_log_item) +
- (XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(struct xfs_extent)),
- 0, 0, NULL);
+ xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
if (!xfs_efd_cache)
goto out_destroy_buf_item_cache;
xfs_efi_cache = kmem_cache_create("xfs_efi_item",
- (sizeof(struct xfs_efi_log_item) +
- (XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(struct xfs_extent)),
- 0, 0, NULL);
+ xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
+ 0, 0, NULL);
if (!xfs_efi_cache)
goto out_destroy_efd_cache;
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 43585850f154..513095e353a5 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -33,10 +33,15 @@ xfs_sysfs_init(
const char *name)
{
struct kobject *parent;
+ int err;
parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
+ if (err)
+ kobject_put(&kobj->kobject);
+
+ return err;
}
static inline void
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index cb7c81ba7fa3..372d871bccc5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -799,6 +799,9 @@ TRACE_DEFINE_ENUM(PE_SIZE_PTE);
TRACE_DEFINE_ENUM(PE_SIZE_PMD);
TRACE_DEFINE_ENUM(PE_SIZE_PUD);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
+TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);
+
TRACE_EVENT(xfs_filemap_fault,
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
bool write_fault),
@@ -2925,6 +2928,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, domain)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
__field(xfs_nlink_t, refcount)
@@ -2932,13 +2936,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
__entry->startblock,
__entry->blockcount,
__entry->refcount)
@@ -2958,6 +2964,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, domain)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
__field(xfs_nlink_t, refcount)
@@ -2966,14 +2973,16 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
__entry->refcount = irec->rc_refcount;
__entry->agbno = agbno;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
__entry->startblock,
__entry->blockcount,
__entry->refcount,
@@ -2994,9 +3003,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
@@ -3004,20 +3015,24 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount)
@@ -3038,9 +3053,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
@@ -3049,21 +3066,25 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
__entry->agbno = agbno;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount,
@@ -3086,12 +3107,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
+ __field(enum xfs_refc_domain, i1_domain)
__field(xfs_agblock_t, i1_startblock)
__field(xfs_extlen_t, i1_blockcount)
__field(xfs_nlink_t, i1_refcount)
+ __field(enum xfs_refc_domain, i2_domain)
__field(xfs_agblock_t, i2_startblock)
__field(xfs_extlen_t, i2_blockcount)
__field(xfs_nlink_t, i2_refcount)
+ __field(enum xfs_refc_domain, i3_domain)
__field(xfs_agblock_t, i3_startblock)
__field(xfs_extlen_t, i3_blockcount)
__field(xfs_nlink_t, i3_refcount)
@@ -3099,27 +3123,33 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
+ __entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
__entry->i1_refcount = i1->rc_refcount;
+ __entry->i2_domain = i2->rc_domain;
__entry->i2_startblock = i2->rc_startblock;
__entry->i2_blockcount = i2->rc_blockcount;
__entry->i2_refcount = i2->rc_refcount;
+ __entry->i3_domain = i3->rc_domain;
__entry->i3_startblock = i3->rc_startblock;
__entry->i3_blockcount = i3->rc_blockcount;
__entry->i3_refcount = i3->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u -- "
- "agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- "
+ "dom %s agbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
+ __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i1_startblock,
__entry->i1_blockcount,
__entry->i1_refcount,
+ __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i2_startblock,
__entry->i2_blockcount,
__entry->i2_refcount,
+ __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS),
__entry->i3_startblock,
__entry->i3_blockcount,
__entry->i3_refcount)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 16fbf2a1144c..f51df7d94ef7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -730,11 +730,10 @@ void
xfs_ail_push_all_sync(
struct xfs_ail *ailp)
{
- struct xfs_log_item *lip;
DEFINE_WAIT(wait);
spin_lock(&ailp->ail_lock);
- while ((lip = xfs_ail_max(ailp)) != NULL) {
+ while (xfs_ail_max(ailp) != NULL) {
prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
wake_up_process(ailp->ail_task);
spin_unlock(&ailp->ail_lock);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 860f0b1032c6..2c53fbb8d918 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -41,6 +41,13 @@ static void zonefs_account_active(struct inode *inode)
return;
/*
+ * For zones that transitioned to the offline or readonly condition,
+ * we only need to clear the active state.
+ */
+ if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
+ goto out;
+
+ /*
* If the zone is active, that is, if it is explicitly open or
* partially written, check if it was already accounted as active.
*/
@@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode)
return;
}
+out:
/* The zone is not active. If it was, update the active count */
if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
@@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
inode->i_flags |= S_IMMUTABLE;
inode->i_mode &= ~0777;
zone->wp = zone->start;
+ zi->i_flags |= ZONEFS_ZONE_OFFLINE;
return 0;
case BLK_ZONE_COND_READONLY:
/*
@@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
zone->cond = BLK_ZONE_COND_OFFLINE;
inode->i_mode &= ~0777;
zone->wp = zone->start;
+ zi->i_flags |= ZONEFS_ZONE_OFFLINE;
return 0;
}
+ zi->i_flags |= ZONEFS_ZONE_READONLY;
inode->i_mode &= ~0222;
return i_size_read(inode);
case BLK_ZONE_COND_FULL:
@@ -478,8 +489,7 @@ static void __zonefs_io_error(struct inode *inode, bool write)
struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
unsigned int noio_flag;
- unsigned int nr_zones =
- zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ unsigned int nr_zones = 1;
struct zonefs_ioerr_data err = {
.inode = inode,
.write = write,
@@ -487,6 +497,15 @@ static void __zonefs_io_error(struct inode *inode, bool write)
int ret;
/*
+ * The only files that have more than one zone are conventional zone
+ * files with aggregated conventional zones, for which the inode zone
+ * size is always larger than the device zone size.
+ */
+ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
+ nr_zones = zi->i_zone_size >>
+ (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+
+ /*
* Memory allocations in blkdev_report_zones() can trigger a memory
* reclaim which may in turn cause a recursion into zonefs as well as
* struct request allocations for the same device. The former case may
@@ -1407,6 +1426,14 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
zi->i_ztype = type;
zi->i_zsector = zone->start;
zi->i_zone_size = zone->len << SECTOR_SHIFT;
+ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
+ !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
+ zonefs_err(sb,
+ "zone size %llu doesn't match device's zone sectors %llu\n",
+ zi->i_zone_size,
+ bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
+ return -EINVAL;
+ }
zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
zone->capacity << SECTOR_SHIFT);
@@ -1456,11 +1483,11 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
struct inode *dir = d_inode(parent);
struct dentry *dentry;
struct inode *inode;
- int ret;
+ int ret = -ENOMEM;
dentry = d_alloc_name(parent, name);
if (!dentry)
- return NULL;
+ return ERR_PTR(ret);
inode = new_inode(parent->d_sb);
if (!inode)
@@ -1485,7 +1512,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
dput:
dput(dentry);
- return NULL;
+ return ERR_PTR(ret);
}
struct zonefs_zone_data {
@@ -1505,7 +1532,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
struct blk_zone *zone, *next, *end;
const char *zgroup_name;
char *file_name;
- struct dentry *dir;
+ struct dentry *dir, *dent;
unsigned int n = 0;
int ret;
@@ -1523,8 +1550,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
zgroup_name = "seq";
dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
- if (!dir) {
- ret = -ENOMEM;
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
goto free;
}
@@ -1570,8 +1597,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
* Use the file number within its group as file name.
*/
snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
- if (!zonefs_create_inode(dir, file_name, zone, type)) {
- ret = -ENOMEM;
+ dent = zonefs_create_inode(dir, file_name, zone, type);
+ if (IS_ERR(dent)) {
+ ret = PTR_ERR(dent);
goto free;
}
@@ -1905,18 +1933,18 @@ static int __init zonefs_init(void)
if (ret)
return ret;
- ret = register_filesystem(&zonefs_type);
+ ret = zonefs_sysfs_init();
if (ret)
goto destroy_inodecache;
- ret = zonefs_sysfs_init();
+ ret = register_filesystem(&zonefs_type);
if (ret)
- goto unregister_fs;
+ goto sysfs_exit;
return 0;
-unregister_fs:
- unregister_filesystem(&zonefs_type);
+sysfs_exit:
+ zonefs_sysfs_exit();
destroy_inodecache:
zonefs_destroy_inodecache();
@@ -1925,9 +1953,9 @@ destroy_inodecache:
static void __exit zonefs_exit(void)
{
+ unregister_filesystem(&zonefs_type);
zonefs_sysfs_exit();
zonefs_destroy_inodecache();
- unregister_filesystem(&zonefs_type);
}
MODULE_AUTHOR("Damien Le Moal");
diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c
index 9cb6755ce39a..9920689dc098 100644
--- a/fs/zonefs/sysfs.c
+++ b/fs/zonefs/sysfs.c
@@ -15,11 +15,6 @@ struct zonefs_sysfs_attr {
ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf);
};
-static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr)
-{
- return container_of(attr, struct zonefs_sysfs_attr, attr);
-}
-
#define ZONEFS_SYSFS_ATTR_RO(name) \
static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name)
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 4b3de66c3233..1dbe78119ff1 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
return ZONEFS_ZTYPE_SEQ;
}
-#define ZONEFS_ZONE_OPEN (1 << 0)
-#define ZONEFS_ZONE_ACTIVE (1 << 1)
+#define ZONEFS_ZONE_OPEN (1U << 0)
+#define ZONEFS_ZONE_ACTIVE (1U << 1)
+#define ZONEFS_ZONE_OFFLINE (1U << 2)
+#define ZONEFS_ZONE_READONLY (1U << 3)
/*
* In-memory inode data.