summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c8
-rw-r--r--fs/ceph/addr.c85
-rw-r--r--fs/ceph/caps.c55
-rw-r--r--fs/ceph/dir.c17
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/metric.c8
-rw-r--r--fs/ceph/snap.c9
-rw-r--r--fs/ceph/super.h13
-rw-r--r--fs/dax.c14
-rw-r--r--fs/debugfs/file.c9
-rw-r--r--fs/f2fs/checkpoint.c7
-rw-r--r--fs/f2fs/compress.c41
-rw-r--r--fs/f2fs/data.c71
-rw-r--r--fs/f2fs/dir.c9
-rw-r--r--fs/f2fs/f2fs.h124
-rw-r--r--fs/f2fs/file.c77
-rw-r--r--fs/f2fs/gc.c43
-rw-r--r--fs/f2fs/inode.c207
-rw-r--r--fs/f2fs/iostat.c1
-rw-r--r--fs/f2fs/namei.c32
-rw-r--r--fs/f2fs/node.c45
-rw-r--r--fs/f2fs/node.h3
-rw-r--r--fs/f2fs/recovery.c75
-rw-r--r--fs/f2fs/segment.c162
-rw-r--r--fs/f2fs/super.c252
-rw-r--r--fs/f2fs/sysfs.c279
-rw-r--r--fs/f2fs/xattr.c6
-rw-r--r--fs/f2fs/xattr.h1
-rw-r--r--fs/file_table.c14
-rw-r--r--fs/fuse/virtio_fs.c3
-rw-r--r--fs/gfs2/aops.c19
-rw-r--r--fs/gfs2/bmap.c4
-rw-r--r--fs/gfs2/file.c5
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/glops.c69
-rw-r--r--fs/gfs2/incore.h12
-rw-r--r--fs/gfs2/lock_dlm.c23
-rw-r--r--fs/gfs2/log.c11
-rw-r--r--fs/gfs2/lops.c21
-rw-r--r--fs/gfs2/ops_fstype.c15
-rw-r--r--fs/gfs2/quota.c26
-rw-r--r--fs/gfs2/recovery.c28
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/gfs2/super.c215
-rw-r--r--fs/gfs2/super.h1
-rw-r--r--fs/gfs2/sys.c4
-rw-r--r--fs/gfs2/trans.c3
-rw-r--r--fs/gfs2/util.c49
-rw-r--r--fs/gfs2/util.h3
-rw-r--r--fs/inode.c6
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/kernfs/dir.c2
-rw-r--r--fs/lockd/clntlock.c6
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfs/client.c32
-rw-r--r--fs/nfs/fs_context.c67
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs3client.c13
-rw-r--r--fs/nfs/nfs42proc.c25
-rw-r--r--fs/nfs/nfs42xattr.c79
-rw-r--r--fs/nfs/nfs42xdr.c621
-rw-r--r--fs/nfs/nfs4client.c44
-rw-r--r--fs/nfs/nfs4proc.c9
-rw-r--r--fs/nfs/nfs4state.c3
-rw-r--r--fs/nfs/super.c18
-rw-r--r--fs/nfs/sysfs.c235
-rw-r--r--fs/nfs/sysfs.h10
-rw-r--r--fs/notify/fanotify/fanotify_user.c14
-rw-r--r--fs/ntfs3/attrib.c2
-rw-r--r--fs/ntfs3/attrlist.c7
-rw-r--r--fs/ntfs3/bitmap.c10
-rw-r--r--fs/ntfs3/file.c6
-rw-r--r--fs/ntfs3/frecord.c58
-rw-r--r--fs/ntfs3/fslog.c40
-rw-r--r--fs/ntfs3/fsntfs.c99
-rw-r--r--fs/ntfs3/index.c20
-rw-r--r--fs/ntfs3/inode.c23
-rw-r--r--fs/ntfs3/lznt.c6
-rw-r--r--fs/ntfs3/namei.c31
-rw-r--r--fs/ntfs3/ntfs.h117
-rw-r--r--fs/ntfs3/ntfs_fs.h31
-rw-r--r--fs/ntfs3/record.c14
-rw-r--r--fs/ntfs3/run.c4
-rw-r--r--fs/ntfs3/super.c280
-rw-r--r--fs/ntfs3/xattr.c20
-rw-r--r--fs/ocfs2/Kconfig6
-rw-r--r--fs/overlayfs/overlayfs.h41
-rw-r--r--fs/overlayfs/params.c532
-rw-r--r--fs/overlayfs/params.h42
-rw-r--r--fs/overlayfs/super.c530
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/smb/client/cifsglob.h2
-rw-r--r--fs/smb/client/smbdirect.c2
-rw-r--r--fs/squashfs/block.c27
-rw-r--r--fs/sysfs/group.c12
-rw-r--r--fs/xfs/libxfs/xfs_ag.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c289
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h24
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c32
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c22
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c10
-rw-r--r--fs/xfs/libxfs/xfs_sb.c56
-rw-r--r--fs/xfs/xfs_extent_busy.c36
-rw-r--r--fs/xfs/xfs_extent_busy.h6
-rw-r--r--fs/xfs/xfs_extfree_item.c75
-rw-r--r--fs/xfs/xfs_fsmap.c255
-rw-r--r--fs/xfs/xfs_log.c47
-rw-r--r--fs/xfs/xfs_notify_failure.c9
-rw-r--r--fs/xfs/xfs_reflink.c3
-rw-r--r--fs/xfs/xfs_trace.h25
-rw-r--r--fs/xfs/xfs_trans_ail.c2
119 files changed, 4017 insertions, 2250 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9c7fd6fd8095..e1c45341719b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -413,17 +413,19 @@ static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t
afs_op_set_vnode(op, 0, vnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
- op->store.write_iter = iter;
op->store.pos = pos;
op->store.size = size;
- op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
op->store.laundering = laundering;
- op->mtime = vnode->netfs.inode.i_mtime;
op->flags |= AFS_OPERATION_UNINTR;
op->ops = &afs_store_data_operation;
try_next_key:
afs_begin_vnode_operation(op);
+
+ op->store.write_iter = iter;
+ op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
+ op->mtime = vnode->netfs.inode.i_mtime;
+
afs_wait_for_operation(op);
switch (op->error) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6bb251a4d613..59cbfb80edbd 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
struct inode *inode = rreq->inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_layout *lo = &ci->i_layout;
+ unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
+ loff_t end = rreq->start + rreq->len, new_end;
+ struct ceph_netfs_request_data *priv = rreq->netfs_priv;
+ unsigned long max_len;
u32 blockoff;
- u64 blockno;
- /* Expand the start downward */
- blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
- rreq->start = blockno * lo->stripe_unit;
- rreq->len += blockoff;
+ if (priv) {
+ /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
+ if (priv->file_ra_disabled)
+ max_pages = 0;
+ else
+ max_pages = priv->file_ra_pages;
+
+ }
- /* Now, round up the length to the next block */
- rreq->len = roundup(rreq->len, lo->stripe_unit);
+ /* Readahead is disabled */
+ if (!max_pages)
+ return;
+
+ max_len = max_pages << PAGE_SHIFT;
+
+ /*
+ * Try to expand the length forward by rounding up it to the next
+ * block, but do not exceed the file size, unless the original
+ * request already exceeds it.
+ */
+ new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
+ if (new_end > end && new_end <= rreq->start + max_len)
+ rreq->len = new_end - rreq->start;
+
+ /* Try to expand the start downward */
+ div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+ if (rreq->len + blockoff <= max_len) {
+ rreq->start -= blockoff;
+ rreq->len += blockoff;
+ }
}
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
@@ -362,18 +388,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
{
struct inode *inode = rreq->inode;
int got = 0, want = CEPH_CAP_FILE_CACHE;
+ struct ceph_netfs_request_data *priv;
int ret = 0;
if (rreq->origin != NETFS_READAHEAD)
return 0;
+ priv = kzalloc(sizeof(*priv), GFP_NOFS);
+ if (!priv)
+ return -ENOMEM;
+
if (file) {
struct ceph_rw_context *rw_ctx;
struct ceph_file_info *fi = file->private_data;
+ priv->file_ra_pages = file->f_ra.ra_pages;
+ priv->file_ra_disabled = file->f_mode & FMODE_RANDOM;
+
rw_ctx = ceph_find_rw_context(fi);
- if (rw_ctx)
+ if (rw_ctx) {
+ rreq->netfs_priv = priv;
return 0;
+ }
}
/*
@@ -383,27 +419,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
if (ret < 0) {
dout("start_read %p, error getting cap\n", inode);
- return ret;
+ goto out;
}
if (!(got & want)) {
dout("start_read %p, no cache cap\n", inode);
- return -EACCES;
+ ret = -EACCES;
+ goto out;
+ }
+ if (ret == 0) {
+ ret = -EACCES;
+ goto out;
}
- if (ret == 0)
- return -EACCES;
- rreq->netfs_priv = (void *)(uintptr_t)got;
- return 0;
+ priv->caps = got;
+ rreq->netfs_priv = priv;
+
+out:
+ if (ret < 0)
+ kfree(priv);
+
+ return ret;
}
static void ceph_netfs_free_request(struct netfs_io_request *rreq)
{
- struct ceph_inode_info *ci = ceph_inode(rreq->inode);
- int got = (uintptr_t)rreq->netfs_priv;
+ struct ceph_netfs_request_data *priv = rreq->netfs_priv;
+
+ if (!priv)
+ return;
- if (got)
- ceph_put_cap_refs(ci, got);
+ if (priv->caps)
+ ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps);
+ kfree(priv);
+ rreq->netfs_priv = NULL;
}
const struct netfs_request_ops ceph_netfs_ops = {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2321e5ddb664..e2bb0d0072da 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3109,6 +3109,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
}
if (had & CEPH_CAP_FILE_WR) {
if (--ci->i_wr_ref == 0) {
+ /*
+ * The Fb caps will always be took and released
+ * together with the Fw caps.
+ */
+ WARN_ON_ONCE(ci->i_wb_ref);
+
last++;
check_flushsnaps = true;
if (ci->i_wrbuffer_ref_head == 0 &&
@@ -3560,6 +3566,15 @@ static void handle_cap_grant(struct inode *inode,
}
BUG_ON(cap->issued & ~cap->implemented);
+ /* don't let check_caps skip sending a response to MDS for revoke msgs */
+ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
+ cap->mds_wanted = 0;
+ if (cap == ci->i_auth_cap)
+ check_caps = 1; /* check auth cap only */
+ else
+ check_caps = 2; /* check all caps */
+ }
+
if (extra_info->inline_version > 0 &&
extra_info->inline_version >= ci->i_inline_version) {
ci->i_inline_version = extra_info->inline_version;
@@ -4086,6 +4101,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct cap_extra_info extra_info = {};
bool queue_trunc;
bool close_sessions = false;
+ bool do_cap_release = false;
dout("handle_caps from mds%d\n", session->s_mds);
@@ -4192,17 +4208,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
- if (op == CEPH_CAP_OP_IMPORT) {
- cap = ceph_get_cap(mdsc, NULL);
- cap->cap_ino = vino.ino;
- cap->queue_release = 1;
- cap->cap_id = le64_to_cpu(h->cap_id);
- cap->mseq = mseq;
- cap->seq = seq;
- cap->issue_seq = seq;
- spin_lock(&session->s_cap_lock);
- __ceph_queue_cap_release(session, cap);
- spin_unlock(&session->s_cap_lock);
+ switch (op) {
+ case CEPH_CAP_OP_IMPORT:
+ case CEPH_CAP_OP_REVOKE:
+ case CEPH_CAP_OP_GRANT:
+ do_cap_release = true;
+ break;
+ default:
+ break;
}
goto flush_cap_releases;
}
@@ -4252,6 +4265,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
inode, ceph_ino(inode), ceph_snap(inode),
session->s_mds);
spin_unlock(&ci->i_ceph_lock);
+ switch (op) {
+ case CEPH_CAP_OP_REVOKE:
+ case CEPH_CAP_OP_GRANT:
+ do_cap_release = true;
+ break;
+ default:
+ break;
+ }
goto flush_cap_releases;
}
@@ -4302,6 +4323,18 @@ flush_cap_releases:
* along for the mds (who clearly thinks we still have this
* cap).
*/
+ if (do_cap_release) {
+ cap = ceph_get_cap(mdsc, NULL);
+ cap->cap_ino = vino.ino;
+ cap->queue_release = 1;
+ cap->cap_id = le64_to_cpu(h->cap_id);
+ cap->mseq = mseq;
+ cap->seq = seq;
+ cap->issue_seq = seq;
+ spin_lock(&session->s_cap_lock);
+ __ceph_queue_cap_release(session, cap);
+ spin_unlock(&session->s_cap_lock);
+ }
ceph_flush_cap_releases(mdsc, session);
goto done;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index cb67ac821f0e..4a2b39d9a61a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -886,7 +886,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mknod.mode = cpu_to_le32(mode);
req->r_args.mknod.rdev = cpu_to_le32(rdev);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
@@ -953,7 +954,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
@@ -1022,7 +1024,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
ihold(dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mkdir.mode = cpu_to_le32(mode);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
@@ -1079,7 +1082,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_parent = dir;
ihold(dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_SHARED on source inode (mds will lock it) */
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
@@ -1218,7 +1221,7 @@ retry:
req->r_num_caps = 2;
req->r_parent = dir;
ihold(dir);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
@@ -1320,9 +1323,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
req->r_parent = new_dir;
ihold(new_dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
- req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_RDCACHE on source inode (mds will lock it) */
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b1925232dc08..63efe5389783 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -791,7 +791,8 @@ retry:
if (flags & O_CREAT) {
struct ceph_file_layout lo;
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4c0f22acf53d..66048a86c480 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -645,6 +645,7 @@ bad:
err = -EIO;
out_bad:
pr_err("mds parse_reply err %d\n", err);
+ ceph_msg_dump(msg);
return err;
}
@@ -3538,6 +3539,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
bad:
pr_err("mdsc_handle_forward decode error err=%d\n", err);
+ ceph_msg_dump(msg);
}
static int __decode_session_metadata(void **p, void *end,
@@ -5258,6 +5260,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
bad:
pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
ceph_umount_begin(mdsc->fsc->sb);
+ ceph_msg_dump(msg);
err_out:
mutex_lock(&mdsc->mutex);
mdsc->mdsmap_err = err;
@@ -5326,6 +5329,7 @@ bad_unlock:
bad:
pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
ceph_umount_begin(mdsc->fsc->sb);
+ ceph_msg_dump(msg);
return;
}
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index c47347d2e84e..cce78d769f55 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -36,6 +36,14 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 items = 0;
s32 len;
+ /* Do not send the metrics until the MDS rank is ready */
+ mutex_lock(&mdsc->mutex);
+ if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) {
+ mutex_unlock(&mdsc->mutex);
+ return false;
+ }
+ mutex_unlock(&mdsc->mutex);
+
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
+ sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize)
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2e73ba62bd7a..343d738448dc 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -675,14 +675,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
return 0;
}
- /* Fb cap still in use, delay it */
- if (ci->i_wb_ref) {
+ /*
+ * Defer flushing the capsnap if the dirty buffer not flushed yet.
+ * And trigger to flush the buffer immediately.
+ */
+ if (ci->i_wrbuffer_ref) {
dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
"used WRBUFFER, delaying\n", __func__, inode,
ceph_vinop(inode), capsnap, capsnap->context,
capsnap->context->seq, ceph_cap_string(capsnap->dirty),
capsnap->size);
- capsnap->writing = 1;
+ ceph_queue_writeback(inode);
return 0;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d24bf0db5234..3bfddf34d488 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -451,6 +451,19 @@ struct ceph_inode_info {
unsigned long i_work_mask;
};
+struct ceph_netfs_request_data {
+ int caps;
+
+ /*
+ * Maximum size of a file readahead request.
+ * The fadvise could update the bdi's default ra_pages.
+ */
+ unsigned int file_ra_pages;
+
+ /* Set it if fadvise disables file readahead entirely */
+ bool file_ra_disabled;
+};
+
static inline struct ceph_inode_info *
ceph_inode(const struct inode *inode)
{
diff --git a/fs/dax.c b/fs/dax.c
index 2ababb89918d..906ecbd541a3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
if (!zero_edge) {
ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
if (ret)
- return ret;
+ return dax_mem2blk_err(ret);
}
if (copy_all) {
@@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
out_unlock:
dax_read_unlock(id);
- return ret;
+ return dax_mem2blk_err(ret);
}
int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
@@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr,
NULL);
if (ret < 0)
- return ret;
+ return dax_mem2blk_err(ret);
+
memset(kaddr + offset, 0, size);
if (iomap->flags & IOMAP_F_SHARED)
ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
@@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
DAX_ACCESS, &kaddr, NULL);
- if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
+ if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
map_len = dax_direct_access(dax_dev, pgoff,
PHYS_PFN(size), DAX_RECOVERY_WRITE,
&kaddr, NULL);
@@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
recovery = true;
}
if (map_len < 0) {
- ret = map_len;
+ ret = dax_mem2blk_err(map_len);
break;
}
@@ -1830,7 +1831,6 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
vm_fault_t ret = VM_FAULT_FALLBACK;
pgoff_t max_pgoff;
void *entry;
- int error;
if (vmf->flags & FAULT_FLAG_WRITE)
iter.flags |= IOMAP_WRITE;
@@ -1877,7 +1877,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
}
iter.pos = (loff_t)xas.xa_index << PAGE_SHIFT;
- while ((error = iomap_iter(&iter, ops)) > 0) {
+ while (iomap_iter(&iter, ops) > 0) {
if (iomap_length(&iter) < PMD_SIZE)
continue; /* actually breaks out of the loop */
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 1f971c880dde..b7711888dd17 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -940,15 +940,6 @@ static const struct file_operations fops_str_wo = {
* This function creates a file in debugfs with the given name that
* contains the value of the variable @value. If the @mode variable is so
* set, it can be read from, and written to.
- *
- * This function will return a pointer to a dentry if it succeeds. This
- * pointer must be passed to the debugfs_remove() function when the file is
- * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be
- * returned.
- *
- * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will
- * be returned.
*/
void debugfs_create_str(const char *name, umode_t mode,
struct dentry *parent, char **value)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 64b3860f50ee..8fd3b7f9fb88 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
unsigned char reason)
{
f2fs_build_fault_attr(sbi, 0, 0);
- set_ckpt_flags(sbi, CP_ERROR_FLAG);
- if (!end_io) {
+ if (!end_io)
f2fs_flush_merged_writes(sbi);
-
- f2fs_handle_stop(sbi, reason);
- }
+ f2fs_handle_critical_error(sbi, reason, end_io);
}
/*
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 11653fa79289..236d890f560b 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -55,6 +55,7 @@ struct f2fs_compress_ops {
int (*init_decompress_ctx)(struct decompress_io_ctx *dic);
void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic);
int (*decompress_pages)(struct decompress_io_ctx *dic);
+ bool (*is_level_valid)(int level);
};
static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index)
@@ -308,17 +309,25 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
return 0;
}
+static bool lz4_is_level_valid(int lvl)
+{
+#ifdef CONFIG_F2FS_FS_LZ4HC
+ return !lvl || (lvl >= LZ4HC_MIN_CLEVEL && lvl <= LZ4HC_MAX_CLEVEL);
+#else
+ return lvl == 0;
+#endif
+}
+
static const struct f2fs_compress_ops f2fs_lz4_ops = {
.init_compress_ctx = lz4_init_compress_ctx,
.destroy_compress_ctx = lz4_destroy_compress_ctx,
.compress_pages = lz4_compress_pages,
.decompress_pages = lz4_decompress_pages,
+ .is_level_valid = lz4_is_level_valid,
};
#endif
#ifdef CONFIG_F2FS_FS_ZSTD
-#define F2FS_ZSTD_DEFAULT_CLEVEL 1
-
static int zstd_init_compress_ctx(struct compress_ctx *cc)
{
zstd_parameters params;
@@ -327,6 +336,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
unsigned int workspace_size;
unsigned char level = F2FS_I(cc->inode)->i_compress_level;
+ /* Need to remain this for backward compatibility */
if (!level)
level = F2FS_ZSTD_DEFAULT_CLEVEL;
@@ -477,6 +487,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic)
return 0;
}
+static bool zstd_is_level_valid(int lvl)
+{
+ return lvl >= zstd_min_clevel() && lvl <= zstd_max_clevel();
+}
+
static const struct f2fs_compress_ops f2fs_zstd_ops = {
.init_compress_ctx = zstd_init_compress_ctx,
.destroy_compress_ctx = zstd_destroy_compress_ctx,
@@ -484,6 +499,7 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = {
.init_decompress_ctx = zstd_init_decompress_ctx,
.destroy_decompress_ctx = zstd_destroy_decompress_ctx,
.decompress_pages = zstd_decompress_pages,
+ .is_level_valid = zstd_is_level_valid,
};
#endif
@@ -542,6 +558,16 @@ bool f2fs_is_compress_backend_ready(struct inode *inode)
return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
}
+bool f2fs_is_compress_level_valid(int alg, int lvl)
+{
+ const struct f2fs_compress_ops *cops = f2fs_cops[alg];
+
+ if (cops->is_level_valid)
+ return cops->is_level_valid(lvl);
+
+ return lvl == 0;
+}
+
static mempool_t *compress_page_pool;
static int num_compress_pages = 512;
module_param(num_compress_pages, uint, 0444);
@@ -743,8 +769,8 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
ret = -EFSCORRUPTED;
/* Avoid f2fs_commit_super in irq context */
- if (in_task)
- f2fs_save_errors(sbi, ERROR_FAIL_DECOMPRESSION);
+ if (!in_task)
+ f2fs_handle_error_async(sbi, ERROR_FAIL_DECOMPRESSION);
else
f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION);
goto out_release;
@@ -1215,6 +1241,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
unsigned int last_index = cc->cluster_size - 1;
loff_t psize;
int i, err;
+ bool quota_inode = IS_NOQUOTA(inode);
/* we should bypass data pages to proceed the kworker jobs */
if (unlikely(f2fs_cp_error(sbi))) {
@@ -1222,7 +1249,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
goto out_free;
}
- if (IS_NOQUOTA(inode)) {
+ if (quota_inode) {
/*
* We need to wait for node_write to avoid block allocation during
* checkpoint. This can only happen to quota writes which can cause
@@ -1344,7 +1371,7 @@ unlock_continue:
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
f2fs_put_dnode(&dn);
- if (IS_NOQUOTA(inode))
+ if (quota_inode)
f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
@@ -1370,7 +1397,7 @@ out_put_cic:
out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
- if (IS_NOQUOTA(inode))
+ if (quota_inode)
f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7165b1202f53..5882afe71d82 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -383,6 +383,17 @@ static void f2fs_write_end_io(struct bio *bio)
bio_put(bio);
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static void f2fs_zone_write_end_io(struct bio *bio)
+{
+ struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
+
+ bio->bi_private = io->bi_private;
+ complete(&io->zone_wait);
+ f2fs_write_end_io(bio);
+}
+#endif
+
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, sector_t *sector)
{
@@ -639,6 +650,11 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
+#ifdef CONFIG_BLK_DEV_ZONED
+ init_completion(&sbi->write_io[i][j].zone_wait);
+ sbi->write_io[i][j].zone_pending_bio = NULL;
+ sbi->write_io[i][j].bi_private = NULL;
+#endif
}
}
@@ -965,6 +981,26 @@ alloc_new:
return 0;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+ int devi = 0;
+
+ if (f2fs_is_multi_device(sbi)) {
+ devi = f2fs_target_device_index(sbi, blkaddr);
+ if (blkaddr < FDEV(devi).start_blk ||
+ blkaddr > FDEV(devi).end_blk) {
+ f2fs_err(sbi, "Invalid block %x", blkaddr);
+ return false;
+ }
+ blkaddr -= FDEV(devi).start_blk;
+ }
+ return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM &&
+ f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
+ (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
+}
+#endif
+
void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
@@ -975,6 +1011,16 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
f2fs_bug_on(sbi, is_read_io(fio->op));
f2fs_down_write(&io->io_rwsem);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
+ wait_for_completion_io(&io->zone_wait);
+ bio_put(io->zone_pending_bio);
+ io->zone_pending_bio = NULL;
+ io->bi_private = NULL;
+ }
+#endif
+
next:
if (fio->in_list) {
spin_lock(&io->io_lock);
@@ -1038,6 +1084,18 @@ skip:
if (fio->in_list)
goto next;
out:
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
+ is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
+ bio_get(io->bio);
+ reinit_completion(&io->zone_wait);
+ io->bi_private = io->bio->bi_private;
+ io->bio->bi_private = io;
+ io->bio->bi_end_io = f2fs_zone_write_end_io;
+ io->zone_pending_bio = io->bio;
+ __submit_merged_bio(io);
+ }
+#endif
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
@@ -2173,7 +2231,6 @@ submit_and_realloc:
f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
F2FS_BLKSIZE);
*last_block_in_bio = block_nr;
- goto out;
out:
*bio_ret = bio;
return ret;
@@ -2775,6 +2832,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
unsigned offset = 0;
bool need_balance_fs = false;
+ bool quota_inode = IS_NOQUOTA(inode);
int err = 0;
struct f2fs_io_info fio = {
.sbi = sbi,
@@ -2807,6 +2865,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (S_ISDIR(inode->i_mode) &&
!is_sbi_flag_set(sbi, SBI_IS_CLOSE))
goto redirty_out;
+
+ /* keep data pages in remount-ro mode */
+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+ goto redirty_out;
goto out;
}
@@ -2832,19 +2894,19 @@ write:
goto out;
/* Dentry/quota blocks are controlled by checkpoint */
- if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
+ if (S_ISDIR(inode->i_mode) || quota_inode) {
/*
* We need to wait for node_write to avoid block allocation during
* checkpoint. This can only happen to quota writes which can cause
* the below discard race condition.
*/
- if (IS_NOQUOTA(inode))
+ if (quota_inode)
f2fs_down_read(&sbi->node_write);
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
- if (IS_NOQUOTA(inode))
+ if (quota_inode)
f2fs_up_read(&sbi->node_write);
goto done;
@@ -4067,7 +4129,6 @@ const struct address_space_operations f2fs_dblock_aops = {
.migrate_folio = filemap_migrate_folio,
.invalidate_folio = f2fs_invalidate_folio,
.release_folio = f2fs_release_folio,
- .direct_IO = noop_direct_IO,
.bmap = f2fs_bmap,
.swap_activate = f2fs_swap_activate,
.swap_deactivate = f2fs_swap_deactivate,
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 887e55988450..d635c58cf5a3 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -775,8 +775,15 @@ int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname,
{
int err = -EAGAIN;
- if (f2fs_has_inline_dentry(dir))
+ if (f2fs_has_inline_dentry(dir)) {
+ /*
+ * Should get i_xattr_sem to keep the lock order:
+ * i_xattr_sem -> inode_page lock used by f2fs_setxattr.
+ */
+ f2fs_down_read(&F2FS_I(dir)->i_xattr_sem);
err = f2fs_add_inline_entry(dir, fname, inode, ino, mode);
+ f2fs_up_read(&F2FS_I(dir)->i_xattr_sem);
+ }
if (err == -EAGAIN)
err = f2fs_add_regular_entry(dir, fname, inode, ino, mode);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d211ee89c158..c7cb2177b252 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -80,34 +80,34 @@ extern const char *f2fs_fault_name[FAULT_MAX];
/*
* For mount options
*/
-#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
-#define F2FS_MOUNT_DISCARD 0x00000004
-#define F2FS_MOUNT_NOHEAP 0x00000008
-#define F2FS_MOUNT_XATTR_USER 0x00000010
-#define F2FS_MOUNT_POSIX_ACL 0x00000020
-#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
-#define F2FS_MOUNT_INLINE_XATTR 0x00000080
-#define F2FS_MOUNT_INLINE_DATA 0x00000100
-#define F2FS_MOUNT_INLINE_DENTRY 0x00000200
-#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
-#define F2FS_MOUNT_NOBARRIER 0x00000800
-#define F2FS_MOUNT_FASTBOOT 0x00001000
-#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00002000
-#define F2FS_MOUNT_DATA_FLUSH 0x00008000
-#define F2FS_MOUNT_FAULT_INJECTION 0x00010000
-#define F2FS_MOUNT_USRQUOTA 0x00080000
-#define F2FS_MOUNT_GRPQUOTA 0x00100000
-#define F2FS_MOUNT_PRJQUOTA 0x00200000
-#define F2FS_MOUNT_QUOTA 0x00400000
-#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
-#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
-#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
-#define F2FS_MOUNT_NORECOVERY 0x04000000
-#define F2FS_MOUNT_ATGC 0x08000000
-#define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000
-#define F2FS_MOUNT_GC_MERGE 0x20000000
-#define F2FS_MOUNT_COMPRESS_CACHE 0x40000000
-#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x80000000
+#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000001
+#define F2FS_MOUNT_DISCARD 0x00000002
+#define F2FS_MOUNT_NOHEAP 0x00000004
+#define F2FS_MOUNT_XATTR_USER 0x00000008
+#define F2FS_MOUNT_POSIX_ACL 0x00000010
+#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000020
+#define F2FS_MOUNT_INLINE_XATTR 0x00000040
+#define F2FS_MOUNT_INLINE_DATA 0x00000080
+#define F2FS_MOUNT_INLINE_DENTRY 0x00000100
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
+#define F2FS_MOUNT_NOBARRIER 0x00000400
+#define F2FS_MOUNT_FASTBOOT 0x00000800
+#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00001000
+#define F2FS_MOUNT_DATA_FLUSH 0x00002000
+#define F2FS_MOUNT_FAULT_INJECTION 0x00004000
+#define F2FS_MOUNT_USRQUOTA 0x00008000
+#define F2FS_MOUNT_GRPQUOTA 0x00010000
+#define F2FS_MOUNT_PRJQUOTA 0x00020000
+#define F2FS_MOUNT_QUOTA 0x00040000
+#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00080000
+#define F2FS_MOUNT_RESERVE_ROOT 0x00100000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x00200000
+#define F2FS_MOUNT_NORECOVERY 0x00400000
+#define F2FS_MOUNT_ATGC 0x00800000
+#define F2FS_MOUNT_MERGE_CHECKPOINT 0x01000000
+#define F2FS_MOUNT_GC_MERGE 0x02000000
+#define F2FS_MOUNT_COMPRESS_CACHE 0x04000000
+#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x08000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -162,6 +162,7 @@ struct f2fs_mount_info {
int fs_mode; /* fs mode: LFS or ADAPTIVE */
int bggc_mode; /* bggc mode: off, on or sync */
int memory_mode; /* memory mode */
+ int errors; /* errors parameter */
int discard_unit; /*
* discard command's offset/size should
* be aligned to this unit: block,
@@ -185,21 +186,21 @@ struct f2fs_mount_info {
unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
};
-#define F2FS_FEATURE_ENCRYPT 0x0001
-#define F2FS_FEATURE_BLKZONED 0x0002
-#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
-#define F2FS_FEATURE_EXTRA_ATTR 0x0008
-#define F2FS_FEATURE_PRJQUOTA 0x0010
-#define F2FS_FEATURE_INODE_CHKSUM 0x0020
-#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
-#define F2FS_FEATURE_QUOTA_INO 0x0080
-#define F2FS_FEATURE_INODE_CRTIME 0x0100
-#define F2FS_FEATURE_LOST_FOUND 0x0200
-#define F2FS_FEATURE_VERITY 0x0400
-#define F2FS_FEATURE_SB_CHKSUM 0x0800
-#define F2FS_FEATURE_CASEFOLD 0x1000
-#define F2FS_FEATURE_COMPRESSION 0x2000
-#define F2FS_FEATURE_RO 0x4000
+#define F2FS_FEATURE_ENCRYPT 0x00000001
+#define F2FS_FEATURE_BLKZONED 0x00000002
+#define F2FS_FEATURE_ATOMIC_WRITE 0x00000004
+#define F2FS_FEATURE_EXTRA_ATTR 0x00000008
+#define F2FS_FEATURE_PRJQUOTA 0x00000010
+#define F2FS_FEATURE_INODE_CHKSUM 0x00000020
+#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x00000040
+#define F2FS_FEATURE_QUOTA_INO 0x00000080
+#define F2FS_FEATURE_INODE_CRTIME 0x00000100
+#define F2FS_FEATURE_LOST_FOUND 0x00000200
+#define F2FS_FEATURE_VERITY 0x00000400
+#define F2FS_FEATURE_SB_CHKSUM 0x00000800
+#define F2FS_FEATURE_CASEFOLD 0x00001000
+#define F2FS_FEATURE_COMPRESSION 0x00002000
+#define F2FS_FEATURE_RO 0x00004000
#define __F2FS_HAS_FEATURE(raw_super, mask) \
((raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -1175,6 +1176,7 @@ enum iostat_type {
/* other */
FS_DISCARD_IO, /* discard */
FS_FLUSH_IO, /* flush */
+ FS_ZONE_RESET_IO, /* zone reset */
NR_IO_TYPE,
};
@@ -1217,6 +1219,11 @@ struct f2fs_bio_info {
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
+#ifdef CONFIG_BLK_DEV_ZONED
+ struct completion zone_wait; /* condition value for the previous open zone to close */
+ struct bio *zone_pending_bio; /* pending bio for the previous zone */
+ void *bi_private; /* previous bi_private for pending bio */
+#endif
struct f2fs_rwsem io_rwsem; /* blocking op for bio */
spinlock_t io_lock; /* serialize DATA/NODE IOs */
struct list_head io_list; /* track fios */
@@ -1370,6 +1377,12 @@ enum {
MEMORY_MODE_LOW, /* memory mode for low memry devices */
};
+enum errors_option {
+ MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
+ MOUNT_ERRORS_CONTINUE, /* continue on errors */
+ MOUNT_ERRORS_PANIC, /* panic on errors */
+};
+
static inline int f2fs_test_bit(unsigned int nr, char *addr);
static inline void f2fs_set_bit(unsigned int nr, char *addr);
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@@ -1427,6 +1440,8 @@ struct compress_data {
#define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000
+#define F2FS_ZSTD_DEFAULT_CLEVEL 1
+
#define COMPRESS_LEVEL_OFFSET 8
/* compress context */
@@ -1721,8 +1736,14 @@ struct f2fs_sb_info {
struct workqueue_struct *post_read_wq; /* post read workqueue */
- unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
- spinlock_t error_lock; /* protect errors array */
+ /*
+ * If we are in irq context, let's update error information into
+ * on-disk superblock in the work.
+ */
+ struct work_struct s_error_work;
+ unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
+ unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
+ spinlock_t error_lock; /* protect errors/stop_reason array */
bool error_dirty; /* errors of sb is dirty */
struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
@@ -2941,6 +2962,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */
+#define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL)
+
/* Flags that should be inherited by new inodes from their parent. */
#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
@@ -3394,6 +3417,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+#define F2FS_MIN_EXTRA_ATTR_SIZE (sizeof(__le32))
+
#define F2FS_TOTAL_EXTRA_ATTR_SIZE \
(offsetof(struct f2fs_inode, i_extra_end) - \
offsetof(struct f2fs_inode, i_extra_isize)) \
@@ -3432,7 +3457,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr)
* file.c
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
@@ -3541,9 +3565,11 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
int f2fs_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
void f2fs_quota_off_umount(struct super_block *sb);
-void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
+void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
+ bool irq_context);
void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
+void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
@@ -3815,7 +3841,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
-int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
+int f2fs_resize_fs(struct file *filp, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
void f2fs_destroy_garbage_collection_cache(void);
/* victim selection function for cleaning and SSR */
@@ -4213,6 +4239,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
bool f2fs_is_compress_backend_ready(struct inode *inode);
+bool f2fs_is_compress_level_valid(int alg, int lvl);
int __init f2fs_init_compress_mempool(void);
void f2fs_destroy_compress_mempool(void);
void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task);
@@ -4277,6 +4304,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
/* not support compression */
return false;
}
+static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; }
static inline struct page *f2fs_compress_control_page(struct page *page)
{
WARN_ON_ONCE(1);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 2435111a8532..093039dee992 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -149,8 +149,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
zero_user_segment(page, offset, PAGE_SIZE);
}
set_page_dirty(page);
- if (!PageUptodate(page))
- SetPageUptodate(page);
f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
f2fs_update_time(sbi, REQ_TIME);
@@ -546,7 +544,8 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
- filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+ filp->f_mode |= FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}
@@ -627,11 +626,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->ofs_in_node, nr_free);
}
-void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
-{
- f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
-}
-
static int truncate_partial_data_page(struct inode *inode, u64 from,
bool cache_only)
{
@@ -2225,7 +2219,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
ret = 0;
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
trace_f2fs_shutdown(sbi, in, ret);
}
return ret;
@@ -2238,7 +2231,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev);
break;
case F2FS_GOING_DOWN_METASYNC:
@@ -2247,16 +2239,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NEED_FSCK:
set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -2593,6 +2582,11 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
inode_lock(inode);
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
+ err = -EINVAL;
+ goto unlock_out;
+ }
+
/* if in-place-update policy is enabled, don't waste time here */
set_inode_flag(inode, FI_OPU_WRITE);
if (f2fs_should_update_inplace(inode, NULL)) {
@@ -2717,6 +2711,7 @@ clear_out:
clear_inode_flag(inode, FI_SKIP_WRITES);
out:
clear_inode_flag(inode, FI_OPU_WRITE);
+unlock_out:
inode_unlock(inode);
if (!err)
range->len = (u64)total << PAGE_SHIFT;
@@ -2876,6 +2871,17 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
out_src:
f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ if (ret)
+ goto out_unlock;
+
+ src->i_mtime = src->i_ctime = current_time(src);
+ f2fs_mark_inode_dirty_sync(src, false);
+ if (src != dst) {
+ dst->i_mtime = dst->i_ctime = current_time(dst);
+ f2fs_mark_inode_dirty_sync(dst, false);
+ }
+ f2fs_update_time(sbi, REQ_TIME);
+
out_unlock:
if (src != dst)
inode_unlock(dst);
@@ -3278,7 +3284,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
sizeof(block_count)))
return -EFAULT;
- return f2fs_resize_fs(sbi, block_count);
+ return f2fs_resize_fs(filp, block_count);
}
static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
@@ -3375,18 +3381,29 @@ out:
return err;
}
-static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg)
+static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks)
{
- struct inode *inode = file_inode(filp);
- __u64 blocks;
-
if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
if (!f2fs_compressed_file(inode))
return -EINVAL;
- blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
+ *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
+
+ return 0;
+}
+
+static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ __u64 blocks;
+ int ret;
+
+ ret = f2fs_get_compress_blocks(inode, &blocks);
+ if (ret < 0)
+ return ret;
+
return put_user(blocks, (u64 __user *)arg);
}
@@ -3455,7 +3472,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
int ret;
int writecount;
- if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ if (!f2fs_sb_has_compression(sbi))
return -EOPNOTSUPP;
if (!f2fs_compressed_file(inode))
@@ -3468,7 +3485,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
return ret;
- f2fs_balance_fs(F2FS_I_SB(inode), true);
+ f2fs_balance_fs(sbi, true);
inode_lock(inode);
@@ -3488,13 +3505,15 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
goto out;
+ if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ ret = -EPERM;
+ goto out;
+ }
+
set_inode_flag(inode, FI_COMPRESS_RELEASED);
inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, true);
- if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
- goto out;
-
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
@@ -3625,7 +3644,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
unsigned int reserved_blocks = 0;
int ret;
- if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+ if (!f2fs_sb_has_compression(sbi))
return -EOPNOTSUPP;
if (!f2fs_compressed_file(inode))
@@ -3641,7 +3660,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
- f2fs_balance_fs(F2FS_I_SB(inode), true);
+ f2fs_balance_fs(sbi, true);
inode_lock(inode);
@@ -4035,7 +4054,7 @@ static int f2fs_ioc_decompress_file(struct file *filp)
if (!f2fs_compressed_file(inode))
return -EINVAL;
- f2fs_balance_fs(F2FS_I_SB(inode), true);
+ f2fs_balance_fs(sbi, true);
file_start_write(filp);
inode_lock(inode);
@@ -4110,7 +4129,7 @@ static int f2fs_ioc_compress_file(struct file *filp)
if (!f2fs_compressed_file(inode))
return -EINVAL;
- f2fs_balance_fs(F2FS_I_SB(inode), true);
+ f2fs_balance_fs(sbi, true);
file_start_write(filp);
inode_lock(inode);
@@ -4238,7 +4257,7 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FS_IOC_SETFSLABEL:
return f2fs_ioc_setfslabel(filp, arg);
case F2FS_IOC_GET_COMPRESS_BLOCKS:
- return f2fs_get_compress_blocks(filp, arg);
+ return f2fs_ioc_get_compress_blocks(filp, arg);
case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
return f2fs_release_compress_blocks(filp, arg);
case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 61c5f9d26018..01effd3fcb6c 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
if (gc_th->gc_wake)
gc_th->gc_wake = false;
- if (try_to_freeze()) {
+ if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
stat_other_skip_bggc_count(sbi);
continue;
}
@@ -1797,7 +1797,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
{
int gc_type = gc_control->init_gc_type;
unsigned int segno = gc_control->victim_segno;
- int sec_freed = 0, seg_freed = 0, total_freed = 0;
+ int sec_freed = 0, seg_freed = 0, total_freed = 0, total_sec_freed = 0;
int ret = 0;
struct cp_control cpc;
struct gc_inode_list gc_list = {
@@ -1842,6 +1842,8 @@ gc_more:
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
+ /* Reset due to checkpoint */
+ sec_freed = 0;
}
}
@@ -1866,15 +1868,17 @@ retry:
gc_control->should_migrate_blocks);
total_freed += seg_freed;
- if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
+ if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) {
sec_freed++;
+ total_sec_freed++;
+ }
if (gc_type == FG_GC) {
sbi->cur_victim_sec = NULL_SEGNO;
if (has_enough_free_secs(sbi, sec_freed, 0)) {
if (!gc_control->no_bg_gc &&
- sec_freed < gc_control->nr_free_secs)
+ total_sec_freed < gc_control->nr_free_secs)
goto go_gc_more;
goto stop;
}
@@ -1901,6 +1905,8 @@ retry:
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
+ /* Reset due to checkpoint */
+ sec_freed = 0;
}
go_gc_more:
segno = NULL_SEGNO;
@@ -1913,7 +1919,7 @@ stop:
if (gc_type == FG_GC)
f2fs_unpin_all_sections(sbi, true);
- trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed,
+ trace_f2fs_gc_end(sbi->sb, ret, total_freed, total_sec_freed,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
@@ -1927,7 +1933,7 @@ stop:
put_gc_inode(&gc_list);
if (gc_control->err_gc_skipped && !ret)
- ret = sec_freed ? 0 : -EAGAIN;
+ ret = total_sec_freed ? 0 : -EAGAIN;
return ret;
}
@@ -2099,8 +2105,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
}
}
-int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
+int f2fs_resize_fs(struct file *filp, __u64 block_count)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
__u64 old_block_count, shrunk_blocks;
struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
unsigned int secs;
@@ -2138,12 +2145,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
return -EINVAL;
}
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
shrunk_blocks = old_block_count - block_count;
secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
/* stop other GC */
- if (!f2fs_down_write_trylock(&sbi->gc_lock))
- return -EAGAIN;
+ if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
+ err = -EAGAIN;
+ goto out_drop_write;
+ }
/* stop CP to protect MAIN_SEC in free_segment_range */
f2fs_lock_op(sbi);
@@ -2163,10 +2176,20 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
out_unlock:
f2fs_unlock_op(sbi);
f2fs_up_write(&sbi->gc_lock);
+out_drop_write:
+ mnt_drop_write_file(filp);
if (err)
return err;
- freeze_super(sbi->sb);
+ err = freeze_super(sbi->sb);
+ if (err)
+ return err;
+
+ if (f2fs_readonly(sbi->sb)) {
+ thaw_super(sbi->sb);
+ return -EROFS;
+ }
+
f2fs_down_write(&sbi->gc_lock);
f2fs_down_write(&sbi->cp_global_sem);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index cf4327ad106c..09e986b050c6 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -10,6 +10,8 @@
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/sched/mm.h>
+#include <linux/lz4.h>
+#include <linux/zstd.h>
#include "f2fs.h"
#include "node.h"
@@ -202,6 +204,80 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
+static bool sanity_check_compress_inode(struct inode *inode,
+ struct f2fs_inode *ri)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned char clevel;
+
+ if (ri->i_compress_algorithm >= COMPRESS_MAX) {
+ f2fs_warn(sbi,
+ "%s: inode (ino=%lx) has unsupported compress algorithm: %u, run fsck to fix",
+ __func__, inode->i_ino, ri->i_compress_algorithm);
+ goto err;
+ }
+ if (le64_to_cpu(ri->i_compr_blocks) >
+ SECTOR_TO_BLOCK(inode->i_blocks)) {
+ f2fs_warn(sbi,
+ "%s: inode (ino=%lx) has inconsistent i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
+ __func__, inode->i_ino, le64_to_cpu(ri->i_compr_blocks),
+ SECTOR_TO_BLOCK(inode->i_blocks));
+ goto err;
+ }
+ if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
+ ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
+ f2fs_warn(sbi,
+ "%s: inode (ino=%lx) has unsupported log cluster size: %u, run fsck to fix",
+ __func__, inode->i_ino, ri->i_log_cluster_size);
+ goto err;
+ }
+
+ clevel = le16_to_cpu(ri->i_compress_flag) >>
+ COMPRESS_LEVEL_OFFSET;
+ switch (ri->i_compress_algorithm) {
+ case COMPRESS_LZO:
+#ifdef CONFIG_F2FS_FS_LZO
+ if (clevel)
+ goto err_level;
+#endif
+ break;
+ case COMPRESS_LZORLE:
+#ifdef CONFIG_F2FS_FS_LZORLE
+ if (clevel)
+ goto err_level;
+#endif
+ break;
+ case COMPRESS_LZ4:
+#ifdef CONFIG_F2FS_FS_LZ4
+#ifdef CONFIG_F2FS_FS_LZ4HC
+ if (clevel &&
+ (clevel < LZ4HC_MIN_CLEVEL || clevel > LZ4HC_MAX_CLEVEL))
+ goto err_level;
+#else
+ if (clevel)
+ goto err_level;
+#endif
+#endif
+ break;
+ case COMPRESS_ZSTD:
+#ifdef CONFIG_F2FS_FS_ZSTD
+ if (clevel < zstd_min_clevel() || clevel > zstd_max_clevel())
+ goto err_level;
+#endif
+ break;
+ default:
+ goto err_level;
+ }
+
+ return true;
+err_level:
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress level: %u, run fsck to fix",
+ __func__, inode->i_ino, clevel);
+err:
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return false;
+}
+
static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -225,41 +301,77 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
- if (f2fs_sb_has_flexible_inline_xattr(sbi)
- && !f2fs_has_extra_attr(inode)) {
+ if (f2fs_has_extra_attr(inode)) {
+ if (!f2fs_sb_has_extra_attr(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
+ fi->i_extra_isize < F2FS_MIN_EXTRA_ATTR_SIZE ||
+ fi->i_extra_isize % sizeof(__le32)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ return false;
+ }
+ if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
+ f2fs_has_inline_xattr(inode) &&
+ (!fi->i_inline_xattr_size ||
+ fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_inline_xattr_size,
+ MAX_INLINE_XATTR_SIZE);
+ return false;
+ }
+ if (f2fs_sb_has_compression(sbi) &&
+ fi->i_flags & F2FS_COMPR_FL &&
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
+ i_compress_flag)) {
+ if (!sanity_check_compress_inode(inode, ri))
+ return false;
+ }
+ } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
__func__, inode->i_ino);
return false;
}
- if (f2fs_has_extra_attr(inode) &&
- !f2fs_sb_has_extra_attr(sbi)) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
- __func__, inode->i_ino);
- return false;
- }
-
- if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
- fi->i_extra_isize % sizeof(__le32)) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
- __func__, inode->i_ino, fi->i_extra_isize,
- F2FS_TOTAL_EXTRA_ATTR_SIZE);
- return false;
- }
-
- if (f2fs_has_extra_attr(inode) &&
- f2fs_sb_has_flexible_inline_xattr(sbi) &&
- f2fs_has_inline_xattr(inode) &&
- (!fi->i_inline_xattr_size ||
- fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
- __func__, inode->i_ino, fi->i_inline_xattr_size,
- MAX_INLINE_XATTR_SIZE);
- return false;
+ if (!f2fs_sb_has_extra_attr(sbi)) {
+ if (f2fs_sb_has_project_quota(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+ __func__, inode->i_ino, F2FS_FEATURE_PRJQUOTA);
+ return false;
+ }
+ if (f2fs_sb_has_inode_chksum(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+ __func__, inode->i_ino, F2FS_FEATURE_INODE_CHKSUM);
+ return false;
+ }
+ if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+ __func__, inode->i_ino, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR);
+ return false;
+ }
+ if (f2fs_sb_has_inode_crtime(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+ __func__, inode->i_ino, F2FS_FEATURE_INODE_CRTIME);
+ return false;
+ }
+ if (f2fs_sb_has_compression(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
+ __func__, inode->i_ino, F2FS_FEATURE_COMPRESSION);
+ return false;
+ }
}
if (f2fs_sanity_check_inline_data(inode)) {
@@ -283,39 +395,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
- if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
- fi->i_flags & F2FS_COMPR_FL &&
- F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
- i_log_cluster_size)) {
- if (ri->i_compress_algorithm >= COMPRESS_MAX) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
- "compress algorithm: %u, run fsck to fix",
- __func__, inode->i_ino,
- ri->i_compress_algorithm);
- return false;
- }
- if (le64_to_cpu(ri->i_compr_blocks) >
- SECTOR_TO_BLOCK(inode->i_blocks)) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent "
- "i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
- __func__, inode->i_ino,
- le64_to_cpu(ri->i_compr_blocks),
- SECTOR_TO_BLOCK(inode->i_blocks));
- return false;
- }
- if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
- ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
- "log cluster size: %u, run fsck to fix",
- __func__, inode->i_ino,
- ri->i_log_cluster_size);
- return false;
- }
- }
-
return true;
}
@@ -442,7 +521,7 @@ static int do_read_inode(struct inode *inode)
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
(fi->i_flags & F2FS_COMPR_FL)) {
if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
- i_log_cluster_size)) {
+ i_compress_flag)) {
unsigned short compress_flag;
atomic_set(&fi->i_compr_blocks,
@@ -680,7 +759,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (f2fs_sb_has_compression(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
- i_log_cluster_size)) {
+ i_compress_flag)) {
unsigned short compress_flag;
ri->i_compr_blocks =
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index 3d5bfb1ad585..f8703038e1d8 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -80,6 +80,7 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
seq_puts(seq, "[OTHER]\n");
IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO);
IOSTAT_INFO_SHOW("fs flush", FS_FLUSH_IO);
+ IOSTAT_INFO_SHOW("fs zone reset", FS_ZONE_RESET_IO);
return 0;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index ad597b417fea..bee0568888da 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -23,7 +23,7 @@
#include <trace/events/f2fs.h>
static inline bool is_extension_exist(const unsigned char *s, const char *sub,
- bool tmp_ext)
+ bool tmp_ext, bool tmp_dot)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
@@ -49,13 +49,27 @@ static inline bool is_extension_exist(const unsigned char *s, const char *sub,
for (i = 1; i < slen - sublen; i++) {
if (s[i] != '.')
continue;
- if (!strncasecmp(s + i + 1, sub, sublen))
- return true;
+ if (!strncasecmp(s + i + 1, sub, sublen)) {
+ if (!tmp_dot)
+ return true;
+ if (i == slen - sublen - 1 || s[i + 1 + sublen] == '.')
+ return true;
+ }
}
return false;
}
+static inline bool is_temperature_extension(const unsigned char *s, const char *sub)
+{
+ return is_extension_exist(s, sub, true, false);
+}
+
+static inline bool is_compress_extension(const unsigned char *s, const char *sub)
+{
+ return is_extension_exist(s, sub, true, true);
+}
+
int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set)
{
@@ -148,7 +162,7 @@ static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir,
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = cold_count; i < cold_count + hot_count; i++)
- if (is_extension_exist(name, extlist[i], false))
+ if (is_temperature_extension(name, extlist[i]))
break;
f2fs_up_read(&sbi->sb_lock);
if (i < (cold_count + hot_count))
@@ -156,12 +170,12 @@ static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir,
/* Don't compress unallowed extension. */
for (i = 0; i < noext_cnt; i++)
- if (is_extension_exist(name, noext[i], false))
+ if (is_compress_extension(name, noext[i]))
return;
/* Compress wanting extension. */
for (i = 0; i < ext_cnt; i++) {
- if (is_extension_exist(name, ext[i], false)) {
+ if (is_compress_extension(name, ext[i])) {
set_compress_context(inode);
return;
}
@@ -189,7 +203,7 @@ static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = 0; i < cold_count + hot_count; i++)
- if (is_extension_exist(name, extlist[i], true))
+ if (is_temperature_extension(name, extlist[i]))
break;
f2fs_up_read(&sbi->sb_lock);
@@ -576,8 +590,8 @@ out_splice:
}
#endif
new = d_splice_alias(inode, dentry);
- err = PTR_ERR_OR_ZERO(new);
- trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err);
+ trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry,
+ ino, IS_ERR(new) ? PTR_ERR(new) : err);
return new;
out_iput:
iput(inode);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index bd1dad523796..ee2e1dd64f25 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -925,6 +925,7 @@ static int truncate_node(struct dnode_of_data *dn)
static int truncate_dnode(struct dnode_of_data *dn)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct page *page;
int err;
@@ -932,19 +933,30 @@ static int truncate_dnode(struct dnode_of_data *dn)
return 1;
/* get direct node */
- page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ page = f2fs_get_node_page(sbi, dn->nid);
if (PTR_ERR(page) == -ENOENT)
return 1;
else if (IS_ERR(page))
return PTR_ERR(page);
+ if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) {
+ f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
+ dn->inode->i_ino, dn->nid, ino_of_node(page));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
+ f2fs_put_page(page, 1);
+ return -EFSCORRUPTED;
+ }
+
/* Make dnode_of_data for parameter */
dn->node_page = page;
dn->ofs_in_node = 0;
- f2fs_truncate_data_blocks(dn);
+ f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
err = truncate_node(dn);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
return err;
+ }
return 1;
}
@@ -1596,6 +1608,9 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
trace_f2fs_writepage(page, NODE);
if (unlikely(f2fs_cp_error(sbi))) {
+ /* keep node pages in remount-ro mode */
+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+ goto redirty_out;
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES);
unlock_page(page);
@@ -2063,7 +2078,6 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
struct list_head *head = &sbi->fsync_node_list;
unsigned long flags;
unsigned int cur_seq_id = 0;
- int ret2, ret = 0;
while (seq_id && cur_seq_id < seq_id) {
spin_lock_irqsave(&sbi->fsync_node_lock, flags);
@@ -2084,16 +2098,9 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
f2fs_wait_on_page_writeback(page, NODE, true, false);
put_page(page);
-
- if (ret)
- break;
}
- ret2 = filemap_check_errors(NODE_MAPPING(sbi));
- if (!ret)
- ret = ret2;
-
- return ret;
+ return filemap_check_errors(NODE_MAPPING(sbi));
}
static int f2fs_write_node_pages(struct address_space *mapping,
@@ -3065,7 +3072,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
- struct nat_entry_set *setvec[SETVEC_SIZE];
+ struct nat_entry_set *setvec[NAT_VEC_SIZE];
struct nat_entry_set *set, *tmp;
unsigned int found;
nid_t set_idx = 0;
@@ -3098,7 +3105,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
- set_idx, SETVEC_SIZE, setvec))) {
+ set_idx, NAT_VEC_SIZE, setvec))) {
unsigned idx;
set_idx = setvec[found - 1]->set + 1;
@@ -3319,8 +3326,9 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next_i;
- struct nat_entry *natvec[NATVEC_SIZE];
- struct nat_entry_set *setvec[SETVEC_SIZE];
+ void *vec[NAT_VEC_SIZE];
+ struct nat_entry **natvec = (struct nat_entry **)vec;
+ struct nat_entry_set **setvec = (struct nat_entry_set **)vec;
nid_t nid = 0;
unsigned int found;
@@ -3343,7 +3351,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy nat cache */
f2fs_down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
- nid, NATVEC_SIZE, natvec))) {
+ nid, NAT_VEC_SIZE, natvec))) {
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
@@ -3359,8 +3367,9 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy nat set cache */
nid = 0;
+ memset(vec, 0, sizeof(void *) * NAT_VEC_SIZE);
while ((found = __gang_lookup_nat_set(nm_i,
- nid, SETVEC_SIZE, setvec))) {
+ nid, NAT_VEC_SIZE, setvec))) {
unsigned idx;
nid = setvec[found - 1]->set + 1;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 906fb67a99da..5bd16a95eef8 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -35,8 +35,7 @@
#define DEF_RF_NODE_BLOCKS 0
/* vector size for gang look-up from nat cache that consists of radix tree */
-#define NATVEC_SIZE 64
-#define SETVEC_SIZE 32
+#define NAT_VEC_SIZE 32
/* return value for read_node_page */
#define LOCKED_PAGE 1
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 58c1a0096f7d..4e7d4ceeb084 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -360,21 +360,63 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
return ra_blocks;
}
+/* Detect looped node chain with Floyd's cycle detection algorithm. */
+static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
+ block_t *blkaddr_fast, bool *is_detecting)
+{
+ unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
+ struct page *page = NULL;
+ int i;
+
+ if (!*is_detecting)
+ return 0;
+
+ for (i = 0; i < 2; i++) {
+ if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) {
+ *is_detecting = false;
+ return 0;
+ }
+
+ page = f2fs_get_tmp_page(sbi, *blkaddr_fast);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ if (!is_recoverable_dnode(page)) {
+ f2fs_put_page(page, 1);
+ *is_detecting = false;
+ return 0;
+ }
+
+ ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast,
+ next_blkaddr_of_node(page));
+
+ *blkaddr_fast = next_blkaddr_of_node(page);
+ f2fs_put_page(page, 1);
+
+ f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks);
+ }
+
+ if (*blkaddr_fast == blkaddr) {
+ f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u."
+ " Run fsck to fix it.", __func__, blkaddr);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
bool check_only)
{
struct curseg_info *curseg;
struct page *page = NULL;
- block_t blkaddr;
- unsigned int loop_cnt = 0;
- unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
- unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
- valid_user_blocks(sbi);
+ block_t blkaddr, blkaddr_fast;
+ bool is_detecting = true;
int err = 0;
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ blkaddr_fast = blkaddr;
while (1) {
struct fsync_inode_entry *entry;
@@ -418,10 +460,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
- if (err == -ENOENT) {
- err = 0;
+ if (err == -ENOENT)
goto next;
- }
f2fs_put_page(page, 1);
break;
}
@@ -431,25 +471,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
if (IS_INODE(page) && is_dent_dnode(page))
entry->last_dentry = blkaddr;
next:
- /* sanity check in order to detect looped node chain */
- if (++loop_cnt >= free_blocks ||
- blkaddr == next_blkaddr_of_node(page)) {
- f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u",
- __func__, blkaddr,
- next_blkaddr_of_node(page));
- f2fs_put_page(page, 1);
- err = -EINVAL;
- break;
- }
-
- ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
- next_blkaddr_of_node(page));
-
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
- f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
+ err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast,
+ &is_detecting);
+ if (err)
+ break;
}
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6db410f1bb8c..0457d620011f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1196,6 +1196,45 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
+ struct discard_cmd *dc, blk_opf_t flag,
+ struct list_head *wait_list,
+ unsigned int *issued)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct block_device *bdev = dc->bdev;
+ struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
+ unsigned long flags;
+
+ trace_f2fs_issue_reset_zone(bdev, dc->di.start);
+
+ spin_lock_irqsave(&dc->lock, flags);
+ dc->state = D_SUBMIT;
+ dc->bio_ref++;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ if (issued)
+ (*issued)++;
+
+ atomic_inc(&dcc->queued_discard);
+ dc->queued++;
+ list_move_tail(&dc->list, wait_list);
+
+ /* sanity check on discard range */
+ __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
+
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
+ bio->bi_private = dc;
+ bio->bi_end_io = f2fs_submit_discard_endio;
+ submit_bio(bio);
+
+ atomic_inc(&dcc->issued_discard);
+ f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
+}
+#endif
+
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
@@ -1217,6 +1256,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
return 0;
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
+ __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued);
+ return 0;
+ }
+#endif
+
trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
lstart = dc->di.lstart;
@@ -1461,6 +1507,19 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
}
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t blkstart, block_t lblkstart,
+ block_t blklen)
+{
+ trace_f2fs_queue_reset_zone(bdev, blkstart);
+
+ mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
+ __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
+ mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
+}
+#endif
+
static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
@@ -1724,6 +1783,19 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
mutex_lock(&dcc->cmd_lock);
dc = __lookup_discard_cmd(sbi, blkaddr);
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
+ /* force submit zone reset */
+ if (dc->state == D_PREP)
+ __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
+ &dcc->wait_list, NULL);
+ dc->ref++;
+ mutex_unlock(&dcc->cmd_lock);
+ /* wait zone reset */
+ __wait_one_discard_bio(sbi, dc);
+ return;
+ }
+#endif
if (dc) {
if (dc->state == D_PREP) {
__punch_discard_cmd(sbi, dc, blkaddr);
@@ -1876,9 +1948,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
blkstart, blklen);
return -EIO;
}
- trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- sector, nr_sects, GFP_NOFS);
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
+ trace_f2fs_issue_reset_zone(bdev, blkstart);
+ return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sects, GFP_NOFS);
+ }
+
+ __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
+ return 0;
}
/* For conventional zones, use regular discard if supported */
@@ -2115,7 +2193,7 @@ find_next:
len = next_pos - cur_pos;
if (f2fs_sb_has_blkzoned(sbi) ||
- (force && len < cpc->trim_minlen))
+ !force || len < cpc->trim_minlen)
goto skip;
f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
@@ -4768,17 +4846,17 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
{
unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
block_t zone_block, wp_block, last_valid_block;
- unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
int i, s, b, ret;
struct seg_entry *se;
if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
- wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
+ wp_block = fdev->start_blk + (zone->wp >> sbi->log_sectors_per_block);
wp_segno = GET_SEGNO(sbi, wp_block);
wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
- zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
+ zone_block = fdev->start_blk + (zone->start >>
+ sbi->log_sectors_per_block);
zone_segno = GET_SEGNO(sbi, zone_block);
zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
@@ -4811,39 +4889,52 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
}
/*
- * If last valid block is beyond the write pointer, report the
- * inconsistency. This inconsistency does not cause write error
- * because the zone will not be selected for write operation until
- * it get discarded. Just report it.
+ * The write pointer matches with the valid blocks or
+ * already points to the end of the zone.
*/
- if (last_valid_block >= wp_block) {
- f2fs_notice(sbi, "Valid block beyond write pointer: "
- "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
- GET_SEGNO(sbi, last_valid_block),
- GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
- wp_segno, wp_blkoff);
+ if ((last_valid_block + 1 == wp_block) ||
+ (zone->wp == zone->start + zone->len))
return 0;
- }
- /*
- * If there is no valid block in the zone and if write pointer is
- * not at zone start, reset the write pointer.
- */
- if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
+ if (last_valid_block + 1 == zone_block) {
+ /*
+ * If there is no valid block in the zone and if write pointer
+ * is not at zone start, reset the write pointer.
+ */
f2fs_notice(sbi,
"Zone without valid block has non-zero write "
"pointer. Reset the write pointer: wp[0x%x,0x%x]",
wp_segno, wp_blkoff);
ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
- zone->len >> log_sectors_per_block);
- if (ret) {
+ zone->len >> sbi->log_sectors_per_block);
+ if (ret)
f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
fdev->path, ret);
- return ret;
- }
+
+ return ret;
}
- return 0;
+ /*
+ * If there are valid blocks and the write pointer doesn't
+ * match with them, we need to report the inconsistency and
+ * fill the zone till the end to close the zone. This inconsistency
+ * does not cause write error because the zone will not be selected
+ * for write operation until it get discarded.
+ */
+ f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: "
+ "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
+ GET_SEGNO(sbi, last_valid_block),
+ GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
+ wp_segno, wp_blkoff);
+
+ ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
+ zone->len - (zone->wp - zone->start),
+ GFP_NOFS, 0);
+ if (ret)
+ f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
+ fdev->path, ret);
+
+ return ret;
}
static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
@@ -4876,7 +4967,6 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
struct blk_zone zone;
unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
block_t cs_zone_block, wp_block;
- unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
sector_t zone_sector;
int err;
@@ -4888,8 +4978,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
return 0;
/* report zone for the sector the curseg points to */
- zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
- << log_sectors_per_block;
+ zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) <<
+ sbi->log_sectors_per_block;
err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
report_one_zone_cb, &zone);
if (err != 1) {
@@ -4901,10 +4991,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
- wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
+ wp_block = zbd->start_blk + (zone.wp >> sbi->log_sectors_per_block);
wp_segno = GET_SEGNO(sbi, wp_block);
wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
- wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
+ wp_sector_off = zone.wp & GENMASK(sbi->log_sectors_per_block - 1, 0);
if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
wp_sector_off == 0)
@@ -4931,8 +5021,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
if (!zbd)
return 0;
- zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
- << log_sectors_per_block;
+ zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) <<
+ sbi->log_sectors_per_block;
err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
report_one_zone_cb, &zone);
if (err != 1) {
@@ -4950,7 +5040,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
"Reset the zone: curseg[0x%x,0x%x]",
type, cs->segno, cs->next_blkoff);
err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
- zone.len >> log_sectors_per_block);
+ zone.len >> sbi->log_sectors_per_block);
if (err) {
f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
zbd->path, err);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e34197a70dc1..ca31163da00a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -164,6 +164,7 @@ enum {
Opt_discard_unit,
Opt_memory_mode,
Opt_age_extent_cache,
+ Opt_errors,
Opt_err,
};
@@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
{Opt_discard_unit, "discard_unit=%s"},
{Opt_memory_mode, "memory=%s"},
{Opt_age_extent_cache, "age_extent_cache"},
+ {Opt_errors, "errors=%s"},
{Opt_err, NULL},
};
@@ -587,14 +589,12 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
{
#ifdef CONFIG_F2FS_FS_LZ4HC
unsigned int level;
-#endif
if (strlen(str) == 3) {
- F2FS_OPTION(sbi).compress_level = 0;
+ F2FS_OPTION(sbi).compress_level = LZ4HC_DEFAULT_CLEVEL;
return 0;
}
-#ifdef CONFIG_F2FS_FS_LZ4HC
str += 3;
if (str[0] != ':') {
@@ -604,7 +604,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
if (kstrtouint(str + 1, 10, &level))
return -EINVAL;
- if (level < LZ4HC_MIN_CLEVEL || level > LZ4HC_MAX_CLEVEL) {
+ if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) {
f2fs_info(sbi, "invalid lz4hc compress level: %d", level);
return -EINVAL;
}
@@ -612,6 +612,10 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
F2FS_OPTION(sbi).compress_level = level;
return 0;
#else
+ if (strlen(str) == 3) {
+ F2FS_OPTION(sbi).compress_level = 0;
+ return 0;
+ }
f2fs_info(sbi, "kernel doesn't support lz4hc compression");
return -EINVAL;
#endif
@@ -625,7 +629,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
int len = 4;
if (strlen(str) == len) {
- F2FS_OPTION(sbi).compress_level = 0;
+ F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
return 0;
}
@@ -638,7 +642,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
if (kstrtouint(str + 1, 10, &level))
return -EINVAL;
- if (!level || level > zstd_max_clevel()) {
+ if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) {
f2fs_info(sbi, "invalid zstd compress level: %d", level);
return -EINVAL;
}
@@ -1268,6 +1272,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_age_extent_cache:
set_opt(sbi, AGE_EXTENT_CACHE);
break;
+ case Opt_errors:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "remount-ro")) {
+ F2FS_OPTION(sbi).errors =
+ MOUNT_ERRORS_READONLY;
+ } else if (!strcmp(name, "continue")) {
+ F2FS_OPTION(sbi).errors =
+ MOUNT_ERRORS_CONTINUE;
+ } else if (!strcmp(name, "panic")) {
+ F2FS_OPTION(sbi).errors =
+ MOUNT_ERRORS_PANIC;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -1340,7 +1363,7 @@ default_check:
return -EINVAL;
}
- min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32);
+ min_size = MIN_INLINE_XATTR_SIZE;
max_size = MAX_INLINE_XATTR_SIZE;
if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
@@ -1550,6 +1573,7 @@ static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int i;
+ int err = 0;
bool done;
/* unregister procfs/sysfs entries in advance to avoid race case */
@@ -1576,7 +1600,7 @@ static void f2fs_put_super(struct super_block *sb)
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
- f2fs_write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
}
/* be sure to wait for any on-going discard commands */
@@ -1585,7 +1609,7 @@ static void f2fs_put_super(struct super_block *sb)
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
- f2fs_write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
}
/*
@@ -1602,6 +1626,19 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
+ if (err) {
+ truncate_inode_pages_final(NODE_MAPPING(sbi));
+ truncate_inode_pages_final(META_MAPPING(sbi));
+ }
+
+ for (i = 0; i < NR_COUNT_TYPE; i++) {
+ if (!get_pages(sbi, i))
+ continue;
+ f2fs_err(sbi, "detect filesystem reference count leak during "
+ "umount, type: %d, count: %lld", i, get_pages(sbi, i));
+ f2fs_bug_on(sbi, 1);
+ }
+
f2fs_bug_on(sbi, sbi->fsync_node_num);
f2fs_destroy_compress_inode(sbi);
@@ -1622,6 +1659,9 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_node_manager(sbi);
f2fs_destroy_segment_manager(sbi);
+ /* flush s_error_work before sbi destroy */
+ flush_work(&sbi->s_error_work);
+
f2fs_destroy_post_read_wq(sbi);
kvfree(sbi->ckpt);
@@ -2052,12 +2092,32 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
seq_printf(seq, ",memory=%s", "low");
+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+ seq_printf(seq, ",errors=%s", "remount-ro");
+ else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
+ seq_printf(seq, ",errors=%s", "continue");
+ else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
+ seq_printf(seq, ",errors=%s", "panic");
+
return 0;
}
-static void default_options(struct f2fs_sb_info *sbi)
+static void default_options(struct f2fs_sb_info *sbi, bool remount)
{
/* init some FS parameters */
+ if (!remount) {
+ set_opt(sbi, READ_EXTENT_CACHE);
+ clear_opt(sbi, DISABLE_CHECKPOINT);
+
+ if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
+ set_opt(sbi, DISCARD);
+
+ if (f2fs_sb_has_blkzoned(sbi))
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
+ else
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
+ }
+
if (f2fs_sb_has_readonly(sbi))
F2FS_OPTION(sbi).active_logs = NR_CURSEG_RO_TYPE;
else
@@ -2080,29 +2140,23 @@ static void default_options(struct f2fs_sb_info *sbi)
}
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
+ F2FS_OPTION(sbi).errors = MOUNT_ERRORS_CONTINUE;
sbi->sb->s_flags &= ~SB_INLINECRYPT;
set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
- set_opt(sbi, READ_EXTENT_CACHE);
set_opt(sbi, NOHEAP);
- clear_opt(sbi, DISABLE_CHECKPOINT);
set_opt(sbi, MERGE_CHECKPOINT);
F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
if (!f2fs_is_readonly(sbi))
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
- set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi)) {
+ if (f2fs_sb_has_blkzoned(sbi))
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
- F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
- } else {
+ else
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
- F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
- }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -2274,13 +2328,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
}
- default_options(sbi);
+ default_options(sbi, true);
/* parse mount options */
err = parse_options(sb, data, true);
if (err)
goto restore_opts;
+ /* flush outstanding errors before changing fs state */
+ flush_work(&sbi->s_error_work);
+
/*
* Previous and new state of filesystem is RO,
* so skip checking GC and FLUSH_MERGE conditions.
@@ -2713,6 +2770,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
{
struct inode *qf_inode;
unsigned long qf_inum;
+ unsigned long qf_flag = F2FS_QUOTA_DEFAULT_FL;
int err;
BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb)));
@@ -2728,7 +2786,15 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
}
/* Don't account quota for quota files to avoid recursion */
+ inode_lock(qf_inode);
qf_inode->i_flags |= S_NOQUOTA;
+
+ if ((F2FS_I(qf_inode)->i_flags & qf_flag) != qf_flag) {
+ F2FS_I(qf_inode)->i_flags |= qf_flag;
+ f2fs_set_inode_flags(qf_inode);
+ }
+ inode_unlock(qf_inode);
+
err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
iput(qf_inode);
return err;
@@ -2859,18 +2925,29 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
return -EBUSY;
}
+ if (path->dentry->d_sb != sb)
+ return -EXDEV;
+
err = f2fs_quota_sync(sb, type);
if (err)
return err;
- err = dquot_quota_on(sb, type, format_id, path);
+ inode = d_inode(path->dentry);
+
+ err = filemap_fdatawrite(inode->i_mapping);
if (err)
return err;
- inode = d_inode(path->dentry);
+ err = filemap_fdatawait(inode->i_mapping);
+ if (err)
+ return err;
+
+ err = dquot_quota_on(sb, type, format_id, path);
+ if (err)
+ return err;
inode_lock(inode);
- F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL;
+ F2FS_I(inode)->i_flags |= F2FS_QUOTA_DEFAULT_FL;
f2fs_set_inode_flags(inode);
inode_unlock(inode);
f2fs_mark_inode_dirty_sync(inode, false);
@@ -2895,7 +2972,7 @@ static int __f2fs_quota_off(struct super_block *sb, int type)
goto out_put;
inode_lock(inode);
- F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL);
+ F2FS_I(inode)->i_flags &= ~F2FS_QUOTA_DEFAULT_FL;
f2fs_set_inode_flags(inode);
inode_unlock(inode);
f2fs_mark_inode_dirty_sync(inode, false);
@@ -3926,55 +4003,73 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return err;
}
-void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
+static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->error_lock, flags);
+ if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
+ sbi->stop_reason[reason]++;
+ spin_unlock_irqrestore(&sbi->error_lock, flags);
+}
+
+static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ unsigned long flags;
int err;
f2fs_down_write(&sbi->sb_lock);
- if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
- raw_super->s_stop_reason[reason]++;
+ spin_lock_irqsave(&sbi->error_lock, flags);
+ if (sbi->error_dirty) {
+ memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
+ MAX_F2FS_ERRORS);
+ sbi->error_dirty = false;
+ }
+ memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
+ spin_unlock_irqrestore(&sbi->error_lock, flags);
err = f2fs_commit_super(sbi, false);
- if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
- reason, err);
+
f2fs_up_write(&sbi->sb_lock);
+ if (err)
+ f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
}
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
{
- spin_lock(&sbi->error_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->error_lock, flags);
if (!test_bit(flag, (unsigned long *)sbi->errors)) {
set_bit(flag, (unsigned long *)sbi->errors);
sbi->error_dirty = true;
}
- spin_unlock(&sbi->error_lock);
+ spin_unlock_irqrestore(&sbi->error_lock, flags);
}
static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
{
+ unsigned long flags;
bool need_update = false;
- spin_lock(&sbi->error_lock);
+ spin_lock_irqsave(&sbi->error_lock, flags);
if (sbi->error_dirty) {
memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
MAX_F2FS_ERRORS);
sbi->error_dirty = false;
need_update = true;
}
- spin_unlock(&sbi->error_lock);
+ spin_unlock_irqrestore(&sbi->error_lock, flags);
return need_update;
}
-void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
+static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error)
{
int err;
- f2fs_save_errors(sbi, error);
-
f2fs_down_write(&sbi->sb_lock);
if (!f2fs_update_errors(sbi))
@@ -3988,6 +4083,83 @@ out_unlock:
f2fs_up_write(&sbi->sb_lock);
}
+void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
+{
+ f2fs_save_errors(sbi, error);
+ f2fs_record_errors(sbi, error);
+}
+
+void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error)
+{
+ f2fs_save_errors(sbi, error);
+
+ if (!sbi->error_dirty)
+ return;
+ if (!test_bit(error, (unsigned long *)sbi->errors))
+ return;
+ schedule_work(&sbi->s_error_work);
+}
+
+static bool system_going_down(void)
+{
+ return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+ || system_state == SYSTEM_RESTART;
+}
+
+void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
+ bool irq_context)
+{
+ struct super_block *sb = sbi->sb;
+ bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
+ bool continue_fs = !shutdown &&
+ F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
+
+ set_ckpt_flags(sbi, CP_ERROR_FLAG);
+
+ if (!f2fs_hw_is_readonly(sbi)) {
+ save_stop_reason(sbi, reason);
+
+ if (irq_context && !shutdown)
+ schedule_work(&sbi->s_error_work);
+ else
+ f2fs_record_stop_reason(sbi);
+ }
+
+ /*
+ * We force ERRORS_RO behavior when system is rebooting. Otherwise we
+ * could panic during 'reboot -f' as the underlying device got already
+ * disabled.
+ */
+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
+ !shutdown && !system_going_down() &&
+ !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
+ panic("F2FS-fs (device %s): panic forced after error\n",
+ sb->s_id);
+
+ if (shutdown)
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
+
+ /* continue filesystem operators if errors=continue */
+ if (continue_fs || f2fs_readonly(sb))
+ return;
+
+ f2fs_warn(sbi, "Remounting filesystem read-only");
+ /*
+ * Make sure updated value of ->s_mount_flags will be visible before
+ * ->s_flags update
+ */
+ smp_wmb();
+ sb->s_flags |= SB_RDONLY;
+}
+
+static void f2fs_record_error_work(struct work_struct *work)
+{
+ struct f2fs_sb_info *sbi = container_of(work,
+ struct f2fs_sb_info, s_error_work);
+
+ f2fs_record_stop_reason(sbi);
+}
+
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -4220,14 +4392,16 @@ try_onemore:
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
+ INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
+ memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sbi))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
- default_options(sbi);
+ default_options(sbi, false);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
if (data && !options) {
@@ -4617,6 +4791,8 @@ free_sm:
f2fs_destroy_segment_manager(sbi);
stop_ckpt_thread:
f2fs_stop_ckpt_thread(sbi);
+ /* flush s_error_work before sbi destroy */
+ flush_work(&sbi->s_error_work);
f2fs_destroy_post_read_wq(sbi);
free_devices:
destroy_device_list(sbi);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 8ea05340bad9..48b7e0073884 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -842,68 +842,160 @@ static struct f2fs_attr f2fs_attr_##_name = { \
#define F2FS_GENERAL_RO_ATTR(name) \
static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
-#define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \
-static struct f2fs_attr f2fs_attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = 0444 }, \
- .show = f2fs_sbi_show, \
- .struct_type = _struct_type, \
- .offset = offsetof(struct _struct_name, _elname), \
-}
+#ifdef CONFIG_F2FS_STAT_FS
+#define STAT_INFO_RO_ATTR(name, elname) \
+ F2FS_RO_ATTR(STAT_INFO, f2fs_stat_info, name, elname)
+#endif
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time,
- urgent_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_request);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_io_aware_gran, discard_io_aware_gran);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_urgent_util, discard_urgent_util);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard);
-F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
- interval_time[DISCARD_TIME]);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
- umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
-#ifdef CONFIG_F2FS_IOSTAT
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
+#define GC_THREAD_RW_ATTR(name, elname) \
+ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, name, elname)
+
+#define SM_INFO_RW_ATTR(name, elname) \
+ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, name, elname)
+
+#define SM_INFO_GENERAL_RW_ATTR(elname) \
+ SM_INFO_RW_ATTR(elname, elname)
+
+#define DCC_INFO_RW_ATTR(name, elname) \
+ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, name, elname)
+
+#define DCC_INFO_GENERAL_RW_ATTR(elname) \
+ DCC_INFO_RW_ATTR(elname, elname)
+
+#define NM_INFO_RW_ATTR(name, elname) \
+ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, name, elname)
+
+#define NM_INFO_GENERAL_RW_ATTR(elname) \
+ NM_INFO_RW_ATTR(elname, elname)
+
+#define F2FS_SBI_RW_ATTR(name, elname) \
+ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, name, elname)
+
+#define F2FS_SBI_GENERAL_RW_ATTR(elname) \
+ F2FS_SBI_RW_ATTR(elname, elname)
+
+#define F2FS_SBI_GENERAL_RO_ATTR(elname) \
+ F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, elname, elname)
+
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+#define FAULT_INFO_GENERAL_RW_ATTR(type, elname) \
+ F2FS_RW_ATTR(type, f2fs_fault_info, elname, elname)
#endif
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
+
+#define RESERVED_BLOCKS_GENERAL_RW_ATTR(elname) \
+ F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, elname, elname)
+
+#define CPRC_INFO_GENERAL_RW_ATTR(elname) \
+ F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, elname, elname)
+
+#define ATGC_INFO_RW_ATTR(name, elname) \
+ F2FS_RW_ATTR(ATGC_INFO, atgc_management, name, elname)
+
+/* GC_THREAD ATTR */
+GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time);
+GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time);
+GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time);
+GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
+
+/* SM_INFO ATTR */
+SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments);
+SM_INFO_GENERAL_RW_ATTR(ipu_policy);
+SM_INFO_GENERAL_RW_ATTR(min_ipu_util);
+SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks);
+SM_INFO_GENERAL_RW_ATTR(min_seq_blocks);
+SM_INFO_GENERAL_RW_ATTR(min_hot_blocks);
+SM_INFO_GENERAL_RW_ATTR(min_ssr_sections);
+
+/* DCC_INFO ATTR */
+DCC_INFO_RW_ATTR(max_small_discards, max_discards);
+DCC_INFO_GENERAL_RW_ATTR(max_discard_request);
+DCC_INFO_GENERAL_RW_ATTR(min_discard_issue_time);
+DCC_INFO_GENERAL_RW_ATTR(mid_discard_issue_time);
+DCC_INFO_GENERAL_RW_ATTR(max_discard_issue_time);
+DCC_INFO_GENERAL_RW_ATTR(discard_io_aware_gran);
+DCC_INFO_GENERAL_RW_ATTR(discard_urgent_util);
+DCC_INFO_GENERAL_RW_ATTR(discard_granularity);
+DCC_INFO_GENERAL_RW_ATTR(max_ordered_discard);
+
+/* NM_INFO ATTR */
+NM_INFO_RW_ATTR(max_roll_forward_node_blocks, max_rf_node_blocks);
+NM_INFO_GENERAL_RW_ATTR(ram_thresh);
+NM_INFO_GENERAL_RW_ATTR(ra_nid_pages);
+NM_INFO_GENERAL_RW_ATTR(dirty_nats_ratio);
+
+/* F2FS_SBI ATTR */
F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
+F2FS_SBI_RW_ATTR(gc_idle, gc_mode);
+F2FS_SBI_RW_ATTR(gc_urgent, gc_mode);
+F2FS_SBI_RW_ATTR(cp_interval, interval_time[CP_TIME]);
+F2FS_SBI_RW_ATTR(idle_interval, interval_time[REQ_TIME]);
+F2FS_SBI_RW_ATTR(discard_idle_interval, interval_time[DISCARD_TIME]);
+F2FS_SBI_RW_ATTR(gc_idle_interval, interval_time[GC_TIME]);
+F2FS_SBI_RW_ATTR(umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
+F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold);
+F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs);
+F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
+F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
+F2FS_SBI_GENERAL_RW_ATTR(dir_level);
+#ifdef CONFIG_F2FS_IOSTAT
+F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
+F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
+#endif
+F2FS_SBI_GENERAL_RW_ATTR(readdir_ra);
+F2FS_SBI_GENERAL_RW_ATTR(max_io_bytes);
+F2FS_SBI_GENERAL_RW_ATTR(data_io_flag);
+F2FS_SBI_GENERAL_RW_ATTR(node_io_flag);
+F2FS_SBI_GENERAL_RW_ATTR(gc_remaining_trials);
+F2FS_SBI_GENERAL_RW_ATTR(seq_file_ra_mul);
+F2FS_SBI_GENERAL_RW_ATTR(gc_segment_mode);
+F2FS_SBI_GENERAL_RW_ATTR(max_fragment_chunk);
+F2FS_SBI_GENERAL_RW_ATTR(max_fragment_hole);
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+F2FS_SBI_GENERAL_RW_ATTR(compr_written_block);
+F2FS_SBI_GENERAL_RW_ATTR(compr_saved_block);
+F2FS_SBI_GENERAL_RW_ATTR(compr_new_inode);
+F2FS_SBI_GENERAL_RW_ATTR(compress_percent);
+F2FS_SBI_GENERAL_RW_ATTR(compress_watermark);
+#endif
+/* atomic write */
+F2FS_SBI_GENERAL_RO_ATTR(current_atomic_write);
+F2FS_SBI_GENERAL_RW_ATTR(peak_atomic_write);
+F2FS_SBI_GENERAL_RW_ATTR(committed_atomic_block);
+F2FS_SBI_GENERAL_RW_ATTR(revoked_atomic_block);
+/* block age extent cache */
+F2FS_SBI_GENERAL_RW_ATTR(hot_data_age_threshold);
+F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold);
+F2FS_SBI_GENERAL_RW_ATTR(last_age_weight);
+#ifdef CONFIG_BLK_DEV_ZONED
+F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec);
+#endif
+
+/* STAT_INFO ATTR */
+#ifdef CONFIG_F2FS_STAT_FS
+STAT_INFO_RO_ATTR(cp_foreground_calls, cp_count);
+STAT_INFO_RO_ATTR(cp_background_calls, bg_cp_count);
+STAT_INFO_RO_ATTR(gc_foreground_calls, call_count);
+STAT_INFO_RO_ATTR(gc_background_calls, bg_gc);
+#endif
+
+/* FAULT_INFO ATTR */
#ifdef CONFIG_F2FS_FAULT_INJECTION
-F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
-F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
+FAULT_INFO_GENERAL_RW_ATTR(FAULT_INFO_RATE, inject_rate);
+FAULT_INFO_GENERAL_RW_ATTR(FAULT_INFO_TYPE, inject_type);
#endif
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_remaining_trials, gc_remaining_trials);
-F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
+
+/* RESERVED_BLOCKS ATTR */
+RESERVED_BLOCKS_GENERAL_RW_ATTR(reserved_blocks);
+
+/* CPRC_INFO ATTR */
+CPRC_INFO_GENERAL_RW_ATTR(ckpt_thread_ioprio);
+
+/* ATGC_INFO ATTR */
+ATGC_INFO_RW_ATTR(atgc_candidate_ratio, candidate_ratio);
+ATGC_INFO_RW_ATTR(atgc_candidate_count, max_candidate_count);
+ATGC_INFO_RW_ATTR(atgc_age_weight, age_weight);
+ATGC_INFO_RW_ATTR(atgc_age_threshold, age_threshold);
+
F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(free_segments);
F2FS_GENERAL_RO_ATTR(ovp_segments);
@@ -917,10 +1009,6 @@ F2FS_GENERAL_RO_ATTR(main_blkaddr);
F2FS_GENERAL_RO_ATTR(pending_discard);
F2FS_GENERAL_RO_ATTR(gc_mode);
#ifdef CONFIG_F2FS_STAT_FS
-F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
-F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
-F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_foreground_calls, call_count);
-F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_background_calls, bg_gc);
F2FS_GENERAL_RO_ATTR(moved_blocks_background);
F2FS_GENERAL_RO_ATTR(moved_blocks_foreground);
F2FS_GENERAL_RO_ATTR(avg_vblocks);
@@ -935,8 +1023,6 @@ F2FS_FEATURE_RO_ATTR(encrypted_casefold);
#endif /* CONFIG_FS_ENCRYPTION */
#ifdef CONFIG_BLK_DEV_ZONED
F2FS_FEATURE_RO_ATTR(block_zoned);
-F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, unusable_blocks_per_sec,
- unusable_blocks_per_sec);
#endif
F2FS_FEATURE_RO_ATTR(atomic_write);
F2FS_FEATURE_RO_ATTR(extra_attr);
@@ -956,37 +1042,9 @@ F2FS_FEATURE_RO_ATTR(casefold);
F2FS_FEATURE_RO_ATTR(readonly);
#ifdef CONFIG_F2FS_FS_COMPRESSION
F2FS_FEATURE_RO_ATTR(compression);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_percent, compress_percent);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_watermark, compress_watermark);
#endif
F2FS_FEATURE_RO_ATTR(pin_file);
-/* For ATGC */
-F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_ratio, candidate_ratio);
-F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_count, max_candidate_count);
-F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_weight, age_weight);
-F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
-
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole);
-
-/* For atomic write */
-F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, current_atomic_write, current_atomic_write);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block);
-
-/* For block age extent cache */
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, last_age_weight, last_age_weight);
-
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_urgent_sleep_time),
@@ -1386,12 +1444,19 @@ int __init f2fs_init_sysfs(void)
ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype,
NULL, "features");
- if (ret) {
- kobject_put(&f2fs_feat);
- kset_unregister(&f2fs_kset);
- } else {
- f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+ if (ret)
+ goto put_kobject;
+
+ f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+ if (!f2fs_proc_root) {
+ ret = -ENOMEM;
+ goto put_kobject;
}
+
+ return 0;
+put_kobject:
+ kobject_put(&f2fs_feat);
+ kset_unregister(&f2fs_kset);
return ret;
}
@@ -1430,23 +1495,24 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
if (err)
goto put_feature_list_kobj;
- if (f2fs_proc_root)
- sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+ sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+ if (!sbi->s_proc) {
+ err = -ENOMEM;
+ goto put_feature_list_kobj;
+ }
- if (sbi->s_proc) {
- proc_create_single_data("segment_info", 0444, sbi->s_proc,
+ proc_create_single_data("segment_info", 0444, sbi->s_proc,
segment_info_seq_show, sb);
- proc_create_single_data("segment_bits", 0444, sbi->s_proc,
+ proc_create_single_data("segment_bits", 0444, sbi->s_proc,
segment_bits_seq_show, sb);
#ifdef CONFIG_F2FS_IOSTAT
- proc_create_single_data("iostat_info", 0444, sbi->s_proc,
+ proc_create_single_data("iostat_info", 0444, sbi->s_proc,
iostat_info_seq_show, sb);
#endif
- proc_create_single_data("victim_bits", 0444, sbi->s_proc,
+ proc_create_single_data("victim_bits", 0444, sbi->s_proc,
victim_bits_seq_show, sb);
- proc_create_single_data("discard_plist_info", 0444, sbi->s_proc,
+ proc_create_single_data("discard_plist_info", 0444, sbi->s_proc,
discard_plist_seq_show, sb);
- }
return 0;
put_feature_list_kobj:
kobject_put(&sbi->s_feature_list_kobj);
@@ -1462,8 +1528,7 @@ put_sb_kobj:
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
{
- if (sbi->s_proc)
- remove_proc_subtree(sbi->sb->s_id, f2fs_proc_root);
+ remove_proc_subtree(sbi->sb->s_id, f2fs_proc_root);
kobject_put(&sbi->s_stat_kobj);
wait_for_completion(&sbi->s_stat_kobj_unregister);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 213805d3592c..476b186b90a6 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -528,10 +528,12 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
- f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
+ if (!ipage)
+ f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr, &base_size, &is_inline);
- f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
+ if (!ipage)
+ f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 416d652774a3..b1811c392e6f 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -83,6 +83,7 @@ struct f2fs_xattr_entry {
sizeof(struct f2fs_xattr_header) - \
sizeof(struct f2fs_xattr_entry))
+#define MIN_INLINE_XATTR_SIZE (sizeof(struct f2fs_xattr_header) / sizeof(__le32))
#define MAX_INLINE_XATTR_SIZE \
(DEF_ADDRS_PER_INODE - \
F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \
diff --git a/fs/file_table.c b/fs/file_table.c
index e06c68e2d757..fc7d677ff5ad 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -160,7 +160,7 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
f->f_cred = get_cred(cred);
error = security_file_alloc(f);
if (unlikely(error)) {
- file_free_rcu(&f->f_rcuhead);
+ put_cred(f->f_cred);
return error;
}
@@ -208,8 +208,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
return ERR_PTR(-ENOMEM);
error = init_file(f, flags, cred);
- if (unlikely(error))
+ if (unlikely(error)) {
+ kmem_cache_free(filp_cachep, f);
return ERR_PTR(error);
+ }
percpu_counter_inc(&nr_files);
@@ -240,8 +242,10 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
return ERR_PTR(-ENOMEM);
error = init_file(f, flags, cred);
- if (unlikely(error))
+ if (unlikely(error)) {
+ kmem_cache_free(filp_cachep, f);
return ERR_PTR(error);
+ }
f->f_mode |= FMODE_NOACCOUNT;
@@ -265,8 +269,10 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
return ERR_PTR(-ENOMEM);
error = init_file(&ff->file, flags, cred);
- if (unlikely(error))
+ if (unlikely(error)) {
+ kfree(ff);
return ERR_PTR(error);
+ }
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
return &ff->file;
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 4d8d4f16c727..5f1be1da92ce 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
NULL);
if (rc < 0)
- return rc;
+ return dax_mem2blk_err(rc);
+
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 1c407eba1e30..ae49256b7c8c 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -432,10 +432,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
if (error)
return error;
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_page(page);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
flush_dcache_page(page);
brelse(dibh);
SetPageUptodate(page);
@@ -489,18 +489,18 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
unsigned copied = 0;
unsigned amt;
struct page *page;
- void *p;
do {
+ page = read_cache_page(mapping, index, gfs2_read_folio, NULL);
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -EINTR)
+ continue;
+ return PTR_ERR(page);
+ }
amt = size - copied;
if (offset + size > PAGE_SIZE)
amt = PAGE_SIZE - offset;
- page = read_cache_page(mapping, index, gfs2_read_folio, NULL);
- if (IS_ERR(page))
- return PTR_ERR(page);
- p = kmap_atomic(page);
- memcpy(buf + copied, p + offset, amt);
- kunmap_atomic(p);
+ memcpy_from_page(buf + copied, page, offset, amt);
put_page(page);
copied += amt;
index++;
@@ -751,7 +751,6 @@ static const struct address_space_operations gfs2_aops = {
.release_folio = iomap_release_folio,
.invalidate_folio = iomap_invalidate_folio,
.bmap = gfs2_bmap,
- .direct_IO = noop_direct_IO,
.migrate_folio = filemap_migrate_folio,
.is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c739b258a2d9..8d611fbcf0bd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1729,8 +1729,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
if (offset >= maxsize) {
/*
- * The starting point lies beyond the allocated meta-data;
- * there are no blocks do deallocate.
+ * The starting point lies beyond the allocated metadata;
+ * there are no blocks to deallocate.
*/
return 0;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index f146447eac63..1bf3c4453516 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -630,6 +630,9 @@ int gfs2_open_common(struct inode *inode, struct file *file)
ret = generic_file_open(inode, file);
if (ret)
return ret;
+
+ if (!gfs2_is_jdata(GFS2_I(inode)))
+ file->f_mode |= FMODE_CAN_ODIRECT;
}
fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS);
@@ -1030,8 +1033,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
}
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
-retry:
if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
+retry:
window_size -= fault_in_iov_iter_readable(from, window_size);
if (!window_size) {
ret = -EFAULT;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 5adc7d85dbf3..1438e7465e30 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -145,8 +145,8 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
*
* We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
* when we're withdrawn. For example, to maintain metadata integrity, we should
- * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
- * iopen or the transaction glocks may be safely used because none of their
+ * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like
+ * the iopen or freeze glock may be safely used because none of their
* metadata goes through the journal. So in general, we should disallow all
* glocks that are journaled, and allow all the others. One exception is:
* we need to allow our active journal to be promoted and demoted so others
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 01d433ed6ce7..54319328b16b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -236,7 +236,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
truncate_inode_pages_range(mapping, start, end);
}
-static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+static void gfs2_rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl,
const char *fs_id_buf)
{
struct gfs2_rgrpd *rgd = gl->gl_object;
@@ -536,72 +536,53 @@ static int inode_go_held(struct gfs2_holder *gh)
*
*/
-static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
+static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl,
const char *fs_id_buf)
{
struct gfs2_inode *ip = gl->gl_object;
- struct inode *inode;
- unsigned long nrpages;
+ const struct inode *inode = &ip->i_inode;
if (ip == NULL)
return;
- inode = &ip->i_inode;
- xa_lock_irq(&inode->i_data.i_pages);
- nrpages = inode->i_data.nrpages;
- xa_unlock_irq(&inode->i_data.i_pages);
-
gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu "
"p:%lu\n", fs_id_buf,
(unsigned long long)ip->i_no_formal_ino,
(unsigned long long)ip->i_no_addr,
- IF2DT(ip->i_inode.i_mode), ip->i_flags,
+ IF2DT(inode->i_mode), ip->i_flags,
(unsigned int)ip->i_diskflags,
- (unsigned long long)i_size_read(inode), nrpages);
+ (unsigned long long)i_size_read(inode),
+ inode->i_data.nrpages);
}
/**
- * freeze_go_sync - promote/demote the freeze glock
+ * freeze_go_callback - A cluster node is requesting a freeze
* @gl: the glock
+ * @remote: true if this came from a different cluster node
*/
-static int freeze_go_sync(struct gfs2_glock *gl)
+static void freeze_go_callback(struct gfs2_glock *gl, bool remote)
{
- int error = 0;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct super_block *sb = sdp->sd_vfs;
+
+ if (!remote ||
+ gl->gl_state != LM_ST_SHARED ||
+ gl->gl_demote_state != LM_ST_UNLOCKED)
+ return;
/*
- * We need to check gl_state == LM_ST_SHARED here and not gl_req ==
- * LM_ST_EXCLUSIVE. That's because when any node does a freeze,
- * all the nodes should have the freeze glock in SH mode and they all
- * call do_xmote: One for EX and the others for UN. They ALL must
- * freeze locally, and they ALL must queue freeze work. The freeze_work
- * calls freeze_func, which tries to reacquire the freeze glock in SH,
- * effectively waiting for the thaw on the node who holds it in EX.
- * Once thawed, the work func acquires the freeze glock in
- * SH and everybody goes back to thawed.
+ * Try to get an active super block reference to prevent racing with
+ * unmount (see trylock_super()). But note that unmount isn't the only
+ * place where a write lock on s_umount is taken, and we can fail here
+ * because of things like remount as well.
*/
- if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
- !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
- atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
- error = freeze_super(sdp->sd_vfs);
- if (error) {
- fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
- error);
- if (gfs2_withdrawn(sdp)) {
- atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
- return 0;
- }
- gfs2_assert_withdraw(sdp, 0);
- }
- queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
- if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
- gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
- GFS2_LFC_FREEZE_GO_SYNC);
- else /* read-only mounts */
- atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+ if (down_read_trylock(&sb->s_umount)) {
+ atomic_inc(&sb->s_active);
+ up_read(&sb->s_umount);
+ if (!queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work))
+ deactivate_super(sb);
}
- return 0;
}
/**
@@ -761,9 +742,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
};
const struct gfs2_glock_operations gfs2_freeze_glops = {
- .go_sync = freeze_go_sync,
.go_xmote_bh = freeze_go_xmote_bh,
.go_demote_ok = freeze_go_demote_ok,
+ .go_callback = freeze_go_callback,
.go_type = LM_TYPE_NONDISK,
.go_flags = GLOF_NONDISK,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 79485329118b..04f2d78e8658 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -221,7 +221,7 @@ struct gfs2_glock_operations {
int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_instantiate) (struct gfs2_glock *gl);
int (*go_held)(struct gfs2_holder *gh);
- void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl,
+ void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl,
const char *fs_id_buf);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
void (*go_free)(struct gfs2_glock *gl);
@@ -600,7 +600,7 @@ enum {
SDF_RORECOVERY = 7, /* read only recovery */
SDF_SKIP_DLM_UNLOCK = 8,
SDF_FORCE_AIL_FLUSH = 9,
- SDF_FS_FROZEN = 10,
+ SDF_FREEZE_INITIATOR = 10,
SDF_WITHDRAWING = 11, /* Will withdraw eventually */
SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */
SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */
@@ -608,12 +608,7 @@ enum {
withdrawing */
SDF_DEACTIVATING = 15,
SDF_EVICTING = 16,
-};
-
-enum gfs2_freeze_state {
- SFS_UNFROZEN = 0,
- SFS_STARTING_FREEZE = 1,
- SFS_FROZEN = 2,
+ SDF_FROZEN = 17,
};
#define GFS2_FSNAME_LEN 256
@@ -841,7 +836,6 @@ struct gfs2_sbd {
/* For quiescing the filesystem */
struct gfs2_holder sd_freeze_gh;
- atomic_t sd_freeze_state;
struct mutex sd_freeze_mutex;
char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2];
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 71911bf9ab34..54911294687c 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -296,10 +296,8 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int error;
- if (gl->gl_lksb.sb_lkid == 0) {
- gfs2_glock_free(gl);
- return;
- }
+ if (gl->gl_lksb.sb_lkid == 0)
+ goto out_free;
clear_bit(GLF_BLOCKING, &gl->gl_flags);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
@@ -307,17 +305,13 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_update_request_times(gl);
/* don't want to call dlm if we've unmounted the lock protocol */
- if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
- gfs2_glock_free(gl);
- return;
- }
+ if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
+ goto out_free;
/* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
- !gl->gl_lksb.sb_lvbptr) {
- gfs2_glock_free(gl);
- return;
- }
+ !gl->gl_lksb.sb_lvbptr)
+ goto out_free;
again:
error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
@@ -331,8 +325,11 @@ again:
fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n",
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number, error);
- return;
}
+ return;
+
+out_free:
+ gfs2_glock_free(gl);
}
static void gdlm_cancel(struct gfs2_glock *gl)
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index d750d1128bed..aa568796207c 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -914,9 +914,8 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
{
blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
- enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
- gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
+ gfs2_assert_withdraw(sdp, !test_bit(SDF_FROZEN, &sdp->sd_flags));
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
gfs2_ordered_wait(sdp);
@@ -1036,7 +1035,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
{
struct gfs2_trans *tr = NULL;
unsigned int reserved_blocks = 0, used_blocks = 0;
- enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+ bool frozen = test_bit(SDF_FROZEN, &sdp->sd_flags);
unsigned int first_log_head;
unsigned int reserved_revokes = 0;
@@ -1067,7 +1066,7 @@ repeat:
if (tr) {
sdp->sd_log_tr = NULL;
tr->tr_first = first_log_head;
- if (unlikely (state == SFS_FROZEN)) {
+ if (unlikely(frozen)) {
if (gfs2_assert_withdraw_delayed(sdp,
!tr->tr_num_buf_new && !tr->tr_num_databuf_new))
goto out_withdraw;
@@ -1092,7 +1091,7 @@ repeat:
if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- if (unlikely(state == SFS_FROZEN))
+ if (unlikely(frozen))
if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
goto out_withdraw;
@@ -1136,8 +1135,6 @@ repeat:
if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
GFS2_LOG_HEAD_FLUSH_FREEZE))
gfs2_log_shutdown(sdp);
- if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
- atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
}
out_end:
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 1902413d5d12..251322b01631 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -427,10 +427,11 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host lh;
- void *kaddr = kmap_atomic(page);
+ void *kaddr;
unsigned int offset;
bool ret = false;
+ kaddr = kmap_local_page(page);
for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
if (lh.lh_sequence >= head->lh_sequence)
@@ -441,7 +442,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
}
}
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
return ret;
}
@@ -626,11 +627,11 @@ static void gfs2_check_magic(struct buffer_head *bh)
__be32 *ptr;
clear_buffer_escaped(bh);
- kaddr = kmap_atomic(bh->b_page);
+ kaddr = kmap_local_page(bh->b_page);
ptr = kaddr + bh_offset(bh);
if (*ptr == cpu_to_be32(GFS2_MAGIC))
set_buffer_escaped(bh);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
}
static int blocknr_cmp(void *priv, const struct list_head *a,
@@ -696,14 +697,12 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
lock_buffer(bd2->bd_bh);
if (buffer_escaped(bd2->bd_bh)) {
- void *kaddr;
+ void *p;
+
page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
- ptr = page_address(page);
- kaddr = kmap_atomic(bd2->bd_bh->b_page);
- memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
- bd2->bd_bh->b_size);
- kunmap_atomic(kaddr);
- *(__be32 *)ptr = 0;
+ p = page_address(page);
+ memcpy_from_page(p, page, bh_offset(bd2->bd_bh), bd2->bd_bh->b_size);
+ *(__be32 *)p = 0;
clear_buffer_escaped(bd2->bd_bh);
unlock_buffer(bd2->bd_bh);
brelse(bd2->bd_bh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cd962985b058..8a27957dbfee 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -135,7 +135,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_rwsem(&sdp->sd_log_flush_lock);
atomic_set(&sdp->sd_log_in_flight, 0);
init_waitqueue_head(&sdp->sd_log_flush_wait);
- atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
mutex_init(&sdp->sd_freeze_mutex);
return sdp;
@@ -434,7 +433,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops,
CREATE, &sdp->sd_freeze_gl);
if (error) {
- fs_err(sdp, "can't create transaction glock: %d\n", error);
+ fs_err(sdp, "can't create freeze glock: %d\n", error);
goto fail_rename;
}
@@ -1140,7 +1139,6 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
int silent = fc->sb_flags & SB_SILENT;
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
- struct gfs2_holder freeze_gh;
int error;
sdp = init_sbd(sb);
@@ -1269,15 +1267,15 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
}
}
- error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
+ error = gfs2_freeze_lock_shared(sdp);
if (error)
goto fail_per_node;
if (!sb_rdonly(sb))
error = gfs2_make_fs_rw(sdp);
- gfs2_freeze_unlock(&freeze_gh);
if (error) {
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
if (sdp->sd_quotad_process)
kthread_stop(sdp->sd_quotad_process);
sdp->sd_quotad_process = NULL;
@@ -1590,12 +1588,6 @@ static int gfs2_reconfigure(struct fs_context *fc)
fc->sb_flags |= SB_RDONLY;
if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
- struct gfs2_holder freeze_gh;
-
- error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
- if (error)
- return -EINVAL;
-
if (fc->sb_flags & SB_RDONLY) {
gfs2_make_fs_ro(sdp);
} else {
@@ -1603,7 +1595,6 @@ static int gfs2_reconfigure(struct fs_context *fc)
if (error)
errorfc(fc, "unable to remount read-write");
}
- gfs2_freeze_unlock(&freeze_gh);
}
sdp->sd_args = *newargs;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 1ed17226d9ed..704192b73605 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -75,6 +75,9 @@
#define GFS2_QD_HASH_SIZE BIT(GFS2_QD_HASH_SHIFT)
#define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1)
+#define QC_CHANGE 0
+#define QC_SYNC 1
+
/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
/* -> sd_bitmap_lock */
static DEFINE_SPINLOCK(qd_lock);
@@ -470,7 +473,6 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
spin_unlock(&qd_lock);
if (qd) {
- gfs2_assert_warn(sdp, qd->qd_change_sync);
error = bh_get(qd);
if (error) {
clear_bit(QDF_LOCKED, &qd->qd_flags);
@@ -591,6 +593,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) ||
gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) {
error = -EIO;
+ gfs2_qa_put(ip);
goto out;
}
@@ -661,7 +664,7 @@ static int sort_qd(const void *a, const void *b)
return 0;
}
-static void do_qc(struct gfs2_quota_data *qd, s64 change)
+static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
@@ -686,16 +689,18 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
qd->qd_change = x;
spin_unlock(&qd_lock);
- if (!x) {
+ if (qc_type == QC_CHANGE) {
+ if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
+ qd_hold(qd);
+ slot_hold(qd);
+ }
+ } else {
gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
clear_bit(QDF_CHANGE, &qd->qd_flags);
qc->qc_flags = 0;
qc->qc_id = 0;
slot_put(qd);
qd_put(qd);
- } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
- qd_hold(qd);
- slot_hold(qd);
}
if (change < 0) /* Reset quiet flag if we freed some blocks */
@@ -711,7 +716,6 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
struct address_space *mapping = inode->i_mapping;
struct page *page;
struct buffer_head *bh;
- void *kaddr;
u64 blk;
unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0;
unsigned to_write = bytes, pg_off = off;
@@ -763,10 +767,8 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
}
/* Write to the page, now that we have setup the buffer(s) */
- kaddr = kmap_atomic(page);
- memcpy(kaddr + off, buf, bytes);
+ memcpy_to_page(page, off, buf, bytes);
flush_dcache_page(page);
- kunmap_atomic(kaddr);
unlock_page(page);
put_page(page);
@@ -955,7 +957,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
if (error)
goto out_end_trans;
- do_qc(qd, -qd->qd_change_sync);
+ do_qc(qd, -qd->qd_change_sync, QC_SYNC);
set_bit(QDF_REFRESH, &qd->qd_flags);
}
@@ -1281,7 +1283,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
if (qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
qid_eq(qd->qd_id, make_kqid_gid(gid))) {
- do_qc(qd, change);
+ do_qc(qd, change, QC_CHANGE);
}
}
}
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2bb085a72e8e..9c7a9f640bad 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -404,7 +404,7 @@ void gfs2_recover_func(struct work_struct *work)
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host head;
- struct gfs2_holder j_gh, ji_gh, thaw_gh;
+ struct gfs2_holder j_gh, ji_gh;
ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
int ro = 0;
unsigned int pass;
@@ -420,10 +420,10 @@ void gfs2_recover_func(struct work_struct *work)
if (sdp->sd_args.ar_spectator)
goto fail;
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
- fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
+ fs_info(sdp, "jid=%u: Trying to acquire journal glock...\n",
jd->jd_jid);
jlocked = 1;
- /* Acquire the journal lock so we can do recovery */
+ /* Acquire the journal glock so we can do recovery */
error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
LM_ST_EXCLUSIVE,
@@ -465,14 +465,14 @@ void gfs2_recover_func(struct work_struct *work)
ktime_ms_delta(t_jhd, t_jlck));
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
- fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
- jd->jd_jid);
-
- /* Acquire a shared hold on the freeze lock */
+ mutex_lock(&sdp->sd_freeze_mutex);
- error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
- if (error)
+ if (test_bit(SDF_FROZEN, &sdp->sd_flags)) {
+ mutex_unlock(&sdp->sd_freeze_mutex);
+ fs_warn(sdp, "jid=%u: Can't replay: filesystem "
+ "is frozen\n", jd->jd_jid);
goto fail_gunlock_ji;
+ }
if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
ro = 1;
@@ -496,7 +496,7 @@ void gfs2_recover_func(struct work_struct *work)
fs_warn(sdp, "jid=%u: Can't replay: read-only block "
"device\n", jd->jd_jid);
error = -EROFS;
- goto fail_gunlock_thaw;
+ goto fail_gunlock_nofreeze;
}
t_tlck = ktime_get();
@@ -514,7 +514,7 @@ void gfs2_recover_func(struct work_struct *work)
lops_after_scan(jd, error, pass);
if (error) {
up_read(&sdp->sd_log_flush_lock);
- goto fail_gunlock_thaw;
+ goto fail_gunlock_nofreeze;
}
}
@@ -522,7 +522,7 @@ void gfs2_recover_func(struct work_struct *work)
clean_journal(jd, &head);
up_read(&sdp->sd_log_flush_lock);
- gfs2_freeze_unlock(&thaw_gh);
+ mutex_unlock(&sdp->sd_freeze_mutex);
t_rep = ktime_get();
fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
"jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
@@ -543,8 +543,8 @@ void gfs2_recover_func(struct work_struct *work)
fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
goto done;
-fail_gunlock_thaw:
- gfs2_freeze_unlock(&thaw_gh);
+fail_gunlock_nofreeze:
+ mutex_unlock(&sdp->sd_freeze_mutex);
fail_gunlock_ji:
if (jlocked) {
gfs2_glock_dq_uninit(&ji_gh);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3b9b76e980ad..9308190895c8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2584,8 +2584,8 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- rgrp_unlock_local(rgd);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
+ rgrp_unlock_local(rgd);
gfs2_statfs_change(sdp, 0, +1, -1);
trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a84bf6444bba..9f4d5d6549ee 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -332,7 +332,12 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
struct lfcc *lfcc;
LIST_HEAD(list);
struct gfs2_log_header_host lh;
- int error;
+ int error, error2;
+
+ /*
+ * Grab all the journal glocks in SH mode. We are *probably* doing
+ * that to prevent recovery.
+ */
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
@@ -349,11 +354,13 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
list_add(&lfcc->list, &list);
}
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
LM_FLAG_NOEXP | GL_NOPID,
&sdp->sd_freeze_gh);
if (error)
- goto out;
+ goto relock_shared;
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
error = gfs2_jdesc_check(jd);
@@ -368,8 +375,14 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
}
}
- if (error)
- gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+ if (!error)
+ goto out; /* success */
+
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+
+relock_shared:
+ error2 = gfs2_freeze_lock_shared(sdp);
+ gfs2_assert_withdraw(sdp, !error2);
out:
while (!list_empty(&list)) {
@@ -463,7 +476,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
* @flags: The type of dirty
*
* Unfortunately it can be called under any combination of inode
- * glock and transaction lock, so we have to check carefully.
+ * glock and freeze glock, so we have to check carefully.
*
* At the moment this deals only with atime - it should be possible
* to expand that role in future, once a review of the locking has
@@ -615,6 +628,8 @@ restart:
/* Release stuff */
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+
iput(sdp->sd_jindex);
iput(sdp->sd_statfs_inode);
iput(sdp->sd_rindex);
@@ -669,59 +684,109 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
return sdp->sd_log_error;
}
-void gfs2_freeze_func(struct work_struct *work)
+static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
{
- int error;
- struct gfs2_holder freeze_gh;
- struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
struct super_block *sb = sdp->sd_vfs;
+ int error;
- atomic_inc(&sb->s_active);
- error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
- if (error) {
- gfs2_assert_withdraw(sdp, 0);
- } else {
- atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
- error = thaw_super(sb);
- if (error) {
- fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
- error);
- gfs2_assert_withdraw(sdp, 0);
+ error = freeze_super(sb);
+ if (error)
+ return error;
+
+ if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
+ gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
+ GFS2_LFC_FREEZE_GO_SYNC);
+ if (gfs2_withdrawn(sdp)) {
+ thaw_super(sb);
+ return -EIO;
}
- gfs2_freeze_unlock(&freeze_gh);
}
+ return 0;
+}
+
+static int gfs2_do_thaw(struct gfs2_sbd *sdp)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ int error;
+
+ error = gfs2_freeze_lock_shared(sdp);
+ if (error)
+ goto fail;
+ error = thaw_super(sb);
+ if (!error)
+ return 0;
+
+fail:
+ fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", error);
+ gfs2_assert_withdraw(sdp, 0);
+ return error;
+}
+
+void gfs2_freeze_func(struct work_struct *work)
+{
+ struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
+ struct super_block *sb = sdp->sd_vfs;
+ int error;
+
+ mutex_lock(&sdp->sd_freeze_mutex);
+ error = -EBUSY;
+ if (test_bit(SDF_FROZEN, &sdp->sd_flags))
+ goto freeze_failed;
+
+ error = gfs2_freeze_locally(sdp);
+ if (error)
+ goto freeze_failed;
+
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+ set_bit(SDF_FROZEN, &sdp->sd_flags);
+
+ error = gfs2_do_thaw(sdp);
+ if (error)
+ goto out;
+
+ clear_bit(SDF_FROZEN, &sdp->sd_flags);
+ goto out;
+
+freeze_failed:
+ fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error);
+
+out:
+ mutex_unlock(&sdp->sd_freeze_mutex);
deactivate_super(sb);
- clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
- wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
- return;
}
/**
- * gfs2_freeze - prevent further writes to the filesystem
+ * gfs2_freeze_super - prevent further writes to the filesystem
* @sb: the VFS structure for the filesystem
*
*/
-static int gfs2_freeze(struct super_block *sb)
+static int gfs2_freeze_super(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
- mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
- error = -EBUSY;
+ if (!mutex_trylock(&sdp->sd_freeze_mutex))
+ return -EBUSY;
+ error = -EBUSY;
+ if (test_bit(SDF_FROZEN, &sdp->sd_flags))
goto out;
- }
for (;;) {
- if (gfs2_withdrawn(sdp)) {
- error = -EINVAL;
+ error = gfs2_freeze_locally(sdp);
+ if (error) {
+ fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
+ error);
goto out;
}
error = gfs2_lock_fs_check_clean(sdp);
if (!error)
- break;
+ break; /* success */
+
+ error = gfs2_do_thaw(sdp);
+ if (error)
+ goto out;
if (error == -EBUSY)
fs_err(sdp, "waiting for recovery before freeze\n");
@@ -735,32 +800,58 @@ static int gfs2_freeze(struct super_block *sb)
fs_err(sdp, "retrying...\n");
msleep(1000);
}
- set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
+
out:
+ if (!error) {
+ set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
+ set_bit(SDF_FROZEN, &sdp->sd_flags);
+ }
mutex_unlock(&sdp->sd_freeze_mutex);
return error;
}
/**
- * gfs2_unfreeze - reallow writes to the filesystem
+ * gfs2_thaw_super - reallow writes to the filesystem
* @sb: the VFS structure for the filesystem
*
*/
-static int gfs2_unfreeze(struct super_block *sb)
+static int gfs2_thaw_super(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
+ int error;
- mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
- !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
- mutex_unlock(&sdp->sd_freeze_mutex);
- return -EINVAL;
+ if (!mutex_trylock(&sdp->sd_freeze_mutex))
+ return -EBUSY;
+ error = -EINVAL;
+ if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags))
+ goto out;
+
+ gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+
+ error = gfs2_do_thaw(sdp);
+
+ if (!error) {
+ clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
+ clear_bit(SDF_FROZEN, &sdp->sd_flags);
}
+out:
+ mutex_unlock(&sdp->sd_freeze_mutex);
+ return error;
+}
+
+void gfs2_thaw_freeze_initiator(struct super_block *sb)
+{
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+
+ mutex_lock(&sdp->sd_freeze_mutex);
+ if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags))
+ goto out;
gfs2_freeze_unlock(&sdp->sd_freeze_gh);
+
+out:
mutex_unlock(&sdp->sd_freeze_mutex);
- return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
}
/**
@@ -1004,7 +1095,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
{
struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
struct gfs2_args *args = &sdp->sd_args;
- int val;
+ unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum;
+
+ spin_lock(&sdp->sd_tune.gt_spin);
+ logd_secs = sdp->sd_tune.gt_logd_secs;
+ quota_quantum = sdp->sd_tune.gt_quota_quantum;
+ statfs_quantum = sdp->sd_tune.gt_statfs_quantum;
+ statfs_slow = sdp->sd_tune.gt_statfs_slow;
+ spin_unlock(&sdp->sd_tune.gt_spin);
if (is_ancestor(root, sdp->sd_master_dir))
seq_puts(s, ",meta");
@@ -1059,17 +1157,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
}
if (args->ar_discard)
seq_puts(s, ",discard");
- val = sdp->sd_tune.gt_logd_secs;
- if (val != 30)
- seq_printf(s, ",commit=%d", val);
- val = sdp->sd_tune.gt_statfs_quantum;
- if (val != 30)
- seq_printf(s, ",statfs_quantum=%d", val);
- else if (sdp->sd_tune.gt_statfs_slow)
+ if (logd_secs != 30)
+ seq_printf(s, ",commit=%d", logd_secs);
+ if (statfs_quantum != 30)
+ seq_printf(s, ",statfs_quantum=%d", statfs_quantum);
+ else if (statfs_slow)
seq_puts(s, ",statfs_quantum=0");
- val = sdp->sd_tune.gt_quota_quantum;
- if (val != 60)
- seq_printf(s, ",quota_quantum=%d", val);
+ if (quota_quantum != 60)
+ seq_printf(s, ",quota_quantum=%d", quota_quantum);
if (args->ar_statfs_percent)
seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
@@ -1131,9 +1226,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
return -EIO;
}
- error = gfs2_rindex_update(sdp);
- if (error)
- return error;
+ gfs2_rindex_update(sdp);
error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
if (error)
@@ -1334,9 +1427,6 @@ static int evict_unlinked_inode(struct inode *inode)
goto out;
}
- if (ip->i_gl)
- gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
-
/*
* As soon as we clear the bitmap for the dinode, gfs2_create_inode()
* can get called to recreate it, or even gfs2_inode_lookup() if the
@@ -1350,6 +1440,9 @@ static int evict_unlinked_inode(struct inode *inode)
*/
ret = gfs2_dinode_dealloc(ip);
+ if (!ret && ip->i_gl)
+ gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
+
out:
return ret;
}
@@ -1528,8 +1621,8 @@ const struct super_operations gfs2_super_ops = {
.evict_inode = gfs2_evict_inode,
.put_super = gfs2_put_super,
.sync_fs = gfs2_sync_fs,
- .freeze_super = gfs2_freeze,
- .thaw_super = gfs2_unfreeze,
+ .freeze_super = gfs2_freeze_super,
+ .thaw_super = gfs2_thaw_super,
.statfs = gfs2_statfs,
.drop_inode = gfs2_drop_inode,
.show_options = gfs2_show_options,
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 58d13fd77aed..bba58629bc45 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -46,6 +46,7 @@ extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc,
extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh);
extern int gfs2_statfs_sync(struct super_block *sb, int type);
extern void gfs2_freeze_func(struct work_struct *work);
+extern void gfs2_thaw_freeze_initiator(struct super_block *sb);
extern void free_local_statfs_inodes(struct gfs2_sbd *sdp);
extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 454dc2ff8b5e..2dfbe2f188dd 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -82,6 +82,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
"RO Recovery: %d\n"
"Skip DLM Unlock: %d\n"
"Force AIL Flush: %d\n"
+ "FS Freeze Initiator: %d\n"
"FS Frozen: %d\n"
"Withdrawing: %d\n"
"Withdraw In Prog: %d\n"
@@ -111,7 +112,8 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
test_bit(SDF_RORECOVERY, &f),
test_bit(SDF_SKIP_DLM_UNLOCK, &f),
test_bit(SDF_FORCE_AIL_FLUSH, &f),
- test_bit(SDF_FS_FROZEN, &f),
+ test_bit(SDF_FREEZE_INITIATOR, &f),
+ test_bit(SDF_FROZEN, &f),
test_bit(SDF_WITHDRAWING, &f),
test_bit(SDF_WITHDRAW_IN_PROG, &f),
test_bit(SDF_REMOTE_WITHDRAW, &f),
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 63fec11ef2ce..ec1631257978 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -233,7 +233,6 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
struct gfs2_bufdata *bd;
struct gfs2_meta_header *mh;
struct gfs2_trans *tr = current->journal_info;
- enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lock_buffer(bh);
if (buffer_pinned(bh)) {
@@ -267,7 +266,7 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
(unsigned long long)bd->bd_bh->b_blocknr);
BUG();
}
- if (unlikely(state == SFS_FROZEN)) {
+ if (unlikely(test_bit(SDF_FROZEN, &sdp->sd_flags))) {
fs_info(sdp, "GFS2:adding buf while frozen\n");
gfs2_assert_withdraw(sdp, 0);
}
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 7a6aeffcdf5c..dac22b1c1a2e 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -93,21 +93,18 @@ out_unlock:
}
/**
- * gfs2_freeze_lock - hold the freeze glock
+ * gfs2_freeze_lock_shared - hold the freeze glock
* @sdp: the superblock
- * @freeze_gh: pointer to the requested holder
- * @caller_flags: any additional flags needed by the caller
*/
-int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
- int caller_flags)
+int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp)
{
- int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
int error;
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
- freeze_gh);
- if (error && error != GLR_TRYFAILED)
- fs_err(sdp, "can't lock the freeze lock: %d\n", error);
+ error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
+ LM_FLAG_NOEXP | GL_EXACT,
+ &sdp->sd_freeze_gh);
+ if (error)
+ fs_err(sdp, "can't lock the freeze glock: %d\n", error);
return error;
}
@@ -124,7 +121,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
struct gfs2_inode *ip;
struct gfs2_glock *i_gl;
u64 no_formal_ino;
- int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
int ret = 0;
int tries;
@@ -152,24 +148,18 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
*/
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
if (!sb_rdonly(sdp->sd_vfs)) {
- struct gfs2_holder freeze_gh;
-
- gfs2_holder_mark_uninitialized(&freeze_gh);
- if (sdp->sd_freeze_gl &&
- !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
- ret = gfs2_freeze_lock(sdp, &freeze_gh,
- log_write_allowed ? 0 : LM_FLAG_TRY);
- if (ret == GLR_TRYFAILED)
- ret = 0;
- }
- if (!ret)
- gfs2_make_fs_ro(sdp);
+ bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
+
+ gfs2_make_fs_ro(sdp);
+
+ if (locked)
+ mutex_unlock(&sdp->sd_freeze_mutex);
+
/*
* Dequeue any pending non-system glock holders that can no
* longer be granted because the file system is withdrawn.
*/
gfs2_gl_dq_holders(sdp);
- gfs2_freeze_unlock(&freeze_gh);
}
if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
@@ -187,15 +177,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
}
sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq(&sdp->sd_jinode_gh);
- if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
- /* Make sure gfs2_unfreeze works if partially-frozen */
- flush_work(&sdp->sd_freeze_work);
- atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
- thaw_super(sdp->sd_vfs);
- } else {
- wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
- TASK_UNINTERRUPTIBLE);
- }
+ gfs2_thaw_freeze_initiator(sdp->sd_vfs);
+ wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
/*
* holder_uninit to force glock_put, to force dlm to let go
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 78ec190f4155..cdb839529175 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -149,8 +149,7 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
bool verbose);
-extern int gfs2_freeze_lock(struct gfs2_sbd *sdp,
- struct gfs2_holder *freeze_gh, int caller_flags);
+extern int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp);
extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh);
#define gfs2_io_error(sdp) \
diff --git a/fs/inode.c b/fs/inode.c
index d37fad91c8da..8fefb69e1f84 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1156,8 +1156,10 @@ lock:
*/
void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
- WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
- WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
+ if (inode1)
+ WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
+ if (inode2)
+ WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
}
EXPORT_SYMBOL(lock_two_nondirectories);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index a4fa81af60d9..adb92cdb24b0 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1076,7 +1076,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
{
loff_t start_byte;
loff_t end_byte;
- int blocksize = i_blocksize(inode);
+ unsigned int blocksize = i_blocksize(inode);
if (iomap->type != IOMAP_DELALLOC)
return 0;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 45b6919903e6..5a1a4af9d3d2 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -655,7 +655,9 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
return kn;
err_out3:
+ spin_lock(&kernfs_idr_lock);
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
+ spin_unlock(&kernfs_idr_lock);
err_out2:
kmem_cache_free(kernfs_node_cache, kn);
err_out1:
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index e3972aa3045a..5d85715be763 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -93,6 +93,12 @@ void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct
block->b_status = nlm_lck_blocked;
}
+struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host)
+{
+ return host->h_rpcclnt;
+}
+EXPORT_SYMBOL_GPL(nlmclnt_rpc_clnt);
+
/*
* Queue up a lock for blocking so that the GRANTED request can see it
*/
diff --git a/fs/namei.c b/fs/namei.c
index 91171da719c5..e56ff39a79bc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4874,8 +4874,7 @@ int vfs_rename(struct renamedata *rd)
d_exchange(old_dentry, new_dentry);
}
out:
- if (source)
- inode_unlock(source);
+ inode_unlock(source);
if (target)
inode_unlock(target);
dput(new_dentry);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f50e025ae406..e4c5f193ed5e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -184,6 +184,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_net = get_net(cl_init->net);
clp->cl_principal = "*";
+ clp->cl_xprtsec = cl_init->xprtsec;
return clp;
error_cleanup:
@@ -326,6 +327,10 @@ again:
sap))
continue;
+ /* Match the xprt security policy */
+ if (clp->cl_xprtsec.policy != data->xprtsec.policy)
+ continue;
+
refcount_inc(&clp->cl_count);
return clp;
}
@@ -458,6 +463,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
switch (proto) {
case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_TCP_TLS:
case XPRT_TRANSPORT_RDMA:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_TCP_RETRANS;
@@ -510,6 +516,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.version = clp->rpc_ops->version,
.authflavor = flavor,
.cred = cl_init->cred,
+ .xprtsec = cl_init->xprtsec,
};
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@@ -592,6 +599,7 @@ static int nfs_start_lockd(struct nfs_server *server)
server->nlm_host = host;
server->destroy = nfs_destroy_server;
+ nfs_sysfs_link_rpc_client(server, nlmclnt_rpc_clnt(host), NULL);
return 0;
}
@@ -621,6 +629,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
if (server->flags & NFS_MOUNT_SOFT)
server->client->cl_softrtry = 1;
+ nfs_sysfs_link_rpc_client(server, server->client, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
@@ -675,6 +684,7 @@ static int nfs_init_server(struct nfs_server *server,
.cred = server->cred,
.nconnect = ctx->nfs_server.nconnect,
.init_flags = (1UL << NFS_CS_REUSEPORT),
+ .xprtsec = ctx->xprtsec,
};
struct nfs_client *clp;
int error;
@@ -690,6 +700,8 @@ static int nfs_init_server(struct nfs_server *server,
return PTR_ERR(clp);
server->nfs_client = clp;
+ nfs_sysfs_add_server(server);
+ nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state");
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
@@ -944,6 +956,8 @@ void nfs_server_remove_lists(struct nfs_server *server)
}
EXPORT_SYMBOL_GPL(nfs_server_remove_lists);
+static DEFINE_IDA(s_sysfs_ids);
+
/*
* Allocate and initialise a server record
*/
@@ -955,6 +969,12 @@ struct nfs_server *nfs_alloc_server(void)
if (!server)
return NULL;
+ server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL);
+ if (server->s_sysfs_id < 0) {
+ kfree(server);
+ return NULL;
+ }
+
server->client = server->client_acl = ERR_PTR(-EINVAL);
/* Zero out the NFS state stuff */
@@ -1001,6 +1021,12 @@ void nfs_free_server(struct nfs_server *server)
nfs_put_client(server->nfs_client);
+ if (server->kobj.state_initialized) {
+ nfs_sysfs_remove_server(server);
+ kobject_put(&server->kobj);
+ }
+ ida_free(&s_sysfs_ids, server->s_sysfs_id);
+
ida_destroy(&server->lockowner_id);
ida_destroy(&server->openowner_id);
nfs_free_iostats(server->io_stats);
@@ -1102,6 +1128,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
server->fsid = fattr->fsid;
+ nfs_sysfs_add_server(server);
+
+ nfs_sysfs_link_rpc_client(server,
+ server->nfs_client->cl_rpcclient, "_state");
+
error = nfs_init_server_rpcclient(server,
source->client->cl_timeout,
flavor);
@@ -1385,6 +1416,7 @@ error_0:
void nfs_fs_proc_exit(void)
{
remove_proc_subtree("fs/nfsfs", NULL);
+ ida_destroy(&s_sysfs_ids);
}
#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 9bcd53d5c7d4..853e8d609bb3 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -18,6 +18,9 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
+
+#include <net/handshake.h>
+
#include "nfs.h"
#include "internal.h"
@@ -88,6 +91,7 @@ enum nfs_param {
Opt_vers,
Opt_wsize,
Opt_write,
+ Opt_xprtsec,
};
enum {
@@ -194,6 +198,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_string("vers", Opt_vers),
fsparam_enum ("write", Opt_write, nfs_param_enums_write),
fsparam_u32 ("wsize", Opt_wsize),
+ fsparam_string("xprtsec", Opt_xprtsec),
{}
};
@@ -267,6 +272,20 @@ static const struct constant_table nfs_secflavor_tokens[] = {
{}
};
+enum {
+ Opt_xprtsec_none,
+ Opt_xprtsec_tls,
+ Opt_xprtsec_mtls,
+ nr__Opt_xprtsec
+};
+
+static const struct constant_table nfs_xprtsec_policies[] = {
+ { "none", Opt_xprtsec_none },
+ { "tls", Opt_xprtsec_tls },
+ { "mtls", Opt_xprtsec_mtls },
+ {}
+};
+
/*
* Sanity-check a server address provided by the mount command.
*
@@ -320,9 +339,21 @@ static int nfs_validate_transport_protocol(struct fs_context *fc,
default:
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
}
+
+ if (ctx->xprtsec.policy != RPC_XPRTSEC_NONE)
+ switch (ctx->nfs_server.protocol) {
+ case XPRT_TRANSPORT_TCP:
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP_TLS;
+ break;
+ default:
+ goto out_invalid_xprtsec_policy;
+ }
+
return 0;
out_invalid_transport_udp:
return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
+out_invalid_xprtsec_policy:
+ return nfs_invalf(fc, "NFS: Transport does not support xprtsec");
}
/*
@@ -430,6 +461,29 @@ static int nfs_parse_security_flavors(struct fs_context *fc,
return 0;
}
+static int nfs_parse_xprtsec_policy(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+ trace_nfs_mount_assign(param->key, param->string);
+
+ switch (lookup_constant(nfs_xprtsec_policies, param->string, -1)) {
+ case Opt_xprtsec_none:
+ ctx->xprtsec.policy = RPC_XPRTSEC_NONE;
+ break;
+ case Opt_xprtsec_tls:
+ ctx->xprtsec.policy = RPC_XPRTSEC_TLS_ANON;
+ break;
+ case Opt_xprtsec_mtls:
+ ctx->xprtsec.policy = RPC_XPRTSEC_TLS_X509;
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: Unrecognized transport security policy");
+ }
+ return 0;
+}
+
static int nfs_parse_version_string(struct fs_context *fc,
const char *string)
{
@@ -696,6 +750,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
if (ret < 0)
return ret;
break;
+ case Opt_xprtsec:
+ ret = nfs_parse_xprtsec_policy(fc, param);
+ if (ret < 0)
+ return ret;
+ break;
case Opt_proto:
if (!param->string)
@@ -791,16 +850,19 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->mount_server.addrlen = len;
break;
case Opt_nconnect:
+ trace_nfs_mount_assign(param->key, param->string);
if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS)
goto out_of_bounds;
ctx->nfs_server.nconnect = result.uint_32;
break;
case Opt_max_connect:
+ trace_nfs_mount_assign(param->key, param->string);
if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS)
goto out_of_bounds;
ctx->nfs_server.max_connect = result.uint_32;
break;
case Opt_lookupcache:
+ trace_nfs_mount_assign(param->key, param->string);
switch (result.uint_32) {
case Opt_lookupcache_all:
ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
@@ -817,6 +879,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_local_lock:
+ trace_nfs_mount_assign(param->key, param->string);
switch (result.uint_32) {
case Opt_local_lock_all:
ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK |
@@ -837,6 +900,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_write:
+ trace_nfs_mount_assign(param->key, param->string);
switch (result.uint_32) {
case Opt_write_lazy:
ctx->flags &=
@@ -1569,6 +1633,9 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->selected_flavor = RPC_AUTH_MAXFLAVOR;
ctx->minorversion = 0;
ctx->need_mount = true;
+ ctx->xprtsec.policy = RPC_XPRTSEC_NONE;
+ ctx->xprtsec.cert_serial = TLS_NO_CERT;
+ ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY;
fc->s_iflags |= SB_I_STABLE_WRITES;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a910b9a638c5..8172dd4135a1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -845,7 +845,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
- STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME |
+ STATX_INO | STATX_SIZE | STATX_BLOCKS |
STATX_CHANGE_COOKIE;
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b5f21d35d30e..913c09806c7f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -81,6 +81,7 @@ struct nfs_client_initdata {
struct net *net;
const struct rpc_timeout *timeparms;
const struct cred *cred;
+ struct xprtsec_parms xprtsec;
};
/*
@@ -101,6 +102,7 @@ struct nfs_fs_context {
unsigned int bsize;
struct nfs_auth_info auth_info;
rpc_authflavor_t selected_flavor;
+ struct xprtsec_parms xprtsec;
char *client_address;
unsigned int version;
unsigned int minorversion;
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 669cda757a5c..eff3802c5e03 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -4,6 +4,8 @@
#include <linux/sunrpc/addr.h>
#include "internal.h"
#include "nfs3_fs.h"
+#include "netns.h"
+#include "sysfs.h"
#ifdef CONFIG_NFS_V3_ACL
static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
@@ -31,6 +33,8 @@ static void nfs_init_server_aclclient(struct nfs_server *server)
if (IS_ERR(server->client_acl))
goto out_noacl;
+ nfs_sysfs_link_rpc_client(server, server->client_acl, NULL);
+
/* No errors! Assume that Sun nfsacls are supported */
server->caps |= NFS_CAP_ACLS;
return;
@@ -93,6 +97,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
+ .xprtsec = mds_clp->cl_xprtsec,
};
struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
@@ -102,8 +107,12 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
- if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
- cl_init.nconnect = mds_clp->cl_nconnect;
+ switch (ds_proto) {
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_TCP_TLS:
+ if (mds_clp->cl_nconnect > 1)
+ cl_init.nconnect = mds_clp->cl_nconnect;
+ }
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 93e306bf4430..63802d195556 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1190,15 +1190,19 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
const void *buf, size_t buflen, int flags)
{
struct nfs_server *server = NFS_SERVER(inode);
+ __u32 bitmask[NFS_BITMASK_SZ];
struct page *pages[NFS4XATTR_MAXPAGES];
struct nfs42_setxattrargs arg = {
.fh = NFS_FH(inode),
+ .bitmask = bitmask,
.xattr_pages = pages,
.xattr_len = buflen,
.xattr_name = name,
.xattr_flags = flags,
};
- struct nfs42_setxattrres res;
+ struct nfs42_setxattrres res = {
+ .server = server,
+ };
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR],
.rpc_argp = &arg,
@@ -1210,13 +1214,22 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
if (buflen > server->sxasize)
return -ERANGE;
+ res.fattr = nfs_alloc_fattr();
+ if (!res.fattr)
+ return -ENOMEM;
+
if (buflen > 0) {
np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages);
- if (np < 0)
- return np;
+ if (np < 0) {
+ ret = np;
+ goto out;
+ }
} else
np = 0;
+ nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
+ inode, NFS_INO_INVALID_CHANGE);
+
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 1);
trace_nfs4_setxattr(inode, name, ret);
@@ -1224,9 +1237,13 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
for (; np > 0; np--)
put_page(pages[np - 1]);
- if (!ret)
+ if (!ret) {
nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+ ret = nfs_post_op_update_inode(inode, res.fattr);
+ }
+out:
+ kfree(res.fattr);
return ret;
}
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 76ae11834206..911f634ba3da 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -991,6 +991,29 @@ static void nfs4_xattr_cache_init_once(void *p)
INIT_LIST_HEAD(&cache->dispose);
}
+static int nfs4_xattr_shrinker_init(struct shrinker *shrinker,
+ struct list_lru *lru, const char *name)
+{
+ int ret = 0;
+
+ ret = register_shrinker(shrinker, name);
+ if (ret)
+ return ret;
+
+ ret = list_lru_init_memcg(lru, shrinker);
+ if (ret)
+ unregister_shrinker(shrinker);
+
+ return ret;
+}
+
+static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker,
+ struct list_lru *lru)
+{
+ unregister_shrinker(shrinker);
+ list_lru_destroy(lru);
+}
+
int __init nfs4_xattr_cache_init(void)
{
int ret = 0;
@@ -1002,44 +1025,30 @@ int __init nfs4_xattr_cache_init(void)
if (nfs4_xattr_cache_cachep == NULL)
return -ENOMEM;
- ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru,
- &nfs4_xattr_large_entry_shrinker);
- if (ret)
- goto out4;
-
- ret = list_lru_init_memcg(&nfs4_xattr_entry_lru,
- &nfs4_xattr_entry_shrinker);
- if (ret)
- goto out3;
-
- ret = list_lru_init_memcg(&nfs4_xattr_cache_lru,
- &nfs4_xattr_cache_shrinker);
- if (ret)
- goto out2;
-
- ret = register_shrinker(&nfs4_xattr_cache_shrinker, "nfs-xattr_cache");
+ ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker,
+ &nfs4_xattr_cache_lru,
+ "nfs-xattr_cache");
if (ret)
goto out1;
- ret = register_shrinker(&nfs4_xattr_entry_shrinker, "nfs-xattr_entry");
+ ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker,
+ &nfs4_xattr_entry_lru,
+ "nfs-xattr_entry");
if (ret)
- goto out;
+ goto out2;
- ret = register_shrinker(&nfs4_xattr_large_entry_shrinker,
- "nfs-xattr_large_entry");
+ ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker,
+ &nfs4_xattr_large_entry_lru,
+ "nfs-xattr_large_entry");
if (!ret)
return 0;
- unregister_shrinker(&nfs4_xattr_entry_shrinker);
-out:
- unregister_shrinker(&nfs4_xattr_cache_shrinker);
-out1:
- list_lru_destroy(&nfs4_xattr_cache_lru);
+ nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker,
+ &nfs4_xattr_entry_lru);
out2:
- list_lru_destroy(&nfs4_xattr_entry_lru);
-out3:
- list_lru_destroy(&nfs4_xattr_large_entry_lru);
-out4:
+ nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker,
+ &nfs4_xattr_cache_lru);
+out1:
kmem_cache_destroy(nfs4_xattr_cache_cachep);
return ret;
@@ -1047,11 +1056,11 @@ out4:
void nfs4_xattr_cache_exit(void)
{
- unregister_shrinker(&nfs4_xattr_large_entry_shrinker);
- unregister_shrinker(&nfs4_xattr_entry_shrinker);
- unregister_shrinker(&nfs4_xattr_cache_shrinker);
- list_lru_destroy(&nfs4_xattr_large_entry_lru);
- list_lru_destroy(&nfs4_xattr_entry_lru);
- list_lru_destroy(&nfs4_xattr_cache_lru);
+ nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker,
+ &nfs4_xattr_large_entry_lru);
+ nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker,
+ &nfs4_xattr_entry_lru);
+ nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker,
+ &nfs4_xattr_cache_lru);
kmem_cache_destroy(nfs4_xattr_cache_cachep);
}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index a6df815a140c..95234208dc9e 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -7,6 +7,9 @@
#include "nfs42.h"
+/* Not limited by NFS itself, limited by the generic xattr code */
+#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX)
+
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
@@ -89,6 +92,18 @@
2 /* dst offset */ + \
2 /* count */)
#define decode_clone_maxsz (op_decode_hdr_maxsz)
+#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
+#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
+ 1 + nfs4_xattr_name_maxsz + 1)
+#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
+#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1)
+#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1)
+#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \
+ nfs4_xattr_name_maxsz)
+#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \
+ decode_change_info_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
@@ -186,55 +201,40 @@
decode_putfh_maxsz + \
decode_clone_maxsz + \
decode_getattr_maxsz)
-
-/* Not limited by NFS itself, limited by the generic xattr code */
-#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX)
-
-#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \
- nfs4_xattr_name_maxsz)
-#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
-#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \
- 1 + nfs4_xattr_name_maxsz + 1)
-#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz)
-#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1)
-#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1)
-#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \
- nfs4_xattr_name_maxsz)
-#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \
- decode_change_info_maxsz)
-
-#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_getxattr_maxsz)
-#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_getxattr_maxsz)
-#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_setxattr_maxsz)
-#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_setxattr_maxsz)
-#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_listxattrs_maxsz)
-#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_listxattrs_maxsz)
-#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \
- encode_sequence_maxsz + \
- encode_putfh_maxsz + \
- encode_removexattr_maxsz)
-#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \
- decode_sequence_maxsz + \
- decode_putfh_maxsz + \
- decode_removexattr_maxsz)
+#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getxattr_maxsz)
+#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getxattr_maxsz)
+#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_setxattr_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_setxattr_maxsz + \
+ decode_getattr_maxsz)
+#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_listxattrs_maxsz)
+#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_listxattrs_maxsz)
+#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_removexattr_maxsz)
+#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_removexattr_maxsz)
/*
* These values specify the maximum amount of data that is not
@@ -317,6 +317,18 @@ static void encode_copy(struct xdr_stream *xdr,
encode_nl4_server(xdr, args->cp_src);
}
+static void encode_copy_commit(struct xdr_stream *xdr,
+ const struct nfs42_copy_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
+ p = reserve_space(xdr, 12);
+ p = xdr_encode_hyper(p, args->dst_pos);
+ *p = cpu_to_be32(args->count);
+}
+
static void encode_offload_cancel(struct xdr_stream *xdr,
const struct nfs42_offload_status_args *args,
struct compound_hdr *hdr)
@@ -452,20 +464,6 @@ static void encode_setxattr(struct xdr_stream *xdr,
xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len);
}
-static int decode_setxattr(struct xdr_stream *xdr,
- struct nfs4_change_info *cinfo)
-{
- int status;
-
- status = decode_op_hdr(xdr, OP_SETXATTR);
- if (status)
- goto out;
- status = decode_change_info(xdr, cinfo);
-out:
- return status;
-}
-
-
static void encode_getxattr(struct xdr_stream *xdr, const char *name,
struct compound_hdr *hdr)
{
@@ -473,43 +471,6 @@ static void encode_getxattr(struct xdr_stream *xdr, const char *name,
encode_string(xdr, strlen(name), name);
}
-static int decode_getxattr(struct xdr_stream *xdr,
- struct nfs42_getxattrres *res,
- struct rpc_rqst *req)
-{
- int status;
- __be32 *p;
- u32 len, rdlen;
-
- status = decode_op_hdr(xdr, OP_GETXATTR);
- if (status)
- return status;
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- len = be32_to_cpup(p);
-
- /*
- * Only check against the page length here. The actual
- * requested length may be smaller, but that is only
- * checked against after possibly caching a valid reply.
- */
- if (len > req->rq_rcv_buf.page_len)
- return -ERANGE;
-
- res->xattr_len = len;
-
- if (len > 0) {
- rdlen = xdr_read_pages(xdr, len);
- if (rdlen < len)
- return -EIO;
- }
-
- return 0;
-}
-
static void encode_removexattr(struct xdr_stream *xdr, const char *name,
struct compound_hdr *hdr)
{
@@ -517,21 +478,6 @@ static void encode_removexattr(struct xdr_stream *xdr, const char *name,
encode_string(xdr, strlen(name), name);
}
-
-static int decode_removexattr(struct xdr_stream *xdr,
- struct nfs4_change_info *cinfo)
-{
- int status;
-
- status = decode_op_hdr(xdr, OP_REMOVEXATTR);
- if (status)
- goto out;
-
- status = decode_change_info(xdr, cinfo);
-out:
- return status;
-}
-
static void encode_listxattrs(struct xdr_stream *xdr,
const struct nfs42_listxattrsargs *arg,
struct compound_hdr *hdr)
@@ -553,104 +499,6 @@ static void encode_listxattrs(struct xdr_stream *xdr,
*p = cpu_to_be32(arg->count + 8 + 4);
}
-static int decode_listxattrs(struct xdr_stream *xdr,
- struct nfs42_listxattrsres *res)
-{
- int status;
- __be32 *p;
- u32 count, len, ulen;
- size_t left, copied;
- char *buf;
-
- status = decode_op_hdr(xdr, OP_LISTXATTRS);
- if (status) {
- /*
- * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL
- * should be translated to ERANGE.
- */
- if (status == -ETOOSMALL)
- status = -ERANGE;
- /*
- * Special case: for LISTXATTRS, NFS4ERR_NOXATTR
- * should be translated to success with zero-length reply.
- */
- if (status == -ENODATA) {
- res->eof = true;
- status = 0;
- }
- goto out;
- }
-
- p = xdr_inline_decode(xdr, 8);
- if (unlikely(!p))
- return -EIO;
-
- xdr_decode_hyper(p, &res->cookie);
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- left = res->xattr_len;
- buf = res->xattr_buf;
-
- count = be32_to_cpup(p);
- copied = 0;
-
- /*
- * We have asked for enough room to encode the maximum number
- * of possible attribute names, so everything should fit.
- *
- * But, don't rely on that assumption. Just decode entries
- * until they don't fit anymore, just in case the server did
- * something odd.
- */
- while (count--) {
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- len = be32_to_cpup(p);
- if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
- status = -ERANGE;
- goto out;
- }
-
- p = xdr_inline_decode(xdr, len);
- if (unlikely(!p))
- return -EIO;
-
- ulen = len + XATTR_USER_PREFIX_LEN + 1;
- if (buf) {
- if (ulen > left) {
- status = -ERANGE;
- goto out;
- }
-
- memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
- memcpy(buf + XATTR_USER_PREFIX_LEN, p, len);
-
- buf[ulen - 1] = 0;
- buf += ulen;
- left -= ulen;
- }
- copied += ulen;
- }
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- return -EIO;
-
- res->eof = be32_to_cpup(p);
- res->copied = copied;
-
-out:
- if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX)
- status = -E2BIG;
-
- return status;
-}
-
/*
* Encode ALLOCATE request
*/
@@ -671,18 +519,6 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
encode_nops(&hdr);
}
-static void encode_copy_commit(struct xdr_stream *xdr,
- const struct nfs42_copy_args *args,
- struct compound_hdr *hdr)
-{
- __be32 *p;
-
- encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
- p = reserve_space(xdr, 12);
- p = xdr_encode_hyper(p, args->dst_pos);
- *p = cpu_to_be32(args->count);
-}
-
/*
* Encode COPY request
*/
@@ -871,6 +707,90 @@ static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode SETXATTR request
+ */
+static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_setxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_setxattr(xdr, args, &hdr);
+ encode_getfattr(xdr, args->bitmask, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode GETXATTR request
+ */
+static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_getxattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+ uint32_t replen;
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen + op_decode_hdr_maxsz + 1;
+ encode_getxattr(xdr, args->xattr_name, &hdr);
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len,
+ replen);
+
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode LISTXATTR request
+ */
+static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_listxattrsargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+ uint32_t replen;
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1;
+ encode_listxattrs(xdr, args, &hdr);
+
+ rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen);
+
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode REMOVEXATTR request
+ */
+static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfs42_removexattrargs *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_removexattr(xdr, args->xattr_name, &hdr);
+ encode_nops(&hdr);
+}
+
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -1192,6 +1112,168 @@ static int decode_layouterror(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_LAYOUTERROR);
}
+static int decode_setxattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_SETXATTR);
+ if (status)
+ goto out;
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+static int decode_getxattr(struct xdr_stream *xdr,
+ struct nfs42_getxattrres *res,
+ struct rpc_rqst *req)
+{
+ int status;
+ __be32 *p;
+ u32 len, rdlen;
+
+ status = decode_op_hdr(xdr, OP_GETXATTR);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+
+ /*
+ * Only check against the page length here. The actual
+ * requested length may be smaller, but that is only
+ * checked against after possibly caching a valid reply.
+ */
+ if (len > req->rq_rcv_buf.page_len)
+ return -ERANGE;
+
+ res->xattr_len = len;
+
+ if (len > 0) {
+ rdlen = xdr_read_pages(xdr, len);
+ if (rdlen < len)
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int decode_removexattr(struct xdr_stream *xdr,
+ struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_REMOVEXATTR);
+ if (status)
+ goto out;
+
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+static int decode_listxattrs(struct xdr_stream *xdr,
+ struct nfs42_listxattrsres *res)
+{
+ int status;
+ __be32 *p;
+ u32 count, len, ulen;
+ size_t left, copied;
+ char *buf;
+
+ status = decode_op_hdr(xdr, OP_LISTXATTRS);
+ if (status) {
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL
+ * should be translated to ERANGE.
+ */
+ if (status == -ETOOSMALL)
+ status = -ERANGE;
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_NOXATTR
+ * should be translated to success with zero-length reply.
+ */
+ if (status == -ENODATA) {
+ res->eof = true;
+ status = 0;
+ }
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ return -EIO;
+
+ xdr_decode_hyper(p, &res->cookie);
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ left = res->xattr_len;
+ buf = res->xattr_buf;
+
+ count = be32_to_cpup(p);
+ copied = 0;
+
+ /*
+ * We have asked for enough room to encode the maximum number
+ * of possible attribute names, so everything should fit.
+ *
+ * But, don't rely on that assumption. Just decode entries
+ * until they don't fit anymore, just in case the server did
+ * something odd.
+ */
+ while (count--) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ len = be32_to_cpup(p);
+ if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ return -EIO;
+
+ ulen = len + XATTR_USER_PREFIX_LEN + 1;
+ if (buf) {
+ if (ulen > left) {
+ status = -ERANGE;
+ goto out;
+ }
+
+ memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
+ memcpy(buf + XATTR_USER_PREFIX_LEN, p, len);
+
+ buf[ulen - 1] = 0;
+ buf += ulen;
+ left -= ulen;
+ }
+ copied += ulen;
+ }
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+
+ res->eof = be32_to_cpup(p);
+ res->copied = copied;
+
+out:
+ if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX)
+ status = -E2BIG;
+
+ return status;
+}
+
/*
* Decode ALLOCATE request
*/
@@ -1481,22 +1563,9 @@ out:
return status;
}
-#ifdef CONFIG_NFS_V4_2
-static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
- const void *data)
-{
- const struct nfs42_setxattrargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- encode_setxattr(xdr, args, &hdr);
- encode_nops(&hdr);
-}
-
+/*
+ * Decode SETXATTR request
+ */
static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
void *data)
{
@@ -1513,33 +1582,17 @@ static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
-
status = decode_setxattr(xdr, &res->cinfo);
+ if (status)
+ goto out;
+ status = decode_getfattr(xdr, res->fattr, res->server);
out:
return status;
}
-static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
- const void *data)
-{
- const struct nfs42_getxattrargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
- uint32_t replen;
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 1;
- encode_getxattr(xdr, args->xattr_name, &hdr);
-
- rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len,
- replen);
-
- encode_nops(&hdr);
-}
-
+/*
+ * Decode GETXATTR request
+ */
static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp,
struct xdr_stream *xdr, void *data)
{
@@ -1561,26 +1614,9 @@ out:
return status;
}
-static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
- struct xdr_stream *xdr, const void *data)
-{
- const struct nfs42_listxattrsargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
- uint32_t replen;
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1;
- encode_listxattrs(xdr, args, &hdr);
-
- rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen);
-
- encode_nops(&hdr);
-}
-
+/*
+ * Decode LISTXATTR request
+ */
static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
struct xdr_stream *xdr, void *data)
{
@@ -1604,21 +1640,9 @@ out:
return status;
}
-static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req,
- struct xdr_stream *xdr, const void *data)
-{
- const struct nfs42_removexattrargs *args = data;
- struct compound_hdr hdr = {
- .minorversion = nfs4_xdr_minorversion(&args->seq_args),
- };
-
- encode_compound_hdr(xdr, req, &hdr);
- encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->fh, &hdr);
- encode_removexattr(xdr, args->xattr_name, &hdr);
- encode_nops(&hdr);
-}
-
+/*
+ * Decode REMOVEXATTR request
+ */
static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
struct xdr_stream *xdr, void *data)
{
@@ -1640,5 +1664,4 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req,
out:
return status;
}
-#endif
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d3051b051a56..d9114a754db7 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -18,6 +18,7 @@
#include "nfs4idmap.h"
#include "pnfs.h"
#include "netns.h"
+#include "sysfs.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
@@ -896,7 +897,8 @@ static int nfs4_set_client(struct nfs_server *server,
int proto, const struct rpc_timeout *timeparms,
u32 minorversion, unsigned int nconnect,
unsigned int max_connect,
- struct net *net)
+ struct net *net,
+ struct xprtsec_parms *xprtsec)
{
struct nfs_client_initdata cl_init = {
.hostname = hostname,
@@ -909,6 +911,7 @@ static int nfs4_set_client(struct nfs_server *server,
.net = net,
.timeparms = timeparms,
.cred = server->cred,
+ .xprtsec = *xprtsec,
};
struct nfs_client *clp;
@@ -916,8 +919,11 @@ static int nfs4_set_client(struct nfs_server *server,
__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
else
cl_init.max_connect = max_connect;
- if (proto == XPRT_TRANSPORT_TCP)
+ switch (proto) {
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_TCP_TLS:
cl_init.nconnect = nconnect;
+ }
if (server->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -947,6 +953,9 @@ static int nfs4_set_client(struct nfs_server *server,
set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
server->nfs_client = clp;
+ nfs_sysfs_add_server(server);
+ nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state");
+
return 0;
}
@@ -978,6 +987,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
+ .xprtsec = mds_srv->nfs_client->cl_xprtsec,
};
char buf[INET6_ADDRSTRLEN + 1];
@@ -985,9 +995,13 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
- if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
- cl_init.nconnect = mds_clp->cl_nconnect;
- cl_init.max_connect = NFS_MAX_TRANSPORTS;
+ switch (ds_proto) {
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_TCP_TLS:
+ if (mds_clp->cl_nconnect > 1) {
+ cl_init.nconnect = mds_clp->cl_nconnect;
+ cl_init.max_connect = NFS_MAX_TRANSPORTS;
+ }
}
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
@@ -1157,7 +1171,8 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
ctx->minorversion,
ctx->nfs_server.nconnect,
ctx->nfs_server.max_connect,
- fc->net_ns);
+ fc->net_ns,
+ &ctx->xprtsec);
if (error < 0)
return error;
@@ -1219,8 +1234,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
+ int proto, error;
bool auth_probe;
- int error;
server = nfs_alloc_server();
if (!server)
@@ -1247,23 +1262,28 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
parent_client->cl_max_connect,
- parent_client->cl_net);
+ parent_client->cl_net,
+ &parent_client->cl_xprtsec);
if (!error)
goto init_server;
#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
+ proto = XPRT_TRANSPORT_TCP;
+ if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE)
+ proto = XPRT_TRANSPORT_TCP_TLS;
rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
&ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
- XPRT_TRANSPORT_TCP,
+ proto,
parent_server->client->cl_timeout,
parent_client->cl_mvops->minor_version,
parent_client->cl_nconnect,
parent_client->cl_max_connect,
- parent_client->cl_net);
+ parent_client->cl_net,
+ &parent_client->cl_xprtsec);
if (error < 0)
goto error;
@@ -1314,6 +1334,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
.dstaddr = (struct sockaddr *)sap,
.addrlen = salen,
.servername = hostname,
+ /* cel: bleh. We might need to pass TLS parameters here */
};
char buf[INET6_ADDRSTRLEN + 1];
struct sockaddr_storage address;
@@ -1336,7 +1357,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
error = nfs4_set_client(server, hostname, sap, salen, buf,
clp->cl_proto, clnt->cl_timeout,
clp->cl_minorversion,
- clp->cl_nconnect, clp->cl_max_connect, net);
+ clp->cl_nconnect, clp->cl_max_connect,
+ net, &clp->cl_xprtsec);
clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
if (error != 0) {
nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d3665390c4cb..e1a886b58354 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -921,6 +921,7 @@ out:
out_noaction:
return ret;
session_recover:
+ set_bit(NFS4_SLOT_TBL_DRAINING, &session->fc_slot_table.slot_tbl_state);
nfs4_schedule_session_recovery(session, status);
dprintk("%s ERROR: %d Reset session\n", __func__, status);
nfs41_sequence_free_slot(res);
@@ -7159,7 +7160,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
struct nfs4_lock_state *lsp = data->lsp;
- struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -7167,7 +7167,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
switch (task->tk_status) {
case 0:
- renew_lease(server, data->timestamp);
+ renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
+ data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
@@ -7188,8 +7189,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
goto out_restart;
- else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
- goto out_restart;
} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
goto out_restart;
@@ -9371,7 +9370,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
return;
trace_nfs4_sequence(clp, task->tk_status);
- if (task->tk_status < 0) {
+ if (task->tk_status < 0 && !task->tk_client->cl_shutdown) {
dprintk("%s ERROR %d\n", __func__, task->tk_status);
if (refcount_read(&clp->cl_count) == 1)
return;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index bbe49315d99e..e079987af4a3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1210,6 +1210,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
+ if (clp->cl_rpcclient->cl_shutdown)
+ return;
+
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
wake_up_var(&clp->cl_state);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 30e53e93049e..2284f749d892 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -59,6 +59,8 @@
#include <linux/uaccess.h>
#include <linux/nfs_ssc.h>
+#include <uapi/linux/tls.h>
+
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
@@ -68,6 +70,8 @@
#include "nfs4session.h"
#include "pnfs.h"
#include "nfs.h"
+#include "netns.h"
+#include "sysfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -491,6 +495,16 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
+ switch (clp->cl_xprtsec.policy) {
+ case RPC_XPRTSEC_TLS_ANON:
+ seq_puts(m, ",xprtsec=tls");
+ break;
+ case RPC_XPRTSEC_TLS_X509:
+ seq_puts(m, ",xprtsec=mtls");
+ break;
+ default:
+ break;
+ }
if (version != 4)
nfs_show_mountd_options(m, nfss, showdefaults);
@@ -1077,6 +1091,7 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
&sb->s_blocksize_bits);
nfs_super_set_maxbytes(sb, server->maxfilesize);
+ nfs_sysfs_move_server_to_sb(sb);
server->has_sec_mnt_opts = ctx->has_sec_mnt_opts;
}
@@ -1319,13 +1334,14 @@ error_splat_super:
}
/*
- * Destroy an NFS2/3 superblock
+ * Destroy an NFS superblock
*/
void nfs_kill_super(struct super_block *s)
{
struct nfs_server *server = NFS_SB(s);
dev_t dev = s->s_dev;
+ nfs_sysfs_move_sb_to_server(server);
generic_shutdown_super(s);
nfs_fscache_release_super_cookie(s);
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 0cbcd2dfa732..acda8f033d30 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -12,17 +12,18 @@
#include <linux/string.h>
#include <linux/nfs_fs.h>
#include <linux/rcupdate.h>
+#include <linux/lockd/lockd.h>
#include "nfs4_fs.h"
#include "netns.h"
#include "sysfs.h"
-struct kobject *nfs_client_kobj;
-static struct kset *nfs_client_kset;
+static struct kset *nfs_kset;
-static void nfs_netns_object_release(struct kobject *kobj)
+static void nfs_kset_release(struct kobject *kobj)
{
- kfree(kobj);
+ struct kset *kset = container_of(kobj, struct kset, kobj);
+ kfree(kset);
}
static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type(
@@ -31,46 +32,42 @@ static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type(
return &net_ns_type_operations;
}
-static struct kobj_type nfs_netns_object_type = {
- .release = nfs_netns_object_release,
+static struct kobj_type nfs_kset_type = {
+ .release = nfs_kset_release,
.sysfs_ops = &kobj_sysfs_ops,
.child_ns_type = nfs_netns_object_child_ns_type,
};
-static struct kobject *nfs_netns_object_alloc(const char *name,
- struct kset *kset, struct kobject *parent)
+int nfs_sysfs_init(void)
{
- struct kobject *kobj;
+ int ret;
+
+ nfs_kset = kzalloc(sizeof(*nfs_kset), GFP_KERNEL);
+ if (!nfs_kset)
+ return -ENOMEM;
- kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
- if (kobj) {
- kobj->kset = kset;
- if (kobject_init_and_add(kobj, &nfs_netns_object_type,
- parent, "%s", name) == 0)
- return kobj;
- kobject_put(kobj);
+ ret = kobject_set_name(&nfs_kset->kobj, "nfs");
+ if (ret) {
+ kfree(nfs_kset);
+ return ret;
}
- return NULL;
-}
-int nfs_sysfs_init(void)
-{
- nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj);
- if (!nfs_client_kset)
- return -ENOMEM;
- nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL);
- if (!nfs_client_kobj) {
- kset_unregister(nfs_client_kset);
- nfs_client_kset = NULL;
- return -ENOMEM;
+ nfs_kset->kobj.parent = fs_kobj;
+ nfs_kset->kobj.ktype = &nfs_kset_type;
+ nfs_kset->kobj.kset = NULL;
+
+ ret = kset_register(nfs_kset);
+ if (ret) {
+ kfree(nfs_kset);
+ return ret;
}
+
return 0;
}
void nfs_sysfs_exit(void)
{
- kobject_put(nfs_client_kobj);
- kset_unregister(nfs_client_kset);
+ kset_unregister(nfs_kset);
}
static ssize_t nfs_netns_identifier_show(struct kobject *kobj,
@@ -127,7 +124,6 @@ static void nfs_netns_client_release(struct kobject *kobj)
kobject);
kfree(rcu_dereference_raw(c->identifier));
- kfree(c);
}
static const void *nfs_netns_client_namespace(const struct kobject *kobj)
@@ -151,6 +147,25 @@ static struct kobj_type nfs_netns_client_type = {
.namespace = nfs_netns_client_namespace,
};
+static void nfs_netns_object_release(struct kobject *kobj)
+{
+ struct nfs_netns_client *c = container_of(kobj,
+ struct nfs_netns_client,
+ nfs_net_kobj);
+ kfree(c);
+}
+
+static const void *nfs_netns_namespace(const struct kobject *kobj)
+{
+ return container_of(kobj, struct nfs_netns_client, nfs_net_kobj)->net;
+}
+
+static struct kobj_type nfs_netns_object_type = {
+ .release = nfs_netns_object_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = nfs_netns_namespace,
+};
+
static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent,
struct net *net)
{
@@ -159,10 +174,19 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent,
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p) {
p->net = net;
- p->kobject.kset = nfs_client_kset;
+ p->kobject.kset = nfs_kset;
+ p->nfs_net_kobj.kset = nfs_kset;
+
+ if (kobject_init_and_add(&p->nfs_net_kobj, &nfs_netns_object_type,
+ parent, "net") != 0) {
+ kobject_put(&p->nfs_net_kobj);
+ return NULL;
+ }
+
if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type,
- parent, "nfs_client") == 0)
+ &p->nfs_net_kobj, "nfs_client") == 0)
return p;
+
kobject_put(&p->kobject);
}
return NULL;
@@ -172,7 +196,7 @@ void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net)
{
struct nfs_netns_client *clp;
- clp = nfs_netns_client_alloc(nfs_client_kobj, net);
+ clp = nfs_netns_client_alloc(&nfs_kset->kobj, net);
if (clp) {
netns->nfs_client = clp;
kobject_uevent(&clp->kobject, KOBJ_ADD);
@@ -187,6 +211,149 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns)
kobject_uevent(&clp->kobject, KOBJ_REMOVE);
kobject_del(&clp->kobject);
kobject_put(&clp->kobject);
+ kobject_del(&clp->nfs_net_kobj);
+ kobject_put(&clp->nfs_net_kobj);
netns->nfs_client = NULL;
}
}
+
+static bool shutdown_match_client(const struct rpc_task *task, const void *data)
+{
+ return true;
+}
+
+static void shutdown_client(struct rpc_clnt *clnt)
+{
+ clnt->cl_shutdown = 1;
+ rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL);
+}
+
+static ssize_t
+shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ bool shutdown = server->flags & NFS_MOUNT_SHUTDOWN;
+ return sysfs_emit(buf, "%d\n", shutdown);
+}
+
+static ssize_t
+shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nfs_server *server;
+ int ret, val;
+
+ server = container_of(kobj, struct nfs_server, kobj);
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val != 1)
+ return -EINVAL;
+
+ /* already shut down? */
+ if (server->flags & NFS_MOUNT_SHUTDOWN)
+ goto out;
+
+ server->flags |= NFS_MOUNT_SHUTDOWN;
+ shutdown_client(server->client);
+ shutdown_client(server->nfs_client->cl_rpcclient);
+
+ if (!IS_ERR(server->client_acl))
+ shutdown_client(server->client_acl);
+
+ if (server->nlm_host)
+ shutdown_client(server->nlm_host->h_rpcclnt);
+out:
+ return count;
+}
+
+static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown);
+
+#define RPC_CLIENT_NAME_SIZE 64
+
+void nfs_sysfs_link_rpc_client(struct nfs_server *server,
+ struct rpc_clnt *clnt, const char *uniq)
+{
+ char name[RPC_CLIENT_NAME_SIZE];
+ int ret;
+
+ strcpy(name, clnt->cl_program->name);
+ strcat(name, uniq ? uniq : "");
+ strcat(name, "_client");
+
+ ret = sysfs_create_link_nowarn(&server->kobj,
+ &clnt->cl_sysfs->kobject, name);
+ if (ret < 0)
+ pr_warn("NFS: can't create link to %s in sysfs (%d)\n",
+ name, ret);
+}
+EXPORT_SYMBOL_GPL(nfs_sysfs_link_rpc_client);
+
+static void nfs_sysfs_sb_release(struct kobject *kobj)
+{
+ /* no-op: why? see lib/kobject.c kobject_cleanup() */
+}
+
+static const void *nfs_netns_server_namespace(const struct kobject *kobj)
+{
+ return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net;
+}
+
+static struct kobj_type nfs_sb_ktype = {
+ .release = nfs_sysfs_sb_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = nfs_netns_server_namespace,
+ .child_ns_type = nfs_netns_object_child_ns_type,
+};
+
+void nfs_sysfs_add_server(struct nfs_server *server)
+{
+ int ret;
+
+ ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype,
+ &nfs_kset->kobj, "server-%d", server->s_sysfs_id);
+ if (ret < 0) {
+ pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+ return;
+ }
+ ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_shutdown.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+}
+EXPORT_SYMBOL_GPL(nfs_sysfs_add_server);
+
+void nfs_sysfs_move_server_to_sb(struct super_block *s)
+{
+ struct nfs_server *server = s->s_fs_info;
+ int ret;
+
+ ret = kobject_rename(&server->kobj, s->s_id);
+ if (ret < 0)
+ pr_warn("NFS: rename sysfs %s failed (%d)\n",
+ server->kobj.name, ret);
+}
+
+void nfs_sysfs_move_sb_to_server(struct nfs_server *server)
+{
+ const char *s;
+ int ret = -ENOMEM;
+
+ s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id);
+ if (s)
+ ret = kobject_rename(&server->kobj, s);
+ if (ret < 0)
+ pr_warn("NFS: rename sysfs %s failed (%d)\n",
+ server->kobj.name, ret);
+}
+
+/* unlink, not dec-ref */
+void nfs_sysfs_remove_server(struct nfs_server *server)
+{
+ kobject_del(&server->kobj);
+}
diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h
index 5501ef573c32..c5d1990cade5 100644
--- a/fs/nfs/sysfs.h
+++ b/fs/nfs/sysfs.h
@@ -10,11 +10,12 @@
struct nfs_netns_client {
struct kobject kobject;
+ struct kobject nfs_net_kobj;
struct net *net;
const char __rcu *identifier;
};
-extern struct kobject *nfs_client_kobj;
+extern struct kobject *nfs_net_kobj;
extern int nfs_sysfs_init(void);
extern void nfs_sysfs_exit(void);
@@ -22,4 +23,11 @@ extern void nfs_sysfs_exit(void);
void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net);
void nfs_netns_sysfs_destroy(struct nfs_net *netns);
+void nfs_sysfs_link_rpc_client(struct nfs_server *server,
+ struct rpc_clnt *clnt, const char *sysfs_prefix);
+void nfs_sysfs_add_server(struct nfs_server *s);
+void nfs_sysfs_move_server_to_sb(struct super_block *s);
+void nfs_sysfs_move_sb_to_server(struct nfs_server *s);
+void nfs_sysfs_remove_server(struct nfs_server *s);
+
#endif
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 95d7d8790bc3..f69c451018e3 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1623,6 +1623,20 @@ static int fanotify_events_supported(struct fsnotify_group *group,
return -EINVAL;
/*
+ * mount and sb marks are not allowed on kernel internal pseudo fs,
+ * like pipe_mnt, because that would subscribe to events on all the
+ * anonynous pipes in the system.
+ *
+ * SB_NOUSER covers all of the internal pseudo fs whose objects are not
+ * exposed to user's mount namespace, but there are other SB_KERNMOUNT
+ * fs, like nsfs, debugfs, for which the value of allowing sb and mount
+ * mark is questionable. For now we leave them alone.
+ */
+ if (mark_type != FAN_MARK_INODE &&
+ path->mnt->mnt_sb->s_flags & SB_NOUSER)
+ return -EINVAL;
+
+ /*
* We shouldn't have allowed setting dirent events and the directory
* flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode,
* but because we always allowed it, error only when using new APIs.
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index 0b8bc66377db..a9d82bbb4729 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -573,7 +573,7 @@ add_alloc_in_same_attr_seg:
sbi, run, vcn, lcn, to_allocate, &pre_alloc,
is_mft ? ALLOCATE_MFT : ALLOCATE_DEF, &alen,
is_mft ? 0 :
- (sbi->record_size -
+ (sbi->record_size -
le32_to_cpu(rec->used) + 8) /
3 +
1,
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
index c0c6bcbc8c05..42631b31adf1 100644
--- a/fs/ntfs3/attrlist.c
+++ b/fs/ntfs3/attrlist.c
@@ -52,7 +52,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
if (!attr->non_res) {
lsize = le32_to_cpu(attr->res.data_size);
- le = kmalloc(al_aligned(lsize), GFP_NOFS);
+ le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
if (!le) {
err = -ENOMEM;
goto out;
@@ -80,7 +80,7 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr)
if (err < 0)
goto out;
- le = kmalloc(al_aligned(lsize), GFP_NOFS);
+ le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN);
if (!le) {
err = -ENOMEM;
goto out;
@@ -375,8 +375,7 @@ bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le)
* al_delete_le - Delete first le from the list which matches its parameters.
*/
bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
- const __le16 *name, size_t name_len,
- const struct MFT_REF *ref)
+ const __le16 *name, u8 name_len, const struct MFT_REF *ref)
{
u16 size;
struct ATTR_LIST_ENTRY *le;
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index 9a6c6a09d70c..107e808e06ea 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -287,8 +287,8 @@ static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
/* Check bits before 'bit'. */
ib = wnd->zone_bit == wnd->zone_end ||
bit < wnd->zone_end ?
- 0 :
- wnd->zone_end;
+ 0 :
+ wnd->zone_end;
while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
bit -= 1;
@@ -298,8 +298,8 @@ static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
/* Check bits after 'end_in'. */
ib = wnd->zone_bit == wnd->zone_end ||
end_in > wnd->zone_bit ?
- wnd->nbits :
- wnd->zone_bit;
+ wnd->nbits :
+ wnd->zone_bit;
while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
end_in += 1;
@@ -418,7 +418,7 @@ static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
n3 = rb_first(&wnd->count_tree);
wnd->extent_max =
n3 ? rb_entry(n3, struct e_node, count.node)->count.key :
- 0;
+ 0;
return;
}
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 9be3e8edf4f3..1d6c824246c4 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -179,7 +179,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
{
int err = 0;
struct address_space *mapping = inode->i_mapping;
- u32 blocksize = 1 << inode->i_blkbits;
+ u32 blocksize = i_blocksize(inode);
pgoff_t idx = vbo >> PAGE_SHIFT;
u32 from = vbo & (PAGE_SIZE - 1);
pgoff_t idx_end = (vbo_to + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -192,7 +192,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
for (; idx < idx_end; idx += 1, from = 0) {
page_off = (loff_t)idx << PAGE_SHIFT;
to = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off) :
- PAGE_SIZE;
+ PAGE_SIZE;
iblock = page_off >> inode->i_blkbits;
page = find_or_create_page(mapping, idx,
@@ -1078,7 +1078,7 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
ret = is_compressed(ni) ? ntfs_compress_write(iocb, from) :
- __generic_file_write_iter(iocb, from);
+ __generic_file_write_iter(iocb, from);
out:
inode_unlock(inode);
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 2bfcf1a989c9..16bd9faa2d28 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -77,7 +77,7 @@ struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni)
attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO)) :
- NULL;
+ NULL;
}
/*
@@ -92,7 +92,7 @@ struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni)
attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL);
return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO5)) :
- NULL;
+ NULL;
}
/*
@@ -236,6 +236,7 @@ struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr,
return attr;
out:
+ ntfs_inode_err(&ni->vfs_inode, "failed to parse mft record");
ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
return NULL;
}
@@ -384,7 +385,7 @@ bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi)
* ni_remove_attr - Remove all attributes for the given type/name/id.
*/
int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
- const __le16 *name, size_t name_len, bool base_only,
+ const __le16 *name, u8 name_len, bool base_only,
const __le16 *id)
{
int err;
@@ -517,6 +518,9 @@ out:
*/
static int ni_repack(struct ntfs_inode *ni)
{
+#if 1
+ return 0;
+#else
int err = 0;
struct ntfs_sb_info *sbi = ni->mi.sbi;
struct mft_inode *mi, *mi_p = NULL;
@@ -639,6 +643,7 @@ static int ni_repack(struct ntfs_inode *ni)
run_close(&run);
return err;
+#endif
}
/*
@@ -813,10 +818,8 @@ int ni_create_attr_list(struct ntfs_inode *ni)
* Looks like one record_size is always enough.
*/
le = kmalloc(al_aligned(rs), GFP_NOFS);
- if (!le) {
- err = -ENOMEM;
- goto out;
- }
+ if (!le)
+ return -ENOMEM;
mi_get_ref(&ni->mi, &le->ref);
ni->attr_list.le = le;
@@ -865,15 +868,16 @@ int ni_create_attr_list(struct ntfs_inode *ni)
if (to_free > free_b) {
err = -EINVAL;
- goto out1;
+ goto out;
}
}
/* Allocate child MFT. */
err = ntfs_look_free_mft(sbi, &rno, is_mft, ni, &mi);
if (err)
- goto out1;
+ goto out;
+ err = -EINVAL;
/* Call mi_remove_attr() in reverse order to keep pointers 'arr_move' valid. */
while (to_free > 0) {
struct ATTRIB *b = arr_move[--nb];
@@ -882,7 +886,8 @@ int ni_create_attr_list(struct ntfs_inode *ni)
attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off),
b->name_len, asize, name_off);
- WARN_ON(!attr);
+ if (!attr)
+ goto out;
mi_get_ref(mi, &le_b[nb]->ref);
le_b[nb]->id = attr->id;
@@ -892,17 +897,20 @@ int ni_create_attr_list(struct ntfs_inode *ni)
attr->id = le_b[nb]->id;
/* Remove from primary record. */
- WARN_ON(!mi_remove_attr(NULL, &ni->mi, b));
+ if (!mi_remove_attr(NULL, &ni->mi, b))
+ goto out;
if (to_free <= asize)
break;
to_free -= asize;
- WARN_ON(!nb);
+ if (!nb)
+ goto out;
}
attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0,
lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT);
- WARN_ON(!attr);
+ if (!attr)
+ goto out;
attr->non_res = 0;
attr->flags = 0;
@@ -916,14 +924,12 @@ int ni_create_attr_list(struct ntfs_inode *ni)
ni->attr_list.dirty = false;
mark_inode_dirty(&ni->vfs_inode);
- goto out;
+ return 0;
-out1:
+out:
kfree(ni->attr_list.le);
ni->attr_list.le = NULL;
ni->attr_list.size = 0;
-
-out:
return err;
}
@@ -1638,14 +1644,13 @@ int ni_delete_all(struct ntfs_inode *ni)
* Return: File name attribute by its value.
*/
struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni,
- const struct cpu_str *uni,
+ const struct le_str *uni,
const struct MFT_REF *home_dir,
struct mft_inode **mi,
struct ATTR_LIST_ENTRY **le)
{
struct ATTRIB *attr = NULL;
struct ATTR_FILE_NAME *fname;
- struct le_str *fns;
if (le)
*le = NULL;
@@ -1669,10 +1674,9 @@ next:
if (uni->len != fname->name_len)
goto next;
- fns = (struct le_str *)&fname->name_len;
- if (ntfs_cmp_names_cpu(uni, fns, NULL, false))
+ if (ntfs_cmp_names(uni->name, uni->len, fname->name, uni->len, NULL,
+ false))
goto next;
-
return fname;
}
@@ -1757,8 +1761,8 @@ int ni_new_attr_flags(struct ntfs_inode *ni, enum FILE_ATTRIBUTE new_fa)
/* Resize nonresident empty attribute in-place only. */
new_asize = (new_aflags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) ?
- (SIZEOF_NONRESIDENT_EX + 8) :
- (SIZEOF_NONRESIDENT + 8);
+ (SIZEOF_NONRESIDENT_EX + 8) :
+ (SIZEOF_NONRESIDENT + 8);
if (!mi_resize_attr(mi, attr, new_asize - le32_to_cpu(attr->size)))
return -EOPNOTSUPP;
@@ -2910,7 +2914,7 @@ int ni_remove_name(struct ntfs_inode *dir_ni, struct ntfs_inode *ni,
/* Find name in record. */
mi_get_ref(&dir_ni->mi, &de_name->home);
- fname = ni_fname_name(ni, (struct cpu_str *)&de_name->name_len,
+ fname = ni_fname_name(ni, (struct le_str *)&de_name->name_len,
&de_name->home, &mi, &le);
if (!fname)
return -ENOENT;
@@ -3160,8 +3164,8 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup,
__le64 valid_le;
dup->alloc_size = is_attr_ext(attr) ?
- attr->nres.total_size :
- attr->nres.alloc_size;
+ attr->nres.total_size :
+ attr->nres.alloc_size;
dup->data_size = attr->nres.data_size;
if (new_valid > data_size)
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 57762c5fe68b..12f28cdf5c83 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -828,8 +828,8 @@ static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl,
memcpy(rt + 1, tbl + 1, esize * used);
rt->free_goal = free_goal == ~0u ?
- cpu_to_le32(~0u) :
- cpu_to_le32(sizeof(struct RESTART_TABLE) +
+ cpu_to_le32(~0u) :
+ cpu_to_le32(sizeof(struct RESTART_TABLE) +
free_goal * esize);
if (tbl->first_free) {
@@ -1090,8 +1090,8 @@ static inline u64 base_lsn(struct ntfs_log *log,
<< log->file_data_bits) +
((((is_log_record_end(hdr) &&
h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn)) ?
- le16_to_cpu(hdr->record_hdr.next_record_off) :
- log->page_size) +
+ le16_to_cpu(hdr->record_hdr.next_record_off) :
+ log->page_size) +
lsn) >>
3);
@@ -1299,8 +1299,8 @@ static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
log->clst_per_page = 1;
log->first_page = major_ver >= 2 ?
- 0x22 * page_size :
- ((sys_page_size << 1) + (page_size << 1));
+ 0x22 * page_size :
+ ((sys_page_size << 1) + (page_size << 1));
log->major_ver = major_ver;
log->minor_ver = minor_ver;
}
@@ -1513,8 +1513,8 @@ static u32 current_log_avail(struct ntfs_log *log)
* If there is no oldest lsn then start at the first page of the file.
*/
oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN) ?
- log->first_page :
- (log->oldest_lsn_off & ~log->sys_page_mask);
+ log->first_page :
+ (log->oldest_lsn_off & ~log->sys_page_mask);
/*
* We will use the next log page offset to compute the next free page.
@@ -1522,9 +1522,9 @@ static u32 current_log_avail(struct ntfs_log *log)
* If we are at the first page then use the end of the file.
*/
next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL) ?
- log->next_page + log->page_size :
+ log->next_page + log->page_size :
log->next_page == log->first_page ? log->l_size :
- log->next_page;
+ log->next_page;
/* If the two offsets are the same then there is no available space. */
if (oldest_off == next_free_off)
@@ -1535,8 +1535,8 @@ static u32 current_log_avail(struct ntfs_log *log)
*/
free_bytes =
oldest_off < next_free_off ?
- log->total_avail_pages - (next_free_off - oldest_off) :
- oldest_off - next_free_off;
+ log->total_avail_pages - (next_free_off - oldest_off) :
+ oldest_off - next_free_off;
free_bytes >>= log->page_bits;
return free_bytes * log->reserved;
@@ -1671,7 +1671,7 @@ next_tail:
best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0;
best_lsn2 = second_tail ? base_lsn(log, second_tail, second_file_off) :
- 0;
+ 0;
if (first_tail && second_tail) {
if (best_lsn1 > best_lsn2) {
@@ -1767,7 +1767,7 @@ tail_read:
page_cnt = page_pos = 1;
curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off) :
- log->next_page;
+ log->next_page;
wrapped_file =
curpage_off == log->first_page &&
@@ -1826,8 +1826,8 @@ use_cur_page:
((lsn_cur >> log->file_data_bits) +
((curpage_off <
(lsn_to_vbo(log, lsn_cur) & ~log->page_mask)) ?
- 1 :
- 0)) != expected_seq) {
+ 1 :
+ 0)) != expected_seq) {
goto check_tail;
}
@@ -2643,8 +2643,8 @@ static inline bool check_index_root(const struct ATTRIB *attr,
const struct INDEX_ROOT *root = resident_data(attr);
u8 index_bits = le32_to_cpu(root->index_block_size) >=
sbi->cluster_size ?
- sbi->cluster_bits :
- SECTOR_SHIFT;
+ sbi->cluster_bits :
+ SECTOR_SHIFT;
u8 block_clst = root->index_block_clst;
if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) ||
@@ -3861,9 +3861,9 @@ check_restart_area:
/* If we have a valid page then grab a pointer to the restart area. */
ra2 = rst_info.valid_page ?
- Add2Ptr(rst_info.r_page,
+ Add2Ptr(rst_info.r_page,
le16_to_cpu(rst_info.r_page->ra_off)) :
- NULL;
+ NULL;
if (rst_info.chkdsk_was_run ||
(ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 28cc421102e5..33afee0f5559 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -9,6 +9,7 @@
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/kernel.h>
+#include <linux/nls.h>
#include "debug.h"
#include "ntfs.h"
@@ -173,12 +174,12 @@ int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
fo = le16_to_cpu(rhdr->fix_off);
fn = simple ? ((bytes >> SECTOR_SHIFT) + 1) :
- le16_to_cpu(rhdr->fix_num);
+ le16_to_cpu(rhdr->fix_num);
/* Check errors. */
if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- ||
fn * SECTOR_SIZE > bytes) {
- return -EINVAL; /* Native chkntfs returns ok! */
+ return -E_NTFS_CORRUPT;
}
/* Get fixup pointer. */
@@ -1661,7 +1662,8 @@ int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
return 0;
}
-struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
+struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno,
+ enum RECORD_FLAG flag)
{
int err = 0;
struct super_block *sb = sbi->sb;
@@ -1673,8 +1675,7 @@ struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir)
ni = ntfs_i(inode);
- err = mi_format_new(&ni->mi, sbi, rno, dir ? RECORD_FLAG_DIR : 0,
- false);
+ err = mi_format_new(&ni->mi, sbi, rno, flag, false);
if (err)
goto out;
@@ -1937,7 +1938,7 @@ int ntfs_security_init(struct ntfs_sb_info *sbi)
break;
sii_e = (struct NTFS_DE_SII *)ne;
- if (le16_to_cpu(ne->view.data_size) < SIZEOF_SECURITY_HDR)
+ if (le16_to_cpu(ne->view.data_size) < sizeof(sii_e->sec_hdr))
continue;
next_id = le32_to_cpu(sii_e->sec_id) + 1;
@@ -1998,18 +1999,18 @@ int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id,
goto out;
t32 = le32_to_cpu(sii_e->sec_hdr.size);
- if (t32 < SIZEOF_SECURITY_HDR) {
+ if (t32 < sizeof(struct SECURITY_HDR)) {
err = -EINVAL;
goto out;
}
- if (t32 > SIZEOF_SECURITY_HDR + 0x10000) {
+ if (t32 > sizeof(struct SECURITY_HDR) + 0x10000) {
/* Looks like too big security. 0x10000 - is arbitrary big number. */
err = -EFBIG;
goto out;
}
- *size = t32 - SIZEOF_SECURITY_HDR;
+ *size = t32 - sizeof(struct SECURITY_HDR);
p = kmalloc(*size, GFP_NOFS);
if (!p) {
@@ -2023,14 +2024,14 @@ int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id,
if (err)
goto out;
- if (memcmp(&d_security, &sii_e->sec_hdr, SIZEOF_SECURITY_HDR)) {
+ if (memcmp(&d_security, &sii_e->sec_hdr, sizeof(d_security))) {
err = -EINVAL;
goto out;
}
err = ntfs_read_run_nb(sbi, &ni->file.run,
le64_to_cpu(sii_e->sec_hdr.off) +
- SIZEOF_SECURITY_HDR,
+ sizeof(struct SECURITY_HDR),
p, *size, NULL);
if (err)
goto out;
@@ -2069,7 +2070,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
struct NTFS_DE_SDH sdh_e;
struct NTFS_DE_SII sii_e;
struct SECURITY_HDR *d_security;
- u32 new_sec_size = size_sd + SIZEOF_SECURITY_HDR;
+ u32 new_sec_size = size_sd + sizeof(struct SECURITY_HDR);
u32 aligned_sec_size = ALIGN(new_sec_size, 16);
struct SECURITY_KEY hash_key;
struct ntfs_fnd *fnd_sdh = NULL;
@@ -2207,14 +2208,14 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
/* Fill SII entry. */
sii_e.de.view.data_off =
cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr));
- sii_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
+ sii_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR));
sii_e.de.view.res = 0;
- sii_e.de.size = cpu_to_le16(SIZEOF_SII_DIRENTRY);
+ sii_e.de.size = cpu_to_le16(sizeof(struct NTFS_DE_SII));
sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id));
sii_e.de.flags = 0;
sii_e.de.res = 0;
sii_e.sec_id = d_security->key.sec_id;
- memcpy(&sii_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
+ memcpy(&sii_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR));
err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL, 0);
if (err)
@@ -2223,7 +2224,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
/* Fill SDH entry. */
sdh_e.de.view.data_off =
cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr));
- sdh_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR);
+ sdh_e.de.view.data_size = cpu_to_le16(sizeof(struct SECURITY_HDR));
sdh_e.de.view.res = 0;
sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY);
sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key));
@@ -2231,7 +2232,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
sdh_e.de.res = 0;
sdh_e.key.hash = d_security->key.hash;
sdh_e.key.sec_id = d_security->key.sec_id;
- memcpy(&sdh_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR);
+ memcpy(&sdh_e.sec_hdr, d_security, sizeof(struct SECURITY_HDR));
sdh_e.magic[0] = cpu_to_le16('I');
sdh_e.magic[1] = cpu_to_le16('I');
@@ -2522,7 +2523,8 @@ out:
/*
* run_deallocate - Deallocate clusters.
*/
-int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim)
+int run_deallocate(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+ bool trim)
{
CLST lcn, len;
size_t idx = 0;
@@ -2578,13 +2580,13 @@ static inline bool name_has_forbidden_chars(const struct le_str *fname)
return false;
}
-static inline bool is_reserved_name(struct ntfs_sb_info *sbi,
+static inline bool is_reserved_name(const struct ntfs_sb_info *sbi,
const struct le_str *fname)
{
int port_digit;
const __le16 *name = fname->name;
int len = fname->len;
- u16 *upcase = sbi->upcase;
+ const u16 *upcase = sbi->upcase;
/* check for 3 chars reserved names (device names) */
/* name by itself or with any extension is forbidden */
@@ -2618,3 +2620,60 @@ bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *fname)
return !name_has_forbidden_chars(fname) &&
!is_reserved_name(sbi, fname);
}
+
+/*
+ * ntfs_set_label - updates current ntfs label.
+ */
+int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len)
+{
+ int err;
+ struct ATTRIB *attr;
+ struct ntfs_inode *ni = sbi->volume.ni;
+ const u8 max_ulen = 0x80; /* TODO: use attrdef to get maximum length */
+ /* Allocate PATH_MAX bytes. */
+ struct cpu_str *uni = __getname();
+
+ if (!uni)
+ return -ENOMEM;
+
+ err = ntfs_nls_to_utf16(sbi, label, len, uni, (PATH_MAX - 2) / 2,
+ UTF16_LITTLE_ENDIAN);
+ if (err < 0)
+ goto out;
+
+ if (uni->len > max_ulen) {
+ ntfs_warn(sbi->sb, "new label is too long");
+ err = -EFBIG;
+ goto out;
+ }
+
+ ni_lock(ni);
+
+ /* Ignore any errors. */
+ ni_remove_attr(ni, ATTR_LABEL, NULL, 0, false, NULL);
+
+ err = ni_insert_resident(ni, uni->len * sizeof(u16), ATTR_LABEL, NULL,
+ 0, &attr, NULL, NULL);
+ if (err < 0)
+ goto unlock_out;
+
+ /* write new label in on-disk struct. */
+ memcpy(resident_data(attr), uni->name, uni->len * sizeof(u16));
+
+ /* update cached value of current label. */
+ if (len >= ARRAY_SIZE(sbi->volume.label))
+ len = ARRAY_SIZE(sbi->volume.label) - 1;
+ memcpy(sbi->volume.label, label, len);
+ sbi->volume.label[len] = 0;
+ mark_inode_dirty_sync(&ni->vfs_inode);
+
+unlock_out:
+ ni_unlock(ni);
+
+ if (!err)
+ err = _ni_write_inode(&ni->vfs_inode, 0);
+
+out:
+ __putname(uni);
+ return err;
+} \ No newline at end of file
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 0a48d2d67219..124c6e822623 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -432,8 +432,8 @@ next_run:
nbits = 8 * (data_size - vbo);
ok = nbits > from ?
- (*fn)((ulong *)bh->b_data, from, nbits, ret) :
- false;
+ (*fn)((ulong *)bh->b_data, from, nbits, ret) :
+ false;
put_bh(bh);
if (ok) {
@@ -1113,6 +1113,12 @@ ok:
*node = in;
out:
+ if (err == -E_NTFS_CORRUPT) {
+ ntfs_inode_err(&ni->vfs_inode, "directory corrupted");
+ ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR);
+ err = -EINVAL;
+ }
+
if (ib != in->index)
kfree(ib);
@@ -1676,8 +1682,8 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni,
/* Create alloc and bitmap attributes (if not). */
err = run_is_empty(&indx->alloc_run) ?
- indx_create_allocate(indx, ni, &new_vbn) :
- indx_add_allocate(indx, ni, &new_vbn);
+ indx_create_allocate(indx, ni, &new_vbn) :
+ indx_add_allocate(indx, ni, &new_vbn);
/* Layout of record may be changed, so rescan root. */
root = indx_get_root(indx, ni, &attr, &mi);
@@ -1868,8 +1874,8 @@ indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni,
(*indx->cmp)(new_de + 1, le16_to_cpu(new_de->key_size),
up_e + 1, le16_to_cpu(up_e->key_size),
ctx) < 0 ?
- hdr2 :
- hdr1,
+ hdr2 :
+ hdr1,
new_de, NULL, ctx);
indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits);
@@ -2340,7 +2346,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
re, ctx,
fnd->level - 1,
fnd) :
- indx_insert_into_root(indx, ni, re, e,
+ indx_insert_into_root(indx, ni, re, e,
ctx, fnd, 0);
kfree(re);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 6c560245eef4..dc7e7ab701c6 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -263,7 +263,7 @@ next_attr:
goto next_attr;
run = ino == MFT_REC_BITMAP ? &sbi->used.bitmap.run :
- &ni->file.run;
+ &ni->file.run;
break;
case ATTR_ROOT:
@@ -291,8 +291,8 @@ next_attr:
goto out;
mode = sb->s_root ?
- (S_IFDIR | (0777 & sbi->options->fs_dmask_inv)) :
- (S_IFDIR | 0777);
+ (S_IFDIR | (0777 & sbi->options->fs_dmask_inv)) :
+ (S_IFDIR | 0777);
goto next_attr;
case ATTR_ALLOC:
@@ -450,7 +450,7 @@ end_enum:
inode->i_op = &ntfs_file_inode_operations;
inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
- &ntfs_aops;
+ &ntfs_aops;
if (ino != MFT_REC_MFT)
init_rwsem(&ni->file.run_lock);
} else if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
@@ -787,7 +787,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
ret = blockdev_direct_IO(iocb, inode, iter,
wr ? ntfs_get_block_direct_IO_W :
- ntfs_get_block_direct_IO_R);
+ ntfs_get_block_direct_IO_R);
if (ret > 0)
end = vbo + ret;
@@ -1191,11 +1191,11 @@ out:
* - ntfs_symlink
* - ntfs_mkdir
* - ntfs_atomic_open
- *
+ *
* NOTE: if fnd != NULL (ntfs_atomic_open) then @dir is locked
*/
-struct inode *ntfs_create_inode(struct mnt_idmap *idmap,
- struct inode *dir, struct dentry *dentry,
+struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry,
const struct cpu_str *uni, umode_t mode,
dev_t dev, const char *symname, u32 size,
struct ntfs_fnd *fnd)
@@ -1309,7 +1309,7 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap,
if (err)
goto out2;
- ni = ntfs_new_inode(sbi, ino, fa & FILE_ATTRIBUTE_DIRECTORY);
+ ni = ntfs_new_inode(sbi, ino, S_ISDIR(mode) ? RECORD_FLAG_DIR : 0);
if (IS_ERR(ni)) {
err = PTR_ERR(ni);
ni = NULL;
@@ -1437,8 +1437,7 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap,
root = Add2Ptr(attr, sizeof(I30_NAME) + SIZEOF_RESIDENT);
memcpy(root, dir_root, offsetof(struct INDEX_ROOT, ihdr));
- root->ihdr.de_off =
- cpu_to_le32(sizeof(struct INDEX_HDR)); // 0x10
+ root->ihdr.de_off = cpu_to_le32(sizeof(struct INDEX_HDR));
root->ihdr.used = cpu_to_le32(sizeof(struct INDEX_HDR) +
sizeof(struct NTFS_DE));
root->ihdr.total = root->ihdr.used;
@@ -1605,7 +1604,7 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap,
inode->i_op = &ntfs_file_inode_operations;
inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
- &ntfs_aops;
+ &ntfs_aops;
init_rwsem(&ni->file.run_lock);
} else {
inode->i_op = &ntfs_special_inode_operations;
diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c
index 61e161c7c567..4aae598d6d88 100644
--- a/fs/ntfs3/lznt.c
+++ b/fs/ntfs3/lznt.c
@@ -297,7 +297,7 @@ next:
struct lznt *get_lznt_ctx(int level)
{
struct lznt *r = kzalloc(level ? offsetof(struct lznt, hash) :
- sizeof(struct lznt),
+ sizeof(struct lznt),
GFP_NOFS);
if (r)
@@ -393,8 +393,8 @@ ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
} else {
/* This chunk does not contain compressed data. */
unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end ?
- unc_end - unc_chunk :
- LZNT_CHUNK_SIZE;
+ unc_end - unc_chunk :
+ LZNT_CHUNK_SIZE;
if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
cmpr_end) {
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index 9736b1e4a0f6..70f8c859e0ad 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -109,8 +109,8 @@ static int ntfs_create(struct mnt_idmap *idmap, struct inode *dir,
{
struct inode *inode;
- inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFREG | mode,
- 0, NULL, 0, NULL);
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFREG | mode, 0,
+ NULL, 0, NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
}
@@ -125,8 +125,8 @@ static int ntfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
{
struct inode *inode;
- inode = ntfs_create_inode(idmap, dir, dentry, NULL, mode, rdev,
- NULL, 0, NULL);
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, mode, rdev, NULL, 0,
+ NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
}
@@ -199,8 +199,8 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
u32 size = strlen(symname);
struct inode *inode;
- inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777,
- 0, symname, size, NULL);
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0,
+ symname, size, NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
}
@@ -213,8 +213,8 @@ static int ntfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
{
struct inode *inode;
- inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFDIR | mode,
- 0, NULL, 0, NULL);
+ inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFDIR | mode, 0,
+ NULL, 0, NULL);
return IS_ERR(inode) ? PTR_ERR(inode) : 0;
}
@@ -422,19 +422,10 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry,
* fnd contains tree's path to insert to.
* If fnd is not NULL then dir is locked.
*/
-
- /*
- * Unfortunately I don't know how to get here correct 'struct nameidata *nd'
- * or 'struct mnt_idmap *idmap'.
- * See atomic_open in fs/namei.c.
- * This is why xfstest/633 failed.
- * Looks like ntfs_atomic_open must accept 'struct mnt_idmap *idmap' as argument.
- */
-
- inode = ntfs_create_inode(&nop_mnt_idmap, dir, dentry, uni, mode, 0,
- NULL, 0, fnd);
+ inode = ntfs_create_inode(mnt_idmap(file->f_path.mnt), dir, dentry, uni,
+ mode, 0, NULL, 0, fnd);
err = IS_ERR(inode) ? PTR_ERR(inode) :
- finish_open(file, dentry, ntfs_file_open);
+ finish_open(file, dentry, ntfs_file_open);
dput(d);
out2:
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 90151e56c122..98b76d1b09e7 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -95,11 +95,10 @@ enum RECORD_NUM {
MFT_REC_BITMAP = 6,
MFT_REC_BOOT = 7,
MFT_REC_BADCLUST = 8,
- //MFT_REC_QUOTA = 9,
- MFT_REC_SECURE = 9, // NTFS 3.0
+ MFT_REC_SECURE = 9,
MFT_REC_UPCASE = 10,
- MFT_REC_EXTEND = 11, // NTFS 3.0
- MFT_REC_RESERVED = 11,
+ MFT_REC_EXTEND = 11,
+ MFT_REC_RESERVED = 12,
MFT_REC_FREE = 16,
MFT_REC_USER = 24,
};
@@ -109,7 +108,6 @@ enum ATTR_TYPE {
ATTR_STD = cpu_to_le32(0x10),
ATTR_LIST = cpu_to_le32(0x20),
ATTR_NAME = cpu_to_le32(0x30),
- // ATTR_VOLUME_VERSION on Nt4
ATTR_ID = cpu_to_le32(0x40),
ATTR_SECURE = cpu_to_le32(0x50),
ATTR_LABEL = cpu_to_le32(0x60),
@@ -118,7 +116,6 @@ enum ATTR_TYPE {
ATTR_ROOT = cpu_to_le32(0x90),
ATTR_ALLOC = cpu_to_le32(0xA0),
ATTR_BITMAP = cpu_to_le32(0xB0),
- // ATTR_SYMLINK on Nt4
ATTR_REPARSE = cpu_to_le32(0xC0),
ATTR_EA_INFO = cpu_to_le32(0xD0),
ATTR_EA = cpu_to_le32(0xE0),
@@ -144,6 +141,7 @@ enum FILE_ATTRIBUTE {
FILE_ATTRIBUTE_ENCRYPTED = cpu_to_le32(0x00004000),
FILE_ATTRIBUTE_VALID_FLAGS = cpu_to_le32(0x00007fb7),
FILE_ATTRIBUTE_DIRECTORY = cpu_to_le32(0x10000000),
+ FILE_ATTRIBUTE_INDEX = cpu_to_le32(0x20000000)
};
static_assert(sizeof(enum FILE_ATTRIBUTE) == 4);
@@ -266,7 +264,7 @@ enum RECORD_FLAG {
RECORD_FLAG_IN_USE = cpu_to_le16(0x0001),
RECORD_FLAG_DIR = cpu_to_le16(0x0002),
RECORD_FLAG_SYSTEM = cpu_to_le16(0x0004),
- RECORD_FLAG_UNKNOWN = cpu_to_le16(0x0008),
+ RECORD_FLAG_INDEX = cpu_to_le16(0x0008),
};
/* MFT Record structure. */
@@ -290,6 +288,15 @@ struct MFT_REC {
#define MFTRECORD_FIXUP_OFFSET_1 offsetof(struct MFT_REC, res)
#define MFTRECORD_FIXUP_OFFSET_3 offsetof(struct MFT_REC, fixups)
+/*
+ * define MFTRECORD_FIXUP_OFFSET as MFTRECORD_FIXUP_OFFSET_3 (0x30)
+ * to format new mft records with bigger header (as current ntfs.sys does)
+ *
+ * define MFTRECORD_FIXUP_OFFSET as MFTRECORD_FIXUP_OFFSET_1 (0x2A)
+ * to format new mft records with smaller header (as old ntfs.sys did)
+ * Both variants are valid.
+ */
+#define MFTRECORD_FIXUP_OFFSET MFTRECORD_FIXUP_OFFSET_1
static_assert(MFTRECORD_FIXUP_OFFSET_1 == 0x2A);
static_assert(MFTRECORD_FIXUP_OFFSET_3 == 0x30);
@@ -331,18 +338,18 @@ struct ATTR_NONRESIDENT {
__le64 svcn; // 0x10: Starting VCN of this segment.
__le64 evcn; // 0x18: End VCN of this segment.
__le16 run_off; // 0x20: Offset to packed runs.
- // Unit of Compression size for this stream, expressed
- // as a log of the cluster size.
+ // Unit of Compression size for this stream, expressed
+ // as a log of the cluster size.
//
- // 0 means file is not compressed
- // 1, 2, 3, and 4 are potentially legal values if the
- // stream is compressed, however the implementation
- // may only choose to use 4, or possibly 3. Note
- // that 4 means cluster size time 16. If convenient
- // the implementation may wish to accept a
- // reasonable range of legal values here (1-5?),
- // even if the implementation only generates
- // a smaller set of values itself.
+ // 0 means file is not compressed
+ // 1, 2, 3, and 4 are potentially legal values if the
+ // stream is compressed, however the implementation
+ // may only choose to use 4, or possibly 3.
+ // Note that 4 means cluster size time 16.
+ // If convenient the implementation may wish to accept a
+ // reasonable range of legal values here (1-5?),
+ // even if the implementation only generates
+ // a smaller set of values itself.
u8 c_unit; // 0x22:
u8 res1[5]; // 0x23:
__le64 alloc_size; // 0x28: The allocated size of attribute in bytes.
@@ -836,16 +843,22 @@ static_assert(sizeof(struct ATTR_DEF_ENTRY) == 0xa0);
/* Object ID (0x40) */
struct OBJECT_ID {
struct GUID ObjId; // 0x00: Unique Id assigned to file.
- struct GUID BirthVolumeId; // 0x10: Birth Volume Id is the Object Id of the Volume on.
- // which the Object Id was allocated. It never changes.
- struct GUID BirthObjectId; // 0x20: Birth Object Id is the first Object Id that was
- // ever assigned to this MFT Record. I.e. If the Object Id
- // is changed for some reason, this field will reflect the
- // original value of the Object Id.
- struct GUID DomainId; // 0x30: Domain Id is currently unused but it is intended to be
- // used in a network environment where the local machine is
- // part of a Windows 2000 Domain. This may be used in a Windows
- // 2000 Advanced Server managed domain.
+
+ // Birth Volume Id is the Object Id of the Volume on.
+ // which the Object Id was allocated. It never changes.
+ struct GUID BirthVolumeId; //0x10:
+
+ // Birth Object Id is the first Object Id that was
+ // ever assigned to this MFT Record. I.e. If the Object Id
+ // is changed for some reason, this field will reflect the
+ // original value of the Object Id.
+ struct GUID BirthObjectId; // 0x20:
+
+ // Domain Id is currently unused but it is intended to be
+ // used in a network environment where the local machine is
+ // part of a Windows 2000 Domain. This may be used in a Windows
+ // 2000 Advanced Server managed domain.
+ struct GUID DomainId; // 0x30:
};
static_assert(sizeof(struct OBJECT_ID) == 0x40);
@@ -855,32 +868,35 @@ struct NTFS_DE_O {
struct NTFS_DE de;
struct GUID ObjId; // 0x10: Unique Id assigned to file.
struct MFT_REF ref; // 0x20: MFT record number with this file.
- struct GUID BirthVolumeId; // 0x28: Birth Volume Id is the Object Id of the Volume on
- // which the Object Id was allocated. It never changes.
- struct GUID BirthObjectId; // 0x38: Birth Object Id is the first Object Id that was
- // ever assigned to this MFT Record. I.e. If the Object Id
- // is changed for some reason, this field will reflect the
- // original value of the Object Id.
- // This field is valid if data_size == 0x48.
- struct GUID BirthDomainId; // 0x48: Domain Id is currently unused but it is intended
- // to be used in a network environment where the local
- // machine is part of a Windows 2000 Domain. This may be
- // used in a Windows 2000 Advanced Server managed domain.
+
+ // Birth Volume Id is the Object Id of the Volume on
+ // which the Object Id was allocated. It never changes.
+ struct GUID BirthVolumeId; // 0x28:
+
+ // Birth Object Id is the first Object Id that was
+ // ever assigned to this MFT Record. I.e. If the Object Id
+ // is changed for some reason, this field will reflect the
+ // original value of the Object Id.
+ // This field is valid if data_size == 0x48.
+ struct GUID BirthObjectId; // 0x38:
+
+ // Domain Id is currently unused but it is intended
+ // to be used in a network environment where the local
+ // machine is part of a Windows 2000 Domain. This may be
+ // used in a Windows 2000 Advanced Server managed domain.
+ struct GUID BirthDomainId; // 0x48:
};
static_assert(sizeof(struct NTFS_DE_O) == 0x58);
-#define NTFS_OBJECT_ENTRY_DATA_SIZE1 \
- 0x38 // struct NTFS_DE_O.BirthDomainId is not used
-#define NTFS_OBJECT_ENTRY_DATA_SIZE2 \
- 0x48 // struct NTFS_DE_O.BirthDomainId is used
-
/* Q Directory entry structure ( rule = 0x11 ) */
struct NTFS_DE_Q {
struct NTFS_DE de;
__le32 owner_id; // 0x10: Unique Id assigned to file
+
+ /* here is 0x30 bytes of user quota. NOTE: 4 byte aligned! */
__le32 Version; // 0x14: 0x02
- __le32 flags2; // 0x18: Quota flags, see above
+ __le32 Flags; // 0x18: Quota flags, see above
__le64 BytesUsed; // 0x1C:
__le64 ChangeTime; // 0x24:
__le64 WarningLimit; // 0x28:
@@ -888,9 +904,9 @@ struct NTFS_DE_Q {
__le64 ExceededTime; // 0x3C:
// SID is placed here
-}; // sizeof() = 0x44
+}__packed; // sizeof() = 0x44
-#define SIZEOF_NTFS_DE_Q 0x44
+static_assert(sizeof(struct NTFS_DE_Q) == 0x44);
#define SecurityDescriptorsBlockSize 0x40000 // 256K
#define SecurityDescriptorMaxSize 0x20000 // 128K
@@ -912,7 +928,7 @@ struct SECURITY_HDR {
*/
} __packed;
-#define SIZEOF_SECURITY_HDR 0x14
+static_assert(sizeof(struct SECURITY_HDR) == 0x14);
/* SII Directory entry structure */
struct NTFS_DE_SII {
@@ -921,7 +937,8 @@ struct NTFS_DE_SII {
struct SECURITY_HDR sec_hdr; // 0x14:
} __packed;
-#define SIZEOF_SII_DIRENTRY 0x28
+static_assert(offsetof(struct NTFS_DE_SII, sec_hdr) == 0x14);
+static_assert(sizeof(struct NTFS_DE_SII) == 0x28);
/* SDH Directory entry structure */
struct NTFS_DE_SDH {
@@ -1155,7 +1172,7 @@ struct REPARSE_DATA_BUFFER {
#define FILE_NEED_EA 0x80 // See ntifs.h
/*
- *FILE_NEED_EA, indicates that the file to which the EA belongs cannot be
+ * FILE_NEED_EA, indicates that the file to which the EA belongs cannot be
* interpreted without understanding the associated extended attributes.
*/
struct EA_INFO {
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index eb01f7e76479..629403ede6e5 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -53,6 +53,8 @@ enum utf16_endian;
#define E_NTFS_NONRESIDENT 556
/* NTFS specific error code about punch hole. */
#define E_NTFS_NOTALIGNED 557
+/* NTFS specific error code when on-disk struct is corrupted. */
+#define E_NTFS_CORRUPT 558
/* sbi->flags */
@@ -274,7 +276,7 @@ struct ntfs_sb_info {
__le16 flags; // Cached current VOLUME_INFO::flags, VOLUME_FLAG_DIRTY.
u8 major_ver;
u8 minor_ver;
- char label[65];
+ char label[256];
bool real_dirty; // Real fs state.
} volume;
@@ -284,7 +286,6 @@ struct ntfs_sb_info {
struct ntfs_inode *ni;
u32 next_id;
u64 next_off;
-
__le32 def_security_id;
} security;
@@ -312,6 +313,7 @@ struct ntfs_sb_info {
struct ntfs_mount_options *options;
struct ratelimit_state msg_ratelimit;
+ struct proc_dir_entry *procdir;
};
/* One MFT record(usually 1024 bytes), consists of attributes. */
@@ -465,8 +467,7 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
struct ATTR_LIST_ENTRY **new_le);
bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le);
bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
- const __le16 *name, size_t name_len,
- const struct MFT_REF *ref);
+ const __le16 *name, u8 name_len, const struct MFT_REF *ref);
int al_update(struct ntfs_inode *ni, int sync);
static inline size_t al_aligned(size_t size)
{
@@ -525,7 +526,7 @@ struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
int ni_load_all_mi(struct ntfs_inode *ni);
bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi);
int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type,
- const __le16 *name, size_t name_len, bool base_only,
+ const __le16 *name, u8 name_len, bool base_only,
const __le16 *id);
int ni_create_attr_list(struct ntfs_inode *ni);
int ni_expand_list(struct ntfs_inode *ni);
@@ -542,7 +543,7 @@ void ni_remove_attr_le(struct ntfs_inode *ni, struct ATTRIB *attr,
struct mft_inode *mi, struct ATTR_LIST_ENTRY *le);
int ni_delete_all(struct ntfs_inode *ni);
struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni,
- const struct cpu_str *uni,
+ const struct le_str *uni,
const struct MFT_REF *home,
struct mft_inode **mi,
struct ATTR_LIST_ENTRY **entry);
@@ -629,7 +630,7 @@ int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run);
int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run,
u64 vbo, u64 *lbo, u64 *bytes);
struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST nRec,
- bool dir);
+ enum RECORD_FLAG flag);
extern const u8 s_default_security[0x50];
bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len);
int ntfs_security_init(struct ntfs_sb_info *sbi);
@@ -647,8 +648,10 @@ int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag,
const struct MFT_REF *ref);
void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim);
-int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim);
+int run_deallocate(struct ntfs_sb_info *sbi, const struct runs_tree *run,
+ bool trim);
bool valid_windows_name(struct ntfs_sb_info *sbi, const struct le_str *name);
+int ntfs_set_label(struct ntfs_sb_info *sbi, u8 *label, int len);
/* Globals from index.c */
int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit);
@@ -706,8 +709,8 @@ int ntfs_sync_inode(struct inode *inode);
int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
int inode_write_data(struct inode *inode, const void *data, size_t bytes);
-struct inode *ntfs_create_inode(struct mnt_idmap *idmap,
- struct inode *dir, struct dentry *dentry,
+struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry,
const struct cpu_str *uni, umode_t mode,
dev_t dev, const char *symname, u32 size,
struct ntfs_fnd *fnd);
@@ -736,7 +739,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr);
// TODO: id?
struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr,
enum ATTR_TYPE type, const __le16 *name,
- size_t name_len, const __le16 *id);
+ u8 name_len, const __le16 *id);
static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec,
struct ATTR_LIST_ENTRY *le)
{
@@ -856,12 +859,12 @@ unsigned long ntfs_names_hash(const u16 *name, size_t len, const u16 *upcase,
/* globals from xattr.c */
#ifdef CONFIG_NTFS3_FS_POSIX_ACL
-struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, int type);
+struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ int type);
int ntfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode,
- struct inode *dir);
+ struct inode *dir);
#else
#define ntfs_get_acl NULL
#define ntfs_set_acl NULL
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
index 2a281cead2bc..c12ebffc94da 100644
--- a/fs/ntfs3/record.c
+++ b/fs/ntfs3/record.c
@@ -124,7 +124,7 @@ int mi_read(struct mft_inode *mi, bool is_mft)
struct rw_semaphore *rw_lock = NULL;
if (is_mounted(sbi)) {
- if (!is_mft) {
+ if (!is_mft && mft_ni) {
rw_lock = &mft_ni->file.run_lock;
down_read(rw_lock);
}
@@ -148,7 +148,7 @@ int mi_read(struct mft_inode *mi, bool is_mft)
ni_lock(mft_ni);
down_write(rw_lock);
}
- err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, &mft_ni->file.run,
+ err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, run,
vbo >> sbi->cluster_bits);
if (rw_lock) {
up_write(rw_lock);
@@ -180,6 +180,12 @@ ok:
return 0;
out:
+ if (err == -E_NTFS_CORRUPT) {
+ ntfs_err(sbi->sb, "mft corrupted");
+ ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
+ err = -EINVAL;
+ }
+
return err;
}
@@ -296,7 +302,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
*/
struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr,
enum ATTR_TYPE type, const __le16 *name,
- size_t name_len, const __le16 *id)
+ u8 name_len, const __le16 *id)
{
u32 type_in = le32_to_cpu(type);
u32 atype;
@@ -382,6 +388,8 @@ int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno,
rec->seq = cpu_to_le16(seq);
rec->flags = RECORD_FLAG_IN_USE | flags;
+ if (MFTRECORD_FIXUP_OFFSET == MFTRECORD_FIXUP_OFFSET_3)
+ rec->mft_record = cpu_to_le32(rno);
mi->dirty = true;
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
index 47612d16c027..cb8cf0161177 100644
--- a/fs/ntfs3/run.c
+++ b/fs/ntfs3/run.c
@@ -434,8 +434,8 @@ requires_new_range:
if (should_add_tail) {
tail_lcn = r->lcn == SPARSE_LCN ?
- SPARSE_LCN :
- (r->lcn + Tovcn);
+ SPARSE_LCN :
+ (r->lcn + Tovcn);
tail_vcn = r->vcn + Tovcn;
tail_len = r->len - Tovcn;
}
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 5158dd31fd97..1a02072b6b0e 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -57,6 +57,7 @@
#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/nls.h>
+#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/statfs.h>
@@ -116,8 +117,8 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
/* Use static allocated buffer, if possible. */
name = atomic_dec_and_test(&s_name_buf_cnt) ?
- s_name_buf :
- kmalloc(sizeof(s_name_buf), GFP_NOFS);
+ s_name_buf :
+ kmalloc(sizeof(s_name_buf), GFP_NOFS);
if (name) {
struct dentry *de = d_find_alias(inode);
@@ -257,6 +258,7 @@ enum Opt {
Opt_err,
};
+// clang-format off
static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_u32("uid", Opt_uid),
fsparam_u32("gid", Opt_gid),
@@ -277,9 +279,13 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_flag_no("nocase", Opt_nocase),
{}
};
+// clang-format on
/*
* Load nls table or if @nls is utf8 then return NULL.
+ *
+ * It is good idea to use here "const char *nls".
+ * But load_nls accepts "char*".
*/
static struct nls_table *ntfs_load_nls(char *nls)
{
@@ -436,6 +442,103 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
return 0;
}
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_info_root;
+
+/*
+ * ntfs3_volinfo:
+ *
+ * The content of /proc/fs/ntfs3/<dev>/volinfo
+ *
+ * ntfs3.1
+ * cluster size
+ * number of clusters
+*/
+static int ntfs3_volinfo(struct seq_file *m, void *o)
+{
+ struct super_block *sb = m->private;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+ seq_printf(m, "ntfs%d.%d\n%u\n%zu\n", sbi->volume.major_ver,
+ sbi->volume.minor_ver, sbi->cluster_size,
+ sbi->used.bitmap.nbits);
+
+ return 0;
+}
+
+static int ntfs3_volinfo_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ntfs3_volinfo, pde_data(inode));
+}
+
+/* read /proc/fs/ntfs3/<dev>/label */
+static int ntfs3_label_show(struct seq_file *m, void *o)
+{
+ struct super_block *sb = m->private;
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+ seq_printf(m, "%s\n", sbi->volume.label);
+
+ return 0;
+}
+
+/* write /proc/fs/ntfs3/<dev>/label */
+static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ int err;
+ struct super_block *sb = pde_data(file_inode(file));
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ ssize_t ret = count;
+ u8 *label = kmalloc(count, GFP_NOFS);
+
+ if (!label)
+ return -ENOMEM;
+
+ if (copy_from_user(label, buffer, ret)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ while (ret > 0 && label[ret - 1] == '\n')
+ ret -= 1;
+
+ err = ntfs_set_label(sbi, label, ret);
+
+ if (err < 0) {
+ ntfs_err(sb, "failed (%d) to write label", err);
+ ret = err;
+ goto out;
+ }
+
+ *ppos += count;
+ ret = count;
+out:
+ kfree(label);
+ return ret;
+}
+
+static int ntfs3_label_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ntfs3_label_show, pde_data(inode));
+}
+
+static const struct proc_ops ntfs3_volinfo_fops = {
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+ .proc_open = ntfs3_volinfo_open,
+};
+
+static const struct proc_ops ntfs3_label_fops = {
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+ .proc_open = ntfs3_label_open,
+ .proc_write = ntfs3_label_write,
+};
+
+#endif
+
static struct kmem_cache *ntfs_inode_cachep;
static struct inode *ntfs_alloc_inode(struct super_block *sb)
@@ -510,6 +613,16 @@ static void ntfs_put_super(struct super_block *sb)
{
struct ntfs_sb_info *sbi = sb->s_fs_info;
+#ifdef CONFIG_PROC_FS
+ // Remove /proc/fs/ntfs3/..
+ if (sbi->procdir) {
+ remove_proc_entry("label", sbi->procdir);
+ remove_proc_entry("volinfo", sbi->procdir);
+ remove_proc_entry(sb->s_id, proc_info_root);
+ sbi->procdir = NULL;
+ }
+#endif
+
/* Mark rw ntfs as clear, if possible. */
ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
@@ -711,9 +824,16 @@ static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot)
/*
* ntfs_init_from_boot - Init internal info from on-disk boot sector.
+ *
+ * NTFS mount begins from boot - special formatted 512 bytes.
+ * There are two boots: the first and the last 512 bytes of volume.
+ * The content of boot is not changed during ntfs life.
+ *
+ * NOTE: ntfs.sys checks only first (primary) boot.
+ * chkdsk checks both boots.
*/
static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
- u64 dev_size)
+ u64 dev_size, struct NTFS_BOOT **boot2)
{
struct ntfs_sb_info *sbi = sb->s_fs_info;
int err;
@@ -724,6 +844,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
struct MFT_REC *rec;
u16 fn, ao;
u8 cluster_bits;
+ u32 boot_off = 0;
+ const char *hint = "Primary boot";
sbi->volume.blocks = dev_size >> PAGE_SHIFT;
@@ -731,11 +853,12 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
if (!bh)
return -EIO;
+check_boot:
err = -EINVAL;
- boot = (struct NTFS_BOOT *)bh->b_data;
+ boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off);
if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) {
- ntfs_err(sb, "Boot's signature is not NTFS.");
+ ntfs_err(sb, "%s signature is not NTFS.", hint);
goto out;
}
@@ -748,14 +871,16 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
boot->bytes_per_sector[0];
if (boot_sector_size < SECTOR_SIZE ||
!is_power_of_2(boot_sector_size)) {
- ntfs_err(sb, "Invalid bytes per sector %u.", boot_sector_size);
+ ntfs_err(sb, "%s: invalid bytes per sector %u.", hint,
+ boot_sector_size);
goto out;
}
/* cluster size: 512, 1K, 2K, 4K, ... 2M */
sct_per_clst = true_sectors_per_clst(boot);
if ((int)sct_per_clst < 0 || !is_power_of_2(sct_per_clst)) {
- ntfs_err(sb, "Invalid sectors per cluster %u.", sct_per_clst);
+ ntfs_err(sb, "%s: invalid sectors per cluster %u.", hint,
+ sct_per_clst);
goto out;
}
@@ -771,20 +896,20 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
if (mlcn * sct_per_clst >= sectors || mlcn2 * sct_per_clst >= sectors) {
ntfs_err(
sb,
- "Start of MFT 0x%llx (0x%llx) is out of volume 0x%llx.",
- mlcn, mlcn2, sectors);
+ "%s: start of MFT 0x%llx (0x%llx) is out of volume 0x%llx.",
+ hint, mlcn, mlcn2, sectors);
goto out;
}
sbi->record_size = record_size =
boot->record_size < 0 ? 1 << (-boot->record_size) :
- (u32)boot->record_size << cluster_bits;
+ (u32)boot->record_size << cluster_bits;
sbi->record_bits = blksize_bits(record_size);
sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes
/* Check MFT record size. */
if (record_size < SECTOR_SIZE || !is_power_of_2(record_size)) {
- ntfs_err(sb, "Invalid bytes per MFT record %u (%d).",
+ ntfs_err(sb, "%s: invalid bytes per MFT record %u (%d).", hint,
record_size, boot->record_size);
goto out;
}
@@ -796,18 +921,18 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
}
sbi->index_size = boot->index_size < 0 ?
- 1u << (-boot->index_size) :
- (u32)boot->index_size << cluster_bits;
+ 1u << (-boot->index_size) :
+ (u32)boot->index_size << cluster_bits;
/* Check index record size. */
if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) {
- ntfs_err(sb, "Invalid bytes per index %u(%d).", sbi->index_size,
- boot->index_size);
+ ntfs_err(sb, "%s: invalid bytes per index %u(%d).", hint,
+ sbi->index_size, boot->index_size);
goto out;
}
if (sbi->index_size > MAXIMUM_BYTES_PER_INDEX) {
- ntfs_err(sb, "Unsupported bytes per index %u.",
+ ntfs_err(sb, "%s: unsupported bytes per index %u.", hint,
sbi->index_size);
goto out;
}
@@ -834,7 +959,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
/* Compare boot's cluster and sector. */
if (sbi->cluster_size < boot_sector_size) {
- ntfs_err(sb, "Invalid bytes per cluster (%u).",
+ ntfs_err(sb, "%s: invalid bytes per cluster (%u).", hint,
sbi->cluster_size);
goto out;
}
@@ -850,7 +975,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
}
sbi->max_bytes_per_attr =
- record_size - ALIGN(MFTRECORD_FIXUP_OFFSET_1, 8) -
+ record_size - ALIGN(MFTRECORD_FIXUP_OFFSET, 8) -
ALIGN(((record_size >> SECTOR_SHIFT) * sizeof(short)), 8) -
ALIGN(sizeof(enum ATTR_TYPE), 8);
@@ -892,10 +1017,10 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
sbi->new_rec = rec;
rec->rhdr.sign = NTFS_FILE_SIGNATURE;
- rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET_1);
+ rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET);
fn = (sbi->record_size >> SECTOR_SHIFT) + 1;
rec->rhdr.fix_num = cpu_to_le16(fn);
- ao = ALIGN(MFTRECORD_FIXUP_OFFSET_1 + sizeof(short) * fn, 8);
+ ao = ALIGN(MFTRECORD_FIXUP_OFFSET + sizeof(short) * fn, 8);
rec->attr_off = cpu_to_le16(ao);
rec->used = cpu_to_le32(ao + ALIGN(sizeof(enum ATTR_TYPE), 8));
rec->total = cpu_to_le32(sbi->record_size);
@@ -930,7 +1055,34 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
err = 0;
+ if (bh->b_blocknr && !sb_rdonly(sb)) {
+ /*
+ * Alternative boot is ok but primary is not ok.
+ * Do not update primary boot here 'cause it may be faked boot.
+ * Let ntfs to be mounted and update boot later.
+ */
+ *boot2 = kmemdup(boot, sizeof(*boot), GFP_NOFS | __GFP_NOWARN);
+ }
+
out:
+ if (err == -EINVAL && !bh->b_blocknr && dev_size > PAGE_SHIFT) {
+ u32 block_size = min_t(u32, sector_size, PAGE_SIZE);
+ u64 lbo = dev_size - sizeof(*boot);
+
+ /*
+ * Try alternative boot (last sector)
+ */
+ brelse(bh);
+
+ sb_set_blocksize(sb, block_size);
+ bh = ntfs_bread(sb, lbo >> blksize_bits(block_size));
+ if (!bh)
+ return -EINVAL;
+
+ boot_off = lbo & (block_size - 1);
+ hint = "Alternative boot";
+ goto check_boot;
+ }
brelse(bh);
return err;
@@ -955,6 +1107,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
struct ATTR_DEF_ENTRY *t;
u16 *shared;
struct MFT_REF ref;
+ bool ro = sb_rdonly(sb);
+ struct NTFS_BOOT *boot2 = NULL;
ref.high = 0;
@@ -985,7 +1139,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
/* Parse boot. */
err = ntfs_init_from_boot(sb, bdev_logical_block_size(bdev),
- bdev_nr_bytes(bdev));
+ bdev_nr_bytes(bdev), &boot2);
if (err)
goto out;
@@ -1035,6 +1189,10 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
sbi->volume.minor_ver = info->minor_ver;
sbi->volume.flags = info->flags;
sbi->volume.ni = ni;
+ if (info->flags & VOLUME_FLAG_DIRTY) {
+ sbi->volume.real_dirty = true;
+ ntfs_info(sb, "It is recommened to use chkdsk.");
+ }
/* Load $MFTMirr to estimate recs_mirr. */
ref.low = cpu_to_le32(MFT_REC_MIRR);
@@ -1069,21 +1227,16 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
iput(inode);
- if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) {
- if (!sb_rdonly(sb)) {
- ntfs_warn(sb,
- "failed to replay log file. Can't mount rw!");
- err = -EINVAL;
- goto out;
- }
- } else if (sbi->volume.flags & VOLUME_FLAG_DIRTY) {
- if (!sb_rdonly(sb) && !options->force) {
- ntfs_warn(
- sb,
- "volume is dirty and \"force\" flag is not set!");
- err = -EINVAL;
- goto out;
- }
+ if ((sbi->flags & NTFS_FLAGS_NEED_REPLAY) && !ro) {
+ ntfs_warn(sb, "failed to replay log file. Can't mount rw!");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if ((sbi->volume.flags & VOLUME_FLAG_DIRTY) && !ro && !options->force) {
+ ntfs_warn(sb, "volume is dirty and \"force\" flag is not set!");
+ err = -EINVAL;
+ goto out;
}
/* Load $MFT. */
@@ -1173,7 +1326,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
bad_len += len;
bad_frags += 1;
- if (sb_rdonly(sb))
+ if (ro)
continue;
if (wnd_set_used_safe(&sbi->used.bitmap, lcn, len, &tt) || tt) {
@@ -1368,6 +1521,44 @@ load_root:
goto put_inode_out;
}
+ if (boot2) {
+ /*
+ * Alternative boot is ok but primary is not ok.
+ * Volume is recognized as NTFS. Update primary boot.
+ */
+ struct buffer_head *bh0 = sb_getblk(sb, 0);
+ if (bh0) {
+ if (buffer_locked(bh0))
+ __wait_on_buffer(bh0);
+
+ lock_buffer(bh0);
+ memcpy(bh0->b_data, boot2, sizeof(*boot2));
+ set_buffer_uptodate(bh0);
+ mark_buffer_dirty(bh0);
+ unlock_buffer(bh0);
+ if (!sync_dirty_buffer(bh0))
+ ntfs_warn(sb, "primary boot is updated");
+ put_bh(bh0);
+ }
+
+ kfree(boot2);
+ }
+
+#ifdef CONFIG_PROC_FS
+ /* Create /proc/fs/ntfs3/.. */
+ if (proc_info_root) {
+ struct proc_dir_entry *e = proc_mkdir(sb->s_id, proc_info_root);
+ static_assert((S_IRUGO | S_IWUSR) == 0644);
+ if (e) {
+ proc_create_data("volinfo", S_IRUGO, e,
+ &ntfs3_volinfo_fops, sb);
+ proc_create_data("label", S_IRUGO | S_IWUSR, e,
+ &ntfs3_label_fops, sb);
+ sbi->procdir = e;
+ }
+ }
+#endif
+
return 0;
put_inode_out:
@@ -1380,6 +1571,7 @@ out:
put_mount_options(sbi->options);
put_ntfs(sbi);
sb->s_fs_info = NULL;
+ kfree(boot2);
return err;
}
@@ -1473,12 +1665,14 @@ static void ntfs_fs_free(struct fs_context *fc)
put_mount_options(opts);
}
+// clang-format off
static const struct fs_context_operations ntfs_context_ops = {
.parse_param = ntfs_fs_parse_param,
.get_tree = ntfs_fs_get_tree,
.reconfigure = ntfs_fs_reconfigure,
.free = ntfs_fs_free,
};
+// clang-format on
/*
* ntfs_init_fs_context - Initialize sbi and opts
@@ -1559,6 +1753,12 @@ static int __init init_ntfs_fs(void)
if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS))
pr_info("ntfs3: Read-only LZX/Xpress compression included\n");
+
+#ifdef CONFIG_PROC_FS
+ /* Create "/proc/fs/ntfs3" */
+ proc_info_root = proc_mkdir("fs/ntfs3", NULL);
+#endif
+
err = ntfs3_init_bitmap();
if (err)
return err;
@@ -1590,6 +1790,12 @@ static void __exit exit_ntfs_fs(void)
kmem_cache_destroy(ntfs_inode_cachep);
unregister_filesystem(&ntfs_fs_type);
ntfs3_exit_bitmap();
+
+#ifdef CONFIG_PROC_FS
+ if (proc_info_root)
+ remove_proc_entry("fs/ntfs3", NULL);
+#endif
+
}
MODULE_LICENSE("GPL");
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index c3de60a4543f..023f314e8950 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -24,7 +24,7 @@
static inline size_t unpacked_ea_size(const struct EA_FULL *ea)
{
return ea->size ? le32_to_cpu(ea->size) :
- ALIGN(struct_size(ea, name,
+ ALIGN(struct_size(ea, name,
1 + ea->name_len +
le16_to_cpu(ea->elength)),
4);
@@ -141,6 +141,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
memset(Add2Ptr(ea_p, size), 0, add_bytes);
+ err = -EINVAL;
/* Check all attributes for consistency. */
for (off = 0; off < size; off += ea_size) {
const struct EA_FULL *ef = Add2Ptr(ea_p, off);
@@ -214,6 +215,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
ea = Add2Ptr(ea_all, off);
ea_size = unpacked_ea_size(ea);
+ if (!ea->name_len)
+ break;
+
if (buffer) {
if (ret + ea->name_len + 1 > bytes_per_buffer) {
err = -ERANGE;
@@ -524,8 +528,8 @@ out:
/*
* ntfs_get_acl - inode_operations::get_acl
*/
-struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, int type)
+struct posix_acl *ntfs_get_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+ int type)
{
struct inode *inode = d_inode(dentry);
struct ntfs_inode *ni = ntfs_i(inode);
@@ -592,8 +596,7 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap,
case ACL_TYPE_ACCESS:
/* Do not change i_mode if we are in init_acl */
if (acl && !init_acl) {
- err = posix_acl_update_mode(idmap, inode, &mode,
- &acl);
+ err = posix_acl_update_mode(idmap, inode, &mode, &acl);
if (err)
return err;
}
@@ -816,10 +819,9 @@ out:
* ntfs_setxattr - inode_operations::setxattr
*/
static noinline int ntfs_setxattr(const struct xattr_handler *handler,
- struct mnt_idmap *idmap,
- struct dentry *de, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+ struct mnt_idmap *idmap, struct dentry *de,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
{
int err = -EINVAL;
struct ntfs_inode *ni = ntfs_i(inode);
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 304d12186ccd..3123da7cfb30 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -17,9 +17,9 @@ config OCFS2_FS
You'll want to install the ocfs2-tools package in order to at least
get "mount.ocfs2".
- Project web page: https://oss.oracle.com/projects/ocfs2
- Tools web page: https://oss.oracle.com/projects/ocfs2-tools
- OCFS2 mailing lists: https://oss.oracle.com/projects/ocfs2/mailman/
+ Project web page: https://ocfs2.wiki.kernel.org/
+ Tools web page: https://github.com/markfasheh/ocfs2-tools
+ OCFS2 mailing lists: https://subspace.kernel.org/lists.linux.dev.html
For more information on OCFS2, see the file
<file:Documentation/filesystems/ocfs2.rst>.
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4142d1a457ff..9402591f12aa 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -70,14 +70,6 @@ enum {
OVL_XINO_ON,
};
-/* The set of options that user requested explicitly via mount options */
-struct ovl_opt_set {
- bool metacopy;
- bool redirect;
- bool nfs_export;
- bool index;
-};
-
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
@@ -368,30 +360,6 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
}
-
-/* params.c */
-#define OVL_MAX_STACK 500
-
-struct ovl_fs_context_layer {
- char *name;
- struct path path;
-};
-
-struct ovl_fs_context {
- struct path upper;
- struct path work;
- size_t capacity;
- size_t nr; /* includes nr_data */
- size_t nr_data;
- struct ovl_opt_set set;
- struct ovl_fs_context_layer *lower;
-};
-
-int ovl_parse_param_upperdir(const char *name, struct fs_context *fc,
- bool workdir);
-int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc);
-void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx);
-
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
@@ -791,3 +759,12 @@ int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
/* export.c */
extern const struct export_operations ovl_export_operations;
+
+/* super.c */
+int ovl_fill_super(struct super_block *sb, struct fs_context *fc);
+
+/* Will this overlay be forced to mount/remount ro? */
+static inline bool ovl_force_readonly(struct ovl_fs *ofs)
+{
+ return (!ovl_upper_mnt(ofs) || !ofs->workdir);
+}
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index d17d6b483dd0..a63160dbb0f9 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -1,12 +1,124 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/fs.h>
+#include <linux/module.h>
#include <linux/namei.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/seq_file.h>
#include <linux/xattr.h>
#include "overlayfs.h"
+#include "params.h"
+
+static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
+module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
+MODULE_PARM_DESC(redirect_dir,
+ "Default to on or off for the redirect_dir feature");
+
+static bool ovl_redirect_always_follow =
+ IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
+module_param_named(redirect_always_follow, ovl_redirect_always_follow,
+ bool, 0644);
+MODULE_PARM_DESC(redirect_always_follow,
+ "Follow redirects even if redirect_dir feature is turned off");
+
+static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
+module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
+MODULE_PARM_DESC(xino_auto,
+ "Auto enable xino feature");
+
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(index,
+ "Default to on or off for the inodes index feature");
+
+static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
+module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
+MODULE_PARM_DESC(nfs_export,
+ "Default to on or off for the NFS export feature");
+
+static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
+module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
+MODULE_PARM_DESC(metacopy,
+ "Default to on or off for the metadata only copy up feature");
+
+enum {
+ Opt_lowerdir,
+ Opt_upperdir,
+ Opt_workdir,
+ Opt_default_permissions,
+ Opt_redirect_dir,
+ Opt_index,
+ Opt_uuid,
+ Opt_nfs_export,
+ Opt_userxattr,
+ Opt_xino,
+ Opt_metacopy,
+ Opt_volatile,
+};
+
+static const struct constant_table ovl_parameter_bool[] = {
+ { "on", true },
+ { "off", false },
+ {}
+};
+
+static const struct constant_table ovl_parameter_xino[] = {
+ { "off", OVL_XINO_OFF },
+ { "auto", OVL_XINO_AUTO },
+ { "on", OVL_XINO_ON },
+ {}
+};
+
+const char *ovl_xino_mode(struct ovl_config *config)
+{
+ return ovl_parameter_xino[config->xino].name;
+}
+
+static int ovl_xino_def(void)
+{
+ return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
+}
+
+const struct constant_table ovl_parameter_redirect_dir[] = {
+ { "off", OVL_REDIRECT_OFF },
+ { "follow", OVL_REDIRECT_FOLLOW },
+ { "nofollow", OVL_REDIRECT_NOFOLLOW },
+ { "on", OVL_REDIRECT_ON },
+ {}
+};
+
+static const char *ovl_redirect_mode(struct ovl_config *config)
+{
+ return ovl_parameter_redirect_dir[config->redirect_mode].name;
+}
+
+static int ovl_redirect_mode_def(void)
+{
+ return ovl_redirect_dir_def ? OVL_REDIRECT_ON :
+ ovl_redirect_always_follow ? OVL_REDIRECT_FOLLOW :
+ OVL_REDIRECT_NOFOLLOW;
+}
+
+#define fsparam_string_empty(NAME, OPT) \
+ __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
+
+const struct fs_parameter_spec ovl_parameter_spec[] = {
+ fsparam_string_empty("lowerdir", Opt_lowerdir),
+ fsparam_string("upperdir", Opt_upperdir),
+ fsparam_string("workdir", Opt_workdir),
+ fsparam_flag("default_permissions", Opt_default_permissions),
+ fsparam_enum("redirect_dir", Opt_redirect_dir, ovl_parameter_redirect_dir),
+ fsparam_enum("index", Opt_index, ovl_parameter_bool),
+ fsparam_enum("uuid", Opt_uuid, ovl_parameter_bool),
+ fsparam_enum("nfs_export", Opt_nfs_export, ovl_parameter_bool),
+ fsparam_flag("userxattr", Opt_userxattr),
+ fsparam_enum("xino", Opt_xino, ovl_parameter_xino),
+ fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool),
+ fsparam_flag("volatile", Opt_volatile),
+ {}
+};
static ssize_t ovl_parse_param_split_lowerdirs(char *str)
{
@@ -110,8 +222,8 @@ static int ovl_mount_dir(const char *name, struct path *path)
return err;
}
-int ovl_parse_param_upperdir(const char *name, struct fs_context *fc,
- bool workdir)
+static int ovl_parse_param_upperdir(const char *name, struct fs_context *fc,
+ bool workdir)
{
int err;
struct ovl_fs *ofs = fc->s_fs_info;
@@ -154,7 +266,7 @@ int ovl_parse_param_upperdir(const char *name, struct fs_context *fc,
return 0;
}
-void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx)
+static void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx)
{
for (size_t nr = 0; nr < ctx->nr; nr++) {
path_put(&ctx->lower[nr].path);
@@ -179,7 +291,7 @@ void ovl_parse_param_drop_lowerdir(struct ovl_fs_context *ctx)
* Append data "/lower5" as data lower layer. This requires that
* there's at least one regular lower layer present.
*/
-int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
+static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
{
int err;
struct ovl_fs_context *ctx = fc->fs_private;
@@ -387,3 +499,415 @@ out_err:
/* Intentionally don't realloc to a smaller size. */
return err;
}
+
+static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ int err = 0;
+ struct fs_parse_result result;
+ struct ovl_fs *ofs = fc->s_fs_info;
+ struct ovl_config *config = &ofs->config;
+ struct ovl_fs_context *ctx = fc->fs_private;
+ int opt;
+
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ /*
+ * On remount overlayfs has always ignored all mount
+ * options no matter if malformed or not so for
+ * backwards compatibility we do the same here.
+ */
+ if (fc->oldapi)
+ return 0;
+
+ /*
+ * Give us the freedom to allow changing mount options
+ * with the new mount api in the future. So instead of
+ * silently ignoring everything we report a proper
+ * error. This is only visible for users of the new
+ * mount api.
+ */
+ return invalfc(fc, "No changes allowed in reconfigure");
+ }
+
+ opt = fs_parse(fc, ovl_parameter_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_lowerdir:
+ err = ovl_parse_param_lowerdir(param->string, fc);
+ break;
+ case Opt_upperdir:
+ fallthrough;
+ case Opt_workdir:
+ err = ovl_parse_param_upperdir(param->string, fc,
+ (Opt_workdir == opt));
+ break;
+ case Opt_default_permissions:
+ config->default_permissions = true;
+ break;
+ case Opt_redirect_dir:
+ config->redirect_mode = result.uint_32;
+ if (config->redirect_mode == OVL_REDIRECT_OFF) {
+ config->redirect_mode = ovl_redirect_always_follow ?
+ OVL_REDIRECT_FOLLOW :
+ OVL_REDIRECT_NOFOLLOW;
+ }
+ ctx->set.redirect = true;
+ break;
+ case Opt_index:
+ config->index = result.uint_32;
+ ctx->set.index = true;
+ break;
+ case Opt_uuid:
+ config->uuid = result.uint_32;
+ break;
+ case Opt_nfs_export:
+ config->nfs_export = result.uint_32;
+ ctx->set.nfs_export = true;
+ break;
+ case Opt_xino:
+ config->xino = result.uint_32;
+ break;
+ case Opt_metacopy:
+ config->metacopy = result.uint_32;
+ ctx->set.metacopy = true;
+ break;
+ case Opt_volatile:
+ config->ovl_volatile = true;
+ break;
+ case Opt_userxattr:
+ config->userxattr = true;
+ break;
+ default:
+ pr_err("unrecognized mount option \"%s\" or missing value\n",
+ param->key);
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static int ovl_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, ovl_fill_super);
+}
+
+static inline void ovl_fs_context_free(struct ovl_fs_context *ctx)
+{
+ ovl_parse_param_drop_lowerdir(ctx);
+ path_put(&ctx->upper);
+ path_put(&ctx->work);
+ kfree(ctx->lower);
+ kfree(ctx);
+}
+
+static void ovl_free(struct fs_context *fc)
+{
+ struct ovl_fs *ofs = fc->s_fs_info;
+ struct ovl_fs_context *ctx = fc->fs_private;
+
+ /*
+ * ofs is stored in the fs_context when it is initialized.
+ * ofs is transferred to the superblock on a successful mount,
+ * but if an error occurs before the transfer we have to free
+ * it here.
+ */
+ if (ofs)
+ ovl_free_fs(ofs);
+
+ if (ctx)
+ ovl_fs_context_free(ctx);
+}
+
+static int ovl_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+ struct ovl_fs *ofs = sb->s_fs_info;
+ struct super_block *upper_sb;
+ int ret = 0;
+
+ if (!(fc->sb_flags & SB_RDONLY) && ovl_force_readonly(ofs))
+ return -EROFS;
+
+ if (fc->sb_flags & SB_RDONLY && !sb_rdonly(sb)) {
+ upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
+ if (ovl_should_sync(ofs)) {
+ down_read(&upper_sb->s_umount);
+ ret = sync_filesystem(upper_sb);
+ up_read(&upper_sb->s_umount);
+ }
+ }
+
+ return ret;
+}
+
+static const struct fs_context_operations ovl_context_ops = {
+ .parse_param = ovl_parse_param,
+ .get_tree = ovl_get_tree,
+ .reconfigure = ovl_reconfigure,
+ .free = ovl_free,
+};
+
+/*
+ * This is called during fsopen() and will record the user namespace of
+ * the caller in fc->user_ns since we've raised FS_USERNS_MOUNT. We'll
+ * need it when we actually create the superblock to verify that the
+ * process creating the superblock is in the same user namespace as
+ * process that called fsopen().
+ */
+int ovl_init_fs_context(struct fs_context *fc)
+{
+ struct ovl_fs_context *ctx;
+ struct ovl_fs *ofs;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
+ if (!ctx)
+ return -ENOMEM;
+
+ /*
+ * By default we allocate for three lower layers. It's likely
+ * that it'll cover most users.
+ */
+ ctx->lower = kmalloc_array(3, sizeof(*ctx->lower), GFP_KERNEL_ACCOUNT);
+ if (!ctx->lower)
+ goto out_err;
+ ctx->capacity = 3;
+
+ ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+ if (!ofs)
+ goto out_err;
+
+ ofs->config.redirect_mode = ovl_redirect_mode_def();
+ ofs->config.index = ovl_index_def;
+ ofs->config.uuid = true;
+ ofs->config.nfs_export = ovl_nfs_export_def;
+ ofs->config.xino = ovl_xino_def();
+ ofs->config.metacopy = ovl_metacopy_def;
+
+ fc->s_fs_info = ofs;
+ fc->fs_private = ctx;
+ fc->ops = &ovl_context_ops;
+ return 0;
+
+out_err:
+ ovl_fs_context_free(ctx);
+ return -ENOMEM;
+
+}
+
+void ovl_free_fs(struct ovl_fs *ofs)
+{
+ struct vfsmount **mounts;
+ unsigned i;
+
+ iput(ofs->workbasedir_trap);
+ iput(ofs->indexdir_trap);
+ iput(ofs->workdir_trap);
+ dput(ofs->whiteout);
+ dput(ofs->indexdir);
+ dput(ofs->workdir);
+ if (ofs->workdir_locked)
+ ovl_inuse_unlock(ofs->workbasedir);
+ dput(ofs->workbasedir);
+ if (ofs->upperdir_locked)
+ ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
+
+ /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */
+ mounts = (struct vfsmount **) ofs->layers;
+ for (i = 0; i < ofs->numlayer; i++) {
+ iput(ofs->layers[i].trap);
+ mounts[i] = ofs->layers[i].mnt;
+ kfree(ofs->layers[i].name);
+ }
+ kern_unmount_array(mounts, ofs->numlayer);
+ kfree(ofs->layers);
+ for (i = 0; i < ofs->numfs; i++)
+ free_anon_bdev(ofs->fs[i].pseudo_dev);
+ kfree(ofs->fs);
+
+ kfree(ofs->config.upperdir);
+ kfree(ofs->config.workdir);
+ if (ofs->creator_cred)
+ put_cred(ofs->creator_cred);
+ kfree(ofs);
+}
+
+int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
+ struct ovl_config *config)
+{
+ struct ovl_opt_set set = ctx->set;
+
+ if (ctx->nr_data > 0 && !config->metacopy) {
+ pr_err("lower data-only dirs require metacopy support.\n");
+ return -EINVAL;
+ }
+
+ /* Workdir/index are useless in non-upper mount */
+ if (!config->upperdir) {
+ if (config->workdir) {
+ pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+ config->workdir);
+ kfree(config->workdir);
+ config->workdir = NULL;
+ }
+ if (config->index && set.index) {
+ pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
+ set.index = false;
+ }
+ config->index = false;
+ }
+
+ if (!config->upperdir && config->ovl_volatile) {
+ pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
+ config->ovl_volatile = false;
+ }
+
+ /*
+ * This is to make the logic below simpler. It doesn't make any other
+ * difference, since redirect_dir=on is only used for upper.
+ */
+ if (!config->upperdir && config->redirect_mode == OVL_REDIRECT_FOLLOW)
+ config->redirect_mode = OVL_REDIRECT_ON;
+
+ /* Resolve metacopy -> redirect_dir dependency */
+ if (config->metacopy && config->redirect_mode != OVL_REDIRECT_ON) {
+ if (set.metacopy && set.redirect) {
+ pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
+ ovl_redirect_mode(config));
+ return -EINVAL;
+ }
+ if (set.redirect) {
+ /*
+ * There was an explicit redirect_dir=... that resulted
+ * in this conflict.
+ */
+ pr_info("disabling metacopy due to redirect_dir=%s\n",
+ ovl_redirect_mode(config));
+ config->metacopy = false;
+ } else {
+ /* Automatically enable redirect otherwise. */
+ config->redirect_mode = OVL_REDIRECT_ON;
+ }
+ }
+
+ /* Resolve nfs_export -> index dependency */
+ if (config->nfs_export && !config->index) {
+ if (!config->upperdir &&
+ config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
+ pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+ config->nfs_export = false;
+ } else if (set.nfs_export && set.index) {
+ pr_err("conflicting options: nfs_export=on,index=off\n");
+ return -EINVAL;
+ } else if (set.index) {
+ /*
+ * There was an explicit index=off that resulted
+ * in this conflict.
+ */
+ pr_info("disabling nfs_export due to index=off\n");
+ config->nfs_export = false;
+ } else {
+ /* Automatically enable index otherwise. */
+ config->index = true;
+ }
+ }
+
+ /* Resolve nfs_export -> !metacopy dependency */
+ if (config->nfs_export && config->metacopy) {
+ if (set.nfs_export && set.metacopy) {
+ pr_err("conflicting options: nfs_export=on,metacopy=on\n");
+ return -EINVAL;
+ }
+ if (set.metacopy) {
+ /*
+ * There was an explicit metacopy=on that resulted
+ * in this conflict.
+ */
+ pr_info("disabling nfs_export due to metacopy=on\n");
+ config->nfs_export = false;
+ } else {
+ /*
+ * There was an explicit nfs_export=on that resulted
+ * in this conflict.
+ */
+ pr_info("disabling metacopy due to nfs_export=on\n");
+ config->metacopy = false;
+ }
+ }
+
+
+ /* Resolve userxattr -> !redirect && !metacopy dependency */
+ if (config->userxattr) {
+ if (set.redirect &&
+ config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
+ pr_err("conflicting options: userxattr,redirect_dir=%s\n",
+ ovl_redirect_mode(config));
+ return -EINVAL;
+ }
+ if (config->metacopy && set.metacopy) {
+ pr_err("conflicting options: userxattr,metacopy=on\n");
+ return -EINVAL;
+ }
+ /*
+ * Silently disable default setting of redirect and metacopy.
+ * This shall be the default in the future as well: these
+ * options must be explicitly enabled if used together with
+ * userxattr.
+ */
+ config->redirect_mode = OVL_REDIRECT_NOFOLLOW;
+ config->metacopy = false;
+ }
+
+ return 0;
+}
+
+/**
+ * ovl_show_options
+ * @m: the seq_file handle
+ * @dentry: The dentry to query
+ *
+ * Prints the mount options for a given superblock.
+ * Returns zero; does not fail.
+ */
+int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct ovl_fs *ofs = sb->s_fs_info;
+ size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer;
+ const struct ovl_layer *data_layers = &ofs->layers[nr_merged_lower];
+
+ /* ofs->layers[0] is the upper layer */
+ seq_printf(m, ",lowerdir=%s", ofs->layers[1].name);
+ /* dump regular lower layers */
+ for (nr = 2; nr < nr_merged_lower; nr++)
+ seq_printf(m, ":%s", ofs->layers[nr].name);
+ /* dump data lower layers */
+ for (nr = 0; nr < ofs->numdatalayer; nr++)
+ seq_printf(m, "::%s", data_layers[nr].name);
+ if (ofs->config.upperdir) {
+ seq_show_option(m, "upperdir", ofs->config.upperdir);
+ seq_show_option(m, "workdir", ofs->config.workdir);
+ }
+ if (ofs->config.default_permissions)
+ seq_puts(m, ",default_permissions");
+ if (ofs->config.redirect_mode != ovl_redirect_mode_def())
+ seq_printf(m, ",redirect_dir=%s",
+ ovl_redirect_mode(&ofs->config));
+ if (ofs->config.index != ovl_index_def)
+ seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+ if (!ofs->config.uuid)
+ seq_puts(m, ",uuid=off");
+ if (ofs->config.nfs_export != ovl_nfs_export_def)
+ seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
+ "on" : "off");
+ if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(ofs))
+ seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
+ if (ofs->config.metacopy != ovl_metacopy_def)
+ seq_printf(m, ",metacopy=%s",
+ ofs->config.metacopy ? "on" : "off");
+ if (ofs->config.ovl_volatile)
+ seq_puts(m, ",volatile");
+ if (ofs->config.userxattr)
+ seq_puts(m, ",userxattr");
+ return 0;
+}
diff --git a/fs/overlayfs/params.h b/fs/overlayfs/params.h
new file mode 100644
index 000000000000..8750da68ab2a
--- /dev/null
+++ b/fs/overlayfs/params.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+
+struct ovl_fs;
+struct ovl_config;
+
+extern const struct fs_parameter_spec ovl_parameter_spec[];
+extern const struct constant_table ovl_parameter_redirect_dir[];
+
+/* The set of options that user requested explicitly via mount options */
+struct ovl_opt_set {
+ bool metacopy;
+ bool redirect;
+ bool nfs_export;
+ bool index;
+};
+
+#define OVL_MAX_STACK 500
+
+struct ovl_fs_context_layer {
+ char *name;
+ struct path path;
+};
+
+struct ovl_fs_context {
+ struct path upper;
+ struct path work;
+ size_t capacity;
+ size_t nr; /* includes nr_data */
+ size_t nr_data;
+ struct ovl_opt_set set;
+ struct ovl_fs_context_layer *lower;
+};
+
+int ovl_init_fs_context(struct fs_context *fc);
+void ovl_free_fs(struct ovl_fs *ofs);
+int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
+ struct ovl_config *config);
+int ovl_show_options(struct seq_file *m, struct dentry *dentry);
+const char *ovl_xino_mode(struct ovl_config *config);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c14c52560fd6..5b069f1a1e44 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -19,6 +19,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include "overlayfs.h"
+#include "params.h"
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
@@ -27,38 +28,6 @@ MODULE_LICENSE("GPL");
struct ovl_dir_cache;
-static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
-module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
-MODULE_PARM_DESC(redirect_dir,
- "Default to on or off for the redirect_dir feature");
-
-static bool ovl_redirect_always_follow =
- IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
-module_param_named(redirect_always_follow, ovl_redirect_always_follow,
- bool, 0644);
-MODULE_PARM_DESC(redirect_always_follow,
- "Follow redirects even if redirect_dir feature is turned off");
-
-static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
-module_param_named(index, ovl_index_def, bool, 0644);
-MODULE_PARM_DESC(index,
- "Default to on or off for the inodes index feature");
-
-static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
-module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
-MODULE_PARM_DESC(nfs_export,
- "Default to on or off for the NFS export feature");
-
-static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
-module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
-MODULE_PARM_DESC(xino_auto,
- "Auto enable xino feature");
-
-static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
-module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
-MODULE_PARM_DESC(metacopy,
- "Default to on or off for the metadata only copy up feature");
-
static struct dentry *ovl_d_real(struct dentry *dentry,
const struct inode *inode)
{
@@ -211,43 +180,6 @@ static void ovl_destroy_inode(struct inode *inode)
kfree(oi->lowerdata_redirect);
}
-static void ovl_free_fs(struct ovl_fs *ofs)
-{
- struct vfsmount **mounts;
- unsigned i;
-
- iput(ofs->workbasedir_trap);
- iput(ofs->indexdir_trap);
- iput(ofs->workdir_trap);
- dput(ofs->whiteout);
- dput(ofs->indexdir);
- dput(ofs->workdir);
- if (ofs->workdir_locked)
- ovl_inuse_unlock(ofs->workbasedir);
- dput(ofs->workbasedir);
- if (ofs->upperdir_locked)
- ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
-
- /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */
- mounts = (struct vfsmount **) ofs->layers;
- for (i = 0; i < ofs->numlayer; i++) {
- iput(ofs->layers[i].trap);
- mounts[i] = ofs->layers[i].mnt;
- kfree(ofs->layers[i].name);
- }
- kern_unmount_array(mounts, ofs->numlayer);
- kfree(ofs->layers);
- for (i = 0; i < ofs->numfs; i++)
- free_anon_bdev(ofs->fs[i].pseudo_dev);
- kfree(ofs->fs);
-
- kfree(ofs->config.upperdir);
- kfree(ofs->config.workdir);
- if (ofs->creator_cred)
- put_cred(ofs->creator_cred);
- kfree(ofs);
-}
-
static void ovl_put_super(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
@@ -323,122 +255,6 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
return err;
}
-/* Will this overlay be forced to mount/remount ro? */
-static bool ovl_force_readonly(struct ovl_fs *ofs)
-{
- return (!ovl_upper_mnt(ofs) || !ofs->workdir);
-}
-
-static const struct constant_table ovl_parameter_redirect_dir[] = {
- { "off", OVL_REDIRECT_OFF },
- { "follow", OVL_REDIRECT_FOLLOW },
- { "nofollow", OVL_REDIRECT_NOFOLLOW },
- { "on", OVL_REDIRECT_ON },
- {}
-};
-
-static const char *ovl_redirect_mode(struct ovl_config *config)
-{
- return ovl_parameter_redirect_dir[config->redirect_mode].name;
-}
-
-static int ovl_redirect_mode_def(void)
-{
- return ovl_redirect_dir_def ? OVL_REDIRECT_ON :
- ovl_redirect_always_follow ? OVL_REDIRECT_FOLLOW :
- OVL_REDIRECT_NOFOLLOW;
-}
-
-static const struct constant_table ovl_parameter_xino[] = {
- { "off", OVL_XINO_OFF },
- { "auto", OVL_XINO_AUTO },
- { "on", OVL_XINO_ON },
- {}
-};
-
-static const char *ovl_xino_mode(struct ovl_config *config)
-{
- return ovl_parameter_xino[config->xino].name;
-}
-
-static inline int ovl_xino_def(void)
-{
- return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
-}
-
-/**
- * ovl_show_options
- * @m: the seq_file handle
- * @dentry: The dentry to query
- *
- * Prints the mount options for a given superblock.
- * Returns zero; does not fail.
- */
-static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
-{
- struct super_block *sb = dentry->d_sb;
- struct ovl_fs *ofs = sb->s_fs_info;
- size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer;
- const struct ovl_layer *data_layers = &ofs->layers[nr_merged_lower];
-
- /* ofs->layers[0] is the upper layer */
- seq_printf(m, ",lowerdir=%s", ofs->layers[1].name);
- /* dump regular lower layers */
- for (nr = 2; nr < nr_merged_lower; nr++)
- seq_printf(m, ":%s", ofs->layers[nr].name);
- /* dump data lower layers */
- for (nr = 0; nr < ofs->numdatalayer; nr++)
- seq_printf(m, "::%s", data_layers[nr].name);
- if (ofs->config.upperdir) {
- seq_show_option(m, "upperdir", ofs->config.upperdir);
- seq_show_option(m, "workdir", ofs->config.workdir);
- }
- if (ofs->config.default_permissions)
- seq_puts(m, ",default_permissions");
- if (ofs->config.redirect_mode != ovl_redirect_mode_def())
- seq_printf(m, ",redirect_dir=%s",
- ovl_redirect_mode(&ofs->config));
- if (ofs->config.index != ovl_index_def)
- seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
- if (!ofs->config.uuid)
- seq_puts(m, ",uuid=off");
- if (ofs->config.nfs_export != ovl_nfs_export_def)
- seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
- "on" : "off");
- if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(ofs))
- seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
- if (ofs->config.metacopy != ovl_metacopy_def)
- seq_printf(m, ",metacopy=%s",
- ofs->config.metacopy ? "on" : "off");
- if (ofs->config.ovl_volatile)
- seq_puts(m, ",volatile");
- if (ofs->config.userxattr)
- seq_puts(m, ",userxattr");
- return 0;
-}
-
-static int ovl_reconfigure(struct fs_context *fc)
-{
- struct super_block *sb = fc->root->d_sb;
- struct ovl_fs *ofs = sb->s_fs_info;
- struct super_block *upper_sb;
- int ret = 0;
-
- if (!(fc->sb_flags & SB_RDONLY) && ovl_force_readonly(ofs))
- return -EROFS;
-
- if (fc->sb_flags & SB_RDONLY && !sb_rdonly(sb)) {
- upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
- if (ovl_should_sync(ofs)) {
- down_read(&upper_sb->s_umount);
- ret = sync_filesystem(upper_sb);
- up_read(&upper_sb->s_umount);
- }
- }
-
- return ret;
-}
-
static const struct super_operations ovl_super_operations = {
.alloc_inode = ovl_alloc_inode,
.free_inode = ovl_free_inode,
@@ -450,262 +266,6 @@ static const struct super_operations ovl_super_operations = {
.show_options = ovl_show_options,
};
-enum {
- Opt_lowerdir,
- Opt_upperdir,
- Opt_workdir,
- Opt_default_permissions,
- Opt_redirect_dir,
- Opt_index,
- Opt_uuid,
- Opt_nfs_export,
- Opt_userxattr,
- Opt_xino,
- Opt_metacopy,
- Opt_volatile,
-};
-
-static const struct constant_table ovl_parameter_bool[] = {
- { "on", true },
- { "off", false },
- {}
-};
-
-#define fsparam_string_empty(NAME, OPT) \
- __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
-
-static const struct fs_parameter_spec ovl_parameter_spec[] = {
- fsparam_string_empty("lowerdir", Opt_lowerdir),
- fsparam_string("upperdir", Opt_upperdir),
- fsparam_string("workdir", Opt_workdir),
- fsparam_flag("default_permissions", Opt_default_permissions),
- fsparam_enum("redirect_dir", Opt_redirect_dir, ovl_parameter_redirect_dir),
- fsparam_enum("index", Opt_index, ovl_parameter_bool),
- fsparam_enum("uuid", Opt_uuid, ovl_parameter_bool),
- fsparam_enum("nfs_export", Opt_nfs_export, ovl_parameter_bool),
- fsparam_flag("userxattr", Opt_userxattr),
- fsparam_enum("xino", Opt_xino, ovl_parameter_xino),
- fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool),
- fsparam_flag("volatile", Opt_volatile),
- {}
-};
-
-static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
-{
- int err = 0;
- struct fs_parse_result result;
- struct ovl_fs *ofs = fc->s_fs_info;
- struct ovl_config *config = &ofs->config;
- struct ovl_fs_context *ctx = fc->fs_private;
- int opt;
-
- if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
- /*
- * On remount overlayfs has always ignored all mount
- * options no matter if malformed or not so for
- * backwards compatibility we do the same here.
- */
- if (fc->oldapi)
- return 0;
-
- /*
- * Give us the freedom to allow changing mount options
- * with the new mount api in the future. So instead of
- * silently ignoring everything we report a proper
- * error. This is only visible for users of the new
- * mount api.
- */
- return invalfc(fc, "No changes allowed in reconfigure");
- }
-
- opt = fs_parse(fc, ovl_parameter_spec, param, &result);
- if (opt < 0)
- return opt;
-
- switch (opt) {
- case Opt_lowerdir:
- err = ovl_parse_param_lowerdir(param->string, fc);
- break;
- case Opt_upperdir:
- fallthrough;
- case Opt_workdir:
- err = ovl_parse_param_upperdir(param->string, fc,
- (Opt_workdir == opt));
- break;
- case Opt_default_permissions:
- config->default_permissions = true;
- break;
- case Opt_redirect_dir:
- config->redirect_mode = result.uint_32;
- if (config->redirect_mode == OVL_REDIRECT_OFF) {
- config->redirect_mode = ovl_redirect_always_follow ?
- OVL_REDIRECT_FOLLOW :
- OVL_REDIRECT_NOFOLLOW;
- }
- ctx->set.redirect = true;
- break;
- case Opt_index:
- config->index = result.uint_32;
- ctx->set.index = true;
- break;
- case Opt_uuid:
- config->uuid = result.uint_32;
- break;
- case Opt_nfs_export:
- config->nfs_export = result.uint_32;
- ctx->set.nfs_export = true;
- break;
- case Opt_xino:
- config->xino = result.uint_32;
- break;
- case Opt_metacopy:
- config->metacopy = result.uint_32;
- ctx->set.metacopy = true;
- break;
- case Opt_volatile:
- config->ovl_volatile = true;
- break;
- case Opt_userxattr:
- config->userxattr = true;
- break;
- default:
- pr_err("unrecognized mount option \"%s\" or missing value\n",
- param->key);
- return -EINVAL;
- }
-
- return err;
-}
-
-static int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
- struct ovl_config *config)
-{
- struct ovl_opt_set set = ctx->set;
-
- if (ctx->nr_data > 0 && !config->metacopy) {
- pr_err("lower data-only dirs require metacopy support.\n");
- return -EINVAL;
- }
-
- /* Workdir/index are useless in non-upper mount */
- if (!config->upperdir) {
- if (config->workdir) {
- pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
- config->workdir);
- kfree(config->workdir);
- config->workdir = NULL;
- }
- if (config->index && set.index) {
- pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
- set.index = false;
- }
- config->index = false;
- }
-
- if (!config->upperdir && config->ovl_volatile) {
- pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
- config->ovl_volatile = false;
- }
-
- /*
- * This is to make the logic below simpler. It doesn't make any other
- * difference, since redirect_dir=on is only used for upper.
- */
- if (!config->upperdir && config->redirect_mode == OVL_REDIRECT_FOLLOW)
- config->redirect_mode = OVL_REDIRECT_ON;
-
- /* Resolve metacopy -> redirect_dir dependency */
- if (config->metacopy && config->redirect_mode != OVL_REDIRECT_ON) {
- if (set.metacopy && set.redirect) {
- pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
- ovl_redirect_mode(config));
- return -EINVAL;
- }
- if (set.redirect) {
- /*
- * There was an explicit redirect_dir=... that resulted
- * in this conflict.
- */
- pr_info("disabling metacopy due to redirect_dir=%s\n",
- ovl_redirect_mode(config));
- config->metacopy = false;
- } else {
- /* Automatically enable redirect otherwise. */
- config->redirect_mode = OVL_REDIRECT_ON;
- }
- }
-
- /* Resolve nfs_export -> index dependency */
- if (config->nfs_export && !config->index) {
- if (!config->upperdir &&
- config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
- pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
- config->nfs_export = false;
- } else if (set.nfs_export && set.index) {
- pr_err("conflicting options: nfs_export=on,index=off\n");
- return -EINVAL;
- } else if (set.index) {
- /*
- * There was an explicit index=off that resulted
- * in this conflict.
- */
- pr_info("disabling nfs_export due to index=off\n");
- config->nfs_export = false;
- } else {
- /* Automatically enable index otherwise. */
- config->index = true;
- }
- }
-
- /* Resolve nfs_export -> !metacopy dependency */
- if (config->nfs_export && config->metacopy) {
- if (set.nfs_export && set.metacopy) {
- pr_err("conflicting options: nfs_export=on,metacopy=on\n");
- return -EINVAL;
- }
- if (set.metacopy) {
- /*
- * There was an explicit metacopy=on that resulted
- * in this conflict.
- */
- pr_info("disabling nfs_export due to metacopy=on\n");
- config->nfs_export = false;
- } else {
- /*
- * There was an explicit nfs_export=on that resulted
- * in this conflict.
- */
- pr_info("disabling metacopy due to nfs_export=on\n");
- config->metacopy = false;
- }
- }
-
-
- /* Resolve userxattr -> !redirect && !metacopy dependency */
- if (config->userxattr) {
- if (set.redirect &&
- config->redirect_mode != OVL_REDIRECT_NOFOLLOW) {
- pr_err("conflicting options: userxattr,redirect_dir=%s\n",
- ovl_redirect_mode(config));
- return -EINVAL;
- }
- if (config->metacopy && set.metacopy) {
- pr_err("conflicting options: userxattr,metacopy=on\n");
- return -EINVAL;
- }
- /*
- * Silently disable default setting of redirect and metacopy.
- * This shall be the default in the future as well: these
- * options must be explicitly enabled if used together with
- * userxattr.
- */
- config->redirect_mode = OVL_REDIRECT_NOFOLLOW;
- config->metacopy = false;
- }
-
- return 0;
-}
-
#define OVL_WORKDIR_NAME "work"
#define OVL_INDEXDIR_NAME "index"
@@ -1758,7 +1318,7 @@ static struct dentry *ovl_get_root(struct super_block *sb,
return root;
}
-static int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
+int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct ovl_fs *ofs = sb->s_fs_info;
struct ovl_fs_context *ctx = fc->fs_private;
@@ -1919,92 +1479,6 @@ out_err:
return err;
}
-static int ovl_get_tree(struct fs_context *fc)
-{
- return get_tree_nodev(fc, ovl_fill_super);
-}
-
-static inline void ovl_fs_context_free(struct ovl_fs_context *ctx)
-{
- ovl_parse_param_drop_lowerdir(ctx);
- path_put(&ctx->upper);
- path_put(&ctx->work);
- kfree(ctx->lower);
- kfree(ctx);
-}
-
-static void ovl_free(struct fs_context *fc)
-{
- struct ovl_fs *ofs = fc->s_fs_info;
- struct ovl_fs_context *ctx = fc->fs_private;
-
- /*
- * ofs is stored in the fs_context when it is initialized.
- * ofs is transferred to the superblock on a successful mount,
- * but if an error occurs before the transfer we have to free
- * it here.
- */
- if (ofs)
- ovl_free_fs(ofs);
-
- if (ctx)
- ovl_fs_context_free(ctx);
-}
-
-static const struct fs_context_operations ovl_context_ops = {
- .parse_param = ovl_parse_param,
- .get_tree = ovl_get_tree,
- .reconfigure = ovl_reconfigure,
- .free = ovl_free,
-};
-
-/*
- * This is called during fsopen() and will record the user namespace of
- * the caller in fc->user_ns since we've raised FS_USERNS_MOUNT. We'll
- * need it when we actually create the superblock to verify that the
- * process creating the superblock is in the same user namespace as
- * process that called fsopen().
- */
-static int ovl_init_fs_context(struct fs_context *fc)
-{
- struct ovl_fs_context *ctx;
- struct ovl_fs *ofs;
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
- if (!ctx)
- return -ENOMEM;
-
- /*
- * By default we allocate for three lower layers. It's likely
- * that it'll cover most users.
- */
- ctx->lower = kmalloc_array(3, sizeof(*ctx->lower), GFP_KERNEL_ACCOUNT);
- if (!ctx->lower)
- goto out_err;
- ctx->capacity = 3;
-
- ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
- if (!ofs)
- goto out_err;
-
- ofs->config.redirect_mode = ovl_redirect_mode_def();
- ofs->config.index = ovl_index_def;
- ofs->config.uuid = true;
- ofs->config.nfs_export = ovl_nfs_export_def;
- ofs->config.xino = ovl_xino_def();
- ofs->config.metacopy = ovl_metacopy_def;
-
- fc->s_fs_info = ofs;
- fc->fs_private = ctx;
- fc->ops = &ovl_context_ops;
- return 0;
-
-out_err:
- ovl_fs_context_free(ctx);
- return -ENOMEM;
-
-}
-
static struct file_system_type ovl_fs_type = {
.owner = THIS_MODULE,
.name = "overlay",
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 5d0cf59c4926..9cb32e1a78a0 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -199,7 +199,7 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
- if (!virt_addr_valid(ent->addr))
+ if (!virt_addr_valid((void *)ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 4e5488975415..5ea42653126e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -29,7 +29,7 @@ static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
/* Support for permanently empty directories */
-struct ctl_table sysctl_mount_point[] = {
+static struct ctl_table sysctl_mount_point[] = {
{.type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY }
};
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index ca2da713c5fe..b5808fe3469a 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -2179,7 +2179,7 @@ static inline void cifs_sg_set_buf(struct sg_table *sgtable,
} while (buflen);
} else {
sg_set_page(&sgtable->sgl[sgtable->nents++],
- virt_to_page(addr), buflen, off);
+ virt_to_page((void *)addr), buflen, off);
}
}
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 223e17c16b60..2a2aec8c6112 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -2500,7 +2500,7 @@ static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter,
if (is_vmalloc_or_module_addr((void *)kaddr))
page = vmalloc_to_page((void *)kaddr);
else
- page = virt_to_page(kaddr);
+ page = virt_to_page((void *)kaddr);
if (!smb_set_sge(rdma, page, off, seg))
return -EIO;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 6aa9c2e1e8eb..581ce9519339 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -166,6 +166,26 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
return 0;
}
+static struct page *squashfs_get_cache_page(struct address_space *mapping,
+ pgoff_t index)
+{
+ struct page *page;
+
+ if (!mapping)
+ return NULL;
+
+ page = find_get_page(mapping, index);
+ if (!page)
+ return NULL;
+
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return NULL;
+ }
+
+ return page;
+}
+
static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
struct bio **biop, int *block_offset)
{
@@ -190,11 +210,10 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
for (i = 0; i < page_count; ++i) {
unsigned int len =
min_t(unsigned int, PAGE_SIZE - offset, total_len);
- struct page *page = NULL;
+ pgoff_t index = (read_start >> PAGE_SHIFT) + i;
+ struct page *page;
- if (cache_mapping)
- page = find_get_page(cache_mapping,
- (read_start >> PAGE_SHIFT) + i);
+ page = squashfs_get_cache_page(cache_mapping, index);
if (!page)
page = alloc_page(GFP_NOIO);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index eeb0e3099421..138676463336 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -118,11 +118,13 @@ static int internal_create_group(struct kobject *kobj, int update,
/* Updates may happen before the object has been instantiated */
if (unlikely(update && !kobj->sd))
return -EINVAL;
+
if (!grp->attrs && !grp->bin_attrs) {
- WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n",
- kobj->name, grp->name ?: "");
- return -EINVAL;
+ pr_debug("sysfs: (bin_)attrs not set by subsystem for group: %s/%s, skipping\n",
+ kobj->name, grp->name ?: "");
+ return 0;
}
+
kobject_get_ownership(kobj, &uid, &gid);
if (grp->name) {
if (update) {
@@ -142,8 +144,10 @@ static int internal_create_group(struct kobject *kobj, int update,
return PTR_ERR(kn);
}
}
- } else
+ } else {
kn = kobj->sd;
+ }
+
kernfs_get(kn);
error = create_files(kn, kobj, uid, gid, grp, update);
if (error) {
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index ee84835ebc66..e9cc481b4ddf 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -985,7 +985,7 @@ xfs_ag_shrink_space(
goto resv_err;
err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
- true);
+ XFS_AG_RESV_NONE, true);
if (err2)
goto resv_err;
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c20fe99405d8..3069194527dd 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1536,7 +1536,8 @@ xfs_alloc_ag_vextent_lastblock(
*/
STATIC int
xfs_alloc_ag_vextent_near(
- struct xfs_alloc_arg *args)
+ struct xfs_alloc_arg *args,
+ uint32_t alloc_flags)
{
struct xfs_alloc_cur acur = {};
int error; /* error code */
@@ -1555,6 +1556,8 @@ xfs_alloc_ag_vextent_near(
if (args->agbno > args->max_agbno)
args->agbno = args->max_agbno;
+ /* Retry once quickly if we find busy extents before blocking. */
+ alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
restart:
len = 0;
@@ -1610,9 +1613,20 @@ restart:
*/
if (!acur.len) {
if (acur.busy) {
+ /*
+ * Our only valid extents must have been busy. Flush and
+ * retry the allocation again. If we get an -EAGAIN
+ * error, we're being told that a deadlock was avoided
+ * and the current transaction needs committing before
+ * the allocation can be retried.
+ */
trace_xfs_alloc_near_busy(args);
- xfs_extent_busy_flush(args->mp, args->pag,
- acur.busy_gen);
+ error = xfs_extent_busy_flush(args->tp, args->pag,
+ acur.busy_gen, alloc_flags);
+ if (error)
+ goto out;
+
+ alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
goto restart;
}
trace_xfs_alloc_size_neither(args);
@@ -1635,22 +1649,25 @@ out:
* and of the form k * prod + mod unless there's nothing that large.
* Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
*/
-STATIC int /* error */
+static int
xfs_alloc_ag_vextent_size(
- xfs_alloc_arg_t *args) /* allocation argument structure */
+ struct xfs_alloc_arg *args,
+ uint32_t alloc_flags)
{
- struct xfs_agf *agf = args->agbp->b_addr;
- struct xfs_btree_cur *bno_cur; /* cursor for bno btree */
- struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */
- int error; /* error result */
- xfs_agblock_t fbno; /* start of found freespace */
- xfs_extlen_t flen; /* length of found freespace */
- int i; /* temp status variable */
- xfs_agblock_t rbno; /* returned block number */
- xfs_extlen_t rlen; /* length of returned extent */
- bool busy;
- unsigned busy_gen;
+ struct xfs_agf *agf = args->agbp->b_addr;
+ struct xfs_btree_cur *bno_cur;
+ struct xfs_btree_cur *cnt_cur;
+ xfs_agblock_t fbno; /* start of found freespace */
+ xfs_extlen_t flen; /* length of found freespace */
+ xfs_agblock_t rbno; /* returned block number */
+ xfs_extlen_t rlen; /* length of returned extent */
+ bool busy;
+ unsigned busy_gen;
+ int error;
+ int i;
+ /* Retry once quickly if we find busy extents before blocking. */
+ alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
restart:
/*
* Allocate and initialize a cursor for the by-size btree.
@@ -1708,19 +1725,25 @@ restart:
error = xfs_btree_increment(cnt_cur, 0, &i);
if (error)
goto error0;
- if (i == 0) {
- /*
- * Our only valid extents must have been busy.
- * Make it unbusy by forcing the log out and
- * retrying.
- */
- xfs_btree_del_cursor(cnt_cur,
- XFS_BTREE_NOERROR);
- trace_xfs_alloc_size_busy(args);
- xfs_extent_busy_flush(args->mp,
- args->pag, busy_gen);
- goto restart;
- }
+ if (i)
+ continue;
+
+ /*
+ * Our only valid extents must have been busy. Flush and
+ * retry the allocation again. If we get an -EAGAIN
+ * error, we're being told that a deadlock was avoided
+ * and the current transaction needs committing before
+ * the allocation can be retried.
+ */
+ trace_xfs_alloc_size_busy(args);
+ error = xfs_extent_busy_flush(args->tp, args->pag,
+ busy_gen, alloc_flags);
+ if (error)
+ goto error0;
+
+ alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ goto restart;
}
}
@@ -1800,9 +1823,21 @@ restart:
args->len = rlen;
if (rlen < args->minlen) {
if (busy) {
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ /*
+ * Our only valid extents must have been busy. Flush and
+ * retry the allocation again. If we get an -EAGAIN
+ * error, we're being told that a deadlock was avoided
+ * and the current transaction needs committing before
+ * the allocation can be retried.
+ */
trace_xfs_alloc_size_busy(args);
- xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
+ error = xfs_extent_busy_flush(args->tp, args->pag,
+ busy_gen, alloc_flags);
+ if (error)
+ goto error0;
+
+ alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
goto restart;
}
goto out_nominleft;
@@ -2435,23 +2470,25 @@ static int
xfs_defer_agfl_block(
struct xfs_trans *tp,
xfs_agnumber_t agno,
- xfs_fsblock_t agbno,
+ xfs_agblock_t agbno,
struct xfs_owner_info *oinfo)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_extent_free_item *xefi;
+ xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
ASSERT(xfs_extfree_item_cache != NULL);
ASSERT(oinfo != NULL);
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
+ return -EFSCORRUPTED;
+
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
- xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
+ xefi->xefi_startblock = fsbno;
xefi->xefi_blockcount = 1;
xefi->xefi_owner = oinfo->oi_owner;
-
- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
- return -EFSCORRUPTED;
+ xefi->xefi_agresv = XFS_AG_RESV_AGFL;
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
@@ -2470,6 +2507,7 @@ __xfs_free_extent_later(
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type,
bool skip_discard)
{
struct xfs_extent_free_item *xefi;
@@ -2490,6 +2528,7 @@ __xfs_free_extent_later(
ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
#endif
ASSERT(xfs_extfree_item_cache != NULL);
+ ASSERT(type != XFS_AG_RESV_AGFL);
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
return -EFSCORRUPTED;
@@ -2498,6 +2537,7 @@ __xfs_free_extent_later(
GFP_KERNEL | __GFP_NOFAIL);
xefi->xefi_startblock = bno;
xefi->xefi_blockcount = (xfs_extlen_t)len;
+ xefi->xefi_agresv = type;
if (skip_discard)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
if (oinfo) {
@@ -2568,7 +2608,7 @@ out:
int /* error */
xfs_alloc_fix_freelist(
struct xfs_alloc_arg *args, /* allocation argument structure */
- int flags) /* XFS_ALLOC_FLAG_... */
+ uint32_t alloc_flags)
{
struct xfs_mount *mp = args->mp;
struct xfs_perag *pag = args->pag;
@@ -2584,7 +2624,7 @@ xfs_alloc_fix_freelist(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
if (!xfs_perag_initialised_agf(pag)) {
- error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
+ error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
if (error) {
/* Couldn't lock the AGF so skip this AG. */
if (error == -EAGAIN)
@@ -2600,13 +2640,13 @@ xfs_alloc_fix_freelist(
*/
if (xfs_perag_prefers_metadata(pag) &&
(args->datatype & XFS_ALLOC_USERDATA) &&
- (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
- ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
+ (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
+ ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
goto out_agbp_relse;
}
need = xfs_alloc_min_freelist(mp, pag);
- if (!xfs_alloc_space_available(args, need, flags |
+ if (!xfs_alloc_space_available(args, need, alloc_flags |
XFS_ALLOC_FLAG_CHECK))
goto out_agbp_relse;
@@ -2615,7 +2655,7 @@ xfs_alloc_fix_freelist(
* Can fail if we're not blocking on locks, and it's held.
*/
if (!agbp) {
- error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
+ error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
if (error) {
/* Couldn't lock the AGF so skip this AG. */
if (error == -EAGAIN)
@@ -2630,7 +2670,7 @@ xfs_alloc_fix_freelist(
/* If there isn't enough total space or single-extent, reject it. */
need = xfs_alloc_min_freelist(mp, pag);
- if (!xfs_alloc_space_available(args, need, flags))
+ if (!xfs_alloc_space_available(args, need, alloc_flags))
goto out_agbp_relse;
#ifdef DEBUG
@@ -2668,11 +2708,12 @@ xfs_alloc_fix_freelist(
*/
memset(&targs, 0, sizeof(targs));
/* struct copy below */
- if (flags & XFS_ALLOC_FLAG_NORMAP)
+ if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
else
targs.oinfo = XFS_RMAP_OINFO_AG;
- while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
+ while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
+ pag->pagf_flcount > need) {
error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
if (error)
goto out_agbp_relse;
@@ -2700,7 +2741,7 @@ xfs_alloc_fix_freelist(
targs.resv = XFS_AG_RESV_AGFL;
/* Allocate as many blocks as possible at once. */
- error = xfs_alloc_ag_vextent_size(&targs);
+ error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
if (error)
goto out_agflbp_relse;
@@ -2710,7 +2751,7 @@ xfs_alloc_fix_freelist(
* on a completely full ag.
*/
if (targs.agbno == NULLAGBLOCK) {
- if (flags & XFS_ALLOC_FLAG_FREEING)
+ if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
break;
goto out_agflbp_relse;
}
@@ -2916,6 +2957,47 @@ xfs_alloc_put_freelist(
}
/*
+ * Check that this AGF/AGI header's sequence number and length matches the AG
+ * number and size in fsblocks.
+ */
+xfs_failaddr_t
+xfs_validate_ag_length(
+ struct xfs_buf *bp,
+ uint32_t seqno,
+ uint32_t length)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ /*
+ * During growfs operations, the perag is not fully initialised,
+ * so we can't use it for any useful checking. growfs ensures we can't
+ * use it by using uncached buffers that don't have the perag attached
+ * so we can detect and avoid this problem.
+ */
+ if (bp->b_pag && seqno != bp->b_pag->pag_agno)
+ return __this_address;
+
+ /*
+ * Only the last AG in the filesystem is allowed to be shorter
+ * than the AG size recorded in the superblock.
+ */
+ if (length != mp->m_sb.sb_agblocks) {
+ /*
+ * During growfs, the new last AG can get here before we
+ * have updated the superblock. Give it a pass on the seqno
+ * check.
+ */
+ if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
+ return __this_address;
+ if (length < XFS_MIN_AG_BLOCKS)
+ return __this_address;
+ if (length > mp->m_sb.sb_agblocks)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
+/*
* Verify the AGF is consistent.
*
* We do not verify the AGFL indexes in the AGF are fully consistent here
@@ -2934,6 +3016,9 @@ xfs_agf_verify(
{
struct xfs_mount *mp = bp->b_mount;
struct xfs_agf *agf = bp->b_addr;
+ xfs_failaddr_t fa;
+ uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
+ uint32_t agf_length = be32_to_cpu(agf->agf_length);
if (xfs_has_crc(mp)) {
if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
@@ -2945,18 +3030,26 @@ xfs_agf_verify(
if (!xfs_verify_magic(bp, agf->agf_magicnum))
return __this_address;
- if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
- be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
- be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
- be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
- be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
+ if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
return __this_address;
- if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
+ /*
+ * Both agf_seqno and agf_length need to validated before anything else
+ * block number related in the AGF or AGFL can be checked.
+ */
+ fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
+ if (fa)
+ return fa;
+
+ if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
+ return __this_address;
+ if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
+ return __this_address;
+ if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
return __this_address;
if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
- be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
+ be32_to_cpu(agf->agf_freeblks) > agf_length)
return __this_address;
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
@@ -2967,38 +3060,28 @@ xfs_agf_verify(
mp->m_alloc_maxlevels)
return __this_address;
- if (xfs_has_rmapbt(mp) &&
- (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
- mp->m_rmap_maxlevels))
- return __this_address;
-
- if (xfs_has_rmapbt(mp) &&
- be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
+ if (xfs_has_lazysbcount(mp) &&
+ be32_to_cpu(agf->agf_btreeblks) > agf_length)
return __this_address;
- /*
- * during growfs operations, the perag is not fully initialised,
- * so we can't use it for any useful checking. growfs ensures we can't
- * use it by using uncached buffers that don't have the perag attached
- * so we can detect and avoid this problem.
- */
- if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
- return __this_address;
+ if (xfs_has_rmapbt(mp)) {
+ if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
+ return __this_address;
- if (xfs_has_lazysbcount(mp) &&
- be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
- return __this_address;
+ if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
+ be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
+ mp->m_rmap_maxlevels)
+ return __this_address;
+ }
- if (xfs_has_reflink(mp) &&
- be32_to_cpu(agf->agf_refcount_blocks) >
- be32_to_cpu(agf->agf_length))
- return __this_address;
+ if (xfs_has_reflink(mp)) {
+ if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
+ return __this_address;
- if (xfs_has_reflink(mp) &&
- (be32_to_cpu(agf->agf_refcount_level) < 1 ||
- be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
- return __this_address;
+ if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
+ be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
+ return __this_address;
+ }
return NULL;
}
@@ -3226,7 +3309,7 @@ xfs_alloc_vextent_check_args(
static int
xfs_alloc_vextent_prepare_ag(
struct xfs_alloc_arg *args,
- uint32_t flags)
+ uint32_t alloc_flags)
{
bool need_pag = !args->pag;
int error;
@@ -3235,7 +3318,7 @@ xfs_alloc_vextent_prepare_ag(
args->pag = xfs_perag_get(args->mp, args->agno);
args->agbp = NULL;
- error = xfs_alloc_fix_freelist(args, flags);
+ error = xfs_alloc_fix_freelist(args, alloc_flags);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
if (need_pag)
@@ -3357,6 +3440,7 @@ xfs_alloc_vextent_this_ag(
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
+ uint32_t alloc_flags = 0;
int error;
ASSERT(args->pag != NULL);
@@ -3375,9 +3459,9 @@ xfs_alloc_vextent_this_ag(
return error;
}
- error = xfs_alloc_vextent_prepare_ag(args, 0);
+ error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
if (!error && args->agbp)
- error = xfs_alloc_ag_vextent_size(args);
+ error = xfs_alloc_ag_vextent_size(args, alloc_flags);
return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
}
@@ -3406,20 +3490,20 @@ xfs_alloc_vextent_iterate_ags(
xfs_agnumber_t minimum_agno,
xfs_agnumber_t start_agno,
xfs_agblock_t target_agbno,
- uint32_t flags)
+ uint32_t alloc_flags)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t restart_agno = minimum_agno;
xfs_agnumber_t agno;
int error = 0;
- if (flags & XFS_ALLOC_FLAG_TRYLOCK)
+ if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
restart_agno = 0;
restart:
for_each_perag_wrap_range(mp, start_agno, restart_agno,
mp->m_sb.sb_agcount, agno, args->pag) {
args->agno = agno;
- error = xfs_alloc_vextent_prepare_ag(args, flags);
+ error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
if (error)
break;
if (!args->agbp) {
@@ -3433,10 +3517,10 @@ restart:
*/
if (args->agno == start_agno && target_agbno) {
args->agbno = target_agbno;
- error = xfs_alloc_ag_vextent_near(args);
+ error = xfs_alloc_ag_vextent_near(args, alloc_flags);
} else {
args->agbno = 0;
- error = xfs_alloc_ag_vextent_size(args);
+ error = xfs_alloc_ag_vextent_size(args, alloc_flags);
}
break;
}
@@ -3453,8 +3537,8 @@ restart:
* constraining flags by the caller, drop them and retry the allocation
* without any constraints being set.
*/
- if (flags) {
- flags = 0;
+ if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
+ alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
restart_agno = minimum_agno;
goto restart;
}
@@ -3482,6 +3566,7 @@ xfs_alloc_vextent_start_ag(
xfs_agnumber_t start_agno;
xfs_agnumber_t rotorstep = xfs_rotorstep;
bool bump_rotor = false;
+ uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
int error;
ASSERT(args->pag == NULL);
@@ -3508,7 +3593,7 @@ xfs_alloc_vextent_start_ag(
start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
- XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK);
+ XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
if (bump_rotor) {
if (args->agno == start_agno)
@@ -3535,6 +3620,7 @@ xfs_alloc_vextent_first_ag(
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
xfs_agnumber_t start_agno;
+ uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
int error;
ASSERT(args->pag == NULL);
@@ -3553,7 +3639,7 @@ xfs_alloc_vextent_first_ag(
start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
- XFS_FSB_TO_AGBNO(mp, target), 0);
+ XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
}
@@ -3606,6 +3692,7 @@ xfs_alloc_vextent_near_bno(
struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
bool needs_perag = args->pag == NULL;
+ uint32_t alloc_flags = 0;
int error;
if (!needs_perag)
@@ -3626,9 +3713,9 @@ xfs_alloc_vextent_near_bno(
if (needs_perag)
args->pag = xfs_perag_grab(mp, args->agno);
- error = xfs_alloc_vextent_prepare_ag(args, 0);
+ error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
if (!error && args->agbp)
- error = xfs_alloc_ag_vextent_near(args);
+ error = xfs_alloc_ag_vextent_near(args, alloc_flags);
return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
}
@@ -3756,15 +3843,11 @@ xfs_alloc_query_range(
xfs_alloc_query_range_fn fn,
void *priv)
{
- union xfs_btree_irec low_brec;
- union xfs_btree_irec high_brec;
- struct xfs_alloc_query_range_info query;
+ union xfs_btree_irec low_brec = { .a = *low_rec };
+ union xfs_btree_irec high_brec = { .a = *high_rec };
+ struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
- low_brec.a = *low_rec;
- high_brec.a = *high_rec;
- query.priv = priv;
- query.fn = fn;
return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_alloc_query_range_helper, &query);
}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 85ac470be0da..6bb8d295c321 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -19,11 +19,12 @@ unsigned int xfs_agfl_size(struct xfs_mount *mp);
/*
* Flags for xfs_alloc_fix_freelist.
*/
-#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
-#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
-#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */
-#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */
-#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */
+#define XFS_ALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */
+#define XFS_ALLOC_FLAG_FREEING (1U << 1) /* indicate caller is freeing extents*/
+#define XFS_ALLOC_FLAG_NORMAP (1U << 2) /* don't modify the rmapbt */
+#define XFS_ALLOC_FLAG_NOSHRINK (1U << 3) /* don't shrink the freelist */
+#define XFS_ALLOC_FLAG_CHECK (1U << 4) /* test only, don't modify args */
+#define XFS_ALLOC_FLAG_TRYFLUSH (1U << 5) /* don't wait in busy extent flush */
/*
* Argument structure for xfs_alloc routines.
@@ -195,7 +196,7 @@ int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf **bpp);
int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,
struct xfs_buf *, struct xfs_owner_info *);
-int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
+int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags);
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_buf **agbp);
@@ -232,7 +233,7 @@ xfs_buf_to_agfl_bno(
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
- bool skip_discard);
+ enum xfs_ag_resv_type type, bool skip_discard);
/*
* List of extents to be free "later".
@@ -245,6 +246,7 @@ struct xfs_extent_free_item {
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
struct xfs_perag *xefi_pag;
unsigned int xefi_flags;
+ enum xfs_ag_resv_type xefi_agresv;
};
void xfs_extent_free_get_group(struct xfs_mount *mp,
@@ -259,9 +261,10 @@ xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
- const struct xfs_owner_info *oinfo)
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
{
- return __xfs_free_extent_later(tp, bno, len, oinfo, false);
+ return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
}
@@ -270,4 +273,7 @@ extern struct kmem_cache *xfs_extfree_item_cache;
int __init xfs_extfree_intent_init_cache(void);
void xfs_extfree_intent_destroy_cache(void);
+xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno,
+ uint32_t length);
+
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index beee51ad75ce..2580ae47209a 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -2293,8 +2293,6 @@ xfs_attr3_leaf_unbalance(
trace_xfs_attr_leaf_unbalance(state->args);
- drop_leaf = drop_blk->bp->b_addr;
- save_leaf = save_blk->bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
entry = xfs_attr3_leaf_entryp(drop_leaf);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index fef35696adb7..30c931b38853 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
return error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
- error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
+ XFS_AG_RESV_NONE);
if (error)
return error;
@@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(
} else {
error = __xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
- (bflags & XFS_BMAPI_NODISCARD) ||
- del->br_state == XFS_EXT_UNWRITTEN);
+ XFS_AG_RESV_NONE,
+ ((bflags & XFS_BMAPI_NODISCARD) ||
+ del->br_state == XFS_EXT_UNWRITTEN));
if (error)
goto done;
}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 36564ae3084f..bf3f1b36fdd2 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -271,7 +271,8 @@ xfs_bmbt_free_block(
int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
- error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
+ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
+ XFS_AG_RESV_NONE);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 34600f94c2f4..b83e54c70906 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(
/* not sparse, calculate extent info directly */
return xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, sagbno),
- M_IGEO(mp)->ialloc_blks,
- &XFS_RMAP_OINFO_INODES);
+ M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
+ XFS_AG_RESV_NONE);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
error = xfs_free_extent_later(tp,
- XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk, &XFS_RMAP_OINFO_INODES);
+ XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+ &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
if (error)
return error;
@@ -2486,11 +2486,14 @@ xfs_ialloc_log_agi(
static xfs_failaddr_t
xfs_agi_verify(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_mount;
- struct xfs_agi *agi = bp->b_addr;
- int i;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_agi *agi = bp->b_addr;
+ xfs_failaddr_t fa;
+ uint32_t agi_seqno = be32_to_cpu(agi->agi_seqno);
+ uint32_t agi_length = be32_to_cpu(agi->agi_length);
+ int i;
if (xfs_has_crc(mp)) {
if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
@@ -2507,6 +2510,10 @@ xfs_agi_verify(
if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
return __this_address;
+ fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
+ if (fa)
+ return fa;
+
if (be32_to_cpu(agi->agi_level) < 1 ||
be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
return __this_address;
@@ -2516,15 +2523,6 @@ xfs_agi_verify(
be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
return __this_address;
- /*
- * during growfs operations, the perag is not fully initialised,
- * so we can't use it for any useful checking. growfs ensures we can't
- * use it by using uncached buffers that don't have the perag attached
- * so we can detect and avoid this problem.
- */
- if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
- return __this_address;
-
for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
continue;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 5a945ae21b5d..9258f01c0015 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -160,8 +160,7 @@ __xfs_inobt_free_block(
xfs_inobt_mod_blockcount(cur, -1);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
- return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_INOBT, resv);
}
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b6e21433925c..646b3fa362ad 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno,
tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
- tmp.rc_blockcount, NULL);
+ tmp.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
if (error)
goto out_error;
}
@@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno,
ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
- ext.rc_blockcount, NULL);
+ ext.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
if (error)
goto out_error;
}
@@ -1919,8 +1921,13 @@ xfs_refcount_recover_cow_leftovers(
struct xfs_buf *agbp;
struct xfs_refcount_recovery *rr, *n;
struct list_head debris;
- union xfs_btree_irec low;
- union xfs_btree_irec high;
+ union xfs_btree_irec low = {
+ .rc.rc_domain = XFS_REFC_DOMAIN_COW,
+ };
+ union xfs_btree_irec high = {
+ .rc.rc_domain = XFS_REFC_DOMAIN_COW,
+ .rc.rc_startblock = -1U,
+ };
xfs_fsblock_t fsb;
int error;
@@ -1951,10 +1958,6 @@ xfs_refcount_recover_cow_leftovers(
cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
/* Find all the leftover CoW staging extents. */
- memset(&low, 0, sizeof(low));
- memset(&high, 0, sizeof(high));
- low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
- high.rc.rc_startblock = -1U;
error = xfs_btree_query_range(cur, &low, &high,
xfs_refcount_recover_extent, &debris);
xfs_btree_del_cursor(cur, error);
@@ -1981,7 +1984,8 @@ xfs_refcount_recover_cow_leftovers(
/* Free the block. */
error = xfs_free_extent_later(tp, fsb,
- rr->rr_rrec.rc_blockcount, NULL);
+ rr->rr_rrec.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
if (error)
goto out_trans;
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index d4afc5f4e6a5..5c3987d8dc24 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
- int error;
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
- error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
- if (error)
- return error;
-
- return error;
}
STATIC int
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index f4dc23b3b837..fbb0b2637463 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2389,14 +2389,10 @@ xfs_rmap_query_range(
xfs_rmap_query_range_fn fn,
void *priv)
{
- union xfs_btree_irec low_brec;
- union xfs_btree_irec high_brec;
- struct xfs_rmap_query_range_info query;
+ union xfs_btree_irec low_brec = { .r = *low_rec };
+ union xfs_btree_irec high_brec = { .r = *high_rec };
+ struct xfs_rmap_query_range_info query = { .priv = priv, .fn = fn };
- low_brec.r = *low_rec;
- high_brec.r = *high_rec;
- query.priv = priv;
- query.fn = fn;
return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_rmap_query_range_helper, &query);
}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index ba0f17bc1dc0..5e174685a77c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -412,7 +412,6 @@ xfs_validate_sb_common(
sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
- sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE ||
sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES ||
XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES ||
@@ -430,6 +429,61 @@ xfs_validate_sb_common(
return -EFSCORRUPTED;
}
+ /*
+ * Logs that are too large are not supported at all. Reject them
+ * outright. Logs that are too small are tolerated on v4 filesystems,
+ * but we can only check that when mounting the log. Hence we skip
+ * those checks here.
+ */
+ if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) {
+ xfs_notice(mp,
+ "Log size 0x%x blocks too large, maximum size is 0x%llx blocks",
+ sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS);
+ return -EFSCORRUPTED;
+ }
+
+ if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) {
+ xfs_warn(mp,
+ "log size 0x%llx bytes too large, maximum size is 0x%llx bytes",
+ XFS_FSB_TO_B(mp, sbp->sb_logblocks),
+ XFS_MAX_LOG_BYTES);
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Do not allow filesystems with corrupted log sector or stripe units to
+ * be mounted. We cannot safely size the iclogs or write to the log if
+ * the log stripe unit is not valid.
+ */
+ if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) {
+ if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) {
+ xfs_notice(mp,
+ "log sector size in bytes/log2 (0x%x/0x%x) must match",
+ sbp->sb_logsectsize, 1U << sbp->sb_logsectlog);
+ return -EFSCORRUPTED;
+ }
+ } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) {
+ xfs_notice(mp,
+ "log sector size in bytes/log2 (0x%x/0x%x) are not zero",
+ sbp->sb_logsectsize, sbp->sb_logsectlog);
+ return -EFSCORRUPTED;
+ }
+
+ if (sbp->sb_logsunit > 1) {
+ if (sbp->sb_logsunit % sbp->sb_blocksize) {
+ xfs_notice(mp,
+ "log stripe unit 0x%x bytes must be a multiple of block size",
+ sbp->sb_logsunit);
+ return -EFSCORRUPTED;
+ }
+ if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) {
+ xfs_notice(mp,
+ "log stripe unit 0x%x bytes over maximum size (0x%x bytes)",
+ sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE);
+ return -EFSCORRUPTED;
+ }
+ }
+
/* Validate the realtime geometry; stolen from xfs_repair */
if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index f3d328e4a440..7c2fdc71e42d 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -566,20 +566,45 @@ xfs_extent_busy_clear(
/*
* Flush out all busy extents for this AG.
+ *
+ * If the current transaction is holding busy extents, the caller may not want
+ * to wait for committed busy extents to resolve. If we are being told just to
+ * try a flush or progress has been made since we last skipped a busy extent,
+ * return immediately to allow the caller to try again.
+ *
+ * If we are freeing extents, we might actually be holding the only free extents
+ * in the transaction busy list and the log force won't resolve that situation.
+ * In this case, we must return -EAGAIN to avoid a deadlock by informing the
+ * caller it needs to commit the busy extents it holds before retrying the
+ * extent free operation.
*/
-void
+int
xfs_extent_busy_flush(
- struct xfs_mount *mp,
+ struct xfs_trans *tp,
struct xfs_perag *pag,
- unsigned busy_gen)
+ unsigned busy_gen,
+ uint32_t alloc_flags)
{
DEFINE_WAIT (wait);
int error;
- error = xfs_log_force(mp, XFS_LOG_SYNC);
+ error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
if (error)
- return;
+ return error;
+
+ /* Avoid deadlocks on uncommitted busy extents. */
+ if (!list_empty(&tp->t_busy)) {
+ if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH)
+ return 0;
+
+ if (busy_gen != READ_ONCE(pag->pagb_gen))
+ return 0;
+
+ if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
+ return -EAGAIN;
+ }
+ /* Wait for committed busy extents to resolve. */
do {
prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
if (busy_gen != READ_ONCE(pag->pagb_gen))
@@ -588,6 +613,7 @@ xfs_extent_busy_flush(
} while (1);
finish_wait(&pag->pagb_wait, &wait);
+ return 0;
}
void
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index 4a118131059f..c37bf87e6781 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -51,9 +51,9 @@ bool
xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
xfs_extlen_t *len, unsigned *busy_gen);
-void
-xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag,
- unsigned busy_gen);
+int
+xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag,
+ unsigned busy_gen, uint32_t alloc_flags);
void
xfs_extent_busy_wait_all(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index f9e36b810663..f1a5ecf099aa 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -337,6 +337,34 @@ xfs_trans_get_efd(
}
/*
+ * Fill the EFD with all extents from the EFI when we need to roll the
+ * transaction and continue with a new EFI.
+ *
+ * This simply copies all the extents in the EFI to the EFD rather than make
+ * assumptions about which extents in the EFI have already been processed. We
+ * currently keep the xefi list in the same order as the EFI extent list, but
+ * that may not always be the case. Copying everything avoids leaving a landmine
+ * were we fail to cancel all the extents in an EFI if the xefi list is
+ * processed in a different order to the extents in the EFI.
+ */
+static void
+xfs_efd_from_efi(
+ struct xfs_efd_log_item *efdp)
+{
+ struct xfs_efi_log_item *efip = efdp->efd_efip;
+ uint i;
+
+ ASSERT(efip->efi_format.efi_nextents > 0);
+ ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents);
+
+ for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+ efdp->efd_format.efd_extents[i] =
+ efip->efi_format.efi_extents[i];
+ }
+ efdp->efd_next_extent = efip->efi_format.efi_nextents;
+}
+
+/*
* Free an extent and log it to the EFD. Note that the transaction is marked
* dirty regardless of whether the extent free succeeds or fails to support the
* EFI/EFD lifecycle rules.
@@ -365,7 +393,7 @@ xfs_trans_free_extent(
agbno, xefi->xefi_blockcount);
error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
- xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
+ xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
/*
@@ -378,6 +406,17 @@ xfs_trans_free_extent(
tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+ /*
+ * If we need a new transaction to make progress, the caller will log a
+ * new EFI with the current contents. It will also log an EFD to cancel
+ * the existing EFI, and so we need to copy all the unprocessed extents
+ * in this EFI to the EFD so this works correctly.
+ */
+ if (error == -EAGAIN) {
+ xfs_efd_from_efi(efdp);
+ return error;
+ }
+
next_extent = efdp->efd_next_extent;
ASSERT(next_extent < efdp->efd_format.efd_nextents);
extp = &(efdp->efd_format.efd_extents[next_extent]);
@@ -495,6 +534,13 @@ xfs_extent_free_finish_item(
error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
+ /*
+ * Don't free the XEFI if we need a new transaction to complete
+ * processing of it.
+ */
+ if (error == -EAGAIN)
+ return error;
+
xfs_extent_free_put_group(xefi);
kmem_cache_free(xfs_extfree_item_cache, xefi);
return error;
@@ -620,6 +666,7 @@ xfs_efi_item_recover(
struct xfs_trans *tp;
int i;
int error = 0;
+ bool requeue_only = false;
/*
* First check the validity of the extents described by the
@@ -644,6 +691,7 @@ xfs_efi_item_recover(
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
struct xfs_extent_free_item fake = {
.xefi_owner = XFS_RMAP_OWN_UNKNOWN,
+ .xefi_agresv = XFS_AG_RESV_NONE,
};
struct xfs_extent *extp;
@@ -652,9 +700,28 @@ xfs_efi_item_recover(
fake.xefi_startblock = extp->ext_start;
fake.xefi_blockcount = extp->ext_len;
- xfs_extent_free_get_group(mp, &fake);
- error = xfs_trans_free_extent(tp, efdp, &fake);
- xfs_extent_free_put_group(&fake);
+ if (!requeue_only) {
+ xfs_extent_free_get_group(mp, &fake);
+ error = xfs_trans_free_extent(tp, efdp, &fake);
+ xfs_extent_free_put_group(&fake);
+ }
+
+ /*
+ * If we can't free the extent without potentially deadlocking,
+ * requeue the rest of the extents to a new so that they get
+ * run again later with a new transaction context.
+ */
+ if (error == -EAGAIN || requeue_only) {
+ error = xfs_free_extent_later(tp, fake.xefi_startblock,
+ fake.xefi_blockcount,
+ &XFS_RMAP_OINFO_ANY_OWNER,
+ fake.xefi_agresv);
+ if (!error) {
+ requeue_only = true;
+ continue;
+ }
+ }
+
if (error == -EFSCORRUPTED)
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
extp, sizeof(*extp));
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 59e7d1a14b67..10403ba9b58f 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -160,9 +160,18 @@ struct xfs_getfsmap_info {
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
struct xfs_perag *pag; /* AG info, if applicable */
xfs_daddr_t next_daddr; /* next daddr we expect */
+ /* daddr of low fsmap key when we're using the rtbitmap */
+ xfs_daddr_t low_daddr;
u64 missing_owner; /* owner of holes */
u32 dev; /* device id */
- struct xfs_rmap_irec low; /* low rmap key */
+ /*
+ * Low rmap key for the query. If low.rm_blockcount is nonzero, this
+ * is the second (or later) call to retrieve the recordset in pieces.
+ * xfs_getfsmap_rec_before_start will compare all records retrieved
+ * by the rmapbt query to filter out any records that start before
+ * the last record.
+ */
+ struct xfs_rmap_irec low;
struct xfs_rmap_irec high; /* high rmap key */
bool last; /* last extent? */
};
@@ -237,16 +246,31 @@ xfs_getfsmap_format(
xfs_fsmap_from_internal(rec, xfm);
}
+static inline bool
+xfs_getfsmap_rec_before_start(
+ struct xfs_getfsmap_info *info,
+ const struct xfs_rmap_irec *rec,
+ xfs_daddr_t rec_daddr)
+{
+ if (info->low_daddr != -1ULL)
+ return rec_daddr < info->low_daddr;
+ if (info->low.rm_blockcount)
+ return xfs_rmap_compare(rec, &info->low) < 0;
+ return false;
+}
+
/*
* Format a reverse mapping for getfsmap, having translated rm_startblock
- * into the appropriate daddr units.
+ * into the appropriate daddr units. Pass in a nonzero @len_daddr if the
+ * length could be larger than rm_blockcount in struct xfs_rmap_irec.
*/
STATIC int
xfs_getfsmap_helper(
struct xfs_trans *tp,
struct xfs_getfsmap_info *info,
const struct xfs_rmap_irec *rec,
- xfs_daddr_t rec_daddr)
+ xfs_daddr_t rec_daddr,
+ xfs_daddr_t len_daddr)
{
struct xfs_fsmap fmr;
struct xfs_mount *mp = tp->t_mountp;
@@ -256,12 +280,15 @@ xfs_getfsmap_helper(
if (fatal_signal_pending(current))
return -EINTR;
+ if (len_daddr == 0)
+ len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+
/*
* Filter out records that start before our startpoint, if the
* caller requested that.
*/
- if (xfs_rmap_compare(rec, &info->low) < 0) {
- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
+ rec_daddr += len_daddr;
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
return 0;
@@ -280,7 +307,7 @@ xfs_getfsmap_helper(
info->head->fmh_entries++;
- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ rec_daddr += len_daddr;
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
return 0;
@@ -320,7 +347,7 @@ xfs_getfsmap_helper(
if (error)
return error;
fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
- fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ fmr.fmr_length = len_daddr;
if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
fmr.fmr_flags |= FMR_OF_PREALLOC;
if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
@@ -337,7 +364,7 @@ xfs_getfsmap_helper(
xfs_getfsmap_format(mp, &fmr, info);
out:
- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ rec_daddr += len_daddr;
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
return 0;
@@ -358,7 +385,7 @@ xfs_getfsmap_datadev_helper(
fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
- return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
+ return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);
}
/* Transform a bnobt irec into a fsmap */
@@ -382,7 +409,7 @@ xfs_getfsmap_datadev_bnobt_helper(
irec.rm_offset = 0;
irec.rm_flags = 0;
- return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
+ return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);
}
/* Set rmap flags based on the getfsmap flags */
@@ -409,31 +436,25 @@ xfs_getfsmap_logdev(
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_rmap_irec rmap;
- int error;
+ xfs_daddr_t rec_daddr, len_daddr;
+ xfs_fsblock_t start_fsb, end_fsb;
+ uint64_t eofs;
- /* Set up search keys */
- info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
- info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
- error = xfs_fsmap_owner_to_rmap(&info->low, keys);
- if (error)
- return error;
- info->low.rm_blockcount = 0;
- xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
+ if (keys[0].fmr_physical >= eofs)
+ return 0;
+ start_fsb = XFS_BB_TO_FSBT(mp,
+ keys[0].fmr_physical + keys[0].fmr_length);
+ end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
- error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1);
- if (error)
- return error;
- info->high.rm_startblock = -1U;
- info->high.rm_owner = ULLONG_MAX;
- info->high.rm_offset = ULLONG_MAX;
- info->high.rm_blockcount = 0;
- info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
- info->missing_owner = XFS_FMR_OWN_FREE;
+ /* Adjust the low key if we are continuing from where we left off. */
+ if (keys[0].fmr_length > 0)
+ info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
- trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
- trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
+ trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb);
+ trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb);
- if (keys[0].fmr_physical > 0)
+ if (start_fsb > 0)
return 0;
/* Fabricate an rmap entry for the external log device. */
@@ -443,7 +464,9 @@ xfs_getfsmap_logdev(
rmap.rm_offset = 0;
rmap.rm_flags = 0;
- return xfs_getfsmap_helper(tp, info, &rmap, 0);
+ rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock);
+ len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount);
+ return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);
}
#ifdef CONFIG_XFS_RT
@@ -457,72 +480,58 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
{
struct xfs_getfsmap_info *info = priv;
struct xfs_rmap_irec irec;
- xfs_daddr_t rec_daddr;
+ xfs_rtblock_t rtbno;
+ xfs_daddr_t rec_daddr, len_daddr;
+
+ rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
+ rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
+ irec.rm_startblock = rtbno;
+
+ rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize;
+ len_daddr = XFS_FSB_TO_BB(mp, rtbno);
+ irec.rm_blockcount = rtbno;
- irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
- rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock);
- irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
irec.rm_offset = 0;
irec.rm_flags = 0;
- return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
+ return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);
}
-/* Execute a getfsmap query against the realtime device. */
+/* Execute a getfsmap query against the realtime device rtbitmap. */
STATIC int
-__xfs_getfsmap_rtdev(
+xfs_getfsmap_rtdev_rtbitmap(
struct xfs_trans *tp,
const struct xfs_fsmap *keys,
- int (*query_fn)(struct xfs_trans *,
- struct xfs_getfsmap_info *),
struct xfs_getfsmap_info *info)
{
+
+ struct xfs_rtalloc_rec alow = { 0 };
+ struct xfs_rtalloc_rec ahigh = { 0 };
struct xfs_mount *mp = tp->t_mountp;
- xfs_fsblock_t start_fsb;
- xfs_fsblock_t end_fsb;
+ xfs_rtblock_t start_rtb;
+ xfs_rtblock_t end_rtb;
uint64_t eofs;
- int error = 0;
+ int error;
- eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);
if (keys[0].fmr_physical >= eofs)
return 0;
- start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
- end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
+ start_rtb = XFS_BB_TO_FSBT(mp,
+ keys[0].fmr_physical + keys[0].fmr_length);
+ end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
- /* Set up search keys */
- info->low.rm_startblock = start_fsb;
- error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
- if (error)
- return error;
- info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
- info->low.rm_blockcount = 0;
- xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
-
- info->high.rm_startblock = end_fsb;
- error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
- if (error)
- return error;
- info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
- info->high.rm_blockcount = 0;
- xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
-
- trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
- trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
+ info->missing_owner = XFS_FMR_OWN_UNKNOWN;
- return query_fn(tp, info);
-}
+ /* Adjust the low key if we are continuing from where we left off. */
+ if (keys[0].fmr_length > 0) {
+ info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
+ if (info->low_daddr >= eofs)
+ return 0;
+ }
-/* Actually query the realtime bitmap. */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap_query(
- struct xfs_trans *tp,
- struct xfs_getfsmap_info *info)
-{
- struct xfs_rtalloc_rec alow = { 0 };
- struct xfs_rtalloc_rec ahigh = { 0 };
- struct xfs_mount *mp = tp->t_mountp;
- int error;
+ trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb);
+ trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);
xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
@@ -530,8 +539,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(
* Set up query parameters to return free rtextents covering the range
* we want.
*/
- alow.ar_startext = info->low.rm_startblock;
- ahigh.ar_startext = info->high.rm_startblock;
+ alow.ar_startext = start_rtb;
+ ahigh.ar_startext = end_rtb;
do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
ahigh.ar_startext++;
@@ -554,18 +563,6 @@ err:
xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
return error;
}
-
-/* Execute a getfsmap query against the realtime device rtbitmap. */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap(
- struct xfs_trans *tp,
- const struct xfs_fsmap *keys,
- struct xfs_getfsmap_info *info)
-{
- info->missing_owner = XFS_FMR_OWN_UNKNOWN;
- return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
- info);
-}
#endif /* CONFIG_XFS_RT */
/* Execute a getfsmap query against the regular data device. */
@@ -606,9 +603,27 @@ __xfs_getfsmap_datadev(
error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
if (error)
return error;
- info->low.rm_blockcount = 0;
+ info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+ /* Adjust the low key if we are continuing from where we left off. */
+ if (info->low.rm_blockcount == 0) {
+ /* empty */
+ } else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) ||
+ (info->low.rm_flags & (XFS_RMAP_ATTR_FORK |
+ XFS_RMAP_BMBT_BLOCK |
+ XFS_RMAP_UNWRITTEN))) {
+ info->low.rm_startblock += info->low.rm_blockcount;
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+
+ start_fsb += info->low.rm_blockcount;
+ if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
+ return 0;
+ } else {
+ info->low.rm_offset += info->low.rm_blockcount;
+ }
+
info->high.rm_startblock = -1U;
info->high.rm_owner = ULLONG_MAX;
info->high.rm_offset = ULLONG_MAX;
@@ -659,12 +674,8 @@ __xfs_getfsmap_datadev(
* Set the AG low key to the start of the AG prior to
* moving on to the next AG.
*/
- if (pag->pag_agno == start_ag) {
- info->low.rm_startblock = 0;
- info->low.rm_owner = 0;
- info->low.rm_offset = 0;
- info->low.rm_flags = 0;
- }
+ if (pag->pag_agno == start_ag)
+ memset(&info->low, 0, sizeof(info->low));
/*
* If this is the last AG, report any gap at the end of it
@@ -791,6 +802,19 @@ xfs_getfsmap_check_keys(
struct xfs_fsmap *low_key,
struct xfs_fsmap *high_key)
{
+ if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
+ if (low_key->fmr_offset)
+ return false;
+ }
+ if (high_key->fmr_flags != -1U &&
+ (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
+ FMR_OF_EXTENT_MAP))) {
+ if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
+ return false;
+ }
+ if (high_key->fmr_length && high_key->fmr_length != -1ULL)
+ return false;
+
if (low_key->fmr_device > high_key->fmr_device)
return false;
if (low_key->fmr_device < high_key->fmr_device)
@@ -834,15 +858,15 @@ xfs_getfsmap_check_keys(
* ----------------
* There are multiple levels of keys and counters at work here:
* xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
- * these reflect fs-wide sector addrs.
+ * these reflect fs-wide sector addrs.
* dkeys -- fmh_keys used to query each device;
- * these are fmh_keys but w/ the low key
- * bumped up by fmr_length.
+ * these are fmh_keys but w/ the low key
+ * bumped up by fmr_length.
* xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
* is how we detect gaps in the fsmap
records and report them.
* xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
- * dkeys; used to query the metadata.
+ * dkeys; used to query the metadata.
*/
int
xfs_getfsmap(
@@ -863,6 +887,8 @@ xfs_getfsmap(
if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
return -EINVAL;
+ if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
+ return -EINVAL;
use_rmap = xfs_has_rmapbt(mp) &&
has_capability_noaudit(current, CAP_SYS_ADMIN);
@@ -901,26 +927,15 @@ xfs_getfsmap(
* blocks could be mapped to several other files/offsets.
* According to rmapbt record ordering, the minimal next
* possible record for the block range is the next starting
- * offset in the same inode. Therefore, bump the file offset to
- * continue the search appropriately. For all other low key
- * mapping types (attr blocks, metadata), bump the physical
- * offset as there can be no other mapping for the same physical
- * block range.
+ * offset in the same inode. Therefore, each fsmap backend bumps
+ * the file offset to continue the search appropriately. For
+ * all other low key mapping types (attr blocks, metadata), each
+ * fsmap backend bumps the physical offset as there can be no
+ * other mapping for the same physical block range.
*/
dkeys[0] = head->fmh_keys[0];
- if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
- dkeys[0].fmr_physical += dkeys[0].fmr_length;
- dkeys[0].fmr_owner = 0;
- if (dkeys[0].fmr_offset)
- return -EINVAL;
- } else
- dkeys[0].fmr_offset += dkeys[0].fmr_length;
- dkeys[0].fmr_length = 0;
memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
- if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
- return -EINVAL;
-
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
info.fsmap_recs = fsmap_recs;
@@ -960,6 +975,8 @@ xfs_getfsmap(
info.dev = handlers[i].dev;
info.last = false;
info.pag = NULL;
+ info.low_daddr = -1ULL;
+ info.low.rm_blockcount = 0;
error = handlers[i].fn(tp, dkeys, &info);
if (error)
break;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index fc61cc024023..79004d193e54 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -639,7 +639,6 @@ xfs_log_mount(
int num_bblks)
{
struct xlog *log;
- bool fatal = xfs_has_crc(mp);
int error = 0;
int min_logfsbs;
@@ -663,53 +662,37 @@ xfs_log_mount(
mp->m_log = log;
/*
- * Validate the given log space and drop a critical message via syslog
- * if the log size is too small that would lead to some unexpected
- * situations in transaction log space reservation stage.
+ * Now that we have set up the log and it's internal geometry
+ * parameters, we can validate the given log space and drop a critical
+ * message via syslog if the log size is too small. A log that is too
+ * small can lead to unexpected situations in transaction log space
+ * reservation stage. The superblock verifier has already validated all
+ * the other log geometry constraints, so we don't have to check those
+ * here.
*
- * Note: we can't just reject the mount if the validation fails. This
- * would mean that people would have to downgrade their kernel just to
- * remedy the situation as there is no way to grow the log (short of
- * black magic surgery with xfs_db).
+ * Note: For v4 filesystems, we can't just reject the mount if the
+ * validation fails. This would mean that people would have to
+ * downgrade their kernel just to remedy the situation as there is no
+ * way to grow the log (short of black magic surgery with xfs_db).
*
- * We can, however, reject mounts for CRC format filesystems, as the
+ * We can, however, reject mounts for V5 format filesystems, as the
* mkfs binary being used to make the filesystem should never create a
* filesystem with a log that is too small.
*/
min_logfsbs = xfs_log_calc_minimum_size(mp);
-
if (mp->m_sb.sb_logblocks < min_logfsbs) {
xfs_warn(mp,
"Log size %d blocks too small, minimum size is %d blocks",
mp->m_sb.sb_logblocks, min_logfsbs);
- error = -EINVAL;
- } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
- xfs_warn(mp,
- "Log size %d blocks too large, maximum size is %lld blocks",
- mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
- error = -EINVAL;
- } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
- xfs_warn(mp,
- "log size %lld bytes too large, maximum size is %lld bytes",
- XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
- XFS_MAX_LOG_BYTES);
- error = -EINVAL;
- } else if (mp->m_sb.sb_logsunit > 1 &&
- mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
- xfs_warn(mp,
- "log stripe unit %u bytes must be a multiple of block size",
- mp->m_sb.sb_logsunit);
- error = -EINVAL;
- fatal = true;
- }
- if (error) {
+
/*
* Log check errors are always fatal on v5; or whenever bad
* metadata leads to a crash.
*/
- if (fatal) {
+ if (xfs_has_crc(mp)) {
xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
ASSERT(0);
+ error = -EINVAL;
goto out_free_log;
}
xfs_crit(mp, "Log size out of supported range.");
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index c4078d0ec108..4a9bbd3fe120 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -114,7 +114,8 @@ xfs_dax_notify_ddev_failure(
int error = 0;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr);
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
- xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen);
+ xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp,
+ daddr + bblen - 1);
xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
error = xfs_trans_alloc_empty(mp, &tp);
@@ -210,7 +211,7 @@ xfs_dax_notify_failure(
ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
/* Ignore the range out of filesystem area */
- if (offset + len < ddev_start)
+ if (offset + len - 1 < ddev_start)
return -ENXIO;
if (offset > ddev_end)
return -ENXIO;
@@ -222,8 +223,8 @@ xfs_dax_notify_failure(
len -= ddev_start - offset;
offset = 0;
}
- if (offset + len > ddev_end)
- len -= ddev_end - offset;
+ if (offset + len - 1 > ddev_end)
+ len = ddev_end - offset + 1;
return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
mf_flags);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index abcc559f3c64..eb9102453aff 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(
del.br_blockcount);
error = xfs_free_extent_later(*tpp, del.br_startblock,
- del.br_blockcount, NULL);
+ del.br_blockcount, NULL,
+ XFS_AG_RESV_NONE);
if (error)
break;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 4db669203149..f3cc204bb4bf 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3623,6 +3623,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
+DECLARE_EVENT_CLASS(xfs_fsmap_linear_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno),
+ TP_ARGS(mp, keydev, bno),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_fsblock_t, bno)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->bno = bno;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d bno 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->bno)
+)
+#define DEFINE_FSMAP_LINEAR_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_linear_class, name, \
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \
+ TP_ARGS(mp, keydev, bno))
+DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear);
+DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear);
+
DECLARE_EVENT_CLASS(xfs_getfsmap_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
TP_ARGS(mp, fsmap),
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 7d4109af193e..1098452e7f95 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -823,7 +823,7 @@ xfs_trans_ail_update_bulk(
trace_xfs_ail_insert(lip, 0, lsn);
}
lip->li_lsn = lsn;
- list_add(&lip->li_ail, &tmp);
+ list_add_tail(&lip->li_ail, &tmp);
}
if (!list_empty(&tmp))