summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/internal.h6
-rw-r--r--fs/afs/main.c3
-rw-r--r--fs/afs/server.c14
-rw-r--r--fs/afs/volume.c4
-rw-r--r--fs/aio.c9
-rw-r--r--fs/bcachefs/bcachefs.h12
-rw-r--r--fs/bcachefs/btree_update_interior.c3
-rw-r--r--fs/bcachefs/fs-ioctl.c4
-rw-r--r--fs/bcachefs/fs.c9
-rw-r--r--fs/bcachefs/io_write.c1
-rw-r--r--fs/bcachefs/journal_io.c4
-rw-r--r--fs/bcachefs/journal_reclaim.c2
-rw-r--r--fs/bcachefs/mean_and_variance.h2
-rw-r--r--fs/bcachefs/printbuf.c1
-rw-r--r--fs/bcachefs/recovery.c11
-rw-r--r--fs/bcachefs/sb-members.c2
-rw-r--r--fs/bcachefs/super-io.c2
-rw-r--r--fs/bcachefs/super.c4
-rw-r--r--fs/bcachefs/thread_with_file.c2
-rw-r--r--fs/bcachefs/util.c5
-rw-r--r--fs/btrfs/block-group.c80
-rw-r--r--fs/btrfs/block-group.h7
-rw-r--r--fs/btrfs/defrag.c2
-rw-r--r--fs/btrfs/delalloc-space.c29
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent_io.c62
-rw-r--r--fs/btrfs/inode.c26
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/btrfs/qgroup.c14
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/transaction.c38
-rw-r--r--fs/btrfs/zoned.c1
-rw-r--r--fs/cachefiles/cache.c2
-rw-r--r--fs/cachefiles/daemon.c1
-rw-r--r--fs/ceph/caps.c74
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c57
-rw-r--r--fs/ceph/mds_client.h7
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/hugetlbfs/inode.c19
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/netfs/buffered_write.c3
-rw-r--r--fs/netfs/direct_write.c5
-rw-r--r--fs/netfs/io.c2
-rw-r--r--fs/nfsd/nfs4state.c11
-rw-r--r--fs/nilfs2/file.c8
-rw-r--r--fs/nilfs2/recovery.c7
-rw-r--r--fs/nilfs2/segment.c8
-rw-r--r--fs/ntfs3/attrib.c45
-rw-r--r--fs/ntfs3/attrlist.c12
-rw-r--r--fs/ntfs3/bitmap.c4
-rw-r--r--fs/ntfs3/dir.c48
-rw-r--r--fs/ntfs3/file.c76
-rw-r--r--fs/ntfs3/frecord.c19
-rw-r--r--fs/ntfs3/fslog.c232
-rw-r--r--fs/ntfs3/fsntfs.c29
-rw-r--r--fs/ntfs3/index.c8
-rw-r--r--fs/ntfs3/inode.c32
-rw-r--r--fs/ntfs3/namei.c12
-rw-r--r--fs/ntfs3/ntfs.h4
-rw-r--r--fs/ntfs3/ntfs_fs.h29
-rw-r--r--fs/ntfs3/record.c18
-rw-r--r--fs/ntfs3/super.c54
-rw-r--r--fs/ntfs3/xattr.c6
-rw-r--r--fs/overlayfs/copy_up.c14
-rw-r--r--fs/proc/array.c66
-rw-r--r--fs/remap_range.c31
-rw-r--r--fs/smb/client/cached_dir.c1
-rw-r--r--fs/smb/client/cifsglob.h1
-rw-r--r--fs/smb/client/connect.c25
-rw-r--r--fs/smb/client/dfs.c7
-rw-r--r--fs/smb/client/file.c3
-rw-r--r--fs/smb/client/fs_context.c13
-rw-r--r--fs/smb/client/namespace.c16
-rw-r--r--fs/smb/client/readdir.c15
-rw-r--r--fs/smb/client/sess.c3
-rw-r--r--fs/smb/client/smb2ops.c14
-rw-r--r--fs/smb/client/smb2pdu.c16
-rw-r--r--fs/smb/server/misc.c1
-rw-r--r--fs/smb/server/smb2pdu.c8
-rw-r--r--fs/zonefs/file.c42
-rw-r--r--fs/zonefs/super.c66
82 files changed, 1028 insertions, 540 deletions
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 9c03fcf7ffaa..6ce5a612937c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -321,8 +321,7 @@ struct afs_net {
struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */
struct hlist_head fs_proc; /* procfs servers list */
- struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */
- struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */
+ struct hlist_head fs_addresses; /* afs_server (by lowest IPv6 addr) */
seqlock_t fs_addr_lock; /* For fs_addresses[46] */
struct work_struct fs_manager;
@@ -561,8 +560,7 @@ struct afs_server {
struct afs_server __rcu *uuid_next; /* Next server with same UUID */
struct afs_server *uuid_prev; /* Previous server with same UUID */
struct list_head probe_link; /* Link in net->fs_probe_list */
- struct hlist_node addr4_link; /* Link in net->fs_addresses4 */
- struct hlist_node addr6_link; /* Link in net->fs_addresses6 */
+ struct hlist_node addr_link; /* Link in net->fs_addresses6 */
struct hlist_node proc_link; /* Link in net->fs_proc */
struct list_head volumes; /* RCU list of afs_server_entry objects */
struct afs_server *gc_next; /* Next server in manager's list */
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 1b3bd21c168a..a14f6013e316 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -90,8 +90,7 @@ static int __net_init afs_net_init(struct net *net_ns)
INIT_LIST_HEAD(&net->fs_probe_slow);
INIT_HLIST_HEAD(&net->fs_proc);
- INIT_HLIST_HEAD(&net->fs_addresses4);
- INIT_HLIST_HEAD(&net->fs_addresses6);
+ INIT_HLIST_HEAD(&net->fs_addresses);
seqlock_init(&net->fs_addr_lock);
INIT_WORK(&net->fs_manager, afs_manage_servers);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index e169121f603e..038f9d0ae3af 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -38,7 +38,7 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer
seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
- hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
+ hlist_for_each_entry_rcu(server, &net->fs_addresses, addr_link) {
estate = rcu_dereference(server->endpoint_state);
alist = estate->addresses;
for (i = 0; i < alist->nr_addrs; i++)
@@ -177,10 +177,8 @@ added_dup:
* bit, but anything we might want to do gets messy and memory
* intensive.
*/
- if (alist->nr_ipv4 > 0)
- hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
- if (alist->nr_addrs > alist->nr_ipv4)
- hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
+ if (alist->nr_addrs > 0)
+ hlist_add_head_rcu(&server->addr_link, &net->fs_addresses);
write_sequnlock(&net->fs_addr_lock);
@@ -511,10 +509,8 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
list_del(&server->probe_link);
hlist_del_rcu(&server->proc_link);
- if (!hlist_unhashed(&server->addr4_link))
- hlist_del_rcu(&server->addr4_link);
- if (!hlist_unhashed(&server->addr6_link))
- hlist_del_rcu(&server->addr6_link);
+ if (!hlist_unhashed(&server->addr_link))
+ hlist_del_rcu(&server->addr_link);
}
write_sequnlock(&net->fs_lock);
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 020ecd45e476..af3a3f57c1b3 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -353,7 +353,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
{
struct afs_server_list *new, *old, *discard;
struct afs_vldb_entry *vldb;
- char idbuf[16];
+ char idbuf[24];
int ret, idsz;
_enter("");
@@ -361,7 +361,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
/* We look up an ID by passing it as a decimal string in the
* operation's name parameter.
*/
- idsz = sprintf(idbuf, "%llu", volume->vid);
+ idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
if (IS_ERR(vldb)) {
diff --git a/fs/aio.c b/fs/aio.c
index bb2ff48991f3..da18dbcfcb22 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -593,6 +593,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
+ /*
+ * kiocb didn't come from aio or is neither a read nor a write, hence
+ * ignore it.
+ */
+ if (!(iocb->ki_flags & IOCB_AIO_RW))
+ return;
+
if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
return;
@@ -1509,7 +1516,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
- req->ki_flags = req->ki_filp->f_iocb_flags;
+ req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW;
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
if (iocb->aio_flags & IOCB_FLAG_IOPRIO) {
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index b80c6c9efd8c..69d0d60d50e3 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -1249,6 +1249,18 @@ static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
return stdio;
}
+static inline unsigned metadata_replicas_required(struct bch_fs *c)
+{
+ return min(c->opts.metadata_replicas,
+ c->opts.metadata_replicas_required);
+}
+
+static inline unsigned data_replicas_required(struct bch_fs *c)
+{
+ return min(c->opts.data_replicas,
+ c->opts.data_replicas_required);
+}
+
#define BKEY_PADDED_ONSTACK(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 17a5938aa71a..4530b14ff2c3 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -280,7 +280,8 @@ retry:
writepoint_ptr(&c->btree_write_point),
&devs_have,
res->nr_replicas,
- c->opts.metadata_replicas_required,
+ min(res->nr_replicas,
+ c->opts.metadata_replicas_required),
watermark, 0, cl, &wp);
if (unlikely(ret))
return ERR_PTR(ret);
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 3a4c24c28e7f..3dc8630ff9fe 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -455,6 +455,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
if (IS_ERR(victim))
return PTR_ERR(victim);
+ dir = d_inode(path.dentry);
if (victim->d_sb->s_fs_info != c) {
ret = -EXDEV;
goto err;
@@ -463,14 +464,13 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
ret = -ENOENT;
goto err;
}
- dir = d_inode(path.dentry);
ret = __bch2_unlink(dir, victim, true);
if (!ret) {
fsnotify_rmdir(dir, victim);
d_delete(victim);
}
- inode_unlock(dir);
err:
+ inode_unlock(dir);
dput(victim);
path_put(&path);
return ret;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index ec419b8e2c43..77ae65542db9 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -435,7 +435,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
__bch2_link(c, inode, dir, dentry);
if (unlikely(ret))
- return ret;
+ return bch2_err_class(ret);
ihold(&inode->v);
d_instantiate(dentry, &inode->v);
@@ -487,8 +487,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
struct bch_inode_info *dir= to_bch_ei(vdir);
struct bch_fs *c = dir->v.i_sb->s_fs_info;
- return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+ int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
__bch2_unlink(vdir, dentry, false);
+ return bch2_err_class(ret);
}
static int bch2_symlink(struct mnt_idmap *idmap,
@@ -523,7 +524,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
return 0;
err:
iput(&inode->v);
- return ret;
+ return bch2_err_class(ret);
}
static int bch2_mkdir(struct mnt_idmap *idmap,
@@ -641,7 +642,7 @@ err:
src_inode,
dst_inode);
- return ret;
+ return bch2_err_class(ret);
}
static void bch2_setattr_copy(struct mnt_idmap *idmap,
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index ef3a53f9045a..2c098ac017b3 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1564,6 +1564,7 @@ CLOSURE_CALLBACK(bch2_write)
BUG_ON(!op->write_point.v);
BUG_ON(bkey_eq(op->pos, POS_MAX));
+ op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas);
op->start_time = local_clock();
bch2_keylist_init(&op->insert_keys, op->inline_keys);
wbio_init(bio)->put_bio = false;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index bfd6585e746d..47805193f18c 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1478,6 +1478,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
c->opts.foreground_target;
unsigned i, replicas = 0, replicas_want =
READ_ONCE(c->opts.metadata_replicas);
+ unsigned replicas_need = min_t(unsigned, replicas_want,
+ READ_ONCE(c->opts.metadata_replicas_required));
rcu_read_lock();
retry:
@@ -1526,7 +1528,7 @@ done:
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
- return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
+ return replicas >= replicas_need ? 0 : -EROFS;
}
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 820d25e19e5f..2cf626315652 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -205,7 +205,7 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard;
- if (nr_online < c->opts.metadata_replicas_required) {
+ if (nr_online < metadata_replicas_required(c)) {
ret = JOURNAL_ERR_insufficient_devices;
goto out;
}
diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h
index b2be565bb8f2..64df11ab422b 100644
--- a/fs/bcachefs/mean_and_variance.h
+++ b/fs/bcachefs/mean_and_variance.h
@@ -17,7 +17,7 @@
* Rust and rustc has issues with u128.
*/
-#if defined(__SIZEOF_INT128__) && defined(__KERNEL__)
+#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) && !defined(CONFIG_PARISC)
typedef struct {
unsigned __int128 v;
diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c
index accf246c3233..b27d22925929 100644
--- a/fs/bcachefs/printbuf.c
+++ b/fs/bcachefs/printbuf.c
@@ -56,6 +56,7 @@ void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
va_copy(args2, args);
len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2);
+ va_end(args2);
} while (len + 1 >= printbuf_remaining(out) &&
!bch2_printbuf_make_room(out, len + 1));
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 9127d0e3ca2f..21e13bb4335b 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -577,8 +577,9 @@ u64 bch2_recovery_passes_from_stable(u64 v)
static bool check_version_upgrade(struct bch_fs *c)
{
- unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
unsigned latest_version = bcachefs_metadata_version_current;
+ unsigned latest_compatible = min(latest_version,
+ bch2_latest_compatible_version(c->sb.version));
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
@@ -597,7 +598,7 @@ static bool check_version_upgrade(struct bch_fs *c)
new_version = latest_version;
break;
case BCH_VERSION_UPGRADE_none:
- new_version = old_version;
+ new_version = min(old_version, latest_version);
break;
}
}
@@ -774,7 +775,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (!(c->opts.nochanges && c->opts.norecovery)) {
+ if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
bool write_sb = false;
@@ -804,7 +805,7 @@ int bch2_fs_recovery(struct bch_fs *c)
if (bch2_check_version_downgrade(c)) {
struct printbuf buf = PRINTBUF;
- prt_str(&buf, "Version downgrade required:\n");
+ prt_str(&buf, "Version downgrade required:");
__le64 passes = ext->recovery_passes_required[0];
bch2_sb_set_downgrade(c,
@@ -812,7 +813,7 @@ int bch2_fs_recovery(struct bch_fs *c)
BCH_VERSION_MINOR(c->sb.version));
passes = ext->recovery_passes_required[0] & ~passes;
if (passes) {
- prt_str(&buf, " running recovery passes: ");
+ prt_str(&buf, "\n running recovery passes: ");
prt_bitflags(&buf, bch2_recovery_passes,
bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
}
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index a45354d2acde..eff5ce18c69c 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -421,7 +421,7 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
- m->errors_reset_time = ktime_get_real_seconds();
+ m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index d60c7d27a047..36988add581f 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -717,7 +717,7 @@ retry:
if (IS_ERR(sb->bdev_handle)) {
ret = PTR_ERR(sb->bdev_handle);
- goto out;
+ goto err;
}
sb->bdev = sb->bdev_handle->bdev;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index b9911402b175..6b23e11825e6 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1428,10 +1428,10 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED)
? c->opts.metadata_replicas
- : c->opts.metadata_replicas_required,
+ : metadata_replicas_required(c),
!(flags & BCH_FORCE_IF_DATA_DEGRADED)
? c->opts.data_replicas
- : c->opts.data_replicas_required);
+ : data_replicas_required(c));
return nr_rw >= required;
case BCH_MEMBER_STATE_failed:
diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c
index b1c867aa2b58..9220d7de10db 100644
--- a/fs/bcachefs/thread_with_file.c
+++ b/fs/bcachefs/thread_with_file.c
@@ -53,9 +53,9 @@ int bch2_run_thread_with_file(struct thread_with_file *thr,
if (ret)
goto err;
- fd_install(fd, file);
get_task_struct(thr->task);
wake_up_process(thr->task);
+ fd_install(fd, file);
return fd;
err:
if (fd >= 0)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 56b815fd9fc6..231003b405ef 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -418,14 +418,15 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
bch2_quantiles_update(&stats->quantiles, duration);
}
- if (time_after64(end, stats->last_event)) {
+ if (stats->last_event && time_after64(end, stats->last_event)) {
freq = end - stats->last_event;
mean_and_variance_update(&stats->freq_stats, freq);
mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
stats->max_freq = max(stats->max_freq, freq);
stats->min_freq = min(stats->min_freq, freq);
- stats->last_event = end;
}
+
+ stats->last_event = end;
}
static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index a9be9ac99222..378d9103a207 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1455,6 +1455,7 @@ out:
*/
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
{
+ LIST_HEAD(retry_list);
struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
@@ -1476,6 +1477,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
+ u64 used;
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
@@ -1511,9 +1513,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
goto next;
}
+ spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- if (block_group->reserved || block_group->pinned ||
- block_group->used || block_group->ro ||
+ if (btrfs_is_block_group_used(block_group) || block_group->ro ||
list_is_singular(&block_group->list)) {
/*
* We want to bail if we made new allocations or have
@@ -1523,10 +1525,49 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
*/
trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
up_write(&space_info->groups_sem);
goto next;
}
+
+ /*
+ * The block group may be unused but there may be space reserved
+ * accounting with the existence of that block group, that is,
+ * space_info->bytes_may_use was incremented by a task but no
+ * space was yet allocated from the block group by the task.
+ * That space may or may not be allocated, as we are generally
+ * pessimistic about space reservation for metadata as well as
+ * for data when using compression (as we reserve space based on
+ * the worst case, when data can't be compressed, and before
+ * actually attempting compression, before starting writeback).
+ *
+ * So check if the total space of the space_info minus the size
+ * of this block group is less than the used space of the
+ * space_info - if that's the case, then it means we have tasks
+ * that might be relying on the block group in order to allocate
+ * extents, and add back the block group to the unused list when
+ * we finish, so that we retry later in case no tasks ended up
+ * needing to allocate extents from the block group.
+ */
+ used = btrfs_space_info_used(space_info, true);
+ if (space_info->total_bytes - block_group->length < used) {
+ /*
+ * Add a reference for the list, compensate for the ref
+ * drop under the "next" label for the
+ * fs_info->unused_bgs list.
+ */
+ btrfs_get_block_group(block_group);
+ list_add_tail(&block_group->bg_list, &retry_list);
+
+ trace_btrfs_skip_unused_block_group(block_group);
+ spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
+ up_write(&space_info->groups_sem);
+ goto next;
+ }
+
spin_unlock(&block_group->lock);
+ spin_unlock(&space_info->lock);
/* We don't want to force the issue, only flip if it's ok. */
ret = inc_block_group_ro(block_group, 0);
@@ -1650,12 +1691,16 @@ next:
btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock);
}
+ list_splice_tail(&retry_list, &fs_info->unused_bgs);
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
return;
flip_async:
btrfs_end_transaction(trans);
+ spin_lock(&fs_info->unused_bgs_lock);
+ list_splice_tail(&retry_list, &fs_info->unused_bgs);
+ spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_put_block_group(block_group);
btrfs_discard_punt_unused_bgs_list(fs_info);
@@ -2684,6 +2729,37 @@ next:
btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info);
list_del_init(&block_group->bg_list);
clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
+
+ /*
+ * If the block group is still unused, add it to the list of
+ * unused block groups. The block group may have been created in
+ * order to satisfy a space reservation, in which case the
+ * extent allocation only happens later. But often we don't
+ * actually need to allocate space that we previously reserved,
+ * so the block group may become unused for a long time. For
+ * example for metadata we generally reserve space for a worst
+ * possible scenario, but then don't end up allocating all that
+ * space or none at all (due to no need to COW, extent buffers
+ * were already COWed in the current transaction and still
+ * unwritten, tree heights lower than the maximum possible
+ * height, etc). For data we generally reserve the axact amount
+ * of space we are going to allocate later, the exception is
+ * when using compression, as we must reserve space based on the
+ * uncompressed data size, because the compression is only done
+ * when writeback triggered and we don't know how much space we
+ * are actually going to need, so we reserve the uncompressed
+ * size because the data may be uncompressible in the worst case.
+ */
+ if (ret == 0) {
+ bool used;
+
+ spin_lock(&block_group->lock);
+ used = btrfs_is_block_group_used(block_group);
+ spin_unlock(&block_group->lock);
+
+ if (!used)
+ btrfs_mark_bg_unused(block_group);
+ }
}
btrfs_trans_release_chunk_metadata(trans);
}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index c4a1f01cc1c2..962b11983901 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -257,6 +257,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
return (block_group->start + block_group->length);
}
+static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg)
+{
+ lockdep_assert_held(&bg->lock);
+
+ return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
+}
+
static inline bool btrfs_is_block_group_data_only(
struct btrfs_block_group *block_group)
{
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index c276b136ab63..5b0b64571418 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto add;
/* Skip too large extent */
- if (range_len >= extent_thresh)
+ if (em->len >= extent_thresh)
goto next;
/*
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 2833e8ef4c09..acf9f4b6c044 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -245,7 +245,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 reserve_size = 0;
u64 qgroup_rsv_size = 0;
- u64 csum_leaves;
unsigned outstanding_extents;
lockdep_assert_held(&inode->lock);
@@ -260,10 +259,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
outstanding_extents);
reserve_size += btrfs_calc_metadata_size(fs_info, 1);
}
- csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
- inode->csum_bytes);
- reserve_size += btrfs_calc_insert_metadata_size(fs_info,
- csum_leaves);
+ if (!(inode->flags & BTRFS_INODE_NODATASUM)) {
+ u64 csum_leaves;
+
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
+ reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves);
+ }
/*
* For qgroup rsv, the calculation is very simple:
* account one nodesize for each outstanding extent
@@ -278,14 +279,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
spin_unlock(&block_rsv->lock);
}
-static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+static void calc_inode_reservations(struct btrfs_inode *inode,
u64 num_bytes, u64 disk_num_bytes,
u64 *meta_reserve, u64 *qgroup_reserve)
{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 nr_extents = count_max_extents(fs_info, num_bytes);
- u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+ u64 csum_leaves;
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+ if (inode->flags & BTRFS_INODE_NODATASUM)
+ csum_leaves = 0;
+ else
+ csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+
*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
nr_extents + csum_leaves);
@@ -337,7 +344,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
* everything out and try again, which is bad. This way we just
* over-reserve slightly, and clean up the mess when we are done.
*/
- calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
+ calc_inode_reservations(inode, num_bytes, disk_num_bytes,
&meta_reserve, &qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true,
noflush);
@@ -359,7 +366,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
nr_extents = count_max_extents(fs_info, num_bytes);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, nr_extents);
- inode->csum_bytes += disk_num_bytes;
+ if (!(inode->flags & BTRFS_INODE_NODATASUM))
+ inode->csum_bytes += disk_num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
@@ -393,7 +401,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
spin_lock(&inode->lock);
- inode->csum_bytes -= num_bytes;
+ if (!(inode->flags & BTRFS_INODE_NODATASUM))
+ inode->csum_bytes -= num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c6907d533fe8..e71ef97d0a7c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1336,8 +1336,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
- /* Shouldn't get preallocated anon_dev for cached roots */
- ASSERT(!anon_dev);
+ /*
+ * Some other caller may have read out the newly inserted
+ * subvolume already (for things like backref walk etc). Not
+ * that common but still possible. In that case, we just need
+ * to free the anon_dev.
+ */
+ if (unlikely(anon_dev)) {
+ free_anon_bdev(anon_dev);
+ anon_dev = 0;
+ }
+
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
return ERR_PTR(-ENOENT);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cfd2967f04a2..3e19a2475ab3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
+ struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
+ u64 lockstart;
+ u64 lockend;
u64 prealloc_len = 0;
bool delalloc;
+ lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
+ lockend = round_up(end, inode->root->fs_info->sectorsize);
+
+ /*
+ * We are only locking for the delalloc range because that's the
+ * only thing that can change here. With fiemap we have a lock
+ * on the inode, so no buffered or direct writes can happen.
+ *
+ * However mmaps and normal page writeback will cause this to
+ * change arbitrarily. We have to lock the extent lock here to
+ * make sure that nobody messes with the tree while we're doing
+ * btrfs_find_delalloc_in_range.
+ */
+ lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
+ unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
break;
@@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
- struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
struct btrfs_backref_share_check_ctx *backref_ctx;
u64 last_extent_end;
u64 prev_extent_end;
- u64 lockstart;
- u64 lockend;
+ u64 range_start;
+ u64 range_end;
+ const u64 sectorsize = inode->root->fs_info->sectorsize;
bool stopped = false;
int ret;
@@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto out;
}
- lockstart = round_down(start, inode->root->fs_info->sectorsize);
- lockend = round_up(start + len, inode->root->fs_info->sectorsize);
- prev_extent_end = lockstart;
+ range_start = round_down(start, sectorsize);
+ range_end = round_up(start + len, sectorsize);
+ prev_extent_end = range_start;
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
- lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
@@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_release_path(path);
path->reada = READA_FORWARD;
- ret = fiemap_search_slot(inode, path, lockstart);
+ ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
@@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto check_eof_delalloc;
}
- while (prev_extent_end < lockend) {
+ while (prev_extent_end < range_end) {
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *ei;
struct btrfs_key key;
@@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
* The first iteration can leave us at an extent item that ends
* before our range's start. Move to the next item.
*/
- if (extent_end <= lockstart)
+ if (extent_end <= range_start)
goto next_item;
backref_ctx->curr_leaf_bytenr = leaf->start;
/* We have in implicit hole (NO_HOLES feature enabled). */
if (prev_extent_end < key.offset) {
- const u64 range_end = min(key.offset, lockend) - 1;
+ const u64 hole_end = min(key.offset, range_end) - 1;
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state,
backref_ctx, 0, 0, 0,
- prev_extent_end, range_end);
+ prev_extent_end, hole_end);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
@@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
}
/* We've reached the end of the fiemap range, stop. */
- if (key.offset >= lockend) {
+ if (key.offset >= range_end) {
stopped = true;
break;
}
@@ -3049,29 +3066,41 @@ check_eof_delalloc:
btrfs_free_path(path);
path = NULL;
- if (!stopped && prev_extent_end < lockend) {
+ if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
- 0, 0, 0, prev_extent_end, lockend - 1);
+ 0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
goto out_unlock;
- prev_extent_end = lockend;
+ prev_extent_end = range_end;
}
if (cache.cached && cache.offset + cache.len >= last_extent_end) {
const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) {
+ struct extent_state *cached_state = NULL;
u64 delalloc_start;
u64 delalloc_end;
+ u64 lockstart;
+ u64 lockend;
bool delalloc;
+ lockstart = round_down(prev_extent_end, sectorsize);
+ lockend = round_up(i_size, sectorsize);
+
+ /*
+ * See the comment in fiemap_process_hole as to why
+ * we're doing the locking here.
+ */
+ lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
+ unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
@@ -3082,7 +3111,6 @@ check_eof_delalloc:
ret = emit_last_fiemap_cache(fieinfo, &cache);
out_unlock:
- unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
free_extent_state(delalloc_cached_state);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1eb93d3962aa..f88e0ca8331d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ out:
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
@@ -10273,6 +10288,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
return -EINVAL;
+ /*
+ * Compressed extents should always have checksums, so error out if we
+ * have a NOCOW file or inode was created while mounted with NODATASUM.
+ */
+ if (inode->flags & BTRFS_INODE_NODATASUM)
+ return -EINVAL;
+
orig_count = iov_iter_count(from);
/* The extent size must be sane. */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index dfed9dd9c2d7..ac3316e0d11c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3815,6 +3815,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto out;
}
+ if (sa->create && is_fstree(sa->qgroupid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 63b426cc7798..5470e1cdf10c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1736,6 +1736,15 @@ out:
return ret;
}
+static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
+{
+ return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
+ qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
+}
+
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1755,6 +1764,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
goto out;
}
+ if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
/* Check if there are no children of this qgroup */
if (!list_empty(&qgroup->members)) {
ret = -EBUSY;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2d7519a6ce72..7902298c1f25 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -8111,7 +8111,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
}
if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
- ret = -EINVAL;
+ ret = -EOPNOTSUPP;
goto out;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5b3333ceef04..c52807d97efa 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -564,56 +564,22 @@ static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info,
u64 num_bytes,
u64 *delayed_refs_bytes)
{
- struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info;
- u64 extra_delayed_refs_bytes = 0;
- u64 bytes;
+ u64 bytes = num_bytes + *delayed_refs_bytes;
int ret;
/*
- * If there's a gap between the size of the delayed refs reserve and
- * its reserved space, than some tasks have added delayed refs or bumped
- * its size otherwise (due to block group creation or removal, or block
- * group item update). Also try to allocate that gap in order to prevent
- * using (and possibly abusing) the global reserve when committing the
- * transaction.
- */
- if (flush == BTRFS_RESERVE_FLUSH_ALL &&
- !btrfs_block_rsv_full(delayed_refs_rsv)) {
- spin_lock(&delayed_refs_rsv->lock);
- if (delayed_refs_rsv->size > delayed_refs_rsv->reserved)
- extra_delayed_refs_bytes = delayed_refs_rsv->size -
- delayed_refs_rsv->reserved;
- spin_unlock(&delayed_refs_rsv->lock);
- }
-
- bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes;
-
- /*
* We want to reserve all the bytes we may need all at once, so we only
* do 1 enospc flushing cycle per transaction start.
*/
ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
- if (ret == 0) {
- if (extra_delayed_refs_bytes > 0)
- btrfs_migrate_to_delayed_refs_rsv(fs_info,
- extra_delayed_refs_bytes);
- return 0;
- }
-
- if (extra_delayed_refs_bytes > 0) {
- bytes -= extra_delayed_refs_bytes;
- ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
- if (ret == 0)
- return 0;
- }
/*
* If we are an emergency flush, which can steal from the global block
* reserve, then attempt to not reserve space for the delayed refs, as
* we will consume space for them from the global block reserve.
*/
- if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
+ if (ret && flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
bytes -= *delayed_refs_bytes;
*delayed_refs_bytes = 0;
ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 168af9d000d1..3a5d69ff25fc 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1670,6 +1670,7 @@ out:
}
bitmap_free(active);
kfree(zone_info);
+ btrfs_free_chunk_map(map);
return ret;
}
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index 7077f72e6f47..f449f7340aad 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -168,6 +168,8 @@ error_unsupported:
dput(root);
error_open_root:
cachefiles_end_secure(cache, saved_cred);
+ put_cred(cache->cache_cred);
+ cache->cache_cred = NULL;
error_getsec:
fscache_relinquish_cache(cache_cookie);
cache->cache = NULL;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 3f24905f4066..6465e2574230 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -816,6 +816,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
cachefiles_put_directory(cache->graveyard);
cachefiles_put_directory(cache->store);
mntput(cache->mnt);
+ put_cred(cache->cache_cred);
kfree(cache->rootdirname);
kfree(cache->secctx);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 9c02f328c966..7fb4aae97412 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1452,7 +1452,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
if (flushing & CEPH_CAP_XATTR_EXCL) {
arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
arg->xattr_version = ci->i_xattrs.version;
- arg->xattr_buf = ci->i_xattrs.blob;
+ arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob);
} else {
arg->xattr_buf = NULL;
arg->old_xattr_buf = NULL;
@@ -1553,6 +1553,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
encode_cap_msg(msg, arg);
ceph_con_send(&arg->session->s_con, msg);
ceph_buffer_put(arg->old_xattr_buf);
+ ceph_buffer_put(arg->xattr_buf);
if (arg->wake)
wake_up_all(&ci->i_cap_wq);
}
@@ -2155,6 +2156,30 @@ retry:
ceph_cap_string(cap->implemented),
ceph_cap_string(revoking));
+ /* completed revocation? going down and there are no caps? */
+ if (revoking) {
+ if ((revoking & cap_used) == 0) {
+ doutc(cl, "completed revocation of %s\n",
+ ceph_cap_string(cap->implemented & ~cap->issued));
+ goto ack;
+ }
+
+ /*
+ * If the "i_wrbuffer_ref" was increased by mmap or generic
+ * cache write just before the ceph_check_caps() is called,
+ * the Fb capability revoking will fail this time. Then we
+ * must wait for the BDI's delayed work to flush the dirty
+ * pages and to release the "i_wrbuffer_ref", which will cost
+ * at most 5 seconds. That means the MDS needs to wait at
+ * most 5 seconds to finished the Fb capability's revocation.
+ *
+ * Let's queue a writeback for it.
+ */
+ if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
+ (revoking & CEPH_CAP_FILE_BUFFER))
+ queue_writeback = true;
+ }
+
if (cap == ci->i_auth_cap &&
(cap->issued & CEPH_CAP_FILE_WR)) {
/* request larger max_size from MDS? */
@@ -2182,30 +2207,6 @@ retry:
}
}
- /* completed revocation? going down and there are no caps? */
- if (revoking) {
- if ((revoking & cap_used) == 0) {
- doutc(cl, "completed revocation of %s\n",
- ceph_cap_string(cap->implemented & ~cap->issued));
- goto ack;
- }
-
- /*
- * If the "i_wrbuffer_ref" was increased by mmap or generic
- * cache write just before the ceph_check_caps() is called,
- * the Fb capability revoking will fail this time. Then we
- * must wait for the BDI's delayed work to flush the dirty
- * pages and to release the "i_wrbuffer_ref", which will cost
- * at most 5 seconds. That means the MDS needs to wait at
- * most 5 seconds to finished the Fb capability's revocation.
- *
- * Let's queue a writeback for it.
- */
- if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
- (revoking & CEPH_CAP_FILE_BUFFER))
- queue_writeback = true;
- }
-
/* want more caps from mds? */
if (want & ~cap->mds_wanted) {
if (want & ~(cap->mds_wanted | cap->issued))
@@ -3215,7 +3216,6 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
enum put_cap_refs_mode {
PUT_CAP_REFS_SYNC = 0,
- PUT_CAP_REFS_NO_CHECK,
PUT_CAP_REFS_ASYNC,
};
@@ -3331,11 +3331,6 @@ void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had)
__ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC);
}
-void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
-{
- __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_NO_CHECK);
-}
-
/*
* Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
* context. Adjust per-snap dirty page accounting as appropriate.
@@ -4777,7 +4772,22 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
if (__ceph_caps_dirty(ci)) {
struct ceph_mds_client *mdsc =
ceph_inode_to_fs_client(inode)->mdsc;
- __cap_delay_requeue_front(mdsc, ci);
+
+ doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
+ ceph_vinop(inode));
+ spin_lock(&mdsc->cap_unlink_delay_lock);
+ ci->i_ceph_flags |= CEPH_I_FLUSH;
+ if (!list_empty(&ci->i_cap_delay_list))
+ list_del_init(&ci->i_cap_delay_list);
+ list_add_tail(&ci->i_cap_delay_list,
+ &mdsc->cap_unlink_delay_list);
+ spin_unlock(&mdsc->cap_unlink_delay_lock);
+
+ /*
+ * Fire the work immediately, because the MDS maybe
+ * waiting for caps release.
+ */
+ ceph_queue_cap_unlink_work(mdsc);
}
}
spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0c25d326afc4..7b2e77517f23 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -78,6 +78,8 @@ struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
if (!inode)
return ERR_PTR(-ENOMEM);
+ inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT;
+
if (!S_ISLNK(*mode)) {
err = ceph_pre_init_acls(dir, mode, as_ctx);
if (err < 0)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 548d1de379f3..3ab9c268a8bb 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1089,7 +1089,7 @@ void ceph_mdsc_release_request(struct kref *kref)
struct ceph_mds_request *req = container_of(kref,
struct ceph_mds_request,
r_kref);
- ceph_mdsc_release_dir_caps_no_check(req);
+ ceph_mdsc_release_dir_caps_async(req);
destroy_reply_info(&req->r_reply_info);
if (req->r_request)
ceph_msg_put(req->r_request);
@@ -2484,6 +2484,50 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
}
}
+void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc)
+{
+ struct ceph_client *cl = mdsc->fsc->client;
+ if (mdsc->stopping)
+ return;
+
+ if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_unlink_work)) {
+ doutc(cl, "caps unlink work queued\n");
+ } else {
+ doutc(cl, "failed to queue caps unlink work\n");
+ }
+}
+
+static void ceph_cap_unlink_work(struct work_struct *work)
+{
+ struct ceph_mds_client *mdsc =
+ container_of(work, struct ceph_mds_client, cap_unlink_work);
+ struct ceph_client *cl = mdsc->fsc->client;
+
+ doutc(cl, "begin\n");
+ spin_lock(&mdsc->cap_unlink_delay_lock);
+ while (!list_empty(&mdsc->cap_unlink_delay_list)) {
+ struct ceph_inode_info *ci;
+ struct inode *inode;
+
+ ci = list_first_entry(&mdsc->cap_unlink_delay_list,
+ struct ceph_inode_info,
+ i_cap_delay_list);
+ list_del_init(&ci->i_cap_delay_list);
+
+ inode = igrab(&ci->netfs.inode);
+ if (inode) {
+ spin_unlock(&mdsc->cap_unlink_delay_lock);
+ doutc(cl, "on %p %llx.%llx\n", inode,
+ ceph_vinop(inode));
+ ceph_check_caps(ci, CHECK_CAPS_FLUSH);
+ iput(inode);
+ spin_lock(&mdsc->cap_unlink_delay_lock);
+ }
+ }
+ spin_unlock(&mdsc->cap_unlink_delay_lock);
+ doutc(cl, "done\n");
+}
+
/*
* requests
*/
@@ -4261,7 +4305,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
}
}
-void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
+void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req)
{
struct ceph_client *cl = req->r_mdsc->fsc->client;
int dcaps;
@@ -4269,8 +4313,7 @@ void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
dcaps = xchg(&req->r_dir_caps, 0);
if (dcaps) {
doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
- ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
- dcaps);
+ ceph_put_cap_refs_async(ceph_inode(req->r_parent), dcaps);
}
}
@@ -4306,7 +4349,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
if (req->r_session->s_mds != session->s_mds)
continue;
- ceph_mdsc_release_dir_caps_no_check(req);
+ ceph_mdsc_release_dir_caps_async(req);
__send_request(session, req, true);
}
@@ -5360,6 +5403,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_LIST_HEAD(&mdsc->cap_delay_list);
INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock);
+ INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
+ spin_lock_init(&mdsc->cap_unlink_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->last_cap_flush_tid = 1;
@@ -5368,6 +5413,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
spin_lock_init(&mdsc->cap_dirty_lock);
init_waitqueue_head(&mdsc->cap_flushing_wq);
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
+ INIT_WORK(&mdsc->cap_unlink_work, ceph_cap_unlink_work);
err = ceph_metric_init(&mdsc->metric);
if (err)
goto err_mdsmap;
@@ -5641,6 +5687,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
ceph_cleanup_global_and_empty_realms(mdsc);
cancel_work_sync(&mdsc->cap_reclaim_work);
+ cancel_work_sync(&mdsc->cap_unlink_work);
cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
doutc(cl, "done\n");
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 2e6ddaa13d72..03f8ff00874f 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -462,6 +462,8 @@ struct ceph_mds_client {
unsigned long last_renew_caps; /* last time we renewed our caps */
struct list_head cap_delay_list; /* caps with delayed release */
spinlock_t cap_delay_lock; /* protects cap_delay_list */
+ struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */
+ spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */
struct list_head snap_flush_list; /* cap_snaps ready to flush */
spinlock_t snap_flush_lock;
@@ -475,6 +477,8 @@ struct ceph_mds_client {
struct work_struct cap_reclaim_work;
atomic_t cap_reclaim_pending;
+ struct work_struct cap_unlink_work;
+
/*
* Cap reservations
*
@@ -552,7 +556,7 @@ extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req);
extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
-extern void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req);
+extern void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req);
static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
{
kref_get(&req->r_kref);
@@ -574,6 +578,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
+extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc);
extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, int mds, void *),
void *arg);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b06e2bc86221..b63b4cd9b5b6 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1255,8 +1255,6 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had);
-extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
- int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void __ceph_remove_capsnap(struct inode *inode,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 671664fed307..d746866ae3b6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -100,6 +100,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
+ vm_flags_t vm_flags;
/*
* vma address alignment (but not the pgoff alignment) has
@@ -141,10 +142,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);
ret = -ENOMEM;
+
+ vm_flags = vma->vm_flags;
+ /*
+ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
+ * reserving here. Note: only for SHM hugetlbfs file, the inode
+ * flag S_PRIVATE is set.
+ */
+ if (inode->i_flags & S_PRIVATE)
+ vm_flags |= VM_NORESERVE;
+
if (!hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
- vma->vm_flags))
+ vm_flags))
goto out;
ret = 0;
@@ -1354,6 +1365,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
{
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
+ struct hstate *h;
char *rest;
unsigned long ps;
int opt;
@@ -1398,11 +1410,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_pagesize:
ps = memparse(param->string, &rest);
- ctx->hstate = size_to_hstate(ps);
- if (!ctx->hstate) {
+ h = size_to_hstate(ps);
+ if (!h) {
pr_err("Unsupported page size %lu MB\n", ps / SZ_1M);
return -EINVAL;
}
+ ctx->hstate = h;
return 0;
case Opt_min_size:
diff --git a/fs/namespace.c b/fs/namespace.c
index 437f60e96d40..5a51315c6678 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4472,10 +4472,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
/*
* If this is an attached mount make sure it's located in the callers
* mount namespace. If it's not don't let the caller interact with it.
- * If this is a detached mount make sure it has an anonymous mount
- * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE.
+ *
+ * If this mount doesn't have a parent it's most often simply a
+ * detached mount with an anonymous mount namespace. IOW, something
+ * that's simply not attached yet. But there are apparently also users
+ * that do change mount properties on the rootfs itself. That obviously
+ * neither has a parent nor is it a detached mount so we cannot
+ * unconditionally check for detached mounts.
*/
- if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
+ if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
goto out;
/*
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index a3059b3168fd..9a0d32e4b422 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -477,6 +477,9 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ if (!iov_iter_count(from))
+ return 0;
+
if ((iocb->ki_flags & IOCB_DIRECT) ||
test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
return netfs_unbuffered_write_iter(iocb, from);
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 60a40d293c87..bee047e20f5d 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -139,6 +139,9 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ if (!iov_iter_count(from))
+ return 0;
+
trace_netfs_write_iter(iocb, from);
netfs_stat(&netfs_n_rh_dio_write);
@@ -146,7 +149,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret < 0)
return ret;
ret = generic_write_checks(iocb, from);
- if (ret < 0)
+ if (ret <= 0)
goto out;
ret = file_remove_privs(file);
if (ret < 0)
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index e8ff1e61ce79..4261ad6c55b6 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -748,6 +748,8 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
if (!rreq->submitted) {
netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ if (rreq->origin == NETFS_DIO_READ)
+ inode_dio_end(rreq->inode);
ret = 0;
goto out;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6dc6340e2852..7d6c657e0409 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4945,10 +4945,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
*/
fl->fl_break_time = 0;
- spin_lock(&fp->fi_lock);
fp->fi_had_conflict = true;
nfsd_break_one_deleg(dp);
- spin_unlock(&fp->fi_lock);
return false;
}
@@ -5557,12 +5555,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (status)
goto out_unlock;
+ status = -EAGAIN;
+ if (fp->fi_had_conflict)
+ goto out_unlock;
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
- if (fp->fi_had_conflict)
- status = -EAGAIN;
- else
- status = hash_delegation_locked(dp, fp);
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index bec33b89a075..0e3fc5ba33c7 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -107,7 +107,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- folio_wait_stable(folio);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ folio_wait_writeback(folio);
out:
sb_end_pagefault(inode->i_sb);
return vmf_fs_error(ret);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 0955b657938f..a9b8d77c8c1d 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
return -EIO;
kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 2590a0860eab..2bfb08052d39 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1703,7 +1703,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_folio != bd_folio) {
folio_lock(bd_folio);
@@ -1714,6 +1713,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
}
break;
}
+ set_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_begin_folio_io(fs_folio);
fs_folio = bh->b_folio;
@@ -1800,7 +1800,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
clear_buffer_uptodate(bh);
if (bh->b_folio != bd_folio) {
@@ -1809,6 +1808,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
}
break;
}
+ clear_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_end_folio_io(fs_folio, err);
fs_folio = bh->b_folio;
@@ -1896,8 +1896,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Redirected));
- set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
if (bh->b_folio != bd_folio) {
folio_end_writeback(bd_folio);
bd_folio = bh->b_folio;
@@ -1905,6 +1906,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
update_sr = true;
break;
}
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh->b_folio != fs_folio) {
nilfs_end_folio_io(fs_folio, 0);
fs_folio = bh->b_folio;
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index 63f70259edc0..7aadf5010999 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -886,7 +886,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
struct runs_tree *run = &ni->file.run;
struct ntfs_sb_info *sbi;
u8 cluster_bits;
- struct ATTRIB *attr = NULL, *attr_b;
+ struct ATTRIB *attr, *attr_b;
struct ATTR_LIST_ENTRY *le, *le_b;
struct mft_inode *mi, *mi_b;
CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen;
@@ -904,12 +904,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
*len = 0;
up_read(&ni->file.run_lock);
- if (*len) {
- if (*lcn != SPARSE_LCN || !new)
- return 0; /* Fast normal way without allocation. */
- else if (clen > *len)
- clen = *len;
- }
+ if (*len && (*lcn != SPARSE_LCN || !new))
+ return 0; /* Fast normal way without allocation. */
/* No cluster in cache or we need to allocate cluster in hole. */
sbi = ni->mi.sbi;
@@ -918,6 +914,17 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
ni_lock(ni);
down_write(&ni->file.run_lock);
+ /* Repeat the code above (under write lock). */
+ if (!run_lookup_entry(run, vcn, lcn, len, NULL))
+ *len = 0;
+
+ if (*len) {
+ if (*lcn != SPARSE_LCN || !new)
+ goto out; /* normal way without allocation. */
+ if (clen > *len)
+ clen = *len;
+ }
+
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
if (!attr_b) {
@@ -1736,8 +1743,10 @@ repack:
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL,
0, NULL, &mi_b);
- if (!attr_b)
- return -ENOENT;
+ if (!attr_b) {
+ err = -ENOENT;
+ goto out;
+ }
attr = attr_b;
le = le_b;
@@ -1818,13 +1827,15 @@ ins_ext:
ok:
run_truncate_around(run, vcn);
out:
- if (new_valid > data_size)
- new_valid = data_size;
+ if (attr_b) {
+ if (new_valid > data_size)
+ new_valid = data_size;
- valid_size = le64_to_cpu(attr_b->nres.valid_size);
- if (new_valid != valid_size) {
- attr_b->nres.valid_size = cpu_to_le64(valid_size);
- mi_b->dirty = true;
+ valid_size = le64_to_cpu(attr_b->nres.valid_size);
+ if (new_valid != valid_size) {
+ attr_b->nres.valid_size = cpu_to_le64(valid_size);
+ mi_b->dirty = true;
+ }
}
return err;
@@ -2073,7 +2084,7 @@ next_attr:
/* Update inode size. */
ni->i_valid = valid_size;
- ni->vfs_inode.i_size = data_size;
+ i_size_write(&ni->vfs_inode, data_size);
inode_set_bytes(&ni->vfs_inode, total_size);
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
mark_inode_dirty(&ni->vfs_inode);
@@ -2488,7 +2499,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
mi_b->dirty = true;
done:
- ni->vfs_inode.i_size += bytes;
+ i_size_write(&ni->vfs_inode, ni->vfs_inode.i_size + bytes);
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
mark_inode_dirty(&ni->vfs_inode);
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
index 7c01735d1219..9f4bd8d26090 100644
--- a/fs/ntfs3/attrlist.c
+++ b/fs/ntfs3/attrlist.c
@@ -29,7 +29,7 @@ static inline bool al_is_valid_le(const struct ntfs_inode *ni,
void al_destroy(struct ntfs_inode *ni)
{
run_close(&ni->attr_list.run);
- kfree(ni->attr_list.le);
+ kvfree(ni->attr_list.le);
ni->attr_list.le = NULL;
ni->attr_list.size = 0;
ni->attr_list.dirty = false;
@@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
{
size_t off;
u16 sz;
+ const unsigned le_min_size = le_size(0);
if (!le) {
le = ni->attr_list.le;
} else {
sz = le16_to_cpu(le->size);
- if (sz < sizeof(struct ATTR_LIST_ENTRY)) {
+ if (sz < le_min_size) {
/* Impossible 'cause we should not return such le. */
return NULL;
}
@@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
/* Check boundary. */
off = PtrOffset(ni->attr_list.le, le);
- if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) {
+ if (off + le_min_size > ni->attr_list.size) {
/* The regular end of list. */
return NULL;
}
@@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
sz = le16_to_cpu(le->size);
/* Check le for errors. */
- if (sz < sizeof(struct ATTR_LIST_ENTRY) ||
- off + sz > ni->attr_list.size ||
+ if (sz < le_min_size || off + sz > ni->attr_list.size ||
sz < le->name_off + le->name_len * sizeof(short)) {
return NULL;
}
@@ -318,7 +318,7 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
memcpy(ptr, al->le, off);
memcpy(Add2Ptr(ptr, off + sz), le, old_size - off);
le = Add2Ptr(ptr, off);
- kfree(al->le);
+ kvfree(al->le);
al->le = ptr;
} else {
memmove(Add2Ptr(le, sz), le, old_size - off);
diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c
index 63f14a0232f6..845f9b22deef 100644
--- a/fs/ntfs3/bitmap.c
+++ b/fs/ntfs3/bitmap.c
@@ -124,7 +124,7 @@ void wnd_close(struct wnd_bitmap *wnd)
{
struct rb_node *node, *next;
- kfree(wnd->free_bits);
+ kvfree(wnd->free_bits);
wnd->free_bits = NULL;
run_close(&wnd->run);
@@ -1360,7 +1360,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short));
memset(new_free + wnd->nwnd, 0,
(new_wnd - wnd->nwnd) * sizeof(short));
- kfree(wnd->free_bits);
+ kvfree(wnd->free_bits);
wnd->free_bits = new_free;
}
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index ec0566b322d5..5cf3d9decf64 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
return 0;
}
- /* NTFS: symlinks are "dir + reparse" or "file + reparse" */
- if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT)
- dt_type = DT_LNK;
- else
- dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+ /*
+ * NTFS: symlinks are "dir + reparse" or "file + reparse"
+ * Unfortunately reparse attribute is used for many purposes (several dozens).
+ * It is not possible here to know is this name symlink or not.
+ * To get exactly the type of name we should to open inode (read mft).
+ * getattr for opened file (fstat) correctly returns symlink.
+ */
+ dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+
+ /*
+ * It is not reliable to detect the type of name using duplicated information
+ * stored in parent directory.
+ * The only correct way to get the type of name - read MFT record and find ATTR_STD.
+ * The code below is not good idea.
+ * It does additional locks/reads just to get the type of name.
+ * Should we use additional mount option to enable branch below?
+ */
+ if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ ino != ni->mi.rno) {
+ struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
+ if (!IS_ERR_OR_NULL(inode)) {
+ dt_type = fs_umode_to_dtype(inode->i_mode);
+ iput(inode);
+ }
+ }
return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
}
@@ -495,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
struct INDEX_HDR *hdr;
const struct ATTR_FILE_NAME *fname;
u32 e_size, off, end;
- u64 vbo = 0;
size_t drs = 0, fles = 0, bit = 0;
- loff_t i_size = ni->vfs_inode.i_size;
struct indx_node *node = NULL;
- u8 index_bits = ni->dir.index_bits;
+ size_t max_indx = i_size_read(&ni->vfs_inode) >> ni->dir.index_bits;
if (is_empty)
*is_empty = true;
@@ -518,8 +536,10 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
e = Add2Ptr(hdr, off);
e_size = le16_to_cpu(e->size);
if (e_size < sizeof(struct NTFS_DE) ||
- off + e_size > end)
+ off + e_size > end) {
+ /* Looks like corruption. */
break;
+ }
if (de_is_last(e))
break;
@@ -543,7 +563,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
fles += 1;
}
- if (vbo >= i_size)
+ if (bit >= max_indx)
goto out;
err = indx_used_bit(&ni->dir, ni, &bit);
@@ -553,8 +573,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
if (bit == MINUS_ONE_T)
goto out;
- vbo = (u64)bit << index_bits;
- if (vbo >= i_size)
+ if (bit >= max_indx)
goto out;
err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits,
@@ -564,7 +583,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
hdr = &node->index->ihdr;
bit += 1;
- vbo = (u64)bit << ni->dir.idx2vbn_bits;
}
out:
@@ -593,5 +611,9 @@ const struct file_operations ntfs_dir_operations = {
.iterate_shared = ntfs_readdir,
.fsync = generic_file_fsync,
.open = ntfs_file_open,
+ .unlocked_ioctl = ntfs_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ntfs_compat_ioctl,
+#endif
};
// clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index a5a30a24ce5d..5418662c80d8 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -48,7 +48,7 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
return 0;
}
-static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info;
@@ -61,7 +61,7 @@ static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg)
}
#ifdef CONFIG_COMPAT
-static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
+long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg)
{
return ntfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
@@ -188,6 +188,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
u32 bh_next, bh_off, to;
sector_t iblock;
struct folio *folio;
+ bool dirty = false;
for (; idx < idx_end; idx += 1, from = 0) {
page_off = (loff_t)idx << PAGE_SHIFT;
@@ -223,29 +224,27 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to)
/* Ok, it's mapped. Make sure it's up-to-date. */
if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
-
- if (!buffer_uptodate(bh)) {
- err = bh_read(bh, 0);
- if (err < 0) {
- folio_unlock(folio);
- folio_put(folio);
- goto out;
- }
+ else if (bh_read(bh, 0) < 0) {
+ err = -EIO;
+ folio_unlock(folio);
+ folio_put(folio);
+ goto out;
}
mark_buffer_dirty(bh);
-
} while (bh_off = bh_next, iblock += 1,
head != (bh = bh->b_this_page));
folio_zero_segment(folio, from, to);
+ dirty = true;
folio_unlock(folio);
folio_put(folio);
cond_resched();
}
out:
- mark_inode_dirty(inode);
+ if (dirty)
+ mark_inode_dirty(inode);
return err;
}
@@ -261,6 +260,9 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma)
bool rw = vma->vm_flags & VM_WRITE;
int err;
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "mmap encrypted not supported");
return -EOPNOTSUPP;
@@ -499,10 +501,14 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
ni_lock(ni);
err = attr_punch_hole(ni, vbo, len, &frame_size);
ni_unlock(ni);
+ if (!err)
+ goto ok;
+
if (err != E_NTFS_NOTALIGNED)
goto out;
/* Process not aligned punch. */
+ err = 0;
mask = frame_size - 1;
vbo_a = (vbo + mask) & ~mask;
end_a = end & ~mask;
@@ -525,6 +531,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
ni_lock(ni);
err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL);
ni_unlock(ni);
+ if (err)
+ goto out;
}
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
/*
@@ -564,6 +572,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
ni_lock(ni);
err = attr_insert_range(ni, vbo, len);
ni_unlock(ni);
+ if (err)
+ goto out;
} else {
/* Check new size. */
u8 cluster_bits = sbi->cluster_bits;
@@ -633,11 +643,18 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
&ni->file.run, i_size, &ni->i_valid,
true, NULL);
ni_unlock(ni);
+ if (err)
+ goto out;
} else if (new_size > i_size) {
- inode->i_size = new_size;
+ i_size_write(inode, new_size);
}
}
+ok:
+ err = file_modified(file);
+ if (err)
+ goto out;
+
out:
if (map_locked)
filemap_invalidate_unlock(mapping);
@@ -663,6 +680,9 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
umode_t mode = inode->i_mode;
int err;
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
err = setattr_prepare(idmap, dentry, attr);
if (err)
goto out;
@@ -676,7 +696,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
goto out;
}
inode_dio_wait(inode);
- oldsize = inode->i_size;
+ oldsize = i_size_read(inode);
newsize = attr->ia_size;
if (newsize <= oldsize)
@@ -688,7 +708,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
goto out;
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
- inode->i_size = newsize;
+ i_size_write(inode, newsize);
}
setattr_copy(idmap, inode, attr);
@@ -718,6 +738,9 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = file->f_mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "encrypted i/o not supported");
return -EOPNOTSUPP;
@@ -752,6 +775,9 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos,
struct inode *inode = in->f_mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "encrypted i/o not supported");
return -EOPNOTSUPP;
@@ -821,7 +847,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
size_t count = iov_iter_count(from);
loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(file);
- loff_t i_size = inode->i_size;
+ loff_t i_size = i_size_read(inode);
struct address_space *mapping = inode->i_mapping;
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
@@ -1028,6 +1054,8 @@ out:
iocb->ki_pos += written;
if (iocb->ki_pos > ni->i_valid)
ni->i_valid = iocb->ki_pos;
+ if (iocb->ki_pos > i_size)
+ i_size_write(inode, iocb->ki_pos);
return written;
}
@@ -1041,8 +1069,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t ret;
+ int err;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (is_encrypted(ni)) {
ntfs_inode_warn(inode, "encrypted i/o not supported");
return -EOPNOTSUPP;
@@ -1068,6 +1100,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret <= 0)
goto out;
+ err = file_modified(iocb->ki_filp);
+ if (err) {
+ ret = err;
+ goto out;
+ }
+
if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) {
/* Should never be here, see ntfs_file_open(). */
ret = -EOPNOTSUPP;
@@ -1097,6 +1135,9 @@ int ntfs_file_open(struct inode *inode, struct file *file)
{
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
if (unlikely((is_compressed(ni) || is_encrypted(ni)) &&
(file->f_flags & O_DIRECT))) {
return -EOPNOTSUPP;
@@ -1138,7 +1179,8 @@ static int ntfs_file_release(struct inode *inode, struct file *file)
down_write(&ni->file.run_lock);
err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
- inode->i_size, &ni->i_valid, false, NULL);
+ i_size_read(inode), &ni->i_valid, false,
+ NULL);
up_write(&ni->file.run_lock);
ni_unlock(ni);
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 3df2d9e34b91..3b42938a9d3b 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -778,7 +778,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni)
run_deallocate(sbi, &ni->attr_list.run, true);
run_close(&ni->attr_list.run);
ni->attr_list.size = 0;
- kfree(ni->attr_list.le);
+ kvfree(ni->attr_list.le);
ni->attr_list.le = NULL;
ni->attr_list.dirty = false;
@@ -927,7 +927,7 @@ int ni_create_attr_list(struct ntfs_inode *ni)
return 0;
out:
- kfree(ni->attr_list.le);
+ kvfree(ni->attr_list.le);
ni->attr_list.le = NULL;
ni->attr_list.size = 0;
return err;
@@ -2099,7 +2099,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page)
gfp_t gfp_mask;
struct page *pg;
- if (vbo >= ni->vfs_inode.i_size) {
+ if (vbo >= i_size_read(&ni->vfs_inode)) {
SetPageUptodate(page);
err = 0;
goto out;
@@ -2173,7 +2173,7 @@ int ni_decompress_file(struct ntfs_inode *ni)
{
struct ntfs_sb_info *sbi = ni->mi.sbi;
struct inode *inode = &ni->vfs_inode;
- loff_t i_size = inode->i_size;
+ loff_t i_size = i_size_read(inode);
struct address_space *mapping = inode->i_mapping;
gfp_t gfp_mask = mapping_gfp_mask(mapping);
struct page **pages = NULL;
@@ -2457,6 +2457,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
struct ATTR_LIST_ENTRY *le = NULL;
struct runs_tree *run = &ni->file.run;
u64 valid_size = ni->i_valid;
+ loff_t i_size = i_size_read(&ni->vfs_inode);
u64 vbo_disk;
size_t unc_size;
u32 frame_size, i, npages_disk, ondisk_size;
@@ -2548,7 +2549,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
}
}
- frames = (ni->vfs_inode.i_size - 1) >> frame_bits;
+ frames = (i_size - 1) >> frame_bits;
err = attr_wof_frame_info(ni, attr, run, frame64, frames,
frame_bits, &ondisk_size, &vbo_data);
@@ -2556,8 +2557,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
goto out2;
if (frame64 == frames) {
- unc_size = 1 + ((ni->vfs_inode.i_size - 1) &
- (frame_size - 1));
+ unc_size = 1 + ((i_size - 1) & (frame_size - 1));
ondisk_size = attr_size(attr) - vbo_data;
} else {
unc_size = frame_size;
@@ -3259,6 +3259,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
if (is_bad_inode(inode) || sb_rdonly(sb))
return 0;
+ if (unlikely(ntfs3_forced_shutdown(sb)))
+ return -EIO;
+
if (!ni_trylock(ni)) {
/* 'ni' is under modification, skip for now. */
mark_inode_dirty_sync(inode);
@@ -3288,7 +3291,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
modified = true;
}
- ts = inode_get_mtime(inode);
+ ts = inode_get_ctime(inode);
dup.c_time = kernel2nt(&ts);
if (std->c_time != dup.c_time) {
std->c_time = dup.c_time;
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c
index 98ccb6650858..855519713bf7 100644
--- a/fs/ntfs3/fslog.c
+++ b/fs/ntfs3/fslog.c
@@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
{
const struct RESTART_AREA *ra;
u16 cl, fl, ul;
- u32 off, l_size, file_dat_bits, file_size_round;
+ u32 off, l_size, seq_bits;
u16 ro = le16_to_cpu(rhdr->ra_off);
u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
@@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
/* Make sure the sequence number bits match the log file size. */
l_size = le64_to_cpu(ra->l_size);
- file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits);
- file_size_round = 1u << (file_dat_bits + 3);
- if (file_size_round != l_size &&
- (file_size_round < l_size || (file_size_round / 2) > l_size)) {
- return false;
+ seq_bits = sizeof(u64) * 8 + 3;
+ while (l_size) {
+ l_size >>= 1;
+ seq_bits -= 1;
}
+ if (seq_bits != ra->seq_num_bits)
+ return false;
+
/* The log page data offset and record header length must be quad-aligned. */
if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) ||
!IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
@@ -974,6 +976,16 @@ skip_looking:
return e;
}
+struct restart_info {
+ u64 last_lsn;
+ struct RESTART_HDR *r_page;
+ u32 vbo;
+ bool chkdsk_was_run;
+ bool valid_page;
+ bool initialized;
+ bool restart;
+};
+
#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
#define NTFSLOG_WRAPPED 0x00000001
@@ -987,6 +999,7 @@ struct ntfs_log {
struct ntfs_inode *ni;
u32 l_size;
+ u32 orig_file_size;
u32 sys_page_size;
u32 sys_page_mask;
u32 page_size;
@@ -1040,6 +1053,8 @@ struct ntfs_log {
struct CLIENT_ID client_id;
u32 client_undo_commit;
+
+ struct restart_info rst_info, rst_info2;
};
static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
@@ -1105,16 +1120,6 @@ static inline bool verify_client_lsn(struct ntfs_log *log,
lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
}
-struct restart_info {
- u64 last_lsn;
- struct RESTART_HDR *r_page;
- u32 vbo;
- bool chkdsk_was_run;
- bool valid_page;
- bool initialized;
- bool restart;
-};
-
static int read_log_page(struct ntfs_log *log, u32 vbo,
struct RECORD_PAGE_HDR **buffer, bool *usa_error)
{
@@ -1176,7 +1181,7 @@ out:
* restart page header. It will stop the first time we find a
* valid page header.
*/
-static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
+static int log_read_rst(struct ntfs_log *log, bool first,
struct restart_info *info)
{
u32 skip, vbo;
@@ -1192,7 +1197,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
}
/* Loop continuously until we succeed. */
- for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) {
+ for (; vbo < log->l_size; vbo = 2 * vbo + skip, skip = 0) {
bool usa_error;
bool brst, bchk;
struct RESTART_AREA *ra;
@@ -1285,22 +1290,17 @@ check_result:
/*
* Ilog_init_pg_hdr - Init @log from restart page header.
*/
-static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
- u32 page_size, u16 major_ver, u16 minor_ver)
+static void log_init_pg_hdr(struct ntfs_log *log, u16 major_ver, u16 minor_ver)
{
- log->sys_page_size = sys_page_size;
- log->sys_page_mask = sys_page_size - 1;
- log->page_size = page_size;
- log->page_mask = page_size - 1;
- log->page_bits = blksize_bits(page_size);
+ log->sys_page_size = log->page_size;
+ log->sys_page_mask = log->page_mask;
log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
if (!log->clst_per_page)
log->clst_per_page = 1;
- log->first_page = major_ver >= 2 ?
- 0x22 * page_size :
- ((sys_page_size << 1) + (page_size << 1));
+ log->first_page = major_ver >= 2 ? 0x22 * log->page_size :
+ 4 * log->page_size;
log->major_ver = major_ver;
log->minor_ver = minor_ver;
}
@@ -1308,12 +1308,11 @@ static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
/*
* log_create - Init @log in cases when we don't have a restart area to use.
*/
-static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
+static void log_create(struct ntfs_log *log, const u64 last_lsn,
u32 open_log_count, bool wrapped, bool use_multi_page)
{
- log->l_size = l_size;
/* All file offsets must be quadword aligned. */
- log->file_data_bits = blksize_bits(l_size) - 3;
+ log->file_data_bits = blksize_bits(log->l_size) - 3;
log->seq_num_mask = (8 << log->file_data_bits) - 1;
log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
log->seq_num = (last_lsn >> log->file_data_bits) + 2;
@@ -3720,10 +3719,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
struct ntfs_sb_info *sbi = ni->mi.sbi;
struct ntfs_log *log;
- struct restart_info rst_info, rst_info2;
- u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0;
+ u64 rec_lsn, checkpt_lsn = 0, rlsn = 0;
struct ATTR_NAME_ENTRY *attr_names = NULL;
- struct ATTR_NAME_ENTRY *ane;
struct RESTART_TABLE *dptbl = NULL;
struct RESTART_TABLE *trtbl = NULL;
const struct RESTART_TABLE *rt;
@@ -3741,9 +3738,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
struct TRANSACTION_ENTRY *tr;
struct DIR_PAGE_ENTRY *dp;
u32 i, bytes_per_attr_entry;
- u32 l_size = ni->vfs_inode.i_size;
- u32 orig_file_size = l_size;
- u32 page_size, vbo, tail, off, dlen;
+ u32 vbo, tail, off, dlen;
u32 saved_len, rec_len, transact_id;
bool use_second_page;
struct RESTART_AREA *ra2, *ra = NULL;
@@ -3758,52 +3753,50 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
u16 t16;
u32 t32;
- /* Get the size of page. NOTE: To replay we can use default page. */
-#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
- page_size = norm_file_page(PAGE_SIZE, &l_size, true);
-#else
- page_size = norm_file_page(PAGE_SIZE, &l_size, false);
-#endif
- if (!page_size)
- return -EINVAL;
-
log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS);
if (!log)
return -ENOMEM;
log->ni = ni;
- log->l_size = l_size;
- log->one_page_buf = kmalloc(page_size, GFP_NOFS);
+ log->l_size = log->orig_file_size = ni->vfs_inode.i_size;
+ /* Get the size of page. NOTE: To replay we can use default page. */
+#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
+ log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, true);
+#else
+ log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, false);
+#endif
+ if (!log->page_size) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ log->one_page_buf = kmalloc(log->page_size, GFP_NOFS);
if (!log->one_page_buf) {
err = -ENOMEM;
goto out;
}
- log->page_size = page_size;
- log->page_mask = page_size - 1;
- log->page_bits = blksize_bits(page_size);
+ log->page_mask = log->page_size - 1;
+ log->page_bits = blksize_bits(log->page_size);
/* Look for a restart area on the disk. */
- memset(&rst_info, 0, sizeof(struct restart_info));
- err = log_read_rst(log, l_size, true, &rst_info);
+ err = log_read_rst(log, true, &log->rst_info);
if (err)
goto out;
/* remember 'initialized' */
- *initialized = rst_info.initialized;
+ *initialized = log->rst_info.initialized;
- if (!rst_info.restart) {
- if (rst_info.initialized) {
+ if (!log->rst_info.restart) {
+ if (log->rst_info.initialized) {
/* No restart area but the file is not initialized. */
err = -EINVAL;
goto out;
}
- log_init_pg_hdr(log, page_size, page_size, 1, 1);
- log_create(log, l_size, 0, get_random_u32(), false, false);
-
- log->ra = ra;
+ log_init_pg_hdr(log, 1, 1);
+ log_create(log, 0, get_random_u32(), false, false);
ra = log_create_ra(log);
if (!ra) {
@@ -3820,25 +3813,26 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
* If the restart offset above wasn't zero then we won't
* look for a second restart.
*/
- if (rst_info.vbo)
+ if (log->rst_info.vbo)
goto check_restart_area;
- memset(&rst_info2, 0, sizeof(struct restart_info));
- err = log_read_rst(log, l_size, false, &rst_info2);
+ err = log_read_rst(log, false, &log->rst_info2);
if (err)
goto out;
/* Determine which restart area to use. */
- if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn)
+ if (!log->rst_info2.restart ||
+ log->rst_info2.last_lsn <= log->rst_info.last_lsn)
goto use_first_page;
use_second_page = true;
- if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) {
+ if (log->rst_info.chkdsk_was_run &&
+ log->page_size != log->rst_info.vbo) {
struct RECORD_PAGE_HDR *sp = NULL;
bool usa_error;
- if (!read_log_page(log, page_size, &sp, &usa_error) &&
+ if (!read_log_page(log, log->page_size, &sp, &usa_error) &&
sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
use_second_page = false;
}
@@ -3846,52 +3840,43 @@ int log_replay(struct ntfs_inode *ni, bool *initialized)
}
if (use_second_page) {
- kfree(rst_info.r_page);
- memcpy(&rst_info, &rst_info2, sizeof(struct restart_info));
- rst_info2.r_page = NULL;
+ kfree(log->rst_info.r_page);
+ memcpy(&log->rst_info, &log->rst_info2,
+ sizeof(struct restart_info));
+ log->rst_info2.r_page = NULL;
}
use_first_page:
- kfree(rst_info2.r_page);
+ kfree(log->rst_info2.r_page);
check_restart_area:
/*
* If the restart area is at offset 0, we want
* to write the second restart area first.
*/
- log->init_ra = !!rst_info.vbo;
+ log->init_ra = !!log->rst_info.vbo;
/* If we have a valid page then grab a pointer to the restart area. */
- ra2 = rst_info.valid_page ?
- Add2Ptr(rst_info.r_page,
- le16_to_cpu(rst_info.r_page->ra_off)) :
+ ra2 = log->rst_info.valid_page ?
+ Add2Ptr(log->rst_info.r_page,
+ le16_to_cpu(log->rst_info.r_page->ra_off)) :
NULL;
- if (rst_info.chkdsk_was_run ||
+ if (log->rst_info.chkdsk_was_run ||
(ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
bool wrapped = false;
bool use_multi_page = false;
u32 open_log_count;
/* Do some checks based on whether we have a valid log page. */
- if (!rst_info.valid_page) {
- open_log_count = get_random_u32();
- goto init_log_instance;
- }
- open_log_count = le32_to_cpu(ra2->open_log_count);
-
- /*
- * If the restart page size isn't changing then we want to
- * check how much work we need to do.
- */
- if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size))
- goto init_log_instance;
+ open_log_count = log->rst_info.valid_page ?
+ le32_to_cpu(ra2->open_log_count) :
+ get_random_u32();
-init_log_instance:
- log_init_pg_hdr(log, page_size, page_size, 1, 1);
+ log_init_pg_hdr(log, 1, 1);
- log_create(log, l_size, rst_info.last_lsn, open_log_count,
- wrapped, use_multi_page);
+ log_create(log, log->rst_info.last_lsn, open_log_count, wrapped,
+ use_multi_page);
ra = log_create_ra(log);
if (!ra) {
@@ -3916,28 +3901,27 @@ init_log_instance:
* use the log file. We must use the system page size instead of the
* default size if there is not a clean shutdown.
*/
- t32 = le32_to_cpu(rst_info.r_page->sys_page_size);
- if (page_size != t32) {
- l_size = orig_file_size;
- page_size =
- norm_file_page(t32, &l_size, t32 == DefaultLogPageSize);
+ t32 = le32_to_cpu(log->rst_info.r_page->sys_page_size);
+ if (log->page_size != t32) {
+ log->l_size = log->orig_file_size;
+ log->page_size = norm_file_page(t32, &log->l_size,
+ t32 == DefaultLogPageSize);
}
- if (page_size != t32 ||
- page_size != le32_to_cpu(rst_info.r_page->page_size)) {
+ if (log->page_size != t32 ||
+ log->page_size != le32_to_cpu(log->rst_info.r_page->page_size)) {
err = -EINVAL;
goto out;
}
/* If the file size has shrunk then we won't mount it. */
- if (l_size < le64_to_cpu(ra2->l_size)) {
+ if (log->l_size < le64_to_cpu(ra2->l_size)) {
err = -EINVAL;
goto out;
}
- log_init_pg_hdr(log, page_size, page_size,
- le16_to_cpu(rst_info.r_page->major_ver),
- le16_to_cpu(rst_info.r_page->minor_ver));
+ log_init_pg_hdr(log, le16_to_cpu(log->rst_info.r_page->major_ver),
+ le16_to_cpu(log->rst_info.r_page->minor_ver));
log->l_size = le64_to_cpu(ra2->l_size);
log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
@@ -3945,7 +3929,7 @@ init_log_instance:
log->seq_num_mask = (8 << log->file_data_bits) - 1;
log->last_lsn = le64_to_cpu(ra2->current_lsn);
log->seq_num = log->last_lsn >> log->file_data_bits;
- log->ra_off = le16_to_cpu(rst_info.r_page->ra_off);
+ log->ra_off = le16_to_cpu(log->rst_info.r_page->ra_off);
log->restart_size = log->sys_page_size - log->ra_off;
log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
log->ra_size = le16_to_cpu(ra2->ra_len);
@@ -4045,7 +4029,7 @@ find_oldest:
log->current_avail = current_log_avail(log);
/* Remember which restart area to write first. */
- log->init_ra = rst_info.vbo;
+ log->init_ra = log->rst_info.vbo;
process_log:
/* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */
@@ -4105,7 +4089,7 @@ process_log:
log->client_id.seq_num = cr->seq_num;
log->client_id.client_idx = client;
- err = read_rst_area(log, &rst, &ra_lsn);
+ err = read_rst_area(log, &rst, &checkpt_lsn);
if (err)
goto out;
@@ -4114,9 +4098,8 @@ process_log:
bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
- checkpt_lsn = le64_to_cpu(rst->check_point_start);
- if (!checkpt_lsn)
- checkpt_lsn = ra_lsn;
+ if (rst->check_point_start)
+ checkpt_lsn = le64_to_cpu(rst->check_point_start);
/* Allocate and Read the Transaction Table. */
if (!rst->transact_table_len)
@@ -4330,23 +4313,20 @@ check_attr_table:
lcb = NULL;
check_attribute_names2:
- if (!rst->attr_names_len)
- goto trace_attribute_table;
-
- ane = attr_names;
- if (!oatbl)
- goto trace_attribute_table;
- while (ane->off) {
- /* TODO: Clear table on exit! */
- oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
- t16 = le16_to_cpu(ane->name_bytes);
- oe->name_len = t16 / sizeof(short);
- oe->ptr = ane->name;
- oe->is_attr_name = 2;
- ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16);
- }
-
-trace_attribute_table:
+ if (rst->attr_names_len && oatbl) {
+ struct ATTR_NAME_ENTRY *ane = attr_names;
+ while (ane->off) {
+ /* TODO: Clear table on exit! */
+ oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
+ t16 = le16_to_cpu(ane->name_bytes);
+ oe->name_len = t16 / sizeof(short);
+ oe->ptr = ane->name;
+ oe->is_attr_name = 2;
+ ane = Add2Ptr(ane,
+ sizeof(struct ATTR_NAME_ENTRY) + t16);
+ }
+ }
+
/*
* If the checkpt_lsn is zero, then this is a freshly
* formatted disk and we have no work to do.
@@ -5189,7 +5169,7 @@ out:
kfree(oatbl);
kfree(dptbl);
kfree(attr_names);
- kfree(rst_info.r_page);
+ kfree(log->rst_info.r_page);
kfree(ra);
kfree(log->one_page_buf);
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index fbfe21dbb425..ae2ef5c11868 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -853,7 +853,8 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait)
/*
* sb can be NULL here. In this case sbi->flags should be 0 too.
*/
- if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR))
+ if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR) ||
+ unlikely(ntfs3_forced_shutdown(sb)))
return;
blocksize = sb->s_blocksize;
@@ -1006,6 +1007,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes)
return cpu_to_le32(hash);
}
+/*
+ * simple wrapper for sb_bread_unmovable.
+ */
+struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block)
+{
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+ struct buffer_head *bh;
+
+ if (unlikely(block >= sbi->volume.blocks)) {
+ /* prevent generic message "attempt to access beyond end of device" */
+ ntfs_err(sb, "try to read out of volume at offset 0x%llx",
+ (u64)block << sb->s_blocksize_bits);
+ return NULL;
+ }
+
+ bh = sb_bread_unmovable(sb, block);
+ if (bh)
+ return bh;
+
+ ntfs_err(sb, "failed to read volume at offset 0x%llx",
+ (u64)block << sb->s_blocksize_bits);
+ return NULL;
+}
+
int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer)
{
struct block_device *bdev = sb->s_bdev;
@@ -2128,8 +2153,8 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
if (le32_to_cpu(d_security->size) == new_sec_size &&
d_security->key.hash == hash_key.hash &&
!memcmp(d_security + 1, sd, size_sd)) {
- *security_id = d_security->key.sec_id;
/* Such security already exists. */
+ *security_id = d_security->key.sec_id;
err = 0;
goto out;
}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index cf92b2433f7a..daabaad63aaf 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1462,7 +1462,7 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
goto out2;
if (in->name == I30_NAME) {
- ni->vfs_inode.i_size = data_size;
+ i_size_write(&ni->vfs_inode, data_size);
inode_set_bytes(&ni->vfs_inode, alloc_size);
}
@@ -1544,7 +1544,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
}
if (in->name == I30_NAME)
- ni->vfs_inode.i_size = data_size;
+ i_size_write(&ni->vfs_inode, data_size);
*vbn = bit << indx->idx2vbn_bits;
@@ -2090,7 +2090,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
return err;
if (in->name == I30_NAME)
- ni->vfs_inode.i_size = new_data;
+ i_size_write(&ni->vfs_inode, new_data);
bpb = bitmap_size(bit);
if (bpb * 8 == nbits)
@@ -2576,7 +2576,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
&indx->alloc_run, 0, NULL, false, NULL);
if (in->name == I30_NAME)
- ni->vfs_inode.i_size = 0;
+ i_size_write(&ni->vfs_inode, 0);
err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len,
false, NULL);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 5e3d71374918..eb7a8c9fba01 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -345,9 +345,7 @@ next_attr:
inode->i_size = le16_to_cpu(rp.SymbolicLinkReparseBuffer
.PrintNameLength) /
sizeof(u16);
-
ni->i_valid = inode->i_size;
-
/* Clear directory bit. */
if (ni->ni_flags & NI_FLAG_DIR) {
indx_clear(&ni->dir);
@@ -412,7 +410,6 @@ end_enum:
goto out;
if (!is_match && name) {
- /* Reuse rec as buffer for ascii name. */
err = -ENOENT;
goto out;
}
@@ -427,6 +424,7 @@ end_enum:
if (names != le16_to_cpu(rec->hard_links)) {
/* Correct minor error on the fly. Do not mark inode as dirty. */
+ ntfs_inode_warn(inode, "Correct links count -> %u.", names);
rec->hard_links = cpu_to_le16(names);
ni->mi.dirty = true;
}
@@ -653,9 +651,10 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo,
off = vbo & (PAGE_SIZE - 1);
folio_set_bh(bh, folio, off);
- err = bh_read(bh, 0);
- if (err < 0)
+ if (bh_read(bh, 0) < 0) {
+ err = -EIO;
goto out;
+ }
folio_zero_segment(folio, off + voff, off + block_size);
}
}
@@ -853,9 +852,13 @@ static int ntfs_resident_writepage(struct folio *folio,
struct writeback_control *wbc, void *data)
{
struct address_space *mapping = data;
- struct ntfs_inode *ni = ntfs_i(mapping->host);
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
int ret;
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
ni_lock(ni);
ret = attr_data_write_resident(ni, &folio->page);
ni_unlock(ni);
@@ -869,7 +872,12 @@ static int ntfs_resident_writepage(struct folio *folio,
static int ntfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- if (is_resident(ntfs_i(mapping->host)))
+ struct inode *inode = mapping->host;
+
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
+ if (is_resident(ntfs_i(inode)))
return write_cache_pages(mapping, wbc, ntfs_resident_writepage,
mapping);
return mpage_writepages(mapping, wbc, ntfs_get_block);
@@ -889,6 +897,9 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
*pagep = NULL;
if (is_resident(ni)) {
struct page *page =
@@ -974,7 +985,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
}
if (pos + err > inode->i_size) {
- inode->i_size = pos + err;
+ i_size_write(inode, pos + err);
dirty = true;
}
@@ -1306,6 +1317,11 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
goto out1;
}
+ if (unlikely(ntfs3_forced_shutdown(sb))) {
+ err = -EIO;
+ goto out2;
+ }
+
/* Mark rw ntfs as dirty. it will be cleared at umount. */
ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index ee3093be5170..cae41db0aaa7 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -181,6 +181,9 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry)
struct ntfs_inode *ni = ntfs_i(dir);
int err;
+ if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+ return -EIO;
+
ni_lock_dir(ni);
err = ntfs_unlink_inode(dir, dentry);
@@ -199,6 +202,9 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
u32 size = strlen(symname);
struct inode *inode;
+ if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+ return -EIO;
+
inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0,
symname, size, NULL);
@@ -227,6 +233,9 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry)
struct ntfs_inode *ni = ntfs_i(dir);
int err;
+ if (unlikely(ntfs3_forced_shutdown(dir->i_sb)))
+ return -EIO;
+
ni_lock_dir(ni);
err = ntfs_unlink_inode(dir, dentry);
@@ -264,6 +273,9 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
1024);
static_assert(PATH_MAX >= 4 * 1024);
+ if (unlikely(ntfs3_forced_shutdown(sb)))
+ return -EIO;
+
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index 86aecbb01a92..9c7478150a03 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -523,12 +523,10 @@ struct ATTR_LIST_ENTRY {
__le64 vcn; // 0x08: Starting VCN of this attribute.
struct MFT_REF ref; // 0x10: MFT record number with attribute.
__le16 id; // 0x18: struct ATTRIB ID.
- __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset.
+ __le16 name[]; // 0x1A: To get real name use name_off.
}; // sizeof(0x20)
-static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20);
-
static inline u32 le_size(u8 name_len)
{
return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) +
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index f6706143d14b..79356fd29a14 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -61,6 +61,8 @@ enum utf16_endian;
/* sbi->flags */
#define NTFS_FLAGS_NODISCARD 0x00000001
+/* ntfs in shutdown state. */
+#define NTFS_FLAGS_SHUTDOWN_BIT 0x00000002 /* == 4*/
/* Set when LogFile is replaying. */
#define NTFS_FLAGS_LOG_REPLAYING 0x00000008
/* Set when we changed first MFT's which copy must be updated in $MftMirr. */
@@ -226,7 +228,7 @@ struct ntfs_sb_info {
u64 maxbytes; // Maximum size for normal files.
u64 maxbytes_sparse; // Maximum size for sparse file.
- u32 flags; // See NTFS_FLAGS_XXX.
+ unsigned long flags; // See NTFS_FLAGS_
CLST zone_max; // Maximum MFT zone length in clusters
CLST bad_clusters; // The count of marked bad clusters.
@@ -473,7 +475,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn,
int al_update(struct ntfs_inode *ni, int sync);
static inline size_t al_aligned(size_t size)
{
- return (size + 1023) & ~(size_t)1023;
+ return size_add(size, 1023) & ~(size_t)1023;
}
/* Globals from bitfunc.c */
@@ -500,6 +502,8 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
int ntfs_file_open(struct inode *inode, struct file *file);
int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
+long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg);
+long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg);
extern const struct inode_operations ntfs_special_inode_operations;
extern const struct inode_operations ntfs_file_inode_operations;
extern const struct file_operations ntfs_file_operations;
@@ -584,6 +588,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes);
int log_replay(struct ntfs_inode *ni, bool *initialized);
/* Globals from fsntfs.c */
+struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block);
bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes);
int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes,
bool simple);
@@ -872,7 +877,7 @@ int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode,
int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry);
ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-extern const struct xattr_handler * const ntfs_xattr_handlers[];
+extern const struct xattr_handler *const ntfs_xattr_handlers[];
int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size);
void ntfs_get_wsl_perm(struct inode *inode);
@@ -999,6 +1004,11 @@ static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb)
return sb->s_fs_info;
}
+static inline int ntfs3_forced_shutdown(struct super_block *sb)
+{
+ return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags);
+}
+
/*
* ntfs_up_cluster - Align up on cluster boundary.
*/
@@ -1025,19 +1035,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size)
return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
}
-static inline struct buffer_head *ntfs_bread(struct super_block *sb,
- sector_t block)
-{
- struct buffer_head *bh = sb_bread(sb, block);
-
- if (bh)
- return bh;
-
- ntfs_err(sb, "failed to read volume at offset 0x%llx",
- (u64)block << sb->s_blocksize_bits);
- return NULL;
-}
-
static inline struct ntfs_inode *ntfs_i(struct inode *inode)
{
return container_of(inode, struct ntfs_inode, vfs_inode);
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c
index 53629b1f65e9..6aa3a9d44df1 100644
--- a/fs/ntfs3/record.c
+++ b/fs/ntfs3/record.c
@@ -279,7 +279,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr)
if (t16 > asize)
return NULL;
- if (t16 + le32_to_cpu(attr->res.data_size) > asize)
+ if (le32_to_cpu(attr->res.data_size) > asize - t16)
return NULL;
t32 = sizeof(short) * attr->name_len;
@@ -535,8 +535,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi,
return false;
if (ni && is_attr_indexed(attr)) {
- le16_add_cpu(&ni->mi.mrec->hard_links, -1);
- ni->mi.dirty = true;
+ u16 links = le16_to_cpu(ni->mi.mrec->hard_links);
+ struct ATTR_FILE_NAME *fname =
+ attr->type != ATTR_NAME ?
+ NULL :
+ resident_data_ex(attr,
+ SIZEOF_ATTRIBUTE_FILENAME);
+ if (fname && fname->type == FILE_NAME_DOS) {
+ /* Do not decrease links count deleting DOS name. */
+ } else if (!links) {
+ /* minor error. Not critical. */
+ } else {
+ ni->mi.mrec->hard_links = cpu_to_le16(links - 1);
+ ni->mi.dirty = true;
+ }
}
used -= asize;
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 9153dffde950..cef5467fd928 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -122,13 +122,12 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...)
if (name) {
struct dentry *de = d_find_alias(inode);
- const u32 name_len = ARRAY_SIZE(s_name_buf) - 1;
if (de) {
spin_lock(&de->d_lock);
- snprintf(name, name_len, " \"%s\"", de->d_name.name);
+ snprintf(name, sizeof(s_name_buf), " \"%s\"",
+ de->d_name.name);
spin_unlock(&de->d_lock);
- name[name_len] = 0; /* To be sure. */
} else {
name[0] = 0;
}
@@ -625,7 +624,7 @@ static void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
{
kfree(sbi->new_rec);
kvfree(ntfs_put_shared(sbi->upcase));
- kfree(sbi->def_table);
+ kvfree(sbi->def_table);
kfree(sbi->compress.lznt);
#ifdef CONFIG_NTFS3_LZX_XPRESS
xpress_free_decompressor(sbi->compress.xpress);
@@ -715,6 +714,14 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root)
}
/*
+ * ntfs_shutdown - super_operations::shutdown
+ */
+static void ntfs_shutdown(struct super_block *sb)
+{
+ set_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags);
+}
+
+/*
* ntfs_sync_fs - super_operations::sync_fs
*/
static int ntfs_sync_fs(struct super_block *sb, int wait)
@@ -724,6 +731,9 @@ static int ntfs_sync_fs(struct super_block *sb, int wait)
struct ntfs_inode *ni;
struct inode *inode;
+ if (unlikely(ntfs3_forced_shutdown(sb)))
+ return -EIO;
+
ni = sbi->security.ni;
if (ni) {
inode = &ni->vfs_inode;
@@ -763,6 +773,7 @@ static const struct super_operations ntfs_sops = {
.put_super = ntfs_put_super,
.statfs = ntfs_statfs,
.show_options = ntfs_show_options,
+ .shutdown = ntfs_shutdown,
.sync_fs = ntfs_sync_fs,
.write_inode = ntfs3_write_inode,
};
@@ -866,6 +877,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
u16 fn, ao;
u8 cluster_bits;
u32 boot_off = 0;
+ sector_t boot_block = 0;
const char *hint = "Primary boot";
/* Save original dev_size. Used with alternative boot. */
@@ -873,11 +885,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
sbi->volume.blocks = dev_size >> PAGE_SHIFT;
- bh = ntfs_bread(sb, 0);
+read_boot:
+ bh = ntfs_bread(sb, boot_block);
if (!bh)
- return -EIO;
+ return boot_block ? -EINVAL : -EIO;
-check_boot:
err = -EINVAL;
/* Corrupted image; do not read OOB */
@@ -1108,26 +1120,24 @@ check_boot:
}
out:
- if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) {
+ brelse(bh);
+
+ if (err == -EINVAL && !boot_block && dev_size0 > PAGE_SHIFT) {
u32 block_size = min_t(u32, sector_size, PAGE_SIZE);
u64 lbo = dev_size0 - sizeof(*boot);
- /*
- * Try alternative boot (last sector)
- */
- brelse(bh);
-
- sb_set_blocksize(sb, block_size);
- bh = ntfs_bread(sb, lbo >> blksize_bits(block_size));
- if (!bh)
- return -EINVAL;
-
+ boot_block = lbo >> blksize_bits(block_size);
boot_off = lbo & (block_size - 1);
- hint = "Alternative boot";
- dev_size = dev_size0; /* restore original size. */
- goto check_boot;
+ if (boot_block && block_size >= boot_off + sizeof(*boot)) {
+ /*
+ * Try alternative boot (last sector)
+ */
+ sb_set_blocksize(sb, block_size);
+ hint = "Alternative boot";
+ dev_size = dev_size0; /* restore original size. */
+ goto read_boot;
+ }
}
- brelse(bh);
return err;
}
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 4274b6f31cfa..53e7d1fa036a 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -219,6 +219,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer,
if (!ea->name_len)
break;
+ if (ea->name_len > ea_size)
+ break;
+
if (buffer) {
/* Check if we can use field ea->name */
if (off + ea_size > size)
@@ -744,6 +747,9 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de,
int err;
struct ntfs_inode *ni = ntfs_i(inode);
+ if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
+ return -EIO;
+
/* Dispatch request. */
if (!strcmp(name, SYSTEM_DOS_ATTRIB)) {
/* system.dos_attrib */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b8e25ca51016..8586e2f5d243 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -265,20 +265,18 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
if (IS_ERR(old_file))
return PTR_ERR(old_file);
+ /* Try to use clone_file_range to clone up within the same fs */
+ cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0);
+ if (cloned == len)
+ goto out_fput;
+
+ /* Couldn't clone, so now we try to copy the data */
error = rw_verify_area(READ, old_file, &old_pos, len);
if (!error)
error = rw_verify_area(WRITE, new_file, &new_pos, len);
if (error)
goto out_fput;
- /* Try to use clone_file_range to clone up within the same fs */
- ovl_start_write(dentry);
- cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
- ovl_end_write(dentry);
- if (cloned == len)
- goto out_fput;
- /* Couldn't clone, so now we try to copy the data */
-
/* Check if lower fs supports seek operation */
if (old_file->f_mode & FMODE_LSEEK)
skip_hole = true;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ff08a8957552..34a47fb0c57f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -477,13 +477,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
int permitted;
struct mm_struct *mm;
unsigned long long start_time;
- unsigned long cmin_flt = 0, cmaj_flt = 0;
- unsigned long min_flt = 0, maj_flt = 0;
- u64 cutime, cstime, utime, stime;
- u64 cgtime, gtime;
+ unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
+ u64 cutime, cstime, cgtime, utime, stime, gtime;
unsigned long rsslim = 0;
unsigned long flags;
int exit_code = task->exit_code;
+ struct signal_struct *sig = task->signal;
+ unsigned int seq = 1;
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -511,12 +511,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = utime = stime = 0;
- cgtime = gtime = 0;
if (lock_task_sighand(task, &flags)) {
- struct signal_struct *sig = task->signal;
-
if (sig->tty) {
struct pid *pgrp = tty_get_pgrp(sig->tty);
tty_pgrp = pid_nr_ns(pgrp, ns);
@@ -527,28 +523,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
num_threads = get_nr_threads(task);
collect_sigign_sigcatch(task, &sigign, &sigcatch);
- cmin_flt = sig->cmin_flt;
- cmaj_flt = sig->cmaj_flt;
- cutime = sig->cutime;
- cstime = sig->cstime;
- cgtime = sig->cgtime;
rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
- /* add up live thread stats at the group level */
if (whole) {
- struct task_struct *t;
-
- __for_each_thread(sig, t) {
- min_flt += t->min_flt;
- maj_flt += t->maj_flt;
- gtime += task_gtime(t);
- }
-
- min_flt += sig->min_flt;
- maj_flt += sig->maj_flt;
- thread_group_cputime_adjusted(task, &utime, &stime);
- gtime += sig->gtime;
-
if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
exit_code = sig->group_exit_code;
}
@@ -562,10 +539,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
if (permitted && (!whole || num_threads < 2))
wchan = !task_is_running(task);
- if (!whole) {
+
+ do {
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+
+ cmin_flt = sig->cmin_flt;
+ cmaj_flt = sig->cmaj_flt;
+ cutime = sig->cutime;
+ cstime = sig->cstime;
+ cgtime = sig->cgtime;
+
+ if (whole) {
+ struct task_struct *t;
+
+ min_flt = sig->min_flt;
+ maj_flt = sig->maj_flt;
+ gtime = sig->gtime;
+
+ rcu_read_lock();
+ __for_each_thread(sig, t) {
+ min_flt += t->min_flt;
+ maj_flt += t->maj_flt;
+ gtime += task_gtime(t);
+ }
+ rcu_read_unlock();
+ }
+ } while (need_seqretry(&sig->stats_lock, seq));
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+
+ if (whole) {
+ thread_group_cputime_adjusted(task, &utime, &stime);
+ } else {
+ task_cputime_adjusted(task, &utime, &stime);
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- task_cputime_adjusted(task, &utime, &stime);
gtime = task_gtime(task);
}
diff --git a/fs/remap_range.c b/fs/remap_range.c
index f8c1120b8311..de07f978ce3e 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -373,9 +373,9 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
}
EXPORT_SYMBOL(generic_remap_file_range_prep);
-loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
{
loff_t ret;
@@ -391,23 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP;
- ret = file_in->f_op->remap_file_range(file_in, pos_in,
- file_out, pos_out, len, remap_flags);
- if (ret < 0)
- return ret;
-
- fsnotify_access(file_in);
- fsnotify_modify(file_out);
- return ret;
-}
-EXPORT_SYMBOL(do_clone_file_range);
-
-loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t len, unsigned int remap_flags)
-{
- loff_t ret;
-
ret = remap_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
@@ -417,10 +400,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
return ret;
file_start_write(file_out);
- ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
- remap_flags);
+ ret = file_in->f_op->remap_file_range(file_in, pos_in,
+ file_out, pos_out, len, remap_flags);
file_end_write(file_out);
+ if (ret < 0)
+ return ret;
+ fsnotify_access(file_in);
+ fsnotify_modify(file_out);
return ret;
}
EXPORT_SYMBOL(vfs_clone_file_range);
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 1daeb5714faa..3de5047a7ff9 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -242,6 +242,7 @@ replay_again:
.desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES,
.disposition = FILE_OPEN,
.fid = pfid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index c86a72c9d9ec..53c75cfb33ab 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1378,6 +1378,7 @@ struct cifs_open_parms {
struct cifs_fid *fid;
umode_t mode;
bool reconnect:1;
+ bool replay:1; /* indicates that this open is for a replay */
};
struct cifs_fid {
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index bfd568f89710..ac9595504f4b 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -233,6 +233,12 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
/* check if iface is still active */
spin_lock(&ses->chan_lock);
+ if (cifs_ses_get_chan_index(ses, server) ==
+ CIFS_INVAL_CHAN_INDEX) {
+ spin_unlock(&ses->chan_lock);
+ continue;
+ }
+
if (!cifs_chan_is_iface_active(ses, server)) {
spin_unlock(&ses->chan_lock);
cifs_chan_update_iface(ses, server);
@@ -3438,8 +3444,18 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx)
* the user on mount
*/
if ((cifs_sb->ctx->wsize == 0) ||
- (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx)))
- cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx);
+ (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) {
+ cifs_sb->ctx->wsize =
+ round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE);
+ /*
+ * in the very unlikely event that the server sent a max write size under PAGE_SIZE,
+ * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096
+ */
+ if (cifs_sb->ctx->wsize == 0) {
+ cifs_sb->ctx->wsize = PAGE_SIZE;
+ cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n");
+ }
+ }
if ((cifs_sb->ctx->rsize == 0) ||
(cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
@@ -4228,6 +4244,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+
+ /* if tcon is marked for needing reconnect, update state */
+ if (tcon->need_reconnect)
+ tcon->status = TID_NEED_TCON;
+
if (tcon->status == TID_GOOD) {
spin_unlock(&tcon->tc_lock);
return 0;
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index a8a1d386da65..449c59830039 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -565,6 +565,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+
+ /* if tcon is marked for needing reconnect, update state */
+ if (tcon->need_reconnect)
+ tcon->status = TID_NEED_TCON;
+
if (tcon->status == TID_GOOD) {
spin_unlock(&tcon->tc_lock);
return 0;
@@ -625,8 +630,8 @@ out:
spin_lock(&tcon->tc_lock);
if (tcon->status == TID_IN_TCON)
tcon->status = TID_GOOD;
- spin_unlock(&tcon->tc_lock);
tcon->need_reconnect = false;
+ spin_unlock(&tcon->tc_lock);
}
return rc;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index b75282c204da..f391c9b803d8 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -175,6 +175,9 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+ if (tcon->need_reconnect)
+ tcon->status = TID_NEED_RECON;
+
if (tcon->status != TID_NEED_RECON) {
spin_unlock(&tcon->tc_lock);
return;
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 52cbef2eeb28..4b2f5aa2ea0e 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -211,7 +211,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
switch (match_token(value, cifs_secflavor_tokens, args)) {
case Opt_sec_krb5p:
- cifs_errorf(fc, "sec=krb5p is not supported!\n");
+ cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n");
return 1;
case Opt_sec_krb5i:
ctx->sign = true;
@@ -1111,6 +1111,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case Opt_wsize:
ctx->wsize = result.uint_32;
ctx->got_wsize = true;
+ if (ctx->wsize % PAGE_SIZE != 0) {
+ ctx->wsize = round_down(ctx->wsize, PAGE_SIZE);
+ if (ctx->wsize == 0) {
+ ctx->wsize = PAGE_SIZE;
+ cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE);
+ } else {
+ cifs_dbg(VFS,
+ "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n",
+ ctx->wsize, PAGE_SIZE);
+ }
+ }
break;
case Opt_acregmax:
ctx->acregmax = HZ * result.uint_32;
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index a6968573b775..4a517b280f2b 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -168,6 +168,21 @@ static char *automount_fullpath(struct dentry *dentry, void *page)
return s;
}
+static void fs_context_set_ids(struct smb3_fs_context *ctx)
+{
+ kuid_t uid = current_fsuid();
+ kgid_t gid = current_fsgid();
+
+ if (ctx->multiuser) {
+ if (!ctx->uid_specified)
+ ctx->linux_uid = uid;
+ if (!ctx->gid_specified)
+ ctx->linux_gid = gid;
+ }
+ if (!ctx->cruid_specified)
+ ctx->cred_uid = uid;
+}
+
/*
* Create a vfsmount that we can automount
*/
@@ -205,6 +220,7 @@ static struct vfsmount *cifs_do_automount(struct path *path)
tmp.leaf_fullpath = NULL;
tmp.UNC = tmp.prepath = NULL;
tmp.dfs_root_ses = NULL;
+ fs_context_set_ids(&tmp);
rc = smb3_fs_context_dup(ctx, &tmp);
if (rc) {
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index 3b1b01d10f7d..b520eea7bfce 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -307,14 +307,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
}
static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
- SEARCH_ID_FULL_DIR_INFO *info,
+ const void *info,
struct cifs_sb_info *cifs_sb)
{
+ const FILE_FULL_DIRECTORY_INFO *di = info;
+
__dir_info_to_fattr(fattr, info);
- /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */
+ /* See MS-FSCC 2.4.14, 2.4.19 */
if (fattr->cf_cifsattrs & ATTR_REPARSE)
- fattr->cf_cifstag = le32_to_cpu(info->EaSize);
+ fattr->cf_cifstag = le32_to_cpu(di->EaSize);
cifs_fill_common_info(fattr, cifs_sb);
}
@@ -396,7 +398,7 @@ ffirst_retry:
} else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
} else /* not srvinos - BB fixme add check for backlevel? */ {
- cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO;
+ cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO;
}
search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
@@ -987,10 +989,9 @@ static int cifs_filldir(char *find_entry, struct file *file,
(FIND_FILE_STANDARD_INFO *)find_entry,
cifs_sb);
break;
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
case SMB_FIND_FILE_ID_FULL_DIR_INFO:
- cifs_fulldir_info_to_fattr(&fattr,
- (SEARCH_ID_FULL_DIR_INFO *)find_entry,
- cifs_sb);
+ cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb);
break;
default:
cifs_dir_info_to_fattr(&fattr,
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index ed4bd88dd528..8f37373fd333 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -76,7 +76,7 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
unsigned int i;
/* if the channel is waiting for termination */
- if (server->terminate)
+ if (server && server->terminate)
return CIFS_INVAL_CHAN_INDEX;
for (i = 0; i < ses->chan_count; i++) {
@@ -88,7 +88,6 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
if (server)
cifs_dbg(VFS, "unable to get chan index for server: 0x%llx",
server->conn_id);
- WARN_ON(1);
return CIFS_INVAL_CHAN_INDEX;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 83c898afc835..4695433fcf39 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -619,7 +619,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
goto out;
}
- while (bytes_left >= sizeof(*p)) {
+ while (bytes_left >= (ssize_t)sizeof(*p)) {
memset(&tmp_iface, 0, sizeof(tmp_iface));
tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
@@ -1204,6 +1204,7 @@ replay_again:
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, 0),
.fid = &fid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
@@ -1569,6 +1570,7 @@ replay_again:
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, create_options),
.fid = &fid,
+ .replay = !!(retries),
};
if (qi.flags & PASSTHRU_FSCTL) {
@@ -2295,6 +2297,7 @@ replay_again:
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, 0),
.fid = fid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
@@ -2681,6 +2684,7 @@ replay_again:
.disposition = FILE_OPEN,
.create_options = cifs_create_options(cifs_sb, 0),
.fid = &fid,
+ .replay = !!(retries),
};
rc = SMB2_open_init(tcon, server,
@@ -5213,7 +5217,7 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
struct inode *new;
struct kvec iov;
__le16 *path;
- char *sym;
+ char *sym, sep = CIFS_DIR_SEP(cifs_sb);
u16 len, plen;
int rc = 0;
@@ -5227,7 +5231,8 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
.symlink_target = sym,
};
- path = cifs_convert_path_to_utf16(symname, cifs_sb);
+ convert_delimiter(sym, sep);
+ path = cifs_convert_path_to_utf16(sym, cifs_sb);
if (!path) {
rc = -ENOMEM;
goto out;
@@ -5250,7 +5255,10 @@ static int smb2_create_reparse_symlink(const unsigned int xid,
buf->PrintNameLength = cpu_to_le16(plen);
memcpy(buf->PathBuffer, path, plen);
buf->Flags = cpu_to_le32(*symname != '/' ? SYMLINK_FLAG_RELATIVE : 0);
+ if (*sym != sep)
+ buf->Flags = cpu_to_le32(SYMLINK_FLAG_RELATIVE);
+ convert_delimiter(sym, '/');
iov.iov_base = buf;
iov.iov_len = len;
new = smb2_get_reparse_inode(&data, inode->i_sb, xid,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index c58fa44dd6b0..608ee05491e2 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -2404,8 +2404,13 @@ create_durable_v2_buf(struct cifs_open_parms *oparms)
*/
buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout);
buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
- generate_random_uuid(buf->dcontext.CreateGuid);
- memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
+
+ /* for replay, we should not overwrite the existing create guid */
+ if (!oparms->replay) {
+ generate_random_uuid(buf->dcontext.CreateGuid);
+ memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
+ } else
+ memcpy(buf->dcontext.CreateGuid, pfid->create_guid, 16);
/* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */
buf->Name[0] = 'D';
@@ -3142,6 +3147,7 @@ replay_again:
/* reinitialize for possible replay */
flags = 0;
server = cifs_pick_channel(ses);
+ oparms->replay = !!(retries);
cifs_dbg(FYI, "create/open\n");
if (!ses || !server)
@@ -5206,6 +5212,9 @@ int SMB2_query_directory_init(const unsigned int xid,
case SMB_FIND_FILE_POSIX_INFO:
req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO;
break;
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+ req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION;
+ break;
default:
cifs_tcon_dbg(VFS, "info level %u isn't supported\n",
info_level);
@@ -5275,6 +5284,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon,
/* note that posix payload are variable size */
info_buf_size = sizeof(struct smb2_posix_info);
break;
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+ info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO);
+ break;
default:
cifs_tcon_dbg(VFS, "info level %u isn't supported\n",
srch_inf->info_level);
diff --git a/fs/smb/server/misc.c b/fs/smb/server/misc.c
index 9e8afaa686e3..1a5faa6f6e7b 100644
--- a/fs/smb/server/misc.c
+++ b/fs/smb/server/misc.c
@@ -261,6 +261,7 @@ out_ascii:
/**
* ksmbd_extract_sharename() - get share name from tree connect request
+ * @um: pointer to a unicode_map structure for character encoding handling
* @treename: buffer containing tree name and share name
*
* Return: share name on success, otherwise error
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index ba7a72a6a4f4..0c97d3c86072 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -6173,8 +6173,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
offsetof(struct smb2_read_rsp, Buffer),
aux_payload_buf, nbytes);
- if (err)
+ if (err) {
+ kvfree(aux_payload_buf);
goto out;
+ }
kvfree(rpc_resp);
} else {
err = ksmbd_iov_pin_rsp(work, (void *)rsp,
@@ -6384,8 +6386,10 @@ int smb2_read(struct ksmbd_work *work)
err = ksmbd_iov_pin_rsp_read(work, (void *)rsp,
offsetof(struct smb2_read_rsp, Buffer),
aux_payload_buf, nbytes);
- if (err)
+ if (err) {
+ kvfree(aux_payload_buf);
goto out;
+ }
ksmbd_fd_put(work, fp);
return 0;
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 6ab2318a9c8e..dba5dcb62bef 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
struct zonefs_inode_info *zi = ZONEFS_I(inode);
if (error) {
- zonefs_io_error(inode, true);
+ /*
+ * For Sync IOs, error recovery is called from
+ * zonefs_file_dio_write().
+ */
+ if (!is_sync_kiocb(iocb))
+ zonefs_io_error(inode, true);
return error;
}
@@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
ret = -EINVAL;
goto inode_unlock;
}
+ /*
+ * Advance the zone write pointer offset. This assumes that the
+ * IO will succeed, which is OK to do because we do not allow
+ * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO
+ * fails, the error path will correct the write pointer offset.
+ */
+ z->z_wpoffset += count;
+ zonefs_inode_account_active(inode);
mutex_unlock(&zi->i_truncate_mutex);
}
@@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
if (ret == -ENOTBLK)
ret = -EBUSY;
- if (zonefs_zone_is_seq(z) &&
- (ret > 0 || ret == -EIOCBQUEUED)) {
- if (ret > 0)
- count = ret;
-
- /*
- * Update the zone write pointer offset assuming the write
- * operation succeeded. If it did not, the error recovery path
- * will correct it. Also do active seq file accounting.
- */
- mutex_lock(&zi->i_truncate_mutex);
- z->z_wpoffset += count;
- zonefs_inode_account_active(inode);
- mutex_unlock(&zi->i_truncate_mutex);
+ /*
+ * For a failed IO or partial completion, trigger error recovery
+ * to update the zone write pointer offset to a correct value.
+ * For asynchronous IOs, zonefs_file_write_dio_end_io() may already
+ * have executed error recovery if the IO already completed when we
+ * reach here. However, we cannot know that and execute error recovery
+ * again (that will not change anything).
+ */
+ if (zonefs_zone_is_seq(z)) {
+ if (ret > 0 && ret != count)
+ ret = -EIO;
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ zonefs_io_error(inode, true);
}
inode_unlock:
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 93971742613a..b6e8e7c96251 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode)
z->z_mode = inode->i_mode;
}
-struct zonefs_ioerr_data {
- struct inode *inode;
- bool write;
-};
-
static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
- struct zonefs_ioerr_data *err = data;
- struct inode *inode = err->inode;
+ struct blk_zone *z = data;
+
+ *z = *zone;
+ return 0;
+}
+
+static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
+ bool write)
+{
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
@@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
data_size = zonefs_check_zone_condition(sb, z, zone);
isize = i_size_read(inode);
if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
- !err->write && isize == data_size)
- return 0;
+ !write && isize == data_size)
+ return;
/*
* At this point, we detected either a bad zone or an inconsistency
@@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* In all cases, warn about inode size inconsistency and handle the
* IO error according to the zone condition and to the mount options.
*/
- if (zonefs_zone_is_seq(z) && isize != data_size)
+ if (isize != data_size)
zonefs_warn(sb,
"inode %lu: invalid size %lld (should be %lld)\n",
inode->i_ino, isize, data_size);
@@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
zonefs_i_size_write(inode, data_size);
z->z_wpoffset = data_size;
zonefs_inode_account_active(inode);
-
- return 0;
}
/*
@@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write)
{
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
- struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
unsigned int noio_flag;
- unsigned int nr_zones = 1;
- struct zonefs_ioerr_data err = {
- .inode = inode,
- .write = write,
- };
+ struct blk_zone zone;
int ret;
/*
- * The only files that have more than one zone are conventional zone
- * files with aggregated conventional zones, for which the inode zone
- * size is always larger than the device zone size.
+ * Conventional zone have no write pointer and cannot become read-only
+ * or offline. So simply fake a report for a single or aggregated zone
+ * and let zonefs_handle_io_error() correct the zone inode information
+ * according to the mount options.
*/
- if (z->z_size > bdev_zone_sectors(sb->s_bdev))
- nr_zones = z->z_size >>
- (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ if (!zonefs_zone_is_seq(z)) {
+ zone.start = z->z_sector;
+ zone.len = z->z_size >> SECTOR_SHIFT;
+ zone.wp = zone.start + zone.len;
+ zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
+ zone.cond = BLK_ZONE_COND_NOT_WP;
+ zone.capacity = zone.len;
+ goto handle_io_error;
+ }
/*
* Memory allocations in blkdev_report_zones() can trigger a memory
@@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write)
* the GFP_NOIO context avoids both problems.
*/
noio_flag = memalloc_noio_save();
- ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
- zonefs_io_error_cb, &err);
- if (ret != nr_zones)
+ ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
+ zonefs_io_error_cb, &zone);
+ memalloc_noio_restore(noio_flag);
+
+ if (ret != 1) {
zonefs_err(sb, "Get inode %lu zone information failed %d\n",
inode->i_ino, ret);
- memalloc_noio_restore(noio_flag);
+ zonefs_warn(sb, "remounting filesystem read-only\n");
+ sb->s_flags |= SB_RDONLY;
+ return;
+ }
+
+handle_io_error:
+ zonefs_handle_io_error(inode, &zone, write);
}
static struct kmem_cache *zonefs_inode_cachep;