summaryrefslogtreecommitdiff
path: root/fs/f2fs/super.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-04 20:48:47 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-04 20:48:47 +0300
commit6abaa83c7352b31450d7e8c173f674324c16b02b (patch)
treecea534b220e9635bb2af785a954292e416ebdca9 /fs/f2fs/super.c
parent0961f0c00e69672a8e4a2e591355567dbda44389 (diff)
parent9605f75cf36e0bcc0f4ada07b5be712d30107607 (diff)
downloadlinux-6abaa83c7352b31450d7e8c173f674324c16b02b.tar.xz
Merge tag 'f2fs-for-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this cycle, we've addressed some performance issues such as lock contention, misbehaving compress_cache, allowing extent_cache for compressed files, and new sysfs to adjust ra_size for fadvise. In order to diagnose the performance issues quickly, we also added an iostat which shows the IO latencies periodically. On the stability side, we've found two memory leakage cases in the error path in compression flow. And, we've also fixed various corner cases in fiemap, quota, checkpoint=disable, zstd, and so on. Enhancements: - avoid long checkpoint latency by releasing nat_tree_lock - collect and show iostats periodically - support extent_cache for compressed files - add a sysfs entry to manage ra_size given fadvise(POSIX_FADV_SEQUENTIAL) - report f2fs GC status via sysfs - add discard_unit=%s in mount option to handle zoned device Bug fixes: - fix two memory leakages when an error happens in the compressed IO flow - fix commpress_cache to get the right LBA - fix fiemap to deal with compressed case correctly - fix wrong EIO returns due to SBI_NEED_FSCK - fix missing writes when enabling checkpoint back - fix quota deadlock - fix zstd level mount option In addition to the above major updates, we've cleaned up several code paths such as dio, unnecessary operations, debugfs/f2fs/status, sanity check, and typos" * tag 'f2fs-for-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (46 commits) f2fs: should put a page beyond EOF when preparing a write f2fs: deallocate compressed pages when error happens f2fs: enable realtime discard iff device supports discard f2fs: guarantee to write dirty data when enabling checkpoint back f2fs: fix to unmap pages from userspace process in punch_hole() f2fs: fix unexpected ENOENT comes from f2fs_map_blocks() f2fs: fix to account missing .skipped_gc_rwsem f2fs: adjust unlock order for cleanup f2fs: Don't create discard thread when device doesn't support realtime discard f2fs: rebuild nat_bits during umount f2fs: introduce periodic iostat io latency traces f2fs: separate out iostat feature f2fs: compress: do sanity check on cluster f2fs: fix description about main_blkaddr node f2fs: convert S_IRUGO to 0444 f2fs: fix to keep compatibility of fault injection interface f2fs: support fault injection for f2fs_kmem_cache_alloc() f2fs: compress: allow write compress released file after truncate to zero f2fs: correct comment in segment.h f2fs: improve sbi status info in debugfs/f2fs/status ...
Diffstat (limited to 'fs/f2fs/super.c')
-rw-r--r--fs/f2fs/super.c242
1 files changed, 182 insertions, 60 deletions
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ce2ab1b85c11..78ebc306ee2b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -33,6 +33,7 @@
#include "segment.h"
#include "xattr.h"
#include "gc.h"
+#include "iostat.h"
#define CREATE_TRACE_POINTS
#include <trace/events/f2fs.h>
@@ -56,6 +57,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_CHECKPOINT] = "checkpoint error",
[FAULT_DISCARD] = "discard error",
[FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -155,6 +157,7 @@ enum {
Opt_atgc,
Opt_gc_merge,
Opt_nogc_merge,
+ Opt_discard_unit,
Opt_err,
};
@@ -231,6 +234,7 @@ static match_table_t f2fs_tokens = {
{Opt_atgc, "atgc"},
{Opt_gc_merge, "gc_merge"},
{Opt_nogc_merge, "nogc_merge"},
+ {Opt_discard_unit, "discard_unit=%s"},
{Opt_err, NULL},
};
@@ -657,10 +661,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
break;
case Opt_discard:
+ if (!f2fs_hw_support_discard(sbi)) {
+ f2fs_warn(sbi, "device does not support discard");
+ break;
+ }
set_opt(sbi, DISCARD);
break;
case Opt_nodiscard:
- if (f2fs_sb_has_blkzoned(sbi)) {
+ if (f2fs_hw_should_discard(sbi)) {
f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
@@ -1173,6 +1181,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_nogc_merge:
clear_opt(sbi, GC_MERGE);
break;
+ case Opt_discard_unit:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "block")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_BLOCK;
+ } else if (!strcmp(name, "segment")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SEGMENT;
+ } else if (!strcmp(name, "section")) {
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SECTION;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -1211,6 +1238,14 @@ default_check:
return -EINVAL;
}
#endif
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ if (F2FS_OPTION(sbi).discard_unit !=
+ DISCARD_UNIT_SECTION) {
+ f2fs_info(sbi, "Zoned block device doesn't need small discard, set discard_unit=section by default");
+ F2FS_OPTION(sbi).discard_unit =
+ DISCARD_UNIT_SECTION;
+ }
+ }
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_test_compress_extension(sbi)) {
@@ -1271,7 +1306,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
{
struct f2fs_inode_info *fi;
- fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
+ fi = f2fs_kmem_cache_alloc(f2fs_inode_cachep,
+ GFP_F2FS_ZERO, false, F2FS_SB(sb));
if (!fi)
return NULL;
@@ -1541,6 +1577,7 @@ static void f2fs_put_super(struct super_block *sb)
#endif
fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
destroy_percpu_info(sbi);
+ f2fs_destroy_iostat(sbi);
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
@@ -1924,6 +1961,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sbi, ATGC))
seq_puts(seq, ",atgc");
+
+ if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_BLOCK)
+ seq_printf(seq, ",discard_unit=%s", "block");
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+ seq_printf(seq, ",discard_unit=%s", "segment");
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+ seq_printf(seq, ",discard_unit=%s", "section");
+
return 0;
}
@@ -1959,11 +2004,15 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- set_opt(sbi, DISCARD);
- if (f2fs_sb_has_blkzoned(sbi))
+ if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
+ set_opt(sbi, DISCARD);
+ if (f2fs_sb_has_blkzoned(sbi)) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
- else
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION;
+ } else {
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
+ F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK;
+ }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -2038,8 +2087,17 @@ restore_flag:
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
{
+ int retry = DEFAULT_RETRY_IO_COUNT;
+
/* we should flush all the data to keep data consistency */
- sync_inodes_sb(sbi->sb);
+ do {
+ sync_inodes_sb(sbi->sb);
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
+ } while (get_pages(sbi, F2FS_DIRTY_DATA) && retry--);
+
+ if (unlikely(retry < 0))
+ f2fs_warn(sbi, "checkpoint=enable has some unwritten data.");
down_write(&sbi->gc_lock);
f2fs_dirty_to_prefree(sbi);
@@ -2060,12 +2118,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool need_restart_gc = false, need_stop_gc = false;
bool need_restart_ckpt = false, need_stop_ckpt = false;
bool need_restart_flush = false, need_stop_flush = false;
+ bool need_restart_discard = false, need_stop_discard = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
- bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+ bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool no_atgc = !test_opt(sbi, ATGC);
+ bool no_discard = !test_opt(sbi, DISCARD);
bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
- bool checkpoint_changed;
+ bool block_unit_discard = f2fs_block_unit_discard(sbi);
+ struct discard_cmd_control *dcc;
#ifdef CONFIG_QUOTA
int i, j;
#endif
@@ -2110,8 +2171,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
err = parse_options(sb, data, true);
if (err)
goto restore_opts;
- checkpoint_changed =
- disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
/*
* Previous and new state of filesystem is RO,
@@ -2168,6 +2227,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ if (block_unit_discard != f2fs_block_unit_discard(sbi)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch discard_unit option is not allowed");
+ goto restore_opts;
+ }
+
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
@@ -2233,11 +2298,26 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
need_stop_flush = true;
}
- if (checkpoint_changed) {
+ if (no_discard == !!test_opt(sbi, DISCARD)) {
+ if (test_opt(sbi, DISCARD)) {
+ err = f2fs_start_discard_thread(sbi);
+ if (err)
+ goto restore_flush;
+ need_stop_discard = true;
+ } else {
+ dcc = SM_I(sbi)->dcc_info;
+ f2fs_stop_discard_thread(sbi);
+ if (atomic_read(&dcc->discard_cmd_cnt))
+ f2fs_issue_discard_timeout(sbi);
+ need_restart_discard = true;
+ }
+ }
+
+ if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) {
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
err = f2fs_disable_checkpoint(sbi);
if (err)
- goto restore_flush;
+ goto restore_discard;
} else {
f2fs_enable_checkpoint(sbi);
}
@@ -2257,6 +2337,13 @@ skip:
adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
+restore_discard:
+ if (need_restart_discard) {
+ if (f2fs_start_discard_thread(sbi))
+ f2fs_warn(sbi, "discard has been stopped");
+ } else if (need_stop_discard) {
+ f2fs_stop_discard_thread(sbi);
+ }
restore_flush:
if (need_restart_flush) {
if (f2fs_create_flush_cmd_control(sbi))
@@ -2517,6 +2604,33 @@ static int f2fs_enable_quotas(struct super_block *sb)
return 0;
}
+static int f2fs_quota_sync_file(struct f2fs_sb_info *sbi, int type)
+{
+ struct quota_info *dqopt = sb_dqopt(sbi->sb);
+ struct address_space *mapping = dqopt->files[type]->i_mapping;
+ int ret = 0;
+
+ ret = dquot_writeback_dquots(sbi->sb, type);
+ if (ret)
+ goto out;
+
+ ret = filemap_fdatawrite(mapping);
+ if (ret)
+ goto out;
+
+ /* if we are using journalled quota */
+ if (is_journalled_quota(sbi))
+ goto out;
+
+ ret = filemap_fdatawait(mapping);
+
+ truncate_inode_pages(&dqopt->files[type]->i_data, 0);
+out:
+ if (ret)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ return ret;
+}
+
int f2fs_quota_sync(struct super_block *sb, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -2525,56 +2639,41 @@ int f2fs_quota_sync(struct super_block *sb, int type)
int ret;
/*
- * do_quotactl
- * f2fs_quota_sync
- * down_read(quota_sem)
- * dquot_writeback_dquots()
- * f2fs_dquot_commit
- * block_operation
- * down_read(quota_sem)
- */
- f2fs_lock_op(sbi);
-
- down_read(&sbi->quota_sem);
- ret = dquot_writeback_dquots(sb, type);
- if (ret)
- goto out;
-
- /*
* Now when everything is written we can discard the pagecache so
* that userspace sees the changes.
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- struct address_space *mapping;
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_active(sb, cnt))
- continue;
- mapping = dqopt->files[cnt]->i_mapping;
+ if (!sb_has_quota_active(sb, type))
+ return 0;
- ret = filemap_fdatawrite(mapping);
- if (ret)
- goto out;
+ inode_lock(dqopt->files[cnt]);
- /* if we are using journalled quota */
- if (is_journalled_quota(sbi))
- continue;
+ /*
+ * do_quotactl
+ * f2fs_quota_sync
+ * down_read(quota_sem)
+ * dquot_writeback_dquots()
+ * f2fs_dquot_commit
+ * block_operation
+ * down_read(quota_sem)
+ */
+ f2fs_lock_op(sbi);
+ down_read(&sbi->quota_sem);
- ret = filemap_fdatawait(mapping);
- if (ret)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ ret = f2fs_quota_sync_file(sbi, cnt);
+
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
- inode_lock(dqopt->files[cnt]);
- truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
inode_unlock(dqopt->files[cnt]);
+
+ if (ret)
+ break;
}
-out:
- if (ret)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
- up_read(&sbi->quota_sem);
- f2fs_unlock_op(sbi);
return ret;
}
@@ -3207,11 +3306,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- if (le32_to_cpu(raw_super->cp_payload) >
- (blocks_per_seg - F2FS_CP_PACKS)) {
- f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ if (le32_to_cpu(raw_super->cp_payload) >=
+ (blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE)) {
+ f2fs_info(sbi, "Insane cp_payload (%u >= %u)",
le32_to_cpu(raw_super->cp_payload),
- blocks_per_seg - F2FS_CP_PACKS);
+ blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_PERSIST_TYPE);
return -EFSCORRUPTED;
}
@@ -3247,6 +3348,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int cp_pack_start_sum, cp_payload;
block_t user_block_count, valid_user_blocks;
block_t avail_node_count, valid_node_count;
+ unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -3377,6 +3479,17 @@ skip_cross:
return 1;
}
+ nat_blocks = nat_segs << log_blocks_per_seg;
+ nat_bits_bytes = nat_blocks / BITS_PER_BYTE;
+ nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
+ if (__is_set_ckpt_flags(ckpt, CP_NAT_BITS_FLAG) &&
+ (cp_payload + F2FS_CP_PACKS +
+ NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
+ f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
+ cp_payload, nat_bits_blocks);
+ return -EFSCORRUPTED;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
@@ -3409,6 +3522,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
sbi->migration_granularity = sbi->segs_per_sec;
+ sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3768,7 +3882,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
/* adjust parameters according to the volume size */
if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
- sm_i->dcc_info->discard_granularity = 1;
+ if (f2fs_block_unit_discard(sbi))
+ sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
@@ -3889,11 +4004,6 @@ try_onemore:
set_sbi_flag(sbi, SBI_POR_DOING);
spin_lock_init(&sbi->stat_lock);
- /* init iostat info */
- spin_lock_init(&sbi->iostat_lock);
- sbi->iostat_enable = false;
- sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
-
for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1 : NR_TEMP_TYPE;
int j;
@@ -3924,10 +4034,14 @@ try_onemore:
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
- err = init_percpu_info(sbi);
+ err = f2fs_init_iostat(sbi);
if (err)
goto free_bio_info;
+ err = init_percpu_info(sbi);
+ if (err)
+ goto free_iostat;
+
if (F2FS_IO_ALIGNED(sbi)) {
sbi->write_io_dummy =
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
@@ -4259,6 +4373,8 @@ free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
+free_iostat:
+ f2fs_destroy_iostat(sbi);
free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
@@ -4401,9 +4517,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
- err = f2fs_init_bio_entry_cache();
+ err = f2fs_init_iostat_processing();
if (err)
goto free_post_read;
+ err = f2fs_init_bio_entry_cache();
+ if (err)
+ goto free_iostat;
err = f2fs_init_bioset();
if (err)
goto free_bio_enrty_cache;
@@ -4425,6 +4544,8 @@ free_bioset:
f2fs_destroy_bioset();
free_bio_enrty_cache:
f2fs_destroy_bio_entry_cache();
+free_iostat:
+ f2fs_destroy_iostat_processing();
free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
@@ -4459,6 +4580,7 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
+ f2fs_destroy_iostat_processing();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);