From e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Fri, 7 Jan 2022 12:48:44 -0800 Subject: f2fs: move f2fs to use reader-unfair rwsems f2fs rw_semaphores work better if writers can starve readers, especially for the checkpoint thread, because writers are strictly more important than reader threads. This prevents significant priority inversion between low-priority readers that blocked while trying to acquire the read lock and a second acquisition of the write lock that might be blocking high priority work. Signed-off-by: Tim Murray Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 56 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 76e6a3df9aba..9af6c20532ec 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1355,16 +1355,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); - init_rwsem(&fi->i_sem); + init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - init_rwsem(&fi->i_gc_rwsem[READ]); - init_rwsem(&fi->i_gc_rwsem[WRITE]); - init_rwsem(&fi->i_xattr_sem); + init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); + init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); + init_f2fs_rwsem(&fi->i_xattr_sem); /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -2088,7 +2088,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; @@ -2110,7 +2110,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); err = f2fs_write_checkpoint(sbi, &cpc); @@ -2122,7 +2122,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); out_unlock: - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); restore_flag: sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; @@ -2142,12 +2142,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) if (unlikely(retry < 0)) f2fs_warn(sbi, "checkpoint=enable has some unwritten data."); - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); clear_sbi_flag(sbi, SBI_CP_DISABLED); set_sbi_flag(sbi, SBI_IS_DIRTY); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); } @@ -2707,18 +2707,18 @@ int f2fs_quota_sync(struct super_block *sb, int type) /* * do_quotactl * f2fs_quota_sync - * down_read(quota_sem) + * f2fs_down_read(quota_sem) * dquot_writeback_dquots() * f2fs_dquot_commit * block_operation - * down_read(quota_sem) + * f2fs_down_read(quota_sem) */ f2fs_lock_op(sbi); - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = f2fs_quota_sync_file(sbi, cnt); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); f2fs_unlock_op(sbi); inode_unlock(dqopt->files[cnt]); @@ -2843,11 +2843,11 @@ static int f2fs_dquot_commit(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); + f2fs_down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); ret = dquot_commit(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -2856,11 +2856,11 @@ static int f2fs_dquot_acquire(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = dquot_acquire(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -3601,14 +3601,14 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - init_rwsem(&sbi->io_order_lock); + init_f2fs_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); - init_rwsem(&sbi->sb_lock); - init_rwsem(&sbi->pin_sem); + init_f2fs_rwsem(&sbi->sb_lock); + init_f2fs_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -4067,11 +4067,11 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - init_rwsem(&sbi->gc_lock); + init_f2fs_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); - init_rwsem(&sbi->cp_global_sem); - init_rwsem(&sbi->node_write); - init_rwsem(&sbi->node_change); + init_f2fs_rwsem(&sbi->cp_global_sem); + init_f2fs_rwsem(&sbi->node_write); + init_f2fs_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); @@ -4092,18 +4092,18 @@ try_onemore: } for (j = HOT; j < n; j++) { - init_rwsem(&sbi->write_io[i][j].io_rwsem); + init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); sbi->write_io[i][j].sbi = sbi; sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); - init_rwsem(&sbi->write_io[i][j].bio_list_lock); + init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); } } - init_rwsem(&sbi->cp_rwsem); - init_rwsem(&sbi->quota_sem); + init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); -- cgit v1.2.3 From 1018a5463a063715365784704c4e8cdf2eec4b04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 15:19:46 +0800 Subject: f2fs: introduce F2FS_IPU_HONOR_OPU_WRITE ipu policy Once F2FS_IPU_FORCE policy is enabled in some cases: a) f2fs forces to use F2FS_IPU_FORCE in a small-sized volume b) user sets F2FS_IPU_FORCE policy via sysfs Then we may fail to defragment file due to IPU policy check, it doesn't make sense, let's introduce a new IPU policy to allow OPU during file defragmentation. In small-sized volume, let's enable F2FS_IPU_HONOR_OPU_WRITE policy by default. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 3 ++- fs/f2fs/data.c | 18 +++++++++++++----- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 18 +++++++++++------- fs/f2fs/segment.h | 5 ++++- fs/f2fs/super.c | 3 ++- 6 files changed, 34 insertions(+), 16 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 87d3884c90ea..7b50bf82f14d 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -55,8 +55,9 @@ Description: Controls the in-place-update policy. 0x04 F2FS_IPU_UTIL 0x08 F2FS_IPU_SSR_UTIL 0x10 F2FS_IPU_FSYNC - 0x20 F2FS_IPU_ASYNC, + 0x20 F2FS_IPU_ASYNC 0x40 F2FS_IPU_NOCACHE + 0x80 F2FS_IPU_HONOR_OPU_WRITE ==== ================= Refer segment.h for details. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0f124e8de1d4..6b5f389ba998 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2460,6 +2460,9 @@ static inline bool check_inplace_update_policy(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; + if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) && + is_inode_flag_set(inode, FI_OPU_WRITE)) + return false; if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) @@ -2530,6 +2533,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return true; + if (is_inode_flag_set(inode, FI_OPU_WRITE)) + return true; + if (fio) { if (page_private_gcing(fio->page)) return true; @@ -3154,8 +3160,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; - /* skip writing during file defragment */ - if (is_inode_flag_set(inode, FI_DO_DEFRAG)) + /* skip writing in file defragment preparing stage */ + if (is_inode_flag_set(inode, FI_SKIP_WRITES)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, DATA); @@ -3725,6 +3731,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); + set_inode_flag(inode, FI_OPU_WRITE); for (; secidx < end_sec; secidx++) { f2fs_down_write(&sbi->pin_sem); @@ -3733,7 +3740,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { struct page *page; @@ -3750,7 +3757,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, f2fs_put_page(page, 1); } - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); ret = filemap_fdatawrite(inode->i_mapping); @@ -3761,7 +3768,8 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, } done: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); + clear_inode_flag(inode, FI_OPU_WRITE); clear_inode_flag(inode, FI_ALIGNED_WRITE); filemap_invalidate_unlock(inode->i_mapping); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 51c1392708e6..3b4bf1c3f1ed 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -740,7 +740,8 @@ enum { FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ + FI_SKIP_WRITES, /* should skip data page writeback */ + FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ FI_HOT_DATA, /* indicate file is hot */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6ccdd6e347e2..42fbdcf0ccc9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2559,10 +2559,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, bool fragmented = false; int err; - /* if in-place-update policy is enabled, don't waste time here */ - if (f2fs_should_update_inplace(inode, NULL)) - return -EINVAL; - pg_start = range->start >> PAGE_SHIFT; pg_end = (range->start + range->len) >> PAGE_SHIFT; @@ -2570,6 +2566,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, inode_lock(inode); + /* if in-place-update policy is enabled, don't waste time here */ + set_inode_flag(inode, FI_OPU_WRITE); + if (f2fs_should_update_inplace(inode, NULL)) { + err = -EINVAL; + goto out; + } + /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, range->start + range->len - 1); @@ -2651,7 +2654,7 @@ do_map: goto check; } - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { @@ -2676,15 +2679,16 @@ check: if (map.m_lblk < pg_end && cnt < blk_per_seg) goto do_map; - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); err = filemap_fdatawrite(inode->i_mapping); if (err) goto out; } clear_out: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); out: + clear_inode_flag(inode, FI_OPU_WRITE); inode_unlock(inode); if (!err) range->len = (u64)total << PAGE_SHIFT; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 0291cd55cf09..5c94caf0c0a1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -651,7 +651,9 @@ static inline int utilization(struct f2fs_sb_info *sbi) * pages over min_fsync_blocks. (=default option) * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. * F2FS_IPU_NOCACHE - disable IPU bio cache. - * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) + * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has + * FI_OPU_WRITE flag. + * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 @@ -667,6 +669,7 @@ enum { F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, F2FS_IPU_NOCACHE, + F2FS_IPU_HONOR_OPU_WRITE, }; static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9af6c20532ec..806836184ebc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3957,7 +3957,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; if (f2fs_block_unit_discard(sbi)) sm_i->dcc_info->discard_granularity = 1; - sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | + 1 << F2FS_IPU_HONOR_OPU_WRITE; } sbi->readdir_ra = 1; -- cgit v1.2.3 From 47c8ebcce85ed7113e9e3e3f1d8c6374fa87848e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jan 2022 13:31:43 -0800 Subject: f2fs: add a way to limit roll forward recovery time This adds a sysfs entry to call checkpoint during fsync() in order to avoid long elapsed time to run roll-forward recovery when booting the device. Default value doesn't enforce the limitation which is same as before. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/checkpoint.c | 1 + fs/f2fs/debug.c | 3 +++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/node.c | 2 ++ fs/f2fs/node.h | 3 +++ fs/f2fs/recovery.c | 4 ++++ fs/f2fs/super.c | 14 ++++++++++++-- fs/f2fs/sysfs.c | 2 ++ 9 files changed, 36 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 7b50bf82f14d..58bf0dc83712 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -568,3 +568,9 @@ Contact: "Daeho Jeong" Description: You can set the trial count limit for GC urgent high mode with this value. If GC thread gets to the limit, the mode will turn back to GC normal mode. By default, the value is zero, which means there is no limit like before. + +What: /sys/fs/f2fs//max_roll_forward_node_blocks +Date: January 2022 +Contact: "Jaegeuk Kim" +Description: Controls max # of node block writes to be used for roll forward + recovery. This can limit the roll forward recovery time. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a13b6b4af220..203a1577942d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1547,6 +1547,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* update user_block_counts */ sbi->last_valid_block_count = sbi->total_valid_block_count; percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8c50518475a9..9a13902c7702 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -532,6 +532,9 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_meta, si->meta_pages); seq_printf(s, " - imeta: %4d\n", si->ndirty_imeta); + seq_printf(s, " - fsync mark: %4lld\n", + percpu_counter_sum_positive( + &si->sbi->rf_node_block_count)); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3b4bf1c3f1ed..c9515c3c54fd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -917,6 +917,7 @@ struct f2fs_nm_info { nid_t max_nid; /* maximum possible node ids */ nid_t available_nids; /* # of available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ + nid_t max_rf_node_blocks; /* max # of nodes for recovery */ unsigned int ram_thresh; /* control the memory footprint */ unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */ @@ -1688,6 +1689,8 @@ struct f2fs_sb_info { atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* # of node block writes as roll forward recovery */ + struct percpu_counter rf_node_block_count; /* writeback control */ atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 93512f8859d5..0d9883457579 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1782,6 +1782,7 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); + percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) @@ -3218,6 +3219,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; + nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 18b98cf0465b..4c1d34bfea78 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,6 +31,9 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 +/* control total # of node writes used for roll-fowrad recovery */ +#define DEF_RF_NODE_BLOCKS 0 + /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 #define SETVEC_SIZE 32 diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2af503f75b4f..ab33e474af07 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -56,6 +56,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) return false; + if (NM_I(sbi)->max_rf_node_blocks && + percpu_counter_sum_positive(&sbi->rf_node_block_count) >= + NM_I(sbi)->max_rf_node_blocks) + return false; return true; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 806836184ebc..f816d7d1987d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1501,8 +1501,9 @@ static void f2fs_free_inode(struct inode *inode) static void destroy_percpu_info(struct f2fs_sb_info *sbi) { - percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); + percpu_counter_destroy(&sbi->rf_node_block_count); + percpu_counter_destroy(&sbi->alloc_valid_block_count); } static void destroy_device_list(struct f2fs_sb_info *sbi) @@ -3619,11 +3620,20 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; + err = percpu_counter_init(&sbi->rf_node_block_count, 0, GFP_KERNEL); + if (err) + goto err_valid_block; + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); if (err) - percpu_counter_destroy(&sbi->alloc_valid_block_count); + goto err_node_block; + return 0; +err_node_block: + percpu_counter_destroy(&sbi->rf_node_block_count); +err_valid_block: + percpu_counter_destroy(&sbi->alloc_valid_block_count); return err; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 281bc0133ee6..47efcf233afd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -732,6 +732,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); @@ -855,6 +856,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(dirty_nats_ratio), + ATTR_LIST(max_roll_forward_node_blocks), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), ATTR_LIST(discard_idle_interval), -- cgit v1.2.3 From 984fc4e76d63345499f01c0c198a4b44860cf027 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 4 Feb 2022 13:24:56 +0800 Subject: f2fs: support idmapped mounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables idmapped mounts for f2fs, since all dedicated helpers for this functionality existsm, so, in this patch we just pass down the user_namespace argument from the VFS methods to the relevant helpers. Simple idmap example on f2fs image: 1. truncate -s 128M f2fs.img 2. mkfs.f2fs f2fs.img 3. mount f2fs.img /mnt/f2fs/ 4. touch /mnt/f2fs/file 5. ls -ln /mnt/f2fs/ total 0 -rw-r--r-- 1 0 0 0 2月 4 13:17 file 6. ./mount-idmapped --map-mount b:0:1001:1 /mnt/f2fs/ /mnt/scratch_f2fs/ 7. ls -ln /mnt/scratch_f2fs/ total 0 -rw-r--r-- 1 1001 1001 0 2月 4 13:17 file Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 21 ++++++++++++--------- fs/f2fs/file.c | 23 ++++++++++++++--------- fs/f2fs/namei.c | 41 +++++++++++++++++++++++------------------ fs/f2fs/super.c | 2 +- 4 files changed, 50 insertions(+), 37 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 16e826e01f09..eaa240b21f07 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -204,8 +204,9 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu) return __f2fs_get_acl(inode, type, NULL); } -static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p, - struct posix_acl **acl) +static int f2fs_acl_update_mode(struct user_namespace *mnt_userns, + struct inode *inode, umode_t *mode_p, + struct posix_acl **acl) { umode_t mode = inode->i_mode; int error; @@ -218,14 +219,15 @@ static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p, return error; if (error == 0) *acl = NULL; - if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) && - !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) + if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) && + !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) mode &= ~S_ISGID; *mode_p = mode; return 0; } -static int __f2fs_set_acl(struct inode *inode, int type, +static int __f2fs_set_acl(struct user_namespace *mnt_userns, + struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { int name_index; @@ -238,7 +240,8 @@ static int __f2fs_set_acl(struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl && !ipage) { - error = f2fs_acl_update_mode(inode, &mode, &acl); + error = f2fs_acl_update_mode(mnt_userns, inode, + &mode, &acl); if (error) return error; set_acl_inode(inode, mode); @@ -279,7 +282,7 @@ int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - return __f2fs_set_acl(inode, type, acl, NULL); + return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL); } /* @@ -419,7 +422,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, f2fs_mark_inode_dirty_sync(inode, true); if (default_acl) { - error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); } else { @@ -427,7 +430,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, } if (acl) { if (!error) - error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); } else { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 42fbdcf0ccc9..cfdc41f87f5d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -844,7 +844,7 @@ int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, STATX_ATTR_NODUMP | STATX_ATTR_VERITY); - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(mnt_userns, inode, stat); /* we need to show initial sectors used for inline_data/dentries */ if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || @@ -904,7 +904,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, !f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - err = setattr_prepare(&init_user_ns, dentry, attr); + err = setattr_prepare(mnt_userns, dentry, attr); if (err) return err; @@ -980,10 +980,10 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, spin_unlock(&F2FS_I(inode)->i_size_lock); } - __setattr_copy(&init_user_ns, inode, attr); + __setattr_copy(mnt_userns, inode, attr); if (attr->ia_valid & ATTR_MODE) { - err = posix_acl_chmod(&init_user_ns, inode, f2fs_get_inode_mode(inode)); + err = posix_acl_chmod(mnt_userns, inode, f2fs_get_inode_mode(inode)); if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) @@ -1989,11 +1989,12 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; if (!S_ISREG(inode->i_mode)) @@ -2058,9 +2059,10 @@ out: static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2100,9 +2102,10 @@ err_out: static int f2fs_ioc_start_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; if (!S_ISREG(inode->i_mode)) @@ -2135,9 +2138,10 @@ out: static int f2fs_ioc_release_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); @@ -2164,9 +2168,10 @@ out: static int f2fs_ioc_abort_volatile_write(struct file *filp) { struct inode *inode = file_inode(filp); + struct user_namespace *mnt_userns = file_mnt_user_ns(filp); int ret; - if (!inode_owner_or_capable(&init_user_ns, inode)) + if (!inode_owner_or_capable(mnt_userns, inode)) return -EACCES; ret = mnt_want_write_file(filp); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0347c5780910..2b23a76bdae9 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -22,7 +22,8 @@ #include "acl.h" #include -static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) +static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, + struct inode *dir, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); nid_t ino; @@ -46,7 +47,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(mnt_userns, inode, dir, mode); inode->i_ino = ino; inode->i_blocks = 0; @@ -67,7 +68,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + F2FS_I(inode)->i_projid = make_kprojid(mnt_userns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -349,7 +350,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -679,7 +680,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); + inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -750,7 +751,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, S_IFDIR | mode); + inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -807,7 +808,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -834,8 +835,9 @@ out: return err; } -static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, - umode_t mode, struct inode **whiteout) +static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, + struct inode **whiteout) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; @@ -845,7 +847,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (err) return err; - inode = f2fs_new_inode(dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -909,20 +911,22 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - return __f2fs_tmpfile(dir, dentry, mode, NULL); + return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, NULL); } -static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) +static int f2fs_create_whiteout(struct user_namespace *mnt_userns, + struct inode *dir, struct inode **whiteout) { if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) return -EIO; - return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); + return __f2fs_tmpfile(mnt_userns, dir, NULL, + S_IFCHR | WHITEOUT_MODE, whiteout); } -static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); @@ -960,7 +964,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(old_dir, &whiteout); + err = f2fs_create_whiteout(mnt_userns, old_dir, &whiteout); if (err) return err; } @@ -1300,7 +1304,8 @@ static int f2fs_rename2(struct user_namespace *mnt_userns, * VFS has already handled the new dentry existence case, * here, we just deal with "RENAME_NOREPLACE" as regular rename. */ - return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags); + return f2fs_rename(mnt_userns, old_dir, old_dentry, + new_dir, new_dentry, flags); } static const char *f2fs_encrypted_get_link(struct dentry *dentry, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f816d7d1987d..22fb4d3b1170 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4539,7 +4539,7 @@ static struct file_system_type f2fs_fs_type = { .name = "f2fs", .mount = f2fs_mount, .kill_sb = kill_f2fs_super, - .fs_flags = FS_REQUIRES_DEV, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("f2fs"); -- cgit v1.2.3 From 680af5b824a52faa819167628665804a14f0e0df Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Tue, 15 Feb 2022 17:27:21 +0900 Subject: f2fs: quota: fix loop condition at f2fs_quota_sync() cnt should be passed to sb_has_quota_active() instead of type to check active quota properly. Moreover, when the type is -1, the compiler with enough inline knowledge can discard sb_has_quota_active() check altogether, causing a NULL pointer dereference at the following inode_lock(dqopt->files[cnt]): [ 2.796010] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [ 2.796024] Mem abort info: [ 2.796025] ESR = 0x96000005 [ 2.796028] EC = 0x25: DABT (current EL), IL = 32 bits [ 2.796029] SET = 0, FnV = 0 [ 2.796031] EA = 0, S1PTW = 0 [ 2.796032] Data abort info: [ 2.796034] ISV = 0, ISS = 0x00000005 [ 2.796035] CM = 0, WnR = 0 [ 2.796046] user pgtable: 4k pages, 39-bit VAs, pgdp=00000003370d1000 [ 2.796048] [00000000000000a0] pgd=0000000000000000, pud=0000000000000000 [ 2.796051] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 2.796056] CPU: 7 PID: 640 Comm: f2fs_ckpt-259:7 Tainted: G S 5.4.179-arter97-r8-64666-g2f16e087f9d8 #1 [ 2.796057] Hardware name: Qualcomm Technologies, Inc. Lahaina MTP lemonadep (DT) [ 2.796059] pstate: 80c00005 (Nzcv daif +PAN +UAO) [ 2.796065] pc : down_write+0x28/0x70 [ 2.796070] lr : f2fs_quota_sync+0x100/0x294 [ 2.796071] sp : ffffffa3f48ffc30 [ 2.796073] x29: ffffffa3f48ffc30 x28: 0000000000000000 [ 2.796075] x27: ffffffa3f6d718b8 x26: ffffffa415fe9d80 [ 2.796077] x25: ffffffa3f7290048 x24: 0000000000000001 [ 2.796078] x23: 0000000000000000 x22: ffffffa3f7290000 [ 2.796080] x21: ffffffa3f72904a0 x20: ffffffa3f7290110 [ 2.796081] x19: ffffffa3f77a9800 x18: ffffffc020aae038 [ 2.796083] x17: ffffffa40e38e040 x16: ffffffa40e38e6d0 [ 2.796085] x15: ffffffa40e38e6cc x14: ffffffa40e38e6d0 [ 2.796086] x13: 00000000000004f6 x12: 00162c44ff493000 [ 2.796088] x11: 0000000000000400 x10: ffffffa40e38c948 [ 2.796090] x9 : 0000000000000000 x8 : 00000000000000a0 [ 2.796091] x7 : 0000000000000000 x6 : 0000d1060f00002a [ 2.796093] x5 : ffffffa3f48ff718 x4 : 000000000000000d [ 2.796094] x3 : 00000000060c0000 x2 : 0000000000000001 [ 2.796096] x1 : 0000000000000000 x0 : 00000000000000a0 [ 2.796098] Call trace: [ 2.796100] down_write+0x28/0x70 [ 2.796102] f2fs_quota_sync+0x100/0x294 [ 2.796104] block_operations+0x120/0x204 [ 2.796106] f2fs_write_checkpoint+0x11c/0x520 [ 2.796107] __checkpoint_and_complete_reqs+0x7c/0xd34 [ 2.796109] issue_checkpoint_thread+0x6c/0xb8 [ 2.796112] kthread+0x138/0x414 [ 2.796114] ret_from_fork+0x10/0x18 [ 2.796117] Code: aa0803e0 aa1f03e1 52800022 aa0103e9 (c8e97d02) [ 2.796120] ---[ end trace 96e942e8eb6a0b53 ]--- [ 2.800116] Kernel panic - not syncing: Fatal exception [ 2.800120] SMP: stopping secondary CPUs Fixes: 9de71ede81e6 ("f2fs: quota: fix potential deadlock") Cc: # v5.15+ Signed-off-by: Juhyung Park Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 22fb4d3b1170..8e3840973077 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2689,7 +2689,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; - int ret; + int ret = 0; /* * Now when everything is written we can discard the pagecache so @@ -2700,8 +2700,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, type)) - return 0; + if (!sb_has_quota_active(sb, cnt)) + continue; inode_lock(dqopt->files[cnt]); -- cgit v1.2.3 From ba900534f807f0b327c92d5141c85d2313e2d55c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 4 Mar 2022 09:40:05 -0800 Subject: f2fs: don't get FREEZE lock in f2fs_evict_inode in frozen fs Let's purge inode cache in order to avoid the below deadlock. [freeze test] shrinkder freeze_super - pwercpu_down_write(SB_FREEZE_FS) - super_cache_scan - down_read(&sb->s_umount) - prune_icache_sb - dispose_list - evict - f2fs_evict_inode thaw_super - down_write(&sb->s_umount); - __percpu_down_read(SB_FREEZE_FS) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 1 + fs/f2fs/debug.c | 1 + fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 6 ++++-- fs/f2fs/super.c | 4 ++++ 5 files changed, 11 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 58bf0dc83712..5a5f3c5445f6 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -458,6 +458,7 @@ Description: Show status of f2fs superblock in real time. 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted 0x2000 SBI_IS_RESIZEFS resizefs is in process + 0x4000 SBI_IS_FREEZING freefs is in process ====== ===================== ================================= What: /sys/fs/f2fs//ckpt_thread_ioprio diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 9a13902c7702..cba5eab24595 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -338,6 +338,7 @@ static char *s_flag[] = { [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush", [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair", [SBI_IS_RESIZEFS] = " resizefs", + [SBI_IS_FREEZING] = " freezefs", }; static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 68d791ec8b27..da729f53daa8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1293,6 +1293,7 @@ enum { SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ }; enum { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ab8e0c06c78c..71f232dcf3c2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -778,7 +778,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - sb_start_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -809,7 +810,8 @@ retry: if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - sb_end_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8e3840973077..4b570b5c2674 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1663,11 +1663,15 @@ static int f2fs_freeze(struct super_block *sb) /* ensure no checkpoint required */ if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list)) return -EINVAL; + + /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ + set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } static int f2fs_unfreeze(struct super_block *sb) { + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } -- cgit v1.2.3 From c86868bbc22be9487d10d3c6336dd8ccb49e8a62 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Mar 2022 16:33:15 +0800 Subject: f2fs: initialize sbi->gc_mode explicitly It needs to initialized sbi->gc_mode to GC_NORMAL explicitly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4b570b5c2674..9176597fdf94 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3579,6 +3579,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->gc_mode = GC_NORMAL; sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; -- cgit v1.2.3 From 98e92867b99761979f6d4c155b7d5a5b523412a3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 18 Mar 2022 17:02:30 +0800 Subject: f2fs: use aggressive GC policy during f2fs_disable_checkpoint() Let's enable GC_URGENT_HIGH mode during f2fs_disable_checkpoint(), so that we can use SSR allocator for GCed data/node persistence, it can improve the performance due to it avoiding migration of data/node locates in selected target segment of SSR allocator. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9176597fdf94..caceb80b87ec 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2080,6 +2080,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) { unsigned int s_flags = sbi->sb->s_flags; struct cp_control cpc; + unsigned int gc_mode; int err = 0; int ret; block_t unusable; @@ -2092,6 +2093,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); + gc_mode = sbi->gc_mode; + sbi->gc_mode = GC_URGENT_HIGH; + while (!f2fs_time_over(sbi, DISABLE_TIME)) { f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); @@ -2129,6 +2133,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) out_unlock: f2fs_up_write(&sbi->gc_lock); restore_flag: + sbi->gc_mode = gc_mode; sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } -- cgit v1.2.3