From e4544b63a7ee49e7fbebf35ece0a6acd3b9617ae Mon Sep 17 00:00:00 2001 From: Tim Murray Date: Fri, 7 Jan 2022 12:48:44 -0800 Subject: f2fs: move f2fs to use reader-unfair rwsems f2fs rw_semaphores work better if writers can starve readers, especially for the checkpoint thread, because writers are strictly more important than reader threads. This prevents significant priority inversion between low-priority readers that blocked while trying to acquire the read lock and a second acquisition of the write lock that might be blocking high priority work. Signed-off-by: Tim Murray Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 84 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 50b2874e758c..93512f8859d5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -382,14 +382,14 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need; } @@ -399,11 +399,11 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -413,13 +413,13 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -431,14 +431,14 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct nat_entry *new, *e; /* Let's mitigate lock contention of nat_tree_lock during checkpoint */ - if (rwsem_is_locked(&sbi->cp_global_sem)) + if (f2fs_rwsem_is_locked(&sbi->cp_global_sem)) return; new = __alloc_nat_entry(sbi, nid, false); if (!new) return; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) e = __init_nat_entry(nm_i, new, ne, false); @@ -447,7 +447,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); if (e != new) __free_nat_entry(new); } @@ -459,7 +459,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = __init_nat_entry(nm_i, new, NULL, true); @@ -508,7 +508,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -516,7 +516,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) + if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock)) return 0; spin_lock(&nm_i->nat_list_lock); @@ -538,7 +538,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } spin_unlock(&nm_i->nat_list_lock); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -560,13 +560,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ni->nid = nid; retry: /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return 0; } @@ -576,11 +576,11 @@ retry: * nat_tree_lock. Therefore, we should retry, if we failed to grab here * while not bothering checkpoint. */ - if (!rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { + if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { down_read(&curseg->journal_rwsem); - } else if (rwsem_is_contended(&nm_i->nat_tree_lock) || + } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) || !down_read_trylock(&curseg->journal_rwsem)) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto retry; } @@ -589,15 +589,15 @@ retry: ne = nat_in_journal(journal, i); node_info_from_raw_nat(ni, &ne); } - up_read(&curseg->journal_rwsem); + up_read(&curseg->journal_rwsem); if (i >= 0) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto cache; } /* Fill node_info from nat page */ index = current_nat_addr(sbi, nid); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); page = f2fs_get_meta_page(sbi, index); if (IS_ERR(page)) @@ -1609,17 +1609,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; if (wbc->for_reclaim) { - if (!down_read_trylock(&sbi->node_write)) + if (!f2fs_down_read_trylock(&sbi->node_write)) goto redirty_out; } else { - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); unlock_page(page); return 0; } @@ -1627,7 +1627,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (__is_valid_data_blkaddr(ni.blk_addr) && !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto redirty_out; } @@ -1648,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); @@ -2225,14 +2225,14 @@ bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi) unsigned int i; bool ret = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) { ret = false; break; } } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return ret; } @@ -2415,7 +2415,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) unsigned int i, idx; nid_t nid; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) @@ -2438,7 +2438,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) out: scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, @@ -2473,7 +2473,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); while (1) { if (!test_bit_le(NAT_BLOCK_OFFSET(nid), @@ -2488,7 +2488,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, } if (ret) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } @@ -2508,7 +2508,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, /* find free nids from current sum_pages */ scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2953,7 +2953,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) { unsigned int valid = 0, nid_ofs = 0; @@ -2973,7 +2973,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) __update_nat_bits(nm_i, nat_ofs, valid); } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, @@ -3071,15 +3071,15 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * nat_cnt[DIRTY_NAT]. */ if (cpc->reason & CP_UMOUNT) { - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -3108,7 +3108,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ return err; @@ -3228,7 +3228,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + init_f2fs_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -3334,7 +3334,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -3364,7 +3364,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); if (nm_i->free_nid_bitmap) { -- cgit v1.2.3