summaryrefslogtreecommitdiff
path: root/fs/f2fs/segment.c
diff options
context:
space:
mode:
authorChao Yu <chao@kernel.org>2021-08-03 03:15:43 +0300
committerJaegeuk Kim <jaegeuk@kernel.org>2021-08-03 21:16:17 +0300
commit4f993264fe2965c344f223d854ccbb549b16ed71 (patch)
treee6ddde3c3c83f4551f75c2a0337e34cd3c95c91f /fs/f2fs/segment.c
parentdc675a97129c4d9d5af55a3d7f23d7e092b8e032 (diff)
downloadlinux-4f993264fe2965c344f223d854ccbb549b16ed71.tar.xz
f2fs: introduce discard_unit mount option
As James Z reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213877 [1.] One-line summary of the problem: Mount multiple SMR block devices exceed certain number cause system non-response [2.] Full description of the problem/report: Created some F2FS on SMR devices (mkfs.f2fs -m), then mounted in sequence. Each device is the same Model: HGST HSH721414AL (Size 14TB). Empirically, found that when the amount of SMR device * 1.5Gb > System RAM, the system ran out of memory and hung. No dmesg output. For example, 24 SMR Disk need 24*1.5GB = 36GB. A system with 32G RAM can only mount 21 devices, the 22nd device will be a reproducible cause of system hang. The number of SMR devices with other FS mounted on this system does not interfere with the result above. [3.] Keywords (i.e., modules, networking, kernel): F2FS, SMR, Memory [4.] Kernel information [4.1.] Kernel version (uname -a): Linux 5.13.4-200.fc34.x86_64 #1 SMP Tue Jul 20 20:27:29 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux [4.2.] Kernel .config file: Default Fedora 34 with f2fs-tools-1.14.0-2.fc34.x86_64 [5.] Most recent kernel version which did not have the bug: None [6.] Output of Oops.. message (if applicable) with symbolic information resolved (see Documentation/admin-guide/oops-tracing.rst) None [7.] A small shell script or example program which triggers the problem (if possible) mount /dev/sdX /mnt/0X [8.] Memory consumption With 24 * 14T SMR Block device with F2FS free -g total used free shared buff/cache available Mem: 46 36 0 0 10 10 Swap: 0 0 0 With 3 * 14T SMR Block device with F2FS free -g total used free shared buff/cache available Mem: 7 5 0 0 1 1 Swap: 7 0 7 The root cause is, there are three bitmaps: - cur_valid_map - ckpt_valid_map - discard_map and each of them will cost ~500MB memory, {cur, ckpt}_valid_map are necessary, but discard_map is optional, since this bitmap will only be useful in mountpoint that small discard is enabled. For a blkzoned device such as SMR or ZNS devices, f2fs will only issue discard for a section(zone) when all blocks of that section are invalid, so, for such device, we don't need small discard functionality at all. This patch introduces a new mountoption "discard_unit=block|segment| section" to support issuing discard with different basic unit which is aligned to block, segment or section, so that user can specify "discard_unit=segment" or "discard_unit=section" to disable small discard functionality. Note that this mount option can not be changed by remount() due to related metadata need to be initialized during mount(). In order to save memory, let's use "discard_unit=section" for blkzoned device by default. Signed-off-by: Chao Yu <chao@kernel.org> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs/f2fs/segment.c')
-rw-r--r--fs/f2fs/segment.c82
1 files changed, 53 insertions, 29 deletions
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0f976cefe425..80f26158e304 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1893,7 +1893,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
offset = GET_BLKOFF_FROM_SEG0(sbi, i);
- if (!f2fs_test_and_set_bit(offset, se->discard_map))
+ if (f2fs_block_unit_discard(sbi) &&
+ !f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
}
@@ -1918,7 +1919,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
int i;
- if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
+ if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
+ !f2fs_block_unit_discard(sbi))
return false;
if (!force) {
@@ -2003,14 +2005,18 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
- bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
+ bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
+ DISCARD_UNIT_SECTION;
+
+ if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
+ section_alignment = true;
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
- if (need_align && end != -1)
+ if (section_alignment && end != -1)
end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
@@ -2018,7 +2024,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
- if (need_align) {
+ if (section_alignment) {
start = rounddown(start, sbi->segs_per_sec);
end = roundup(end, sbi->segs_per_sec);
}
@@ -2056,6 +2062,9 @@ next:
}
mutex_unlock(&dirty_i->seglist_lock);
+ if (!f2fs_block_unit_discard(sbi))
+ goto wakeup;
+
/* send small discards */
list_for_each_entry_safe(entry, this, head, list) {
unsigned int cur_pos = 0, next_pos, len, total_len = 0;
@@ -2089,6 +2098,7 @@ skip:
dcc->nr_discards -= total_len;
}
+wakeup:
wake_up_discard_thread(sbi, false);
}
@@ -2108,6 +2118,11 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
return -ENOMEM;
dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
+ if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+ dcc->discard_granularity = sbi->blocks_per_seg;
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+ dcc->discard_granularity = BLKS_PER_SEC(sbi);
+
INIT_LIST_HEAD(&dcc->entry_list);
for (i = 0; i < MAX_PLIST_NUM; i++)
INIT_LIST_HEAD(&dcc->pend_list[i]);
@@ -2255,7 +2270,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
del = 0;
}
- if (!f2fs_test_and_set_bit(offset, se->discard_map))
+ if (f2fs_block_unit_discard(sbi) &&
+ !f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
/*
@@ -2297,7 +2313,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
}
}
- if (f2fs_test_and_clear_bit(offset, se->discard_map))
+ if (f2fs_block_unit_discard(sbi) &&
+ f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -4282,6 +4299,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
unsigned int sit_segs, start;
char *src_bitmap, *bitmap;
unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
+ unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
/* allocate memory for SIT information */
sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
@@ -4304,9 +4322,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
#ifdef CONFIG_F2FS_CHECK_FS
- bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
#else
- bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
#endif
sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
if (!sit_i->bitmap)
@@ -4326,8 +4344,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
bitmap += SIT_VBLOCK_MAP_SIZE;
#endif
- sit_i->sentries[start].discard_map = bitmap;
- bitmap += SIT_VBLOCK_MAP_SIZE;
+ if (discard_map) {
+ sit_i->sentries[start].discard_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
+ }
}
sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -4489,17 +4509,19 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (IS_NODESEG(se->type))
total_node_blocks += se->valid_blocks;
- /* build discard map only one time */
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map,
- se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks +=
- sbi->blocks_per_seg -
- se->valid_blocks;
+ if (f2fs_block_unit_discard(sbi)) {
+ /* build discard map only one time */
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff,
+ SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map,
+ se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks +=
+ sbi->blocks_per_seg -
+ se->valid_blocks;
+ }
}
if (__is_large_section(sbi))
@@ -4535,13 +4557,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (IS_NODESEG(se->type))
total_node_blocks += se->valid_blocks;
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map, se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += old_valid_blocks;
- sbi->discard_blks -= se->valid_blocks;
+ if (f2fs_block_unit_discard(sbi)) {
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map, se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += old_valid_blocks;
+ sbi->discard_blks -= se->valid_blocks;
+ }
}
if (__is_large_section(sbi)) {