summaryrefslogtreecommitdiff
path: root/fs/f2fs/data.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-04 20:48:47 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-04 20:48:47 +0300
commit6abaa83c7352b31450d7e8c173f674324c16b02b (patch)
treecea534b220e9635bb2af785a954292e416ebdca9 /fs/f2fs/data.c
parent0961f0c00e69672a8e4a2e591355567dbda44389 (diff)
parent9605f75cf36e0bcc0f4ada07b5be712d30107607 (diff)
downloadlinux-6abaa83c7352b31450d7e8c173f674324c16b02b.tar.xz
Merge tag 'f2fs-for-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this cycle, we've addressed some performance issues such as lock contention, misbehaving compress_cache, allowing extent_cache for compressed files, and new sysfs to adjust ra_size for fadvise. In order to diagnose the performance issues quickly, we also added an iostat which shows the IO latencies periodically. On the stability side, we've found two memory leakage cases in the error path in compression flow. And, we've also fixed various corner cases in fiemap, quota, checkpoint=disable, zstd, and so on. Enhancements: - avoid long checkpoint latency by releasing nat_tree_lock - collect and show iostats periodically - support extent_cache for compressed files - add a sysfs entry to manage ra_size given fadvise(POSIX_FADV_SEQUENTIAL) - report f2fs GC status via sysfs - add discard_unit=%s in mount option to handle zoned device Bug fixes: - fix two memory leakages when an error happens in the compressed IO flow - fix commpress_cache to get the right LBA - fix fiemap to deal with compressed case correctly - fix wrong EIO returns due to SBI_NEED_FSCK - fix missing writes when enabling checkpoint back - fix quota deadlock - fix zstd level mount option In addition to the above major updates, we've cleaned up several code paths such as dio, unnecessary operations, debugfs/f2fs/status, sanity check, and typos" * tag 'f2fs-for-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (46 commits) f2fs: should put a page beyond EOF when preparing a write f2fs: deallocate compressed pages when error happens f2fs: enable realtime discard iff device supports discard f2fs: guarantee to write dirty data when enabling checkpoint back f2fs: fix to unmap pages from userspace process in punch_hole() f2fs: fix unexpected ENOENT comes from f2fs_map_blocks() f2fs: fix to account missing .skipped_gc_rwsem f2fs: adjust unlock order for cleanup f2fs: Don't create discard thread when device doesn't support realtime discard f2fs: rebuild nat_bits during umount f2fs: introduce periodic iostat io latency traces f2fs: separate out iostat feature f2fs: compress: do sanity check on cluster f2fs: fix description about main_blkaddr node f2fs: convert S_IRUGO to 0444 f2fs: fix to keep compatibility of fault injection interface f2fs: support fault injection for f2fs_kmem_cache_alloc() f2fs: compress: allow write compress released file after truncate to zero f2fs: correct comment in segment.h f2fs: improve sbi status info in debugfs/f2fs/status ...
Diffstat (limited to 'fs/f2fs/data.c')
-rw-r--r--fs/f2fs/data.c202
1 files changed, 140 insertions, 62 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index eb222b35edef..f4fd6c246c9a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -25,6 +25,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
+#include "iostat.h"
#include <trace/events/f2fs.h>
#define NUM_PREALLOC_POST_READ_CTXS 128
@@ -116,6 +117,7 @@ struct bio_post_read_ctx {
struct f2fs_sb_info *sbi;
struct work_struct work;
unsigned int enabled_steps;
+ block_t fs_blkaddr;
};
static void f2fs_finish_read_bio(struct bio *bio)
@@ -228,7 +230,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx)
struct bio_vec *bv;
struct bvec_iter_all iter_all;
bool all_compressed = true;
- block_t blkaddr = SECTOR_TO_BLOCK(ctx->bio->bi_iter.bi_sector);
+ block_t blkaddr = ctx->fs_blkaddr;
bio_for_each_segment_all(bv, ctx->bio, iter_all) {
struct page *page = bv->bv_page;
@@ -269,7 +271,10 @@ static void f2fs_post_read_work(struct work_struct *work)
static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
- struct bio_post_read_ctx *ctx = bio->bi_private;
+ struct bio_post_read_ctx *ctx;
+
+ iostat_update_and_unbind_ctx(bio, 0);
+ ctx = bio->bi_private;
if (time_to_inject(sbi, FAULT_READ_IO)) {
f2fs_show_injection_info(sbi, FAULT_READ_IO);
@@ -291,10 +296,13 @@ static void f2fs_read_end_io(struct bio *bio)
static void f2fs_write_end_io(struct bio *bio)
{
- struct f2fs_sb_info *sbi = bio->bi_private;
+ struct f2fs_sb_info *sbi;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
+ iostat_update_and_unbind_ctx(bio, 1);
+ sbi = bio->bi_private;
+
if (time_to_inject(sbi, FAULT_WRITE_IO)) {
f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
bio->bi_status = BLK_STS_IOERR;
@@ -398,6 +406,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
fio->type, fio->temp);
}
+ iostat_alloc_and_bind_ctx(sbi, bio, NULL);
+
if (fio->io_wbc)
wbc_init_bio(fio->io_wbc, bio);
@@ -479,6 +489,8 @@ submit_io:
trace_f2fs_submit_read_bio(sbi->sb, type, bio);
else
trace_f2fs_submit_write_bio(sbi->sb, type, bio);
+
+ iostat_update_submit_ctx(bio, type);
submit_bio(bio);
}
@@ -723,7 +735,7 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
struct bio_entry *be;
- be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+ be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
be->bio = bio;
bio_get(bio);
@@ -970,7 +982,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
- struct bio_post_read_ctx *ctx;
+ struct bio_post_read_ctx *ctx = NULL;
unsigned int post_read_steps = 0;
bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
@@ -1003,8 +1015,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
ctx->bio = bio;
ctx->sbi = sbi;
ctx->enabled_steps = post_read_steps;
+ ctx->fs_blkaddr = blkaddr;
bio->bi_private = ctx;
}
+ iostat_alloc_and_bind_ctx(sbi, bio, ctx);
return bio;
}
@@ -1133,7 +1147,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
{
- struct extent_info ei = {0, 0, 0};
+ struct extent_info ei = {0, };
struct inode *inode = dn->inode;
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -1150,7 +1164,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
int err;
page = f2fs_grab_cache_page(mapping, index, for_write);
@@ -1448,7 +1462,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
blkcnt_t prealloc;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
block_t blkaddr;
unsigned int start_pgofs;
@@ -1490,7 +1504,21 @@ next_dnode:
if (err) {
if (flag == F2FS_GET_BLOCK_BMAP)
map->m_pblk = 0;
+
if (err == -ENOENT) {
+ /*
+ * There is one exceptional case that read_node_page()
+ * may return -ENOENT due to filesystem has been
+ * shutdown or cp_error, so force to convert error
+ * number to EIO for such case.
+ */
+ if (map->m_may_create &&
+ (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+ f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto unlock_out;
+ }
+
err = 0;
if (map->m_next_pgofs)
*map->m_next_pgofs =
@@ -1550,6 +1578,13 @@ next_block:
map->m_flags |= F2FS_MAP_NEW;
blkaddr = dn.data_blkaddr;
} else {
+ if (f2fs_compressed_file(inode) &&
+ f2fs_sanity_check_cluster(&dn) &&
+ (flag != F2FS_GET_BLOCK_FIEMAP ||
+ IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
+ err = -EFSCORRUPTED;
+ goto sync_out;
+ }
if (flag == F2FS_GET_BLOCK_BMAP) {
map->m_pblk = 0;
goto sync_out;
@@ -1843,8 +1878,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
- bool compr_cluster = false;
+ bool compr_cluster = false, compr_appended;
unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
+ unsigned int count_in_cluster = 0;
loff_t maxbytes;
if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
@@ -1892,15 +1928,17 @@ next:
map.m_next_pgofs = &next_pgofs;
map.m_seg_type = NO_CHECK_TYPE;
- if (compr_cluster)
- map.m_len = cluster_size - 1;
+ if (compr_cluster) {
+ map.m_lblk += 1;
+ map.m_len = cluster_size - count_in_cluster;
+ }
ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
if (ret)
goto out;
/* HOLE */
- if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
start_blk = next_pgofs;
if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
@@ -1910,6 +1948,14 @@ next:
flags |= FIEMAP_EXTENT_LAST;
}
+ compr_appended = false;
+ /* In a case of compressed cluster, append this to the last extent */
+ if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
+ !(map.m_flags & F2FS_MAP_FLAGS))) {
+ compr_appended = true;
+ goto skip_fill;
+ }
+
if (size) {
flags |= FIEMAP_EXTENT_MERGED;
if (IS_ENCRYPTED(inode))
@@ -1926,38 +1972,36 @@ next:
if (start_blk > last_blk)
goto out;
- if (compr_cluster) {
- compr_cluster = false;
-
-
- logical = blks_to_bytes(inode, start_blk - 1);
- phys = blks_to_bytes(inode, map.m_pblk);
- size = blks_to_bytes(inode, cluster_size);
-
- flags |= FIEMAP_EXTENT_ENCODED;
-
- start_blk += cluster_size - 1;
-
- if (start_blk > last_blk)
- goto out;
-
- goto prep_next;
- }
-
+skip_fill:
if (map.m_pblk == COMPRESS_ADDR) {
compr_cluster = true;
- start_blk++;
- goto prep_next;
- }
-
- logical = blks_to_bytes(inode, start_blk);
- phys = blks_to_bytes(inode, map.m_pblk);
- size = blks_to_bytes(inode, map.m_len);
- flags = 0;
- if (map.m_flags & F2FS_MAP_UNWRITTEN)
- flags = FIEMAP_EXTENT_UNWRITTEN;
+ count_in_cluster = 1;
+ } else if (compr_appended) {
+ unsigned int appended_blks = cluster_size -
+ count_in_cluster + 1;
+ size += blks_to_bytes(inode, appended_blks);
+ start_blk += appended_blks;
+ compr_cluster = false;
+ } else {
+ logical = blks_to_bytes(inode, start_blk);
+ phys = __is_valid_data_blkaddr(map.m_pblk) ?
+ blks_to_bytes(inode, map.m_pblk) : 0;
+ size = blks_to_bytes(inode, map.m_len);
+ flags = 0;
+
+ if (compr_cluster) {
+ flags = FIEMAP_EXTENT_ENCODED;
+ count_in_cluster += map.m_len;
+ if (count_in_cluster == cluster_size) {
+ compr_cluster = false;
+ size += blks_to_bytes(inode, 1);
+ }
+ } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
+ flags = FIEMAP_EXTENT_UNWRITTEN;
+ }
- start_blk += bytes_to_blks(inode, size);
+ start_blk += bytes_to_blks(inode, size);
+ }
prep_next:
cond_resched();
@@ -2115,6 +2159,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
sector_t last_block_in_file;
const unsigned blocksize = blks_to_bytes(inode, 1);
struct decompress_io_ctx *dic = NULL;
+ struct extent_info ei = {0, };
+ bool from_dnode = true;
int i;
int ret = 0;
@@ -2137,6 +2183,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
continue;
}
unlock_page(page);
+ if (for_write)
+ put_page(page);
cc->rpages[i] = NULL;
cc->nr_rpages--;
}
@@ -2145,6 +2193,12 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (f2fs_cluster_is_empty(cc))
goto out;
+ if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
+ from_dnode = false;
+
+ if (!from_dnode)
+ goto skip_reading_dnode;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
if (ret)
@@ -2152,11 +2206,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
+skip_reading_dnode:
for (i = 1; i < cc->cluster_size; i++) {
block_t blkaddr;
- blkaddr = data_blkaddr(dn.inode, dn.node_page,
- dn.ofs_in_node + i);
+ blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i) :
+ ei.blk + i - 1;
if (!__is_valid_data_blkaddr(blkaddr))
break;
@@ -2166,6 +2222,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
goto out_put_dnode;
}
cc->nr_cpages++;
+
+ if (!from_dnode && i >= ei.c_len)
+ break;
}
/* nothing to decompress */
@@ -2185,8 +2244,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
block_t blkaddr;
struct bio_post_read_ctx *ctx;
- blkaddr = data_blkaddr(dn.inode, dn.node_page,
- dn.ofs_in_node + i + 1);
+ blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
+ dn.ofs_in_node + i + 1) :
+ ei.blk + i;
f2fs_wait_on_block_writeback(inode, blkaddr);
@@ -2220,7 +2280,7 @@ submit_and_realloc:
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
- ctx = bio->bi_private;
+ ctx = get_post_read_ctx(bio);
ctx->enabled_steps |= STEP_DECOMPRESS;
refcount_inc(&dic->refcnt);
@@ -2231,13 +2291,15 @@ submit_and_realloc:
*last_block_in_bio = blkaddr;
}
- f2fs_put_dnode(&dn);
+ if (from_dnode)
+ f2fs_put_dnode(&dn);
*bio_ret = bio;
return 0;
out_put_dnode:
- f2fs_put_dnode(&dn);
+ if (from_dnode)
+ f2fs_put_dnode(&dn);
out:
for (i = 0; i < cc->cluster_size; i++) {
if (cc->rpages[i]) {
@@ -2272,6 +2334,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
.nr_rpages = 0,
.nr_cpages = 0,
};
+ pgoff_t nc_cluster_idx = NULL_CLUSTER;
#endif
unsigned nr_pages = rac ? readahead_count(rac) : 1;
unsigned max_nr_pages = nr_pages;
@@ -2304,12 +2367,23 @@ static int f2fs_mpage_readpages(struct inode *inode,
if (ret)
goto set_error_page;
}
- ret = f2fs_is_compressed_cluster(inode, page->index);
- if (ret < 0)
- goto set_error_page;
- else if (!ret)
- goto read_single_page;
+ if (cc.cluster_idx == NULL_CLUSTER) {
+ if (nc_cluster_idx ==
+ page->index >> cc.log_cluster_size) {
+ goto read_single_page;
+ }
+ ret = f2fs_is_compressed_cluster(inode, page->index);
+ if (ret < 0)
+ goto set_error_page;
+ else if (!ret) {
+ nc_cluster_idx =
+ page->index >> cc.log_cluster_size;
+ goto read_single_page;
+ }
+
+ nc_cluster_idx = NULL_CLUSTER;
+ }
ret = f2fs_init_compress_ctx(&cc);
if (ret)
goto set_error_page;
@@ -2498,6 +2572,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (f2fs_is_atomic_file(inode))
return true;
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ return true;
/* swap file is migrating in aligned write mode */
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@ -2530,7 +2606,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
struct node_info ni;
bool ipu_force = false;
int err = 0;
@@ -3176,9 +3252,8 @@ static int f2fs_write_data_pages(struct address_space *mapping,
FS_CP_DATA_IO : FS_DATA_IO);
}
-static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+static void f2fs_write_failed(struct inode *inode, loff_t to)
{
- struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);
if (IS_NOQUOTA(inode))
@@ -3187,12 +3262,12 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
if (to > i_size && !f2fs_verity_in_progress(inode)) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- filemap_invalidate_lock(mapping);
+ filemap_invalidate_lock(inode->i_mapping);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
- filemap_invalidate_unlock(mapping);
+ filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -3206,7 +3281,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
struct dnode_of_data dn;
struct page *ipage;
bool locked = false;
- struct extent_info ei = {0,0,0};
+ struct extent_info ei = {0, };
int err = 0;
int flag;
@@ -3328,6 +3403,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
*fsdata = NULL;
+ if (len == PAGE_SIZE)
+ goto repeat;
+
ret = f2fs_prepare_compress_overwrite(inode, pagep,
index, fsdata);
if (ret < 0) {
@@ -3410,7 +3488,7 @@ repeat:
fail:
f2fs_put_page(page, 1);
- f2fs_write_failed(mapping, pos + len);
+ f2fs_write_failed(inode, pos + len);
if (drop_atomic)
f2fs_drop_inmem_pages_all(sbi, false);
return err;
@@ -3552,7 +3630,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (f2fs_force_buffered_io(inode, iocb, iter))
return 0;
- do_opu = allow_outplace_dio(inode, iocb, iter);
+ do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
@@ -3600,7 +3678,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
count - iov_iter_count(iter));
} else if (err < 0) {
- f2fs_write_failed(mapping, offset + count);
+ f2fs_write_failed(inode, offset + count);
}
} else {
if (err > 0)