summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2023-11-16 08:19:06 +0300
committerDavid Sterba <dsterba@suse.com>2023-12-15 22:27:03 +0300
commit397239ed6a6c88b002fbba0b25ed5a719c578c2f (patch)
treeac99eeb41e8b9cc942a88720bb35b69dd3f8806f /fs
parent3ba2d3648f9dcd6af6326352bb2775e8b31372e0 (diff)
downloadlinux-397239ed6a6c88b002fbba0b25ed5a719c578c2f.tar.xz
btrfs: allow extent buffer helpers to skip cross-page handling
Currently btrfs extent buffer helpers are doing all the cross-page handling, as there is no guarantee that all those eb pages are contiguous. However on systems with enough memory, there is a very high chance the page cache for btree_inode are allocated with physically contiguous pages. In that case, we can skip all the complex cross-page handling, thus speeding up the code. This patch adds a new member, extent_buffer::addr, which is only set to non-NULL if all the extent buffer pages are physically contiguous. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent_io.c54
-rw-r--r--fs/btrfs/extent_io.h7
3 files changed, 75 insertions, 3 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7b55b59115e7..125b749d2c6f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -74,15 +74,26 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
static void csum_tree_block(struct extent_buffer *buf, u8 *result)
{
struct btrfs_fs_info *fs_info = buf->fs_info;
- const int num_pages = num_extent_pages(buf);
- const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
+ int num_pages;
+ u32 first_page_part;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
char *kaddr;
int i;
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
- kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
+
+ if (buf->addr) {
+ /* Pages are contiguous, handle them as a big one. */
+ kaddr = buf->addr;
+ first_page_part = fs_info->nodesize;
+ num_pages = 1;
+ } else {
+ kaddr = page_address(buf->pages[0]);
+ first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
+ num_pages = num_extent_pages(buf);
+ }
+
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
first_page_part - BTRFS_CSUM_SIZE);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cc9a454810d0..7f7ecee9e048 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3489,6 +3489,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
struct address_space *mapping = fs_info->btree_inode->i_mapping;
struct btrfs_subpage *prealloc = NULL;
u64 lockdep_owner = owner_root;
+ bool page_contig = true;
int uptodate = 1;
int ret;
@@ -3575,6 +3576,14 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
eb->pages[i] = p;
+
+ /*
+ * Check if the current page is physically contiguous with previous eb
+ * page.
+ */
+ if (i && eb->pages[i - 1] + 1 != p)
+ page_contig = false;
+
if (!btrfs_page_test_uptodate(fs_info, p, eb->start, eb->len))
uptodate = 0;
@@ -3588,6 +3597,9 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
}
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ /* All pages are physically contiguous, can skip cross page handling. */
+ if (page_contig)
+ eb->addr = page_address(eb->pages[0]) + offset_in_page(eb->start);
again:
ret = radix_tree_preload(GFP_NOFS);
if (ret) {
@@ -4036,6 +4048,11 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
return;
}
+ if (eb->addr) {
+ memcpy(dstv, eb->addr + start, len);
+ return;
+ }
+
offset = get_eb_offset_in_page(eb, start);
while (len > 0) {
@@ -4067,6 +4084,12 @@ int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
+ if (eb->addr) {
+ if (copy_to_user_nofault(dstv, eb->addr + start, len))
+ ret = -EFAULT;
+ return ret;
+ }
+
offset = get_eb_offset_in_page(eb, start);
while (len > 0) {
@@ -4102,6 +4125,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
if (check_eb_range(eb, start, len))
return -EINVAL;
+ if (eb->addr)
+ return memcmp(ptrv, eb->addr + start, len);
+
offset = get_eb_offset_in_page(eb, start);
while (len > 0) {
@@ -4169,6 +4195,14 @@ static void __write_extent_buffer(const struct extent_buffer *eb,
if (check_eb_range(eb, start, len))
return;
+ if (eb->addr) {
+ if (use_memmove)
+ memmove(eb->addr + start, srcv, len);
+ else
+ memcpy(eb->addr + start, srcv, len);
+ return;
+ }
+
offset = get_eb_offset_in_page(eb, start);
while (len > 0) {
@@ -4201,6 +4235,11 @@ static void memset_extent_buffer(const struct extent_buffer *eb, int c,
{
unsigned long cur = start;
+ if (eb->addr) {
+ memset(eb->addr + start, c, len);
+ return;
+ }
+
while (cur < start + len) {
unsigned long index = get_eb_page_index(cur);
unsigned int offset = get_eb_offset_in_page(eb, cur);
@@ -4428,6 +4467,16 @@ void memcpy_extent_buffer(const struct extent_buffer *dst,
check_eb_range(dst, src_offset, len))
return;
+ if (dst->addr) {
+ const bool use_memmove = areas_overlap(src_offset, dst_offset, len);
+
+ if (use_memmove)
+ memmove(dst->addr + dst_offset, dst->addr + src_offset, len);
+ else
+ memcpy(dst->addr + dst_offset, dst->addr + src_offset, len);
+ return;
+ }
+
while (cur_off < len) {
unsigned long cur_src = cur_off + src_offset;
unsigned long pg_index = get_eb_page_index(cur_src);
@@ -4460,6 +4509,11 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
return;
}
+ if (dst->addr) {
+ memmove(dst->addr + dst_offset, dst->addr + src_offset, len);
+ return;
+ }
+
while (len > 0) {
unsigned long src_i;
size_t cur;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 021040b3117e..c2c6bfba63c0 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -78,6 +78,13 @@ struct extent_buffer {
unsigned long len;
unsigned long bflags;
struct btrfs_fs_info *fs_info;
+
+ /*
+ * The address where the eb can be accessed without any cross-page handling.
+ * This can be NULL if not possible.
+ */
+ void *addr;
+
spinlock_t refs_lock;
atomic_t refs;
int read_mirror;