summaryrefslogtreecommitdiff
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c319
1 files changed, 222 insertions, 97 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7097b0f680e6..54d52afcdb19 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -407,7 +407,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
static int __ext4_ext_check(const char *function, unsigned int line,
struct inode *inode, struct ext4_extent_header *eh,
- int depth)
+ int depth, ext4_fsblk_t pblk)
{
const char *error_msg;
int max = 0;
@@ -447,42 +447,149 @@ static int __ext4_ext_check(const char *function, unsigned int line,
corrupted:
ext4_error_inode(inode, function, line, 0,
- "bad header/extent: %s - magic %x, "
- "entries %u, max %u(%u), depth %u(%u)",
- error_msg, le16_to_cpu(eh->eh_magic),
- le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
- max, le16_to_cpu(eh->eh_depth), depth);
-
+ "pblk %llu bad header/extent: %s - magic %x, "
+ "entries %u, max %u(%u), depth %u(%u)",
+ (unsigned long long) pblk, error_msg,
+ le16_to_cpu(eh->eh_magic),
+ le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
+ max, le16_to_cpu(eh->eh_depth), depth);
return -EIO;
}
-#define ext4_ext_check(inode, eh, depth) \
- __ext4_ext_check(__func__, __LINE__, inode, eh, depth)
+#define ext4_ext_check(inode, eh, depth, pblk) \
+ __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk))
int ext4_ext_check_inode(struct inode *inode)
{
- return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
+ return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
}
-static int __ext4_ext_check_block(const char *function, unsigned int line,
- struct inode *inode,
- struct ext4_extent_header *eh,
- int depth,
- struct buffer_head *bh)
+static struct buffer_head *
+__read_extent_tree_block(const char *function, unsigned int line,
+ struct inode *inode, ext4_fsblk_t pblk, int depth,
+ int flags)
{
- int ret;
+ struct buffer_head *bh;
+ int err;
- if (buffer_verified(bh))
- return 0;
- ret = ext4_ext_check(inode, eh, depth);
- if (ret)
- return ret;
+ bh = sb_getblk(inode->i_sb, pblk);
+ if (unlikely(!bh))
+ return ERR_PTR(-ENOMEM);
+
+ if (!bh_uptodate_or_lock(bh)) {
+ trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
+ err = bh_submit_read(bh);
+ if (err < 0)
+ goto errout;
+ }
+ if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
+ return bh;
+ err = __ext4_ext_check(function, line, inode,
+ ext_block_hdr(bh), depth, pblk);
+ if (err)
+ goto errout;
set_buffer_verified(bh);
- return ret;
+ /*
+ * If this is a leaf block, cache all of its entries
+ */
+ if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
+ struct ext4_extent_header *eh = ext_block_hdr(bh);
+ struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
+ ext4_lblk_t prev = 0;
+ int i;
+
+ for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
+ unsigned int status = EXTENT_STATUS_WRITTEN;
+ ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
+ int len = ext4_ext_get_actual_len(ex);
+
+ if (prev && (prev != lblk))
+ ext4_es_cache_extent(inode, prev,
+ lblk - prev, ~0,
+ EXTENT_STATUS_HOLE);
+
+ if (ext4_ext_is_uninitialized(ex))
+ status = EXTENT_STATUS_UNWRITTEN;
+ ext4_es_cache_extent(inode, lblk, len,
+ ext4_ext_pblock(ex), status);
+ prev = lblk + len;
+ }
+ }
+ return bh;
+errout:
+ put_bh(bh);
+ return ERR_PTR(err);
+
}
-#define ext4_ext_check_block(inode, eh, depth, bh) \
- __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh)
+#define read_extent_tree_block(inode, pblk, depth, flags) \
+ __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \
+ (depth), (flags))
+
+/*
+ * This function is called to cache a file's extent information in the
+ * extent status tree
+ */
+int ext4_ext_precache(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_ext_path *path = NULL;
+ struct buffer_head *bh;
+ int i = 0, depth, ret = 0;
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ return 0; /* not an extent-mapped inode */
+
+ down_read(&ei->i_data_sem);
+ depth = ext_depth(inode);
+
+ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
+ GFP_NOFS);
+ if (path == NULL) {
+ up_read(&ei->i_data_sem);
+ return -ENOMEM;
+ }
+
+ /* Don't cache anything if there are no external extent blocks */
+ if (depth == 0)
+ goto out;
+ path[0].p_hdr = ext_inode_hdr(inode);
+ ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
+ if (ret)
+ goto out;
+ path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
+ while (i >= 0) {
+ /*
+ * If this is a leaf block or we've reached the end of
+ * the index block, go up
+ */
+ if ((i == depth) ||
+ path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
+ brelse(path[i].p_bh);
+ path[i].p_bh = NULL;
+ i--;
+ continue;
+ }
+ bh = read_extent_tree_block(inode,
+ ext4_idx_pblock(path[i].p_idx++),
+ depth - i - 1,
+ EXT4_EX_FORCE_CACHE);
+ if (IS_ERR(bh)) {
+ ret = PTR_ERR(bh);
+ break;
+ }
+ i++;
+ path[i].p_bh = bh;
+ path[i].p_hdr = ext_block_hdr(bh);
+ path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
+ }
+ ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
+out:
+ up_read(&ei->i_data_sem);
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ return ret;
+}
#ifdef EXT_DEBUG
static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -716,7 +823,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
struct ext4_ext_path *
ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
- struct ext4_ext_path *path)
+ struct ext4_ext_path *path, int flags)
{
struct ext4_extent_header *eh;
struct buffer_head *bh;
@@ -748,20 +855,13 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
path[ppos].p_depth = i;
path[ppos].p_ext = NULL;
- bh = sb_getblk(inode->i_sb, path[ppos].p_block);
- if (unlikely(!bh)) {
- ret = -ENOMEM;
+ bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
+ flags);
+ if (IS_ERR(bh)) {
+ ret = PTR_ERR(bh);
goto err;
}
- if (!bh_uptodate_or_lock(bh)) {
- trace_ext4_ext_load_extent(inode, block,
- path[ppos].p_block);
- ret = bh_submit_read(bh);
- if (ret < 0) {
- put_bh(bh);
- goto err;
- }
- }
+
eh = ext_block_hdr(bh);
ppos++;
if (unlikely(ppos > depth)) {
@@ -773,11 +873,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
}
path[ppos].p_bh = bh;
path[ppos].p_hdr = eh;
- i--;
-
- ret = ext4_ext_check_block(inode, eh, i, bh);
- if (ret < 0)
- goto err;
}
path[ppos].p_depth = i;
@@ -1198,7 +1293,8 @@ out:
* if no free index is found, then it requests in-depth growing.
*/
static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
- unsigned int flags,
+ unsigned int mb_flags,
+ unsigned int gb_flags,
struct ext4_ext_path *path,
struct ext4_extent *newext)
{
@@ -1220,7 +1316,7 @@ repeat:
if (EXT_HAS_FREE_INDEX(curp)) {
/* if we found index with free entry, then use that
* entry: create all needed subtree and add new leaf */
- err = ext4_ext_split(handle, inode, flags, path, newext, i);
+ err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
if (err)
goto out;
@@ -1228,12 +1324,12 @@ repeat:
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode,
(ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path);
+ path, gb_flags);
if (IS_ERR(path))
err = PTR_ERR(path);
} else {
/* tree is full, time to grow in depth */
- err = ext4_ext_grow_indepth(handle, inode, flags, newext);
+ err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
if (err)
goto out;
@@ -1241,7 +1337,7 @@ repeat:
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode,
(ext4_lblk_t)le32_to_cpu(newext->ee_block),
- path);
+ path, gb_flags);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out;
@@ -1412,29 +1508,21 @@ got_index:
ix++;
block = ext4_idx_pblock(ix);
while (++depth < path->p_depth) {
- bh = sb_bread(inode->i_sb, block);
- if (bh == NULL)
- return -EIO;
- eh = ext_block_hdr(bh);
/* subtract from p_depth to get proper eh_depth */
- if (ext4_ext_check_block(inode, eh,
- path->p_depth - depth, bh)) {
- put_bh(bh);
- return -EIO;
- }
+ bh = read_extent_tree_block(inode, block,
+ path->p_depth - depth, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ eh = ext_block_hdr(bh);
ix = EXT_FIRST_INDEX(eh);
block = ext4_idx_pblock(ix);
put_bh(bh);
}
- bh = sb_bread(inode->i_sb, block);
- if (bh == NULL)
- return -EIO;
+ bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
eh = ext_block_hdr(bh);
- if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) {
- put_bh(bh);
- return -EIO;
- }
ex = EXT_FIRST_EXTENT(eh);
found_extent:
*logical = le32_to_cpu(ex->ee_block);
@@ -1705,7 +1793,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
brelse(path[1].p_bh);
ext4_free_blocks(handle, inode, NULL, blk, 1,
- EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
+ EXT4_FREE_BLOCKS_RESERVE);
}
/*
@@ -1793,7 +1882,7 @@ out:
*/
int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext, int flag)
+ struct ext4_extent *newext, int gb_flags)
{
struct ext4_extent_header *eh;
struct ext4_extent *ex, *fex;
@@ -1802,7 +1891,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
int depth, len, err;
ext4_lblk_t next;
unsigned uninitialized = 0;
- int flags = 0;
+ int mb_flags = 0;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1817,7 +1906,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
}
/* try to insert block into found extent and return */
- if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) {
+ if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
/*
* Try to see whether we should rather test the extent on
@@ -1920,7 +2009,7 @@ prepend:
if (next != EXT_MAX_BLOCKS) {
ext_debug("next leaf block - %u\n", next);
BUG_ON(npath != NULL);
- npath = ext4_ext_find_extent(inode, next, NULL);
+ npath = ext4_ext_find_extent(inode, next, NULL, 0);
if (IS_ERR(npath))
return PTR_ERR(npath);
BUG_ON(npath->p_depth != path->p_depth);
@@ -1939,9 +2028,10 @@ prepend:
* There is no free space in the found leaf.
* We're gonna add a new leaf in the tree.
*/
- if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
- flags = EXT4_MB_USE_RESERVED;
- err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
+ if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+ mb_flags = EXT4_MB_USE_RESERVED;
+ err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
+ path, newext);
if (err)
goto cleanup;
depth = ext_depth(inode);
@@ -2007,7 +2097,7 @@ has_space:
merge:
/* try to merge extents */
- if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
+ if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
ext4_ext_try_to_merge(handle, inode, path, nearex);
@@ -2050,7 +2140,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
path = NULL;
}
- path = ext4_ext_find_extent(inode, block, path);
+ path = ext4_ext_find_extent(inode, block, path, 0);
if (IS_ERR(path)) {
up_read(&EXT4_I(inode)->i_data_sem);
err = PTR_ERR(path);
@@ -2195,8 +2285,8 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t block)
{
int depth = ext_depth(inode);
- unsigned long len;
- ext4_lblk_t lblock;
+ unsigned long len = 0;
+ ext4_lblk_t lblock = 0;
struct ext4_extent *ex;
ex = path[depth].p_ext;
@@ -2233,7 +2323,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
ext4_es_insert_extent(inode, lblock, len, ~0,
EXTENT_STATUS_HOLE);
} else {
- lblock = len = 0;
BUG();
}
@@ -2712,7 +2801,7 @@ again:
ext4_lblk_t ee_block;
/* find extent for this block */
- path = ext4_ext_find_extent(inode, end, NULL);
+ path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path)) {
ext4_journal_stop(handle);
return PTR_ERR(path);
@@ -2754,6 +2843,7 @@ again:
*/
err = ext4_split_extent_at(handle, inode, path,
end + 1, split_flag,
+ EXT4_EX_NOCACHE |
EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
@@ -2782,7 +2872,7 @@ again:
path[0].p_hdr = ext_inode_hdr(inode);
i = 0;
- if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
+ if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
err = -EIO;
goto out;
}
@@ -2829,21 +2919,21 @@ again:
ext_debug("move to level %d (block %llu)\n",
i + 1, ext4_idx_pblock(path[i].p_idx));
memset(path + i + 1, 0, sizeof(*path));
- bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
- if (!bh) {
+ bh = read_extent_tree_block(inode,
+ ext4_idx_pblock(path[i].p_idx), depth - i - 1,
+ EXT4_EX_NOCACHE);
+ if (IS_ERR(bh)) {
/* should we reset i_size? */
- err = -EIO;
+ err = PTR_ERR(bh);
break;
}
+ /* Yield here to deal with large extent trees.
+ * Should be a no-op if we did IO above. */
+ cond_resched();
if (WARN_ON(i + 1 > depth)) {
err = -EIO;
break;
}
- if (ext4_ext_check_block(inode, ext_block_hdr(bh),
- depth - i - 1, bh)) {
- err = -EIO;
- break;
- }
path[i + 1].p_bh = bh;
/* save actual number of indexes since this
@@ -2958,6 +3048,23 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
+static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
+{
+ ext4_lblk_t ee_block;
+ ext4_fsblk_t ee_pblock;
+ unsigned int ee_len;
+
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+ ee_pblock = ext4_ext_pblock(ex);
+
+ if (ee_len == 0)
+ return 0;
+
+ return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
+ EXTENT_STATUS_WRITTEN);
+}
+
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
@@ -3110,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
goto fix_extent_len;
/* update extent status tree */
- err = ext4_es_zeroout(inode, &zero_ex);
+ err = ext4_zeroout_es(inode, &zero_ex);
goto out;
} else if (err)
@@ -3130,7 +3237,7 @@ fix_extent_len:
* ext4_split_extents() splits an extent and mark extent which is covered
* by @map as split_flags indicates
*
- * It may result in splitting the extent into multiple extents (upto three)
+ * It may result in splitting the extent into multiple extents (up to three)
* There are three possibilities:
* a> There is no split required
* b> Splits in two extents: Split is happening at either end of the extent
@@ -3178,7 +3285,7 @@ static int ext4_split_extent(handle_t *handle,
* result in split of original leaf or extent zeroout.
*/
ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
if (IS_ERR(path))
return PTR_ERR(path);
depth = ext_depth(inode);
@@ -3461,7 +3568,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
out:
/* If we have gotten a failure, don't zero out status tree */
if (!err)
- err = ext4_es_zeroout(inode, &zero_ex);
+ err = ext4_zeroout_es(inode, &zero_ex);
return err ? err : allocated;
}
@@ -3562,7 +3669,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
if (err < 0)
goto out;
ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out;
@@ -4049,7 +4156,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
/* find extent for this block */
- path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
+ path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
@@ -4261,8 +4368,8 @@ got_allocated_blocks:
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), fb_flags);
+ ext4_free_blocks(handle, inode, NULL, newblock,
+ EXT4_C2B(sbi, allocated_clusters), fb_flags);
goto out2;
}
@@ -4382,8 +4489,9 @@ out2:
}
out3:
- trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated);
-
+ trace_ext4_ext_map_blocks_exit(inode, flags, map,
+ err ? err : allocated);
+ ext4_es_lru_add(inode);
return err ? err : allocated;
}
@@ -4405,9 +4513,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)
last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
+retry:
err = ext4_es_remove_extent(inode, last_block,
EXT_MAX_BLOCKS - last_block);
+ if (err == -ENOMEM) {
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
+ if (err) {
+ ext4_std_error(inode->i_sb, err);
+ return;
+ }
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
+ ext4_std_error(inode->i_sb, err);
}
static void ext4_falloc_update_inode(struct inode *inode,
@@ -4729,6 +4848,12 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return error;
}
+ if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
+ error = ext4_ext_precache(inode);
+ if (error)
+ return error;
+ }
+
/* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return generic_block_fiemap(inode, fieinfo, start, len,
@@ -4756,6 +4881,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
error = ext4_fill_fiemap_extents(inode, start_blk,
len_blks, fieinfo);
}
-
+ ext4_es_lru_add(inode);
return error;
}