summaryrefslogtreecommitdiff
path: root/fs/erofs/zdata.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/erofs/zdata.c')
-rw-r--r--fs/erofs/zdata.c283
1 files changed, 119 insertions, 164 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 5f1890e309c6..036f610e044b 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -143,22 +143,17 @@ static inline void z_erofs_onlinepage_split(struct page *page)
atomic_inc((atomic_t *)&page->private);
}
-static inline void z_erofs_page_mark_eio(struct page *page)
+static void z_erofs_onlinepage_endio(struct page *page, int err)
{
- int orig;
+ int orig, v;
+
+ DBG_BUGON(!PagePrivate(page));
do {
orig = atomic_read((atomic_t *)&page->private);
- } while (atomic_cmpxchg((atomic_t *)&page->private, orig,
- orig | Z_EROFS_PAGE_EIO) != orig);
-}
-
-static inline void z_erofs_onlinepage_endio(struct page *page)
-{
- unsigned int v;
+ v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0);
+ } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig);
- DBG_BUGON(!PagePrivate(page));
- v = atomic_dec_return((atomic_t *)&page->private);
if (!(v & ~Z_EROFS_PAGE_EIO)) {
set_page_private(page, 0);
ClearPagePrivate(page);
@@ -507,19 +502,17 @@ enum z_erofs_pclustermode {
*/
Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
- * The current collection has been linked with the owned chain, and
- * could also be linked with the remaining collections, which means
- * if the processing page is the tail page of the collection, thus
- * the current collection can safely use the whole page (since
- * the previous collection is under control) for in-place I/O, as
- * illustrated below:
- * ________________________________________________________________
- * | tail (partial) page | head (partial) page |
- * | (of the current cl) | (of the previous collection) |
- * | | |
- * |__PCLUSTER_FOLLOWED___|___________PCLUSTER_FOLLOWED____________|
+ * The pcluster was just linked to a decompression chain by us. It can
+ * also be linked with the remaining pclusters, which means if the
+ * processing page is the tail page of a pcluster, this pcluster can
+ * safely use the whole page (since the previous pcluster is within the
+ * same chain) for in-place I/O, as illustrated below:
+ * ___________________________________________________
+ * | tail (partial) page | head (partial) page |
+ * | (of the current pcl) | (of the previous pcl) |
+ * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____|
*
- * [ (*) the above page can be used as inplace I/O. ]
+ * [ (*) the page above can be used as inplace I/O. ]
*/
Z_EROFS_PCLUSTER_FOLLOWED,
};
@@ -535,8 +528,6 @@ struct z_erofs_decompress_frontend {
z_erofs_next_pcluster_t owned_head;
enum z_erofs_pclustermode mode;
- /* used for applying cache strategy on the fly */
- bool backmost;
erofs_off_t headoffset;
/* a pointer used to pick up inplace I/O pages */
@@ -545,7 +536,7 @@ struct z_erofs_decompress_frontend {
#define DECOMPRESS_FRONTEND_INIT(__i) { \
.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
+ .mode = Z_EROFS_PCLUSTER_FOLLOWED }
static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
{
@@ -554,7 +545,7 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
return false;
- if (fe->backmost)
+ if (!(fe->map.m_flags & EROFS_MAP_FULL_MAPPED))
return true;
if (cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
@@ -851,9 +842,11 @@ err_out:
return err;
}
-static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
+ struct super_block *sb = fe->inode->i_sb;
+ erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
struct erofs_workgroup *grp = NULL;
int ret;
@@ -863,8 +856,7 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
if (!(map->m_flags & EROFS_MAP_META)) {
- grp = erofs_find_workgroup(fe->inode->i_sb,
- map->m_pa >> PAGE_SHIFT);
+ grp = erofs_find_workgroup(sb, blknr);
} else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
DBG_BUGON(1);
return -EFSCORRUPTED;
@@ -883,9 +875,26 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
} else if (ret) {
return ret;
}
+
z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
- /* since file-backed online pages are traversed in reverse order */
+ if (!z_erofs_is_inline_pcluster(fe->pcl)) {
+ /* bind cache first when cached decompression is preferred */
+ z_erofs_bind_cache(fe);
+ } else {
+ void *mptr;
+
+ mptr = erofs_read_metabuf(&map->buf, sb, blknr, EROFS_NO_KMAP);
+ if (IS_ERR(mptr)) {
+ ret = PTR_ERR(mptr);
+ erofs_err(sb, "failed to get inline data %d", ret);
+ return ret;
+ }
+ get_page(map->buf.page);
+ WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page);
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
+ }
+ /* file-backed inplace I/O pages are traversed in reverse order */
fe->icur = z_erofs_pclusterpages(fe->pcl);
return 0;
}
@@ -908,12 +917,12 @@ void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
call_rcu(&pcl->rcu, z_erofs_rcu_callback);
}
-static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
{
struct z_erofs_pcluster *pcl = fe->pcl;
if (!pcl)
- return false;
+ return;
z_erofs_bvec_iter_end(&fe->biter);
mutex_unlock(&pcl->lock);
@@ -929,37 +938,29 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
erofs_workgroup_put(&pcl->obj);
fe->pcl = NULL;
- return true;
}
-static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
- struct page *page, unsigned int pageofs,
- unsigned int len)
+static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
+ unsigned int cur, unsigned int end, erofs_off_t pos)
{
- struct super_block *sb = inode->i_sb;
- struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode;
+ struct inode *packed_inode = EROFS_SB(sb)->packed_inode;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
- u8 *src, *dst;
- unsigned int i, cnt;
+ unsigned int cnt;
+ u8 *src;
if (!packed_inode)
return -EFSCORRUPTED;
buf.inode = packed_inode;
- pos += EROFS_I(inode)->z_fragmentoff;
- for (i = 0; i < len; i += cnt) {
- cnt = min_t(unsigned int, len - i,
+ for (; cur < end; cur += cnt, pos += cnt) {
+ cnt = min_t(unsigned int, end - cur,
sb->s_blocksize - erofs_blkoff(sb, pos));
src = erofs_bread(&buf, erofs_blknr(sb, pos), EROFS_KMAP);
if (IS_ERR(src)) {
erofs_put_metabuf(&buf);
return PTR_ERR(src);
}
-
- dst = kmap_local_page(page);
- memcpy(dst + pageofs + i, src + erofs_blkoff(sb, pos), cnt);
- kunmap_local(dst);
- pos += cnt;
+ memcpy_to_page(page, cur, src + erofs_blkoff(sb, pos), cnt);
}
erofs_put_metabuf(&buf);
return 0;
@@ -972,94 +973,60 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page);
bool tight = true, exclusive;
- unsigned int cur, end, spiltted;
+ unsigned int cur, end, len, split;
int err = 0;
- /* register locked file pages as online pages in pack */
z_erofs_onlinepage_init(page);
- spiltted = 0;
+ split = 0;
end = PAGE_SIZE;
repeat:
- cur = end - 1;
-
- if (offset + cur < map->m_la ||
- offset + cur >= map->m_la + map->m_llen) {
- if (z_erofs_collector_end(fe))
- fe->backmost = false;
- map->m_la = offset + cur;
+ if (offset + end - 1 < map->m_la ||
+ offset + end - 1 >= map->m_la + map->m_llen) {
+ z_erofs_pcluster_end(fe);
+ map->m_la = offset + end - 1;
map->m_llen = 0;
err = z_erofs_map_blocks_iter(inode, map, 0);
if (err)
goto out;
- } else {
- if (fe->pcl)
- goto hitted;
- /* didn't get a valid pcluster previously (very rare) */
}
- if (!(map->m_flags & EROFS_MAP_MAPPED) ||
- map->m_flags & EROFS_MAP_FRAGMENT)
- goto hitted;
-
- err = z_erofs_collector_begin(fe);
- if (err)
- goto out;
+ cur = offset > map->m_la ? 0 : map->m_la - offset;
+ /* bump split parts first to avoid several separate cases */
+ ++split;
- if (z_erofs_is_inline_pcluster(fe->pcl)) {
- void *mp;
-
- mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb,
- erofs_blknr(inode->i_sb, map->m_pa),
- EROFS_NO_KMAP);
- if (IS_ERR(mp)) {
- err = PTR_ERR(mp);
- erofs_err(inode->i_sb,
- "failed to get inline page, err %d", err);
- goto out;
- }
- get_page(fe->map.buf.page);
- WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
- fe->map.buf.page);
- fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
- } else {
- /* bind cache first when cached decompression is preferred */
- z_erofs_bind_cache(fe);
- }
-hitted:
- /*
- * Ensure the current partial page belongs to this submit chain rather
- * than other concurrent submit chains or the noio(bypass) chain since
- * those chains are handled asynchronously thus the page cannot be used
- * for inplace I/O or bvpage (should be processed in a strict order.)
- */
- tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
-
- cur = end - min_t(unsigned int, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
zero_user_segment(page, cur, end);
+ tight = false;
goto next_part;
}
+
if (map->m_flags & EROFS_MAP_FRAGMENT) {
- unsigned int pageofs, skip, len;
+ erofs_off_t fpos = offset + cur - map->m_la;
- if (offset > map->m_la) {
- pageofs = 0;
- skip = offset - map->m_la;
- } else {
- pageofs = map->m_la & ~PAGE_MASK;
- skip = 0;
- }
- len = min_t(unsigned int, map->m_llen - skip, end - cur);
- err = z_erofs_read_fragment(inode, skip, page, pageofs, len);
+ len = min_t(unsigned int, map->m_llen - fpos, end - cur);
+ err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len,
+ EROFS_I(inode)->z_fragmentoff + fpos);
if (err)
goto out;
- ++spiltted;
tight = false;
goto next_part;
}
- exclusive = (!cur && (!spiltted || tight));
+ if (!fe->pcl) {
+ err = z_erofs_pcluster_begin(fe);
+ if (err)
+ goto out;
+ }
+
+ /*
+ * Ensure the current partial page belongs to this submit chain rather
+ * than other concurrent submit chains or the noio(bypass) chain since
+ * those chains are handled asynchronously thus the page cannot be used
+ * for inplace I/O or bvpage (should be processed in a strict order.)
+ */
+ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
+ exclusive = (!cur && ((split <= 1) || tight));
if (cur)
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
@@ -1072,8 +1039,6 @@ hitted:
goto out;
z_erofs_onlinepage_split(page);
- /* bump up the number of spiltted parts of a page */
- ++spiltted;
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
if (fe->pcl->length < offset + end - map->m_la) {
@@ -1094,9 +1059,7 @@ next_part:
goto repeat;
out:
- if (err)
- z_erofs_page_mark_eio(page);
- z_erofs_onlinepage_endio(page);
+ z_erofs_onlinepage_endio(page, err);
return err;
}
@@ -1144,10 +1107,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
struct z_erofs_bvec *bvec)
{
struct z_erofs_bvec_item *item;
+ unsigned int pgnr;
- if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
- unsigned int pgnr;
-
+ if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) &&
+ (bvec->end == PAGE_SIZE ||
+ bvec->offset + bvec->end == be->pcl->length)) {
pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
if (!be->decompressed_pages[pgnr]) {
@@ -1198,9 +1162,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
cur += len;
}
kunmap_local(dst);
- if (err)
- z_erofs_page_mark_eio(bvi->bvec.page);
- z_erofs_onlinepage_endio(bvi->bvec.page);
+ z_erofs_onlinepage_endio(bvi->bvec.page, err);
list_del(p);
kfree(bvi);
}
@@ -1371,9 +1333,7 @@ out:
/* recycle all individual short-lived pages */
if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue;
- if (err)
- z_erofs_page_mark_eio(page);
- z_erofs_onlinepage_endio(page);
+ z_erofs_onlinepage_endio(page, err);
}
if (be->decompressed_pages != be->onstack_pages)
@@ -1409,7 +1369,10 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
owned = READ_ONCE(be.pcl->next);
z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
- erofs_workgroup_put(&be.pcl->obj);
+ if (z_erofs_is_inline_pcluster(be.pcl))
+ z_erofs_free_pcluster(be.pcl);
+ else
+ erofs_workgroup_put(&be.pcl->obj);
}
}
@@ -1841,21 +1804,16 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
}
cur = map->m_la + map->m_llen - 1;
- while (cur >= end) {
+ while ((cur >= end) && (cur < i_size_read(inode))) {
pgoff_t index = cur >> PAGE_SHIFT;
struct page *page;
page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
if (page) {
- if (PageUptodate(page)) {
+ if (PageUptodate(page))
unlock_page(page);
- } else {
- err = z_erofs_do_read_page(f, page);
- if (err)
- erofs_err(inode->i_sb,
- "readmore error at page %lu @ nid %llu",
- index, EROFS_I(inode)->nid);
- }
+ else
+ (void)z_erofs_do_read_page(f, page);
put_page(page);
}
@@ -1867,25 +1825,25 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
static int z_erofs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *const inode = page->mapping->host;
+ struct inode *const inode = folio->mapping->host;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
int err;
- trace_erofs_readpage(page, false);
- f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
+ trace_erofs_read_folio(folio, false);
+ f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, true);
- err = z_erofs_do_read_page(&f, page);
+ err = z_erofs_do_read_page(&f, &folio->page);
z_erofs_pcluster_readmore(&f, NULL, false);
- (void)z_erofs_collector_end(&f);
+ z_erofs_pcluster_end(&f);
/* if some compressed cluster ready, need submit them anyway */
z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false);
- if (err)
- erofs_err(inode->i_sb, "failed to read, err [%d]", err);
+ if (err && err != -EINTR)
+ erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu",
+ err, folio->index, EROFS_I(inode)->nid);
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&f.pagepool);
@@ -1897,38 +1855,35 @@ static void z_erofs_readahead(struct readahead_control *rac)
struct inode *const inode = rac->mapping->host;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
- struct page *head = NULL, *page;
- unsigned int nr_pages;
+ struct folio *head = NULL, *folio;
+ unsigned int nr_folios;
+ int err;
f.headoffset = readahead_pos(rac);
z_erofs_pcluster_readmore(&f, rac, true);
- nr_pages = readahead_count(rac);
- trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
+ nr_folios = readahead_count(rac);
+ trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false);
- while ((page = readahead_page(rac))) {
- set_page_private(page, (unsigned long)head);
- head = page;
+ while ((folio = readahead_folio(rac))) {
+ folio->private = head;
+ head = folio;
}
+ /* traverse in reverse order for best metadata I/O performance */
while (head) {
- struct page *page = head;
- int err;
-
- /* traversal in reverse order */
- head = (void *)page_private(page);
+ folio = head;
+ head = folio_get_private(folio);
- err = z_erofs_do_read_page(&f, page);
- if (err)
- erofs_err(inode->i_sb,
- "readahead error at page %lu @ nid %llu",
- page->index, EROFS_I(inode)->nid);
- put_page(page);
+ err = z_erofs_do_read_page(&f, &folio->page);
+ if (err && err != -EINTR)
+ erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
+ folio->index, EROFS_I(inode)->nid);
}
z_erofs_pcluster_readmore(&f, rac, false);
- (void)z_erofs_collector_end(&f);
+ z_erofs_pcluster_end(&f);
- z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true);
+ z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true);
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&f.pagepool);
}