summaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c191
1 files changed, 109 insertions, 82 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 4f73d23c2c46..8c19e705b9c3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -189,8 +189,8 @@ EXPORT_SYMBOL(end_buffer_write_sync);
static struct buffer_head *
__find_get_block_slow(struct block_device *bdev, sector_t block)
{
- struct inode *bd_inode = bdev->bd_inode;
- struct address_space *bd_mapping = bd_inode->i_mapping;
+ struct address_space *bd_mapping = bdev->bd_mapping;
+ const int blkbits = bd_mapping->host->i_blkbits;
struct buffer_head *ret = NULL;
pgoff_t index;
struct buffer_head *bh;
@@ -199,7 +199,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
int all_mapped = 1;
static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
- index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
+ index = ((loff_t)block << blkbits) / PAGE_SIZE;
folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
if (IS_ERR(folio))
goto out;
@@ -233,7 +233,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
(unsigned long long)block,
(unsigned long long)bh->b_blocknr,
bh->b_state, bh->b_size, bdev,
- 1 << bd_inode->i_blkbits);
+ 1 << blkbits);
}
out_unlock:
spin_unlock(&bd_mapping->i_private_lock);
@@ -687,30 +687,37 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);
-/*
- * Add a page to the dirty page list.
- *
- * It is a sad fact of life that this function is called from several places
- * deeply under spinlocking. It may not sleep.
- *
- * If the page has buffers, the uptodate buffers are set dirty, to preserve
- * dirty-state coherency between the page and the buffers. It the page does
- * not have buffers then when they are later attached they will all be set
- * dirty.
- *
- * The buffers are dirtied before the page is dirtied. There's a small race
- * window in which a writepage caller may see the page cleanness but not the
- * buffer dirtiness. That's fine. If this code were to set the page dirty
- * before the buffers, a concurrent writepage caller could clear the page dirty
- * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
- * page on the dirty page list.
- *
- * We use i_private_lock to lock against try_to_free_buffers while using the
- * page's buffer list. Also use this to protect against clean buffers being
- * added to the page after it was set dirty.
- *
- * FIXME: may need to call ->reservepage here as well. That's rather up to the
- * address_space though.
+/**
+ * block_dirty_folio - Mark a folio as dirty.
+ * @mapping: The address space containing this folio.
+ * @folio: The folio to mark dirty.
+ *
+ * Filesystems which use buffer_heads can use this function as their
+ * ->dirty_folio implementation. Some filesystems need to do a little
+ * work before calling this function. Filesystems which do not use
+ * buffer_heads should call filemap_dirty_folio() instead.
+ *
+ * If the folio has buffers, the uptodate buffers are set dirty, to
+ * preserve dirty-state coherency between the folio and the buffers.
+ * Buffers added to a dirty folio are created dirty.
+ *
+ * The buffers are dirtied before the folio is dirtied. There's a small
+ * race window in which writeback may see the folio cleanness but not the
+ * buffer dirtiness. That's fine. If this code were to set the folio
+ * dirty before the buffers, writeback could clear the folio dirty flag,
+ * see a bunch of clean buffers and we'd end up with dirty buffers/clean
+ * folio on the dirty folio list.
+ *
+ * We use i_private_lock to lock against try_to_free_buffers() while
+ * using the folio's buffer list. This also prevents clean buffers
+ * being added to the folio after it was set dirty.
+ *
+ * Context: May only be called from process context. Does not sleep.
+ * Caller must ensure that @folio cannot be truncated during this call,
+ * typically by holding the folio lock or having a page in the folio
+ * mapped and holding the page table lock.
+ *
+ * Return: True if the folio was dirtied; false if it was already dirtied.
*/
bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
{
@@ -1034,12 +1041,12 @@ static sector_t folio_init_buffers(struct folio *folio,
static bool grow_dev_folio(struct block_device *bdev, sector_t block,
pgoff_t index, unsigned size, gfp_t gfp)
{
- struct inode *inode = bdev->bd_inode;
+ struct address_space *mapping = bdev->bd_mapping;
struct folio *folio;
struct buffer_head *bh;
sector_t end_block = 0;
- folio = __filemap_get_folio(inode->i_mapping, index,
+ folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
if (IS_ERR(folio))
return false;
@@ -1073,10 +1080,10 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block,
* lock to be atomic wrt __find_get_block(), which does not
* run under the folio lock.
*/
- spin_lock(&inode->i_mapping->i_private_lock);
+ spin_lock(&mapping->i_private_lock);
link_dev_buffers(folio, bh);
end_block = folio_init_buffers(folio, bdev, size);
- spin_unlock(&inode->i_mapping->i_private_lock);
+ spin_unlock(&mapping->i_private_lock);
unlock:
folio_unlock(folio);
folio_put(folio);
@@ -1219,26 +1226,28 @@ void mark_buffer_write_io_error(struct buffer_head *bh)
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
-/*
- * Decrement a buffer_head's reference count. If all buffers against a page
- * have zero reference count, are clean and unlocked, and if the page is clean
- * and unlocked then try_to_free_buffers() may strip the buffers from the page
- * in preparation for freeing it (sometimes, rarely, buffers are removed from
- * a page but it ends up not being freed, and buffers may later be reattached).
+/**
+ * __brelse - Release a buffer.
+ * @bh: The buffer to release.
+ *
+ * This variant of brelse() can be called if @bh is guaranteed to not be NULL.
*/
-void __brelse(struct buffer_head * buf)
+void __brelse(struct buffer_head *bh)
{
- if (atomic_read(&buf->b_count)) {
- put_bh(buf);
+ if (atomic_read(&bh->b_count)) {
+ put_bh(bh);
return;
}
WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
}
EXPORT_SYMBOL(__brelse);
-/*
- * bforget() is like brelse(), except it discards any
- * potentially dirty data.
+/**
+ * __bforget - Discard any dirty data in a buffer.
+ * @bh: The buffer to forget.
+ *
+ * This variant of bforget() can be called if @bh is guaranteed to not
+ * be NULL.
*/
void __bforget(struct buffer_head *bh)
{
@@ -1415,6 +1424,11 @@ EXPORT_SYMBOL(__find_get_block);
* @size: The size of buffer_heads for this @bdev.
* @gfp: The memory allocation flags to use.
*
+ * The returned buffer head has its reference count incremented, but is
+ * not locked. The caller should call brelse() when it has finished
+ * with the buffer. The buffer may not be uptodate. If needed, the
+ * caller can bring it uptodate either by reading it or overwriting it.
+ *
* Return: The buffer head, or NULL if memory could not be allocated.
*/
struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
@@ -1446,24 +1460,33 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
EXPORT_SYMBOL(__breadahead);
/**
- * __bread_gfp() - reads a specified block and returns the bh
- * @bdev: the block_device to read from
- * @block: number of block
- * @size: size (in bytes) to read
- * @gfp: page allocation flag
- *
- * Reads a specified block, and returns buffer head that contains it.
- * The page cache can be allocated from non-movable area
- * not to prevent page migration if you set gfp to zero.
- * It returns NULL if the block was unreadable.
+ * __bread_gfp() - Read a block.
+ * @bdev: The block device to read from.
+ * @block: Block number in units of block size.
+ * @size: The block size of this device in bytes.
+ * @gfp: Not page allocation flags; see below.
+ *
+ * You are not expected to call this function. You should use one of
+ * sb_bread(), sb_bread_unmovable() or __bread().
+ *
+ * Read a specified block, and return the buffer head that refers to it.
+ * If @gfp is 0, the memory will be allocated using the block device's
+ * default GFP flags. If @gfp is __GFP_MOVABLE, the memory may be
+ * allocated from a movable area. Do not pass in a complete set of
+ * GFP flags.
+ *
+ * The returned buffer head has its refcount increased. The caller should
+ * call brelse() when it has finished with the buffer.
+ *
+ * Context: May sleep waiting for I/O.
+ * Return: NULL if the block was unreadable.
*/
-struct buffer_head *
-__bread_gfp(struct block_device *bdev, sector_t block,
- unsigned size, gfp_t gfp)
+struct buffer_head *__bread_gfp(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp)
{
struct buffer_head *bh;
- gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+ gfp |= mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS);
/*
* Prefer looping in the allocator rather than here, at least that
@@ -1696,16 +1719,16 @@ EXPORT_SYMBOL(create_empty_buffers);
*/
void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
{
- struct inode *bd_inode = bdev->bd_inode;
- struct address_space *bd_mapping = bd_inode->i_mapping;
+ struct address_space *bd_mapping = bdev->bd_mapping;
+ const int blkbits = bd_mapping->host->i_blkbits;
struct folio_batch fbatch;
- pgoff_t index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
+ pgoff_t index = ((loff_t)block << blkbits) / PAGE_SIZE;
pgoff_t end;
int i, count;
struct buffer_head *bh;
struct buffer_head *head;
- end = ((loff_t)(block + len - 1) << bd_inode->i_blkbits) / PAGE_SIZE;
+ end = ((loff_t)(block + len - 1) << blkbits) / PAGE_SIZE;
folio_batch_init(&fbatch);
while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
count = folio_batch_count(&fbatch);
@@ -2861,26 +2884,6 @@ int sync_dirty_buffer(struct buffer_head *bh)
}
EXPORT_SYMBOL(sync_dirty_buffer);
-/*
- * try_to_free_buffers() checks if all the buffers on this particular folio
- * are unused, and releases them if so.
- *
- * Exclusion against try_to_free_buffers may be obtained by either
- * locking the folio or by holding its mapping's i_private_lock.
- *
- * If the folio is dirty but all the buffers are clean then we need to
- * be sure to mark the folio clean as well. This is because the folio
- * may be against a block device, and a later reattachment of buffers
- * to a dirty folio will set *all* buffers dirty. Which would corrupt
- * filesystem data on the same device.
- *
- * The same applies to regular filesystem folios: if all the buffers are
- * clean then we set the folio clean and proceed. To do that, we require
- * total exclusion from block_dirty_folio(). That is obtained with
- * i_private_lock.
- *
- * try_to_free_buffers() is non-blocking.
- */
static inline int buffer_busy(struct buffer_head *bh)
{
return atomic_read(&bh->b_count) |
@@ -2914,6 +2917,30 @@ failed:
return false;
}
+/**
+ * try_to_free_buffers - Release buffers attached to this folio.
+ * @folio: The folio.
+ *
+ * If any buffers are in use (dirty, under writeback, elevated refcount),
+ * no buffers will be freed.
+ *
+ * If the folio is dirty but all the buffers are clean then we need to
+ * be sure to mark the folio clean as well. This is because the folio
+ * may be against a block device, and a later reattachment of buffers
+ * to a dirty folio will set *all* buffers dirty. Which would corrupt
+ * filesystem data on the same device.
+ *
+ * The same applies to regular filesystem folios: if all the buffers are
+ * clean then we set the folio clean and proceed. To do that, we require
+ * total exclusion from block_dirty_folio(). That is obtained with
+ * i_private_lock.
+ *
+ * Exclusion against try_to_free_buffers may be obtained by either
+ * locking the folio or by holding its mapping's i_private_lock.
+ *
+ * Context: Process context. @folio must be locked. Will not sleep.
+ * Return: true if all buffers attached to this folio were freed.
+ */
bool try_to_free_buffers(struct folio *folio)
{
struct address_space * const mapping = folio->mapping;