From 62c230bc1790923a1b35da03596a68a6c9b5b100 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:55 -0700 Subject: mm: add support for a filesystem to activate swap files and use direct_IO for writing swap pages Currently swapfiles are managed entirely by the core VM by using ->bmap to allocate space and write to the blocks directly. This effectively ensures that the underlying blocks are allocated and avoids the need for the swap subsystem to locate what physical blocks store offsets within a file. If the swap subsystem is to use the filesystem information to locate the blocks, it is critical that information such as block groups, block bitmaps and the block descriptor table that map the swap file were resident in memory. This patch adds address_space_operations that the VM can call when activating or deactivating swap backed by a file. int swap_activate(struct file *); int swap_deactivate(struct file *); The ->swap_activate() method is used to communicate to the file that the VM relies on it, and the address_space should take adequate measures such as reserving space in the underlying device, reserving memory for mempools and pinning information such as the block descriptor table in memory. The ->swap_deactivate() method is called on sys_swapoff() if ->swap_activate() returned success. After a successful swapfile ->swap_activate, the swapfile is marked SWP_FILE and swapper_space.a_ops will proxy to sis->swap_file->f_mappings->a_ops using ->direct_io to write swapcache pages and ->readpage to read. It is perfectly possible that direct_IO be used to read the swap pages but it is an unnecessary complication. Similarly, it is possible that ->writepage be used instead of direct_io to write the pages but filesystem developers have stated that calling writepage from the VM is undesirable for a variety of reasons and using direct_IO opens up the possibility of writing back batches of swap pages in the future. [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Christoph Hellwig Cc: David S. Miller Cc: Eric B Munson Cc: Eric Paris Cc: James Morris Cc: Mel Gorman Cc: Mike Christie Cc: Neil Brown Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Trond Myklebust Cc: Xiaotian Feng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/swap_state.c | 2 +- mm/swapfile.c | 23 +++++++++++++++++++++-- 3 files changed, 74 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/page_io.c b/mm/page_io.c index 34f02923744c..307a3e795290 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) { struct bio *bio; int ret = 0, rw = WRITE; + struct swap_info_struct *sis = page_swap_info(page); if (try_to_free_swap(page)) { unlock_page(page); @@ -105,6 +107,32 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) end_page_writeback(page); goto out; } + + if (sis->flags & SWP_FILE) { + struct kiocb kiocb; + struct file *swap_file = sis->swap_file; + struct address_space *mapping = swap_file->f_mapping; + struct iovec iov = { + .iov_base = page_address(page), + .iov_len = PAGE_SIZE, + }; + + init_sync_kiocb(&kiocb, swap_file); + kiocb.ki_pos = page_file_offset(page); + kiocb.ki_left = PAGE_SIZE; + kiocb.ki_nbytes = PAGE_SIZE; + + unlock_page(page); + ret = mapping->a_ops->direct_IO(KERNEL_WRITE, + &kiocb, &iov, + kiocb.ki_pos, 1); + if (ret == PAGE_SIZE) { + count_vm_event(PSWPOUT); + ret = 0; + } + return ret; + } + bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); if (bio == NULL) { set_page_dirty(page); @@ -126,6 +154,7 @@ int swap_readpage(struct page *page) { struct bio *bio; int ret = 0; + struct swap_info_struct *sis = page_swap_info(page); VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageUptodate(page)); @@ -134,6 +163,17 @@ int swap_readpage(struct page *page) unlock_page(page); goto out; } + + if (sis->flags & SWP_FILE) { + struct file *swap_file = sis->swap_file; + struct address_space *mapping = swap_file->f_mapping; + + ret = mapping->a_ops->readpage(swap_file, page); + if (!ret) + count_vm_event(PSWPIN); + return ret; + } + bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); if (bio == NULL) { unlock_page(page); @@ -145,3 +185,15 @@ int swap_readpage(struct page *page) out: return ret; } + +int swap_set_page_dirty(struct page *page) +{ + struct swap_info_struct *sis = page_swap_info(page); + + if (sis->flags & SWP_FILE) { + struct address_space *mapping = sis->swap_file->f_mapping; + return mapping->a_ops->set_page_dirty(page); + } else { + return __set_page_dirty_no_writeback(page); + } +} diff --git a/mm/swap_state.c b/mm/swap_state.c index c85b5590cccd..0cb36fb1f61c 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -27,7 +27,7 @@ */ static const struct address_space_operations swap_aops = { .writepage = swap_writepage, - .set_page_dirty = __set_page_dirty_no_writeback, + .set_page_dirty = swap_set_page_dirty, .migratepage = migrate_page, }; diff --git a/mm/swapfile.c b/mm/swapfile.c index f89af5ba2eb2..6ffc87602f4a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1329,6 +1329,14 @@ static void destroy_swap_extents(struct swap_info_struct *sis) list_del(&se->list); kfree(se); } + + if (sis->flags & SWP_FILE) { + struct file *swap_file = sis->swap_file; + struct address_space *mapping = swap_file->f_mapping; + + sis->flags &= ~SWP_FILE; + mapping->a_ops->swap_deactivate(swap_file); + } } /* @@ -1410,7 +1418,9 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, */ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) { - struct inode *inode; + struct file *swap_file = sis->swap_file; + struct address_space *mapping = swap_file->f_mapping; + struct inode *inode = mapping->host; unsigned blocks_per_page; unsigned long page_no; unsigned blkbits; @@ -1421,13 +1431,22 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) int nr_extents = 0; int ret; - inode = sis->swap_file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { ret = add_swap_extent(sis, 0, sis->max, 0); *span = sis->pages; goto out; } + if (mapping->a_ops->swap_activate) { + ret = mapping->a_ops->swap_activate(swap_file); + if (!ret) { + sis->flags |= SWP_FILE; + ret = add_swap_extent(sis, 0, sis->max, 0); + *span = sis->pages; + } + goto out; + } + blkbits = inode->i_blkbits; blocks_per_page = PAGE_SIZE >> blkbits; -- cgit v1.2.3