summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig2
-rw-r--r--drivers/block/brd.c6
-rw-r--r--drivers/block/loop.c42
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/null_blk.c2
-rw-r--r--drivers/block/ps3vram.c10
-rw-r--r--drivers/block/sunvdc.c61
-rw-r--r--drivers/block/virtio_blk.c16
-rw-r--r--drivers/block/xen-blkback/xenbus.c10
-rw-r--r--drivers/block/xen-blkfront.c6
-rw-r--r--drivers/block/zram/Kconfig12
-rw-r--r--drivers/block/zram/zram_drv.c544
-rw-r--r--drivers/block/zram/zram_drv.h11
13 files changed, 584 insertions, 139 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5dd62a8c4d60..104180e3c55e 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -471,7 +471,7 @@ config VIRTIO_BLK
depends on VIRTIO
---help---
This is the virtual block driver for virtio. It can be used with
- lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
+ QEMU based VMMs (like KVM or Xen). Say Y or M.
config VIRTIO_BLK_SCSI
bool "SCSI passthrough request for the Virtio block driver"
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 006e1cb7e6f0..bbd0d186cfc0 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -325,7 +325,11 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct brd_device *brd = bdev->bd_disk->private_data;
- int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
+ int err;
+
+ if (PageTransHuge(page))
+ return -ENOTSUPP;
+ err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
page_endio(page, is_write, err);
return err;
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 2fbd4089c20e..407cb172d6e3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,8 +221,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
- loff_t logical_blocksize)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
@@ -234,12 +233,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
- lo->lo_logical_blocksize = logical_blocksize;
- blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
- blk_queue_logical_block_size(lo->lo_queue,
- lo->lo_logical_blocksize);
- }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -820,7 +813,6 @@ static void loop_config_discard(struct loop_device *lo)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
- int lo_bits = 9;
/*
* We use punch hole to reclaim the free space used by the
@@ -840,11 +832,9 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
- lo_bits = blksize_bits(lo->lo_logical_blocksize);
- blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
- blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
+ blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -938,7 +928,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
- lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
@@ -1104,7 +1093,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
- int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
@@ -1137,26 +1125,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto exit;
- if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
- if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
- lo->lo_logical_blocksize = 512;
- lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
- if (LO_INFO_BLOCKSIZE(info) != 512 &&
- LO_INFO_BLOCKSIZE(info) != 1024 &&
- LO_INFO_BLOCKSIZE(info) != 2048 &&
- LO_INFO_BLOCKSIZE(info) != 4096)
- return -EINVAL;
- if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
- return -EINVAL;
- }
-
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit ||
- lo->lo_flags != lo_flags ||
- ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
- lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
- LO_INFO_BLOCKSIZE(info))) {
+ lo->lo_sizelimit != info->lo_sizelimit) {
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto exit;
}
@@ -1348,8 +1319,7 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
- lo->lo_logical_blocksize);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 2c096b9a17b8..fecd3f97ef8c 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,7 +49,6 @@ struct loop_device {
struct file * lo_backing_file;
struct block_device *lo_device;
unsigned lo_blocksize;
- unsigned lo_logical_blocksize;
void *key_data;
gfp_t old_gfp_mask;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index bd922868a861..8042c26ea9e6 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,7 +35,7 @@ static inline u64 mb_per_tick(int mbps)
struct nullb_cmd {
struct list_head list;
struct llist_node ll_list;
- struct call_single_data csd;
+ call_single_data_t csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index e0e81cacd781..6a55959cbf78 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -409,10 +409,8 @@ static int ps3vram_cache_init(struct ps3_system_bus_device *dev)
priv->cache.page_size = CACHE_PAGE_SIZE;
priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
CACHE_PAGE_COUNT, GFP_KERNEL);
- if (priv->cache.tags == NULL) {
- dev_err(&dev->core, "Could not allocate cache tags\n");
+ if (!priv->cache.tags)
return -ENOMEM;
- }
dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n",
CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
@@ -743,7 +741,11 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
goto out_unmap_reports;
}
- ps3vram_cache_init(dev);
+ error = ps3vram_cache_init(dev);
+ if (error < 0) {
+ goto out_unmap_reports;
+ }
+
ps3vram_proc_init(dev);
queue = blk_alloc_queue(GFP_KERNEL);
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 6b16ead1da58..ad9749463d4f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -875,6 +875,56 @@ static void print_version(void)
printk(KERN_INFO "%s", version);
}
+struct vdc_check_port_data {
+ int dev_no;
+ char *type;
+};
+
+static int vdc_device_probed(struct device *dev, void *arg)
+{
+ struct vio_dev *vdev = to_vio_dev(dev);
+ struct vdc_check_port_data *port_data;
+
+ port_data = (struct vdc_check_port_data *)arg;
+
+ if ((vdev->dev_no == port_data->dev_no) &&
+ (!(strcmp((char *)&vdev->type, port_data->type))) &&
+ dev_get_drvdata(dev)) {
+ /* This device has already been configured
+ * by vdc_port_probe()
+ */
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Determine whether the VIO device is part of an mpgroup
+ * by locating all the virtual-device-port nodes associated
+ * with the parent virtual-device node for the VIO device
+ * and checking whether any of these nodes are vdc-ports
+ * which have already been configured.
+ *
+ * Returns true if this device is part of an mpgroup and has
+ * already been probed.
+ */
+static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
+{
+ struct vdc_check_port_data port_data;
+ struct device *dev;
+
+ port_data.dev_no = vdev->dev_no;
+ port_data.type = (char *)&vdev->type;
+
+ dev = device_find_child(vdev->dev.parent, &port_data,
+ vdc_device_probed);
+
+ if (dev)
+ return true;
+
+ return false;
+}
+
static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
{
struct mdesc_handle *hp;
@@ -893,6 +943,14 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
goto err_out_release_mdesc;
}
+ /* Check if this device is part of an mpgroup */
+ if (vdc_port_mpgroup_check(vdev)) {
+ printk(KERN_WARNING
+ "VIO: Ignoring extra vdisk port %s",
+ dev_name(&vdev->dev));
+ goto err_out_release_mdesc;
+ }
+
port = kzalloc(sizeof(*port), GFP_KERNEL);
err = -ENOMEM;
if (!port) {
@@ -943,6 +1001,9 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
if (err)
goto err_out_free_tx_ring;
+ /* Note that the device driver_data is used to determine
+ * whether the port has been probed.
+ */
dev_set_drvdata(&vdev->dev, port);
mdesc_release(hp);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0ba1eb911a42..34e17ee799be 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
struct request_queue *q = vblk->disk->queue;
char cap_str_2[10], cap_str_10[10];
char *envp[] = { "RESIZE=1", NULL };
+ unsigned long long nblocks;
u64 capacity;
/* Host must always specify the capacity. */
@@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work)
capacity = (sector_t)-1;
}
- string_get_size(capacity, queue_logical_block_size(q),
+ nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
+
+ string_get_size(nblocks, queue_logical_block_size(q),
STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
- string_get_size(capacity, queue_logical_block_size(q),
+ string_get_size(nblocks, queue_logical_block_size(q),
STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
dev_notice(&vdev->dev,
- "new size: %llu %d-byte logical blocks (%s/%s)\n",
- (unsigned long long)capacity,
- queue_logical_block_size(q),
- cap_str_10, cap_str_2);
+ "new size: %llu %d-byte logical blocks (%s/%s)\n",
+ nblocks,
+ queue_logical_block_size(q),
+ cap_str_10,
+ cap_str_2);
set_capacity(vblk->disk, capacity);
revalidate_disk(vblk->disk);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 88eaea6475d7..21c1be1eb226 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -244,6 +244,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
struct pending_req *req, *n;
unsigned int j, r;
+ bool busy = false;
for (r = 0; r < blkif->nr_rings; r++) {
struct xen_blkif_ring *ring = &blkif->rings[r];
@@ -261,8 +262,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
* don't have any discard_io or other_io requests. So, checking
* for inflight IO is enough.
*/
- if (atomic_read(&ring->inflight) > 0)
- return -EBUSY;
+ if (atomic_read(&ring->inflight) > 0) {
+ busy = true;
+ continue;
+ }
if (ring->irq) {
unbind_from_irqhandler(ring->irq, ring);
@@ -300,6 +303,9 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
ring->active = false;
}
+ if (busy)
+ return -EBUSY;
+
blkif->nr_ring_pages = 0;
/*
* blkif->rings was allocated in connect_ring, so we should free it in
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 270019e3e5d8..891265acb10e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2075,9 +2075,9 @@ static int blkfront_resume(struct xenbus_device *dev)
/*
* Get the bios in the request so we can re-queue them.
*/
- if (req_op(shadow[i].request) == REQ_OP_FLUSH ||
- req_op(shadow[i].request) == REQ_OP_DISCARD ||
- req_op(shadow[i].request) == REQ_OP_SECURE_ERASE ||
+ if (req_op(shadow[j].request) == REQ_OP_FLUSH ||
+ req_op(shadow[j].request) == REQ_OP_DISCARD ||
+ req_op(shadow[j].request) == REQ_OP_SECURE_ERASE ||
shadow[j].request->cmd_flags & REQ_FUA) {
/*
* Flush operations don't contain bios, so
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index b8ecba6dcd3b..7cd4a8ec3c8f 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -13,3 +13,15 @@ config ZRAM
disks and maybe many more.
See zram.txt for more information.
+
+config ZRAM_WRITEBACK
+ bool "Write back incompressible page to backing device"
+ depends on ZRAM
+ default n
+ help
+ With incompressible page, there is no memory saving to keep it
+ in memory. Instead, write it out to backing device.
+ For this feature, admin should set up backing device via
+ /sys/block/zramX/backing_dev.
+
+ See zram.txt for more infomration.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 1c3383b4a0cf..4063f3f59f4f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -270,6 +270,349 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+#ifdef CONFIG_ZRAM_WRITEBACK
+static bool zram_wb_enabled(struct zram *zram)
+{
+ return zram->backing_dev;
+}
+
+static void reset_bdev(struct zram *zram)
+{
+ struct block_device *bdev;
+
+ if (!zram_wb_enabled(zram))
+ return;
+
+ bdev = zram->bdev;
+ if (zram->old_block_size)
+ set_blocksize(bdev, zram->old_block_size);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ /* hope filp_close flush all of IO */
+ filp_close(zram->backing_dev, NULL);
+ zram->backing_dev = NULL;
+ zram->old_block_size = 0;
+ zram->bdev = NULL;
+
+ kvfree(zram->bitmap);
+ zram->bitmap = NULL;
+}
+
+static ssize_t backing_dev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ struct file *file = zram->backing_dev;
+ char *p;
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ if (!zram_wb_enabled(zram)) {
+ memcpy(buf, "none\n", 5);
+ up_read(&zram->init_lock);
+ return 5;
+ }
+
+ p = file_path(file, buf, PAGE_SIZE - 1);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ goto out;
+ }
+
+ ret = strlen(p);
+ memmove(buf, p, ret);
+ buf[ret++] = '\n';
+out:
+ up_read(&zram->init_lock);
+ return ret;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ char *file_name;
+ struct file *backing_dev = NULL;
+ struct inode *inode;
+ struct address_space *mapping;
+ unsigned int bitmap_sz, old_block_size = 0;
+ unsigned long nr_pages, *bitmap = NULL;
+ struct block_device *bdev = NULL;
+ int err;
+ struct zram *zram = dev_to_zram(dev);
+
+ file_name = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!file_name)
+ return -ENOMEM;
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Can't setup backing device for initialized device\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ strlcpy(file_name, buf, len);
+
+ backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
+ if (IS_ERR(backing_dev)) {
+ err = PTR_ERR(backing_dev);
+ backing_dev = NULL;
+ goto out;
+ }
+
+ mapping = backing_dev->f_mapping;
+ inode = mapping->host;
+
+ /* Support only block device in this moment */
+ if (!S_ISBLK(inode->i_mode)) {
+ err = -ENOTBLK;
+ goto out;
+ }
+
+ bdev = bdgrab(I_BDEV(inode));
+ err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
+ if (err < 0)
+ goto out;
+
+ nr_pages = i_size_read(inode) >> PAGE_SHIFT;
+ bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
+ bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
+ if (!bitmap) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ old_block_size = block_size(bdev);
+ err = set_blocksize(bdev, PAGE_SIZE);
+ if (err)
+ goto out;
+
+ reset_bdev(zram);
+ spin_lock_init(&zram->bitmap_lock);
+
+ zram->old_block_size = old_block_size;
+ zram->bdev = bdev;
+ zram->backing_dev = backing_dev;
+ zram->bitmap = bitmap;
+ zram->nr_pages = nr_pages;
+ up_write(&zram->init_lock);
+
+ pr_info("setup backing device %s\n", file_name);
+ kfree(file_name);
+
+ return len;
+out:
+ if (bitmap)
+ kvfree(bitmap);
+
+ if (bdev)
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+
+ if (backing_dev)
+ filp_close(backing_dev, NULL);
+
+ up_write(&zram->init_lock);
+
+ kfree(file_name);
+
+ return err;
+}
+
+static unsigned long get_entry_bdev(struct zram *zram)
+{
+ unsigned long entry;
+
+ spin_lock(&zram->bitmap_lock);
+ /* skip 0 bit to confuse zram.handle = 0 */
+ entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
+ if (entry == zram->nr_pages) {
+ spin_unlock(&zram->bitmap_lock);
+ return 0;
+ }
+
+ set_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+
+ return entry;
+}
+
+static void put_entry_bdev(struct zram *zram, unsigned long entry)
+{
+ int was_set;
+
+ spin_lock(&zram->bitmap_lock);
+ was_set = test_and_clear_bit(entry, zram->bitmap);
+ spin_unlock(&zram->bitmap_lock);
+ WARN_ON_ONCE(!was_set);
+}
+
+void zram_page_end_io(struct bio *bio)
+{
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ page_endio(page, op_is_write(bio_op(bio)),
+ blk_status_to_errno(bio->bi_status));
+ bio_put(bio);
+}
+
+/*
+ * Returns 1 if the submission is successful.
+ */
+static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio_set_dev(bio, zram->bdev);
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
+ bio_put(bio);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_READ;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ return 1;
+}
+
+struct zram_work {
+ struct work_struct work;
+ struct zram *zram;
+ unsigned long entry;
+ struct bio *bio;
+};
+
+#if PAGE_SIZE != 4096
+static void zram_sync_read(struct work_struct *work)
+{
+ struct bio_vec bvec;
+ struct zram_work *zw = container_of(work, struct zram_work, work);
+ struct zram *zram = zw->zram;
+ unsigned long entry = zw->entry;
+ struct bio *bio = zw->bio;
+
+ read_from_bdev_async(zram, &bvec, entry, bio);
+}
+
+/*
+ * Block layer want one ->make_request_fn to be active at a time
+ * so if we use chained IO with parent IO in same context,
+ * it's a deadlock. To avoid, it, it uses worker thread context.
+ */
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ struct zram_work work;
+
+ work.zram = zram;
+ work.entry = entry;
+ work.bio = bio;
+
+ INIT_WORK_ONSTACK(&work.work, zram_sync_read);
+ queue_work(system_unbound_wq, &work.work);
+ flush_work(&work.work);
+ destroy_work_on_stack(&work.work);
+
+ return 1;
+}
+#else
+static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *bio)
+{
+ WARN_ON(1);
+ return -EIO;
+}
+#endif
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ if (sync)
+ return read_from_bdev_sync(zram, bvec, entry, parent);
+ else
+ return read_from_bdev_async(zram, bvec, entry, parent);
+}
+
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+{
+ struct bio *bio;
+ unsigned long entry;
+
+ bio = bio_alloc(GFP_ATOMIC, 1);
+ if (!bio)
+ return -ENOMEM;
+
+ entry = get_entry_bdev(zram);
+ if (!entry) {
+ bio_put(bio);
+ return -ENOSPC;
+ }
+
+ bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
+ bio_set_dev(bio, zram->bdev);
+ if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+ bvec->bv_offset)) {
+ bio_put(bio);
+ put_entry_bdev(zram, entry);
+ return -EIO;
+ }
+
+ if (!parent) {
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
+ bio->bi_end_io = zram_page_end_io;
+ } else {
+ bio->bi_opf = parent->bi_opf;
+ bio_chain(bio, parent);
+ }
+
+ submit_bio(bio);
+ *pentry = entry;
+
+ return 0;
+}
+
+static void zram_wb_clear(struct zram *zram, u32 index)
+{
+ unsigned long entry;
+
+ zram_clear_flag(zram, index, ZRAM_WB);
+ entry = zram_get_element(zram, index);
+ zram_set_element(zram, index, 0);
+ put_entry_bdev(zram, entry);
+}
+
+#else
+static bool zram_wb_enabled(struct zram *zram) { return false; }
+static inline void reset_bdev(struct zram *zram) {};
+static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *parent,
+ unsigned long *pentry)
+
+{
+ return -EIO;
+}
+
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+ unsigned long entry, struct bio *parent, bool sync)
+{
+ return -EIO;
+}
+static void zram_wb_clear(struct zram *zram, u32 index) {}
+#endif
+
+
/*
* We switched to per-cpu streams and this attr is not needed anymore.
* However, we will keep it around for some time, because:
@@ -308,7 +651,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
- char compressor[CRYPTO_MAX_ALG_NAME];
+ char compressor[ARRAY_SIZE(zram->compressor)];
size_t sz;
strlcpy(compressor, buf, sizeof(compressor));
@@ -327,7 +670,7 @@ static ssize_t comp_algorithm_store(struct device *dev,
return -EBUSY;
}
- strlcpy(zram->compressor, compressor, sizeof(compressor));
+ strcpy(zram->compressor, compressor);
up_write(&zram->init_lock);
return len;
}
@@ -453,30 +796,6 @@ static bool zram_same_page_read(struct zram *zram, u32 index,
return false;
}
-static bool zram_same_page_write(struct zram *zram, u32 index,
- struct page *page)
-{
- unsigned long element;
- void *mem = kmap_atomic(page);
-
- if (page_same_filled(mem, &element)) {
- kunmap_atomic(mem);
- /* Free memory associated with this sector now. */
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_set_flag(zram, index, ZRAM_SAME);
- zram_set_element(zram, index, element);
- zram_slot_unlock(zram, index);
-
- atomic64_inc(&zram->stats.same_pages);
- atomic64_inc(&zram->stats.pages_stored);
- return true;
- }
- kunmap_atomic(mem);
-
- return false;
-}
-
static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
@@ -515,7 +834,13 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- unsigned long handle = zram_get_handle(zram, index);
+ unsigned long handle;
+
+ if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
+ zram_wb_clear(zram, index);
+ atomic64_dec(&zram->stats.pages_stored);
+ return;
+ }
/*
* No memory is allocated for same element filled pages.
@@ -529,6 +854,7 @@ static void zram_free_page(struct zram *zram, size_t index)
return;
}
+ handle = zram_get_handle(zram, index);
if (!handle)
return;
@@ -542,13 +868,31 @@ static void zram_free_page(struct zram *zram, size_t index)
zram_set_obj_size(zram, index, 0);
}
-static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+ struct bio *bio, bool partial_io)
{
int ret;
unsigned long handle;
unsigned int size;
void *src, *dst;
+ if (zram_wb_enabled(zram)) {
+ zram_slot_lock(zram, index);
+ if (zram_test_flag(zram, index, ZRAM_WB)) {
+ struct bio_vec bvec;
+
+ zram_slot_unlock(zram, index);
+
+ bvec.bv_page = page;
+ bvec.bv_len = PAGE_SIZE;
+ bvec.bv_offset = 0;
+ return read_from_bdev(zram, &bvec,
+ zram_get_element(zram, index),
+ bio, partial_io);
+ }
+ zram_slot_unlock(zram, index);
+ }
+
if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE))
return 0;
@@ -581,7 +925,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
@@ -594,7 +938,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return -ENOMEM;
}
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
if (unlikely(ret))
goto out;
@@ -613,30 +957,57 @@ out:
return ret;
}
-static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
- struct page *page,
- unsigned long *out_handle, unsigned int *out_comp_len)
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *bio)
{
- int ret;
- unsigned int comp_len;
- void *src;
+ int ret = 0;
unsigned long alloced_pages;
unsigned long handle = 0;
+ unsigned int comp_len = 0;
+ void *src, *dst, *mem;
+ struct zcomp_strm *zstrm;
+ struct page *page = bvec->bv_page;
+ unsigned long element = 0;
+ enum zram_pageflags flags = 0;
+ bool allow_wb = true;
+
+ mem = kmap_atomic(page);
+ if (page_same_filled(mem, &element)) {
+ kunmap_atomic(mem);
+ /* Free memory associated with this sector now. */
+ flags = ZRAM_SAME;
+ atomic64_inc(&zram->stats.same_pages);
+ goto out;
+ }
+ kunmap_atomic(mem);
compress_again:
+ zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
- ret = zcomp_compress(*zstrm, src, &comp_len);
+ ret = zcomp_compress(zstrm, src, &comp_len);
kunmap_atomic(src);
if (unlikely(ret)) {
+ zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
- if (handle)
- zs_free(zram->mem_pool, handle);
+ zs_free(zram->mem_pool, handle);
return ret;
}
- if (unlikely(comp_len > max_zpage_size))
+ if (unlikely(comp_len > max_zpage_size)) {
+ if (zram_wb_enabled(zram) && allow_wb) {
+ zcomp_stream_put(zram->comp);
+ ret = write_to_bdev(zram, bvec, index, bio, &element);
+ if (!ret) {
+ flags = ZRAM_WB;
+ ret = 1;
+ goto out;
+ }
+ allow_wb = false;
+ goto compress_again;
+ }
comp_len = PAGE_SIZE;
+ }
/*
* handle allocation has 2 paths:
@@ -663,7 +1034,6 @@ compress_again:
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
- *zstrm = zcomp_stream_get(zram->comp);
if (handle)
goto compress_again;
return -ENOMEM;
@@ -673,34 +1043,11 @@ compress_again:
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
+ zcomp_stream_put(zram->comp);
zs_free(zram->mem_pool, handle);
return -ENOMEM;
}
- *out_handle = handle;
- *out_comp_len = comp_len;
- return 0;
-}
-
-static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
-{
- int ret;
- unsigned long handle;
- unsigned int comp_len;
- void *src, *dst;
- struct zcomp_strm *zstrm;
- struct page *page = bvec->bv_page;
-
- if (zram_same_page_write(zram, index, page))
- return 0;
-
- zstrm = zcomp_stream_get(zram->comp);
- ret = zram_compress(zram, &zstrm, page, &handle, &comp_len);
- if (ret) {
- zcomp_stream_put(zram->comp);
- return ret;
- }
-
dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
src = zstrm->buffer;
@@ -712,25 +1059,31 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
zcomp_stream_put(zram->comp);
zs_unmap_object(zram->mem_pool, handle);
-
+ atomic64_add(comp_len, &zram->stats.compr_data_size);
+out:
/*
* Free memory associated with this sector
* before overwriting unused sectors.
*/
zram_slot_lock(zram, index);
zram_free_page(zram, index);
- zram_set_handle(zram, index, handle);
- zram_set_obj_size(zram, index, comp_len);
+
+ if (flags) {
+ zram_set_flag(zram, index, flags);
+ zram_set_element(zram, index, element);
+ } else {
+ zram_set_handle(zram, index, handle);
+ zram_set_obj_size(zram, index, comp_len);
+ }
zram_slot_unlock(zram, index);
/* Update stats */
- atomic64_add(comp_len, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
- return 0;
+ return ret;
}
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page = NULL;
@@ -748,7 +1101,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (!page)
return -ENOMEM;
- ret = zram_decompress_page(zram, page, index);
+ ret = __zram_bvec_read(zram, page, index, bio, true);
if (ret)
goto out;
@@ -763,7 +1116,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
vec.bv_offset = 0;
}
- ret = __zram_bvec_write(zram, &vec, index);
+ ret = __zram_bvec_write(zram, &vec, index, bio);
out:
if (is_partial_io(bvec))
__free_page(page);
@@ -808,8 +1161,13 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
}
+/*
+ * Returns errno if it has some problem. Otherwise return 0 or 1.
+ * Returns 0 if IO request was done synchronously
+ * Returns 1 if IO request was successfully submitted.
+ */
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, bool is_write)
+ int offset, bool is_write, struct bio *bio)
{
unsigned long start_time = jiffies;
int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
@@ -821,16 +1179,16 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
if (!is_write) {
atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
+ ret = zram_bvec_read(zram, bvec, index, offset, bio);
flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
+ ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
- if (unlikely(ret)) {
+ if (unlikely(ret < 0)) {
if (!is_write)
atomic64_inc(&zram->stats.failed_reads);
else
@@ -869,7 +1227,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
unwritten);
if (zram_bvec_rw(zram, &bv, index, offset,
- op_is_write(bio_op(bio))) < 0)
+ op_is_write(bio_op(bio)), bio) < 0)
goto out;
bv.bv_offset += bv.bv_len;
@@ -923,16 +1281,18 @@ static void zram_slot_free_notify(struct block_device *bdev,
static int zram_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
- int offset, err = -EIO;
+ int offset, ret;
u32 index;
struct zram *zram;
struct bio_vec bv;
+ if (PageTransHuge(page))
+ return -ENOTSUPP;
zram = bdev->bd_disk->private_data;
if (!valid_io_request(zram, sector, PAGE_SIZE)) {
atomic64_inc(&zram->stats.invalid_io);
- err = -EINVAL;
+ ret = -EINVAL;
goto out;
}
@@ -943,7 +1303,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
- err = zram_bvec_rw(zram, &bv, index, offset, is_write);
+ ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
out:
/*
* If I/O fails, just return error(ie, non-zero) without
@@ -953,9 +1313,20 @@ out:
* bio->bi_end_io does things to handle the error
* (e.g., SetPageError, set_page_dirty and extra works).
*/
- if (err == 0)
+ if (unlikely(ret < 0))
+ return ret;
+
+ switch (ret) {
+ case 0:
page_endio(page, is_write, 0);
- return err;
+ break;
+ case 1:
+ ret = 0;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
}
static void zram_reset_device(struct zram *zram)
@@ -984,6 +1355,7 @@ static void zram_reset_device(struct zram *zram)
zram_meta_free(zram, disksize);
memset(&zram->stats, 0, sizeof(zram->stats));
zcomp_destroy(comp);
+ reset_bdev(zram);
}
static ssize_t disksize_store(struct device *dev,
@@ -1109,6 +1481,9 @@ static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RW(backing_dev);
+#endif
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1119,6 +1494,9 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_mem_used_max.attr,
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+ &dev_attr_backing_dev.attr,
+#endif
&dev_attr_io_stat.attr,
&dev_attr_mm_stat.attr,
&dev_attr_debug_stat.attr,
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index e34e44d02e3e..31762db861e3 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -60,9 +60,10 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
- /* Page consists entirely of zeros */
+ /* Page consists the same element */
ZRAM_SAME = ZRAM_FLAG_SHIFT,
ZRAM_ACCESS, /* page is now accessed */
+ ZRAM_WB, /* page is stored on backing_device */
__NR_ZRAM_PAGEFLAGS,
};
@@ -115,5 +116,13 @@ struct zram {
* zram is claimed so open request will be failed
*/
bool claim; /* Protected by bdev->bd_mutex */
+#ifdef CONFIG_ZRAM_WRITEBACK
+ struct file *backing_dev;
+ struct block_device *bdev;
+ unsigned int old_block_size;
+ unsigned long *bitmap;
+ unsigned long nr_pages;
+ spinlock_t bitmap_lock;
+#endif
};
#endif