summaryrefslogtreecommitdiff
path: root/drivers/md/dm-writecache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-writecache.c')
-rw-r--r--drivers/md/dm-writecache.c138
1 files changed, 129 insertions, 9 deletions
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index a09bdc000e64..114927da9cc9 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -26,6 +26,8 @@
#define AUTOCOMMIT_BLOCKS_SSD 65536
#define AUTOCOMMIT_BLOCKS_PMEM 64
#define AUTOCOMMIT_MSEC 1000
+#define MAX_AGE_DIV 16
+#define MAX_AGE_UNSPECIFIED -1UL
#define BITMAP_GRANULARITY 65536
#if BITMAP_GRANULARITY < PAGE_SIZE
@@ -88,6 +90,7 @@ struct wc_entry {
:47
#endif
;
+ unsigned long age;
#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
uint64_t original_sector;
uint64_t seq_count;
@@ -119,6 +122,7 @@ struct dm_writecache {
size_t writeback_size;
size_t freelist_high_watermark;
size_t freelist_low_watermark;
+ unsigned long max_age;
unsigned uncommitted_blocks;
unsigned autocommit_blocks;
@@ -130,6 +134,8 @@ struct dm_writecache {
struct timer_list autocommit_timer;
struct wait_queue_head freelist_wait;
+ struct timer_list max_age_timer;
+
atomic_t bio_in_progress[2];
struct wait_queue_head bio_in_progress_wait[2];
@@ -160,6 +166,7 @@ struct dm_writecache {
bool autocommit_time_set:1;
bool writeback_fua_set:1;
bool flush_on_suspend:1;
+ bool cleaner:1;
unsigned writeback_all;
struct workqueue_struct *writeback_wq;
@@ -502,6 +509,34 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
}
+static void ssd_commit_superblock(struct dm_writecache *wc)
+{
+ int r;
+ struct dm_io_region region;
+ struct dm_io_request req;
+
+ region.bdev = wc->ssd_dev->bdev;
+ region.sector = 0;
+ region.count = PAGE_SIZE;
+
+ if (unlikely(region.sector + region.count > wc->metadata_sectors))
+ region.count = wc->metadata_sectors - region.sector;
+
+ region.sector += wc->start_sector;
+
+ req.bi_op = REQ_OP_WRITE;
+ req.bi_op_flags = REQ_SYNC | REQ_FUA;
+ req.mem.type = DM_IO_VMA;
+ req.mem.ptr.vma = (char *)wc->memory_map;
+ req.client = wc->dm_io;
+ req.notify.fn = NULL;
+ req.notify.context = NULL;
+
+ r = dm_io(&req, 1, &region, NULL);
+ if (unlikely(r))
+ writecache_error(wc, r, "error writing superblock");
+}
+
static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
if (WC_MODE_PMEM(wc))
@@ -596,6 +631,7 @@ static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *i
rb_link_node(&ins->rb_node, parent, node);
rb_insert_color(&ins->rb_node, &wc->tree);
list_add(&ins->lru, &wc->lru);
+ ins->age = jiffies;
}
static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e)
@@ -631,6 +667,16 @@ static inline void writecache_verify_watermark(struct dm_writecache *wc)
queue_work(wc->writeback_wq, &wc->writeback_work);
}
+static void writecache_max_age_timer(struct timer_list *t)
+{
+ struct dm_writecache *wc = from_timer(wc, t, max_age_timer);
+
+ if (!dm_suspended(wc->ti) && !writecache_has_error(wc)) {
+ queue_work(wc->writeback_wq, &wc->writeback_work);
+ mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV);
+ }
+}
+
static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector)
{
struct wc_entry *e;
@@ -741,8 +787,10 @@ static void writecache_flush(struct dm_writecache *wc)
wc->seq_count++;
pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
- writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
- writecache_commit_flushed(wc, false);
+ if (WC_MODE_PMEM(wc))
+ writecache_commit_flushed(wc, false);
+ else
+ ssd_commit_superblock(wc);
wc->overwrote_committed = false;
@@ -837,6 +885,7 @@ static void writecache_suspend(struct dm_target *ti)
bool flush_on_suspend;
del_timer_sync(&wc->autocommit_timer);
+ del_timer_sync(&wc->max_age_timer);
wc_lock(wc);
writecache_flush(wc);
@@ -876,6 +925,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc)
struct wc_entry *e = &wc->entries[b];
e->index = b;
e->write_in_progress = false;
+ cond_resched();
}
return 0;
@@ -930,6 +980,7 @@ static void writecache_resume(struct dm_target *ti)
e->original_sector = le64_to_cpu(wme.original_sector);
e->seq_count = le64_to_cpu(wme.seq_count);
}
+ cond_resched();
}
#endif
for (b = 0; b < wc->n_blocks; b++) {
@@ -973,6 +1024,9 @@ erase_this:
writecache_verify_watermark(wc);
+ if (wc->max_age != MAX_AGE_UNSPECIFIED)
+ mod_timer(&wc->max_age_timer, jiffies + wc->max_age / MAX_AGE_DIV);
+
wc_unlock(wc);
}
@@ -1021,6 +1075,28 @@ static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_w
return 0;
}
+static void activate_cleaner(struct dm_writecache *wc)
+{
+ wc->flush_on_suspend = true;
+ wc->cleaner = true;
+ wc->freelist_high_watermark = wc->n_blocks;
+ wc->freelist_low_watermark = wc->n_blocks;
+}
+
+static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+{
+ if (argc != 1)
+ return -EINVAL;
+
+ wc_lock(wc);
+ activate_cleaner(wc);
+ if (!dm_suspended(wc->ti))
+ writecache_verify_watermark(wc);
+ wc_unlock(wc);
+
+ return 0;
+}
+
static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
@@ -1031,6 +1107,8 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
r = process_flush_mesg(argc, argv, wc);
else if (!strcasecmp(argv[0], "flush_on_suspend"))
r = process_flush_on_suspend_mesg(argc, argv, wc);
+ else if (!strcasecmp(argv[0], "cleaner"))
+ r = process_cleaner_mesg(argc, argv, wc);
else
DMERR("unrecognised message received: %s", argv[0]);
@@ -1194,6 +1272,7 @@ read_next_block:
}
} else {
do {
+ bool found_entry = false;
if (writecache_has_error(wc))
goto unlock_error;
e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
@@ -1204,9 +1283,25 @@ read_next_block:
wc->overwrote_committed = true;
goto bio_copy;
}
+ found_entry = true;
+ } else {
+ if (unlikely(wc->cleaner))
+ goto direct_write;
}
e = writecache_pop_from_freelist(wc, (sector_t)-1);
if (unlikely(!e)) {
+ if (!found_entry) {
+direct_write:
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+ if (e) {
+ sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector;
+ BUG_ON(!next_boundary);
+ if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) {
+ dm_accept_partial_bio(bio, next_boundary);
+ }
+ }
+ goto unlock_remap_origin;
+ }
writecache_wait_on_freelist(wc);
continue;
}
@@ -1619,7 +1714,9 @@ restart:
wbl.size = 0;
while (!list_empty(&wc->lru) &&
(wc->writeback_all ||
- wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark)) {
+ wc->freelist_size + wc->writeback_size <= wc->freelist_low_watermark ||
+ (jiffies - container_of(wc->lru.prev, struct wc_entry, lru)->age >=
+ wc->max_age - wc->max_age / MAX_AGE_DIV))) {
n_walked++;
if (unlikely(n_walked > WRITEBACK_LATENCY) &&
@@ -1791,8 +1888,10 @@ static int init_memory(struct dm_writecache *wc)
pmem_assign(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks));
pmem_assign(sb(wc)->seq_count, cpu_to_le64(0));
- for (b = 0; b < wc->n_blocks; b++)
+ for (b = 0; b < wc->n_blocks; b++) {
write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
+ cond_resched();
+ }
writecache_flush_all_metadata(wc);
writecache_commit_flushed(wc, false);
@@ -1882,9 +1981,11 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
wc->ti = ti;
mutex_init(&wc->lock);
+ wc->max_age = MAX_AGE_UNSPECIFIED;
writecache_poison_lists(wc);
init_waitqueue_head(&wc->freelist_wait);
timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0);
+ timer_setup(&wc->max_age_timer, writecache_max_age_timer, 0);
for (i = 0; i < 2; i++) {
atomic_set(&wc->bio_in_progress[i], 0);
@@ -2058,6 +2159,16 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto invalid_optional;
wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs);
wc->autocommit_time_set = true;
+ } else if (!strcasecmp(string, "max_age") && opt_params >= 1) {
+ unsigned max_age_msecs;
+ string = dm_shift_arg(&as), opt_params--;
+ if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1)
+ goto invalid_optional;
+ if (max_age_msecs > 86400000)
+ goto invalid_optional;
+ wc->max_age = msecs_to_jiffies(max_age_msecs);
+ } else if (!strcasecmp(string, "cleaner")) {
+ wc->cleaner = true;
} else if (!strcasecmp(string, "fua")) {
if (WC_MODE_PMEM(wc)) {
wc->writeback_fua = true;
@@ -2235,6 +2346,9 @@ overflow:
do_div(x, 100);
wc->freelist_low_watermark = x;
+ if (wc->cleaner)
+ activate_cleaner(wc);
+
r = writecache_alloc_entries(wc);
if (r) {
ti->error = "Cannot allocate memory";
@@ -2278,9 +2392,9 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
extra_args = 0;
if (wc->start_sector)
extra_args += 2;
- if (wc->high_wm_percent_set)
+ if (wc->high_wm_percent_set && !wc->cleaner)
extra_args += 2;
- if (wc->low_wm_percent_set)
+ if (wc->low_wm_percent_set && !wc->cleaner)
extra_args += 2;
if (wc->max_writeback_jobs_set)
extra_args += 2;
@@ -2288,19 +2402,21 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
extra_args += 2;
if (wc->autocommit_time_set)
extra_args += 2;
+ if (wc->cleaner)
+ extra_args++;
if (wc->writeback_fua_set)
extra_args++;
DMEMIT("%u", extra_args);
if (wc->start_sector)
DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector);
- if (wc->high_wm_percent_set) {
+ if (wc->high_wm_percent_set && !wc->cleaner) {
x = (uint64_t)wc->freelist_high_watermark * 100;
x += wc->n_blocks / 2;
do_div(x, (size_t)wc->n_blocks);
DMEMIT(" high_watermark %u", 100 - (unsigned)x);
}
- if (wc->low_wm_percent_set) {
+ if (wc->low_wm_percent_set && !wc->cleaner) {
x = (uint64_t)wc->freelist_low_watermark * 100;
x += wc->n_blocks / 2;
do_div(x, (size_t)wc->n_blocks);
@@ -2312,6 +2428,10 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks);
if (wc->autocommit_time_set)
DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies));
+ if (wc->max_age != MAX_AGE_UNSPECIFIED)
+ DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age));
+ if (wc->cleaner)
+ DMEMIT(" cleaner");
if (wc->writeback_fua_set)
DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
break;
@@ -2320,7 +2440,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
static struct target_type writecache_target = {
.name = "writecache",
- .version = {1, 2, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = writecache_ctr,
.dtr = writecache_dtr,