summaryrefslogtreecommitdiff
path: root/drivers/md/dm-writecache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-writecache.c')
-rw-r--r--drivers/md/dm-writecache.c171
1 files changed, 98 insertions, 73 deletions
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 96a003eb7323..3aa5874f0aef 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2018 Red Hat. All rights reserved.
*
@@ -83,16 +83,13 @@ struct wc_entry {
struct rb_node rb_node;
struct list_head lru;
unsigned short wc_list_contiguous;
- bool write_in_progress
#if BITS_PER_LONG == 64
- :1
-#endif
- ;
- unsigned long index
-#if BITS_PER_LONG == 64
- :47
+ bool write_in_progress : 1;
+ unsigned long index : 47;
+#else
+ bool write_in_progress;
+ unsigned long index;
#endif
- ;
unsigned long age;
#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
uint64_t original_sector;
@@ -128,9 +125,9 @@ struct dm_writecache {
unsigned long max_age;
unsigned long pause;
- unsigned uncommitted_blocks;
- unsigned autocommit_blocks;
- unsigned max_writeback_jobs;
+ unsigned int uncommitted_blocks;
+ unsigned int autocommit_blocks;
+ unsigned int max_writeback_jobs;
int error;
@@ -155,7 +152,7 @@ struct dm_writecache {
sector_t data_device_sectors;
void *block_start;
struct wc_entry *entries;
- unsigned block_size;
+ unsigned int block_size;
unsigned char block_size_bits;
bool pmem_mode:1;
@@ -178,13 +175,13 @@ struct dm_writecache {
bool metadata_only:1;
bool pause_set:1;
- unsigned high_wm_percent_value;
- unsigned low_wm_percent_value;
- unsigned autocommit_time_value;
- unsigned max_age_value;
- unsigned pause_value;
+ unsigned int high_wm_percent_value;
+ unsigned int low_wm_percent_value;
+ unsigned int autocommit_time_value;
+ unsigned int max_age_value;
+ unsigned int pause_value;
- unsigned writeback_all;
+ unsigned int writeback_all;
struct workqueue_struct *writeback_wq;
struct work_struct writeback_work;
struct work_struct flush_work;
@@ -202,7 +199,7 @@ struct dm_writecache {
struct dm_kcopyd_client *dm_kcopyd;
unsigned long *dirty_bitmap;
- unsigned dirty_bitmap_size;
+ unsigned int dirty_bitmap_size;
struct bio_set bio_set;
mempool_t copy_pool;
@@ -227,7 +224,7 @@ struct writeback_struct {
struct list_head endio_entry;
struct dm_writecache *wc;
struct wc_entry **wc_list;
- unsigned wc_list_n;
+ unsigned int wc_list_n;
struct wc_entry *wc_list_inline[WB_LIST_INLINE];
struct bio bio;
};
@@ -236,7 +233,7 @@ struct copy_struct {
struct list_head endio_entry;
struct dm_writecache *wc;
struct wc_entry *e;
- unsigned n_entries;
+ unsigned int n_entries;
int error;
};
@@ -300,6 +297,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
}
if (da != p) {
long i;
+
wc->memory_map = NULL;
pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
@@ -309,6 +307,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
i = 0;
do {
long daa;
+
daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i,
p - i, DAX_ACCESS, NULL, &pfn);
if (daa <= 0) {
@@ -369,7 +368,7 @@ static struct page *persistent_memory_page(void *addr)
return virt_to_page(addr);
}
-static unsigned persistent_memory_page_offset(void *addr)
+static unsigned int persistent_memory_page_offset(void *addr)
{
return (unsigned long)addr & (PAGE_SIZE - 1);
}
@@ -502,11 +501,12 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
COMPLETION_INITIALIZER_ONSTACK(endio.c),
ATOMIC_INIT(1),
};
- unsigned bitmap_bits = wc->dirty_bitmap_size * 8;
- unsigned i = 0;
+ unsigned int bitmap_bits = wc->dirty_bitmap_size * 8;
+ unsigned int i = 0;
while (1) {
- unsigned j;
+ unsigned int j;
+
i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i);
if (unlikely(i == bitmap_bits))
break;
@@ -531,7 +531,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
req.notify.context = &endio;
/* writing via async dm-io (implied by notify.fn above) won't return an error */
- (void) dm_io(&req, 1, &region, NULL);
+ (void) dm_io(&req, 1, &region, NULL);
i = j;
}
@@ -623,20 +623,21 @@ static struct wc_entry *writecache_find_entry(struct dm_writecache *wc,
if (unlikely(!node)) {
if (!(flags & WFE_RETURN_FOLLOWING))
return NULL;
- if (read_original_sector(wc, e) >= block) {
+ if (read_original_sector(wc, e) >= block)
return e;
- } else {
- node = rb_next(&e->rb_node);
- if (unlikely(!node))
- return NULL;
- e = container_of(node, struct wc_entry, rb_node);
- return e;
- }
+
+ node = rb_next(&e->rb_node);
+ if (unlikely(!node))
+ return NULL;
+
+ e = container_of(node, struct wc_entry, rb_node);
+ return e;
}
}
while (1) {
struct wc_entry *e2;
+
if (flags & WFE_LOWEST_SEQ)
node = rb_prev(&e->rb_node);
else
@@ -679,6 +680,7 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry
{
if (WC_MODE_SORT_FREELIST(wc)) {
struct rb_node **node = &wc->freetree.rb_node, *parent = NULL;
+
if (unlikely(!*node))
wc->current_free = e;
while (*node) {
@@ -718,6 +720,7 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, s
if (WC_MODE_SORT_FREELIST(wc)) {
struct rb_node *next;
+
if (unlikely(!wc->current_free))
return NULL;
e = wc->current_free;
@@ -769,7 +772,7 @@ static void writecache_poison_lists(struct dm_writecache *wc)
/*
* Catch incorrect access to these values while the device is suspended.
*/
- memset(&wc->tree, -1, sizeof wc->tree);
+ memset(&wc->tree, -1, sizeof(wc->tree));
wc->lru.next = LIST_POISON1;
wc->lru.prev = LIST_POISON2;
wc->freelist.next = LIST_POISON1;
@@ -864,6 +867,7 @@ static void writecache_flush_work(struct work_struct *work)
static void writecache_autocommit_timer(struct timer_list *t)
{
struct dm_writecache *wc = from_timer(wc, t, autocommit_timer);
+
if (!writecache_has_error(wc))
queue_work(wc->writeback_wq, &wc->flush_work);
}
@@ -941,7 +945,8 @@ static void writecache_suspend(struct dm_target *ti)
wc_lock(wc);
if (flush_on_suspend)
wc->writeback_all--;
- while (writecache_wait_for_writeback(wc));
+ while (writecache_wait_for_writeback(wc))
+ ;
if (WC_MODE_PMEM(wc))
persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size);
@@ -962,6 +967,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc)
return -ENOMEM;
for (b = 0; b < wc->n_blocks; b++) {
struct wc_entry *e = &wc->entries[b];
+
e->index = b;
e->write_in_progress = false;
cond_resched();
@@ -1005,6 +1011,7 @@ static void writecache_resume(struct dm_target *ti)
r = writecache_read_metadata(wc, wc->metadata_sectors);
if (r) {
size_t sb_entries_offset;
+
writecache_error(wc, r, "unable to read metadata: %d", r);
sb_entries_offset = offsetof(struct wc_memory_superblock, entries);
memset((char *)wc->memory_map + sb_entries_offset, -1,
@@ -1034,6 +1041,7 @@ static void writecache_resume(struct dm_target *ti)
for (b = 0; b < wc->n_blocks; b++) {
struct wc_entry *e = &wc->entries[b];
struct wc_memory_entry wme;
+
if (writecache_has_error(wc)) {
e->original_sector = -1;
e->seq_count = -1;
@@ -1055,6 +1063,7 @@ static void writecache_resume(struct dm_target *ti)
#endif
for (b = 0; b < wc->n_blocks; b++) {
struct wc_entry *e = &wc->entries[b];
+
if (!writecache_entry_is_committed(wc, e)) {
if (read_seq_count(wc, e) != -1) {
erase_this:
@@ -1100,7 +1109,7 @@ erase_this:
wc_unlock(wc);
}
-static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+static int process_flush_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
{
if (argc != 1)
return -EINVAL;
@@ -1133,7 +1142,7 @@ static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *
return 0;
}
-static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+static int process_flush_on_suspend_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
{
if (argc != 1)
return -EINVAL;
@@ -1153,7 +1162,7 @@ static void activate_cleaner(struct dm_writecache *wc)
wc->freelist_low_watermark = wc->n_blocks;
}
-static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+static int process_cleaner_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
{
if (argc != 1)
return -EINVAL;
@@ -1167,20 +1176,20 @@ static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache
return 0;
}
-static int process_clear_stats_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+static int process_clear_stats_mesg(unsigned int argc, char **argv, struct dm_writecache *wc)
{
if (argc != 1)
return -EINVAL;
wc_lock(wc);
- memset(&wc->stats, 0, sizeof wc->stats);
+ memset(&wc->stats, 0, sizeof(wc->stats));
wc_unlock(wc);
return 0;
}
-static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
- char *result, unsigned maxlen)
+static int writecache_message(struct dm_target *ti, unsigned int argc, char **argv,
+ char *result, unsigned int maxlen)
{
int r = -EINVAL;
struct dm_writecache *wc = ti->private;
@@ -1238,12 +1247,13 @@ static void memcpy_flushcache_optimized(void *dest, void *source, size_t size)
static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
{
void *buf;
- unsigned size;
+ unsigned int size;
int rw = bio_data_dir(bio);
- unsigned remaining_size = wc->block_size;
+ unsigned int remaining_size = wc->block_size;
do {
struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
+
buf = bvec_kmap_local(&bv);
size = bv.bv_len;
if (unlikely(size > remaining_size))
@@ -1251,6 +1261,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
if (rw == READ) {
int r;
+
r = copy_mc_to_kernel(buf, data, size);
flush_dcache_page(bio_page(bio));
if (unlikely(r)) {
@@ -1371,13 +1382,14 @@ read_next_block:
static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
struct wc_entry *e, bool search_used)
{
- unsigned bio_size = wc->block_size;
+ unsigned int bio_size = wc->block_size;
sector_t start_cache_sec = cache_sector(wc, e);
sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
while (bio_size < bio->bi_iter.bi_size) {
if (!search_used) {
struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
+
if (!f)
break;
write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
@@ -1387,6 +1399,7 @@ static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
} else {
struct wc_entry *f;
struct rb_node *next = rb_next(&e->rb_node);
+
if (!next)
break;
f = container_of(next, struct wc_entry, rb_node);
@@ -1427,6 +1440,7 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio
do {
bool found_entry = false;
bool search_used = false;
+
if (writecache_has_error(wc)) {
wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits;
return WC_MAP_ERROR;
@@ -1540,7 +1554,7 @@ static int writecache_map(struct dm_target *ti, struct bio *bio)
bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
- if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
+ if (unlikely((((unsigned int)bio->bi_iter.bi_sector | bio_sectors(bio)) &
(wc->block_size / 512 - 1)) != 0)) {
DMERR("I/O is not aligned, sector %llu, size %u, block size %u",
(unsigned long long)bio->bi_iter.bi_sector,
@@ -1605,6 +1619,7 @@ static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t
if (bio->bi_private == (void *)1) {
int dir = bio_data_dir(bio);
+
if (atomic_dec_and_test(&wc->bio_in_progress[dir]))
if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir])))
wake_up(&wc->bio_in_progress_wait[dir]);
@@ -1666,7 +1681,7 @@ static void writecache_copy_endio(int read_err, unsigned long write_err, void *p
static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
{
- unsigned i;
+ unsigned int i;
struct writeback_struct *wb;
struct wc_entry *e;
unsigned long n_walked = 0;
@@ -1782,7 +1797,7 @@ pop_from_list:
static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e)
{
struct dm_writecache *wc = wb->wc;
- unsigned block_size = wc->block_size;
+ unsigned int block_size = wc->block_size;
void *address = memory_data(wc, e);
persistent_memory_flush_cache(address, block_size);
@@ -1817,7 +1832,7 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
struct wc_entry *e, *f;
struct bio *bio;
struct writeback_struct *wb;
- unsigned max_pages;
+ unsigned int max_pages;
while (wbl->size) {
wbl->size--;
@@ -1832,10 +1847,13 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
wb->wc = wc;
bio->bi_end_io = writecache_writeback_endio;
bio->bi_iter.bi_sector = read_original_sector(wc, e);
- if (max_pages <= WB_LIST_INLINE ||
- unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *),
- GFP_NOIO | __GFP_NORETRY |
- __GFP_NOMEMALLOC | __GFP_NOWARN)))) {
+
+ if (unlikely(max_pages > WB_LIST_INLINE))
+ wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *),
+ GFP_NOIO | __GFP_NORETRY |
+ __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+ if (likely(max_pages <= WB_LIST_INLINE) || unlikely(!wb->wc_list)) {
wb->wc_list = wb->wc_list_inline;
max_pages = WB_LIST_INLINE;
}
@@ -1880,7 +1898,7 @@ static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writebac
struct copy_struct *c;
while (wbl->size) {
- unsigned n_sectors;
+ unsigned int n_sectors;
wbl->size--;
e = container_of(wbl->list.prev, struct wc_entry, lru);
@@ -1940,6 +1958,7 @@ static void writecache_writeback(struct work_struct *work)
if (likely(wc->pause != 0)) {
while (1) {
unsigned long idle;
+
if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) ||
unlikely(dm_suspended(wc->ti)))
break;
@@ -1965,9 +1984,8 @@ restart:
goto restart;
}
- if (wc->overwrote_committed) {
+ if (wc->overwrote_committed)
writecache_wait_for_ios(wc, WRITE);
- }
n_walked = 0;
INIT_LIST_HEAD(&skipped);
@@ -1996,9 +2014,9 @@ restart:
} else
e = container_of(wc->lru.prev, struct wc_entry, lru);
BUG_ON(e->write_in_progress);
- if (unlikely(!writecache_entry_is_committed(wc, e))) {
+ if (unlikely(!writecache_entry_is_committed(wc, e)))
writecache_flush(wc);
- }
+
node = rb_prev(&e->rb_node);
if (node) {
f = container_of(node, struct wc_entry, rb_node);
@@ -2087,12 +2105,13 @@ restart:
if (unlikely(wc->writeback_all)) {
wc_lock(wc);
- while (writecache_wait_for_writeback(wc));
+ while (writecache_wait_for_writeback(wc))
+ ;
wc_unlock(wc);
}
}
-static int calculate_memory_size(uint64_t device_size, unsigned block_size,
+static int calculate_memory_size(uint64_t device_size, unsigned int block_size,
size_t *n_blocks_p, size_t *n_metadata_blocks_p)
{
uint64_t n_blocks, offset;
@@ -2155,7 +2174,7 @@ static int init_memory(struct dm_writecache *wc)
writecache_flush_all_metadata(wc);
writecache_commit_flushed(wc, false);
pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
- writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
+ writecache_flush_region(wc, &sb(wc)->magic, sizeof(sb(wc)->magic));
writecache_commit_flushed(wc, false);
return 0;
@@ -2207,12 +2226,12 @@ static void writecache_dtr(struct dm_target *ti)
kfree(wc);
}
-static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
+static int writecache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct dm_writecache *wc;
struct dm_arg_set as;
const char *string;
- unsigned opt_params;
+ unsigned int opt_params;
size_t offset, data_size;
int i, r;
char dummy;
@@ -2384,6 +2403,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
string = dm_shift_arg(&as), opt_params--;
if (!strcasecmp(string, "start_sector") && opt_params >= 1) {
unsigned long long start_sector;
+
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1)
goto invalid_optional;
@@ -2419,7 +2439,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto invalid_optional;
wc->autocommit_blocks_set = true;
} else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) {
- unsigned autocommit_msecs;
+ unsigned int autocommit_msecs;
+
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1)
goto invalid_optional;
@@ -2429,7 +2450,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
wc->autocommit_time_value = autocommit_msecs;
wc->autocommit_time_set = true;
} else if (!strcasecmp(string, "max_age") && opt_params >= 1) {
- unsigned max_age_msecs;
+ unsigned int max_age_msecs;
+
string = dm_shift_arg(&as), opt_params--;
if (sscanf(string, "%u%c", &max_age_msecs, &dummy) != 1)
goto invalid_optional;
@@ -2445,16 +2467,19 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (WC_MODE_PMEM(wc)) {
wc->writeback_fua = true;
wc->writeback_fua_set = true;
- } else goto invalid_optional;
+ } else
+ goto invalid_optional;
} else if (!strcasecmp(string, "nofua")) {
if (WC_MODE_PMEM(wc)) {
wc->writeback_fua = false;
wc->writeback_fua_set = true;
- } else goto invalid_optional;
+ } else
+ goto invalid_optional;
} else if (!strcasecmp(string, "metadata_only")) {
wc->metadata_only = true;
} else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) {
- unsigned pause_msecs;
+ unsigned int pause_msecs;
+
if (WC_MODE_PMEM(wc))
goto invalid_optional;
string = dm_shift_arg(&as), opt_params--;
@@ -2653,11 +2678,11 @@ bad:
}
static void writecache_status(struct dm_target *ti, status_type_t type,
- unsigned status_flags, char *result, unsigned maxlen)
+ unsigned int status_flags, char *result, unsigned int maxlen)
{
struct dm_writecache *wc = ti->private;
- unsigned extra_args;
- unsigned sz = 0;
+ unsigned int extra_args;
+ unsigned int sz = 0;
switch (type) {
case STATUSTYPE_INFO: