summaryrefslogtreecommitdiff
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c99
1 files changed, 63 insertions, 36 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b75ff6b2b952..36449422e7e0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -484,33 +484,48 @@ u64 dm_start_time_ns_from_clone(struct bio *bio)
}
EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
-static void start_io_acct(struct dm_io *io)
+static bool bio_is_flush_with_data(struct bio *bio)
{
- struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
-
- bio_start_io_acct_time(bio, io->start_time);
- if (unlikely(dm_stats_used(&md->stats)))
- dm_stats_account_io(&md->stats, bio_data_dir(bio),
- bio->bi_iter.bi_sector, bio_sectors(bio),
- false, 0, &io->stats_aux);
+ return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
}
-static void end_io_acct(struct mapped_device *md, struct bio *bio,
- unsigned long start_time, struct dm_stats_aux *stats_aux)
+static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio,
+ unsigned long start_time, struct dm_stats_aux *stats_aux)
{
- unsigned long duration = jiffies - start_time;
+ bool is_flush_with_data;
+ unsigned int bi_size;
- bio_end_io_acct(bio, start_time);
+ /* If REQ_PREFLUSH set save any payload but do not account it */
+ is_flush_with_data = bio_is_flush_with_data(bio);
+ if (is_flush_with_data) {
+ bi_size = bio->bi_iter.bi_size;
+ bio->bi_iter.bi_size = 0;
+ }
+
+ if (!end)
+ bio_start_io_acct_time(bio, start_time);
+ else
+ bio_end_io_acct(bio, start_time);
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
- true, duration, stats_aux);
+ end, start_time, stats_aux);
+
+ /* Restore bio's payload so it does get accounted upon requeue */
+ if (is_flush_with_data)
+ bio->bi_iter.bi_size = bi_size;
+}
+
+static void start_io_acct(struct dm_io *io)
+{
+ dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux);
+}
- /* nudge anyone waiting on suspend queue */
- if (unlikely(wq_has_sleeper(&md->wait)))
- wake_up(&md->wait);
+static void end_io_acct(struct mapped_device *md, struct bio *bio,
+ unsigned long start_time, struct dm_stats_aux *stats_aux)
+{
+ dm_io_acct(true, md, bio, start_time, stats_aux);
}
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
@@ -531,12 +546,15 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->magic = DM_IO_MAGIC;
io->status = 0;
atomic_set(&io->io_count, 1);
+ this_cpu_inc(*md->pending_io);
io->orig_bio = bio;
io->md = md;
spin_lock_init(&io->endio_lock);
io->start_time = jiffies;
+ dm_stats_record_start(&md->stats, &io->stats_aux);
+
return io;
}
@@ -826,11 +844,17 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
stats_aux = io->stats_aux;
free_io(md, io);
end_io_acct(md, bio, start_time, &stats_aux);
+ smp_wmb();
+ this_cpu_dec(*md->pending_io);
+
+ /* nudge anyone waiting on suspend queue */
+ if (unlikely(wq_has_sleeper(&md->wait)))
+ wake_up(&md->wait);
if (io_error == BLK_STS_DM_REQUEUE)
return;
- if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
+ if (bio_is_flush_with_data(bio)) {
/*
* Preflush done for flush with data, reissue
* without REQ_PREFLUSH.
@@ -1570,15 +1594,10 @@ static blk_qc_t dm_submit_bio(struct bio *bio)
struct dm_table *map;
map = dm_get_live_table(md, &srcu_idx);
- if (unlikely(!map)) {
- DMERR_LIMIT("%s: mapping table unavailable, erroring io",
- dm_device_name(md));
- bio_io_error(bio);
- goto out;
- }
- /* If suspended, queue this IO for later */
- if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
+ /* If suspended, or map not yet available, queue this IO for later */
+ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
+ unlikely(!map)) {
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
else if (bio->bi_opf & REQ_RAHEAD)
@@ -1682,6 +1701,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
md->dax_dev = NULL;
}
+ dm_cleanup_zoned_dev(md);
if (md->disk) {
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
@@ -1694,6 +1714,11 @@ static void cleanup_mapped_device(struct mapped_device *md)
blk_cleanup_disk(md->disk);
}
+ if (md->pending_io) {
+ free_percpu(md->pending_io);
+ md->pending_io = NULL;
+ }
+
cleanup_srcu_struct(&md->io_barrier);
mutex_destroy(&md->suspend_lock);
@@ -1702,7 +1727,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
mutex_destroy(&md->swap_bios_lock);
dm_mq_cleanup_mapped_device(md);
- dm_cleanup_zoned_dev(md);
}
/*
@@ -1792,6 +1816,10 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->wq)
goto bad;
+ md->pending_io = alloc_percpu(unsigned long);
+ if (!md->pending_io)
+ goto bad;
+
dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */
@@ -2156,7 +2184,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
- blk_set_queue_dying(md->queue);
+ blk_mark_disk_dead(md->disk);
/*
* Take suspend_lock so that presuspend and postsuspend methods
@@ -2207,16 +2235,13 @@ void dm_put(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_put);
-static bool md_in_flight_bios(struct mapped_device *md)
+static bool dm_in_flight_bios(struct mapped_device *md)
{
int cpu;
- struct block_device *part = dm_disk(md)->part0;
- long sum = 0;
+ unsigned long sum = 0;
- for_each_possible_cpu(cpu) {
- sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
- sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
- }
+ for_each_possible_cpu(cpu)
+ sum += *per_cpu_ptr(md->pending_io, cpu);
return sum != 0;
}
@@ -2229,7 +2254,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta
while (true) {
prepare_to_wait(&md->wait, &wait, task_state);
- if (!md_in_flight_bios(md))
+ if (!dm_in_flight_bios(md))
break;
if (signal_pending_state(task_state, current)) {
@@ -2241,6 +2266,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta
}
finish_wait(&md->wait, &wait);
+ smp_rmb();
+
return r;
}