diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 99 |
1 files changed, 63 insertions, 36 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b75ff6b2b952..36449422e7e0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -484,33 +484,48 @@ u64 dm_start_time_ns_from_clone(struct bio *bio) } EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); -static void start_io_acct(struct dm_io *io) +static bool bio_is_flush_with_data(struct bio *bio) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - - bio_start_io_acct_time(bio, io->start_time); - if (unlikely(dm_stats_used(&md->stats))) - dm_stats_account_io(&md->stats, bio_data_dir(bio), - bio->bi_iter.bi_sector, bio_sectors(bio), - false, 0, &io->stats_aux); + return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size); } -static void end_io_acct(struct mapped_device *md, struct bio *bio, - unsigned long start_time, struct dm_stats_aux *stats_aux) +static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - unsigned long duration = jiffies - start_time; + bool is_flush_with_data; + unsigned int bi_size; - bio_end_io_acct(bio, start_time); + /* If REQ_PREFLUSH set save any payload but do not account it */ + is_flush_with_data = bio_is_flush_with_data(bio); + if (is_flush_with_data) { + bi_size = bio->bi_iter.bi_size; + bio->bi_iter.bi_size = 0; + } + + if (!end) + bio_start_io_acct_time(bio, start_time); + else + bio_end_io_acct(bio, start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, stats_aux); + end, start_time, stats_aux); + + /* Restore bio's payload so it does get accounted upon requeue */ + if (is_flush_with_data) + bio->bi_iter.bi_size = bi_size; +} + +static void start_io_acct(struct dm_io *io) +{ + dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux); +} - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) +{ + dm_io_acct(true, md, bio, start_time, stats_aux); } static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -531,12 +546,15 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->magic = DM_IO_MAGIC; io->status = 0; atomic_set(&io->io_count, 1); + this_cpu_inc(*md->pending_io); io->orig_bio = bio; io->md = md; spin_lock_init(&io->endio_lock); io->start_time = jiffies; + dm_stats_record_start(&md->stats, &io->stats_aux); + return io; } @@ -826,11 +844,17 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error) stats_aux = io->stats_aux; free_io(md, io); end_io_acct(md, bio, start_time, &stats_aux); + smp_wmb(); + this_cpu_dec(*md->pending_io); + + /* nudge anyone waiting on suspend queue */ + if (unlikely(wq_has_sleeper(&md->wait))) + wake_up(&md->wait); if (io_error == BLK_STS_DM_REQUEUE) return; - if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { + if (bio_is_flush_with_data(bio)) { /* * Preflush done for flush with data, reissue * without REQ_PREFLUSH. @@ -1570,15 +1594,10 @@ static blk_qc_t dm_submit_bio(struct bio *bio) struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); - if (unlikely(!map)) { - DMERR_LIMIT("%s: mapping table unavailable, erroring io", - dm_device_name(md)); - bio_io_error(bio); - goto out; - } - /* If suspended, queue this IO for later */ - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { + /* If suspended, or map not yet available, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || + unlikely(!map)) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); else if (bio->bi_opf & REQ_RAHEAD) @@ -1682,6 +1701,7 @@ static void cleanup_mapped_device(struct mapped_device *md) md->dax_dev = NULL; } + dm_cleanup_zoned_dev(md); if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; @@ -1694,6 +1714,11 @@ static void cleanup_mapped_device(struct mapped_device *md) blk_cleanup_disk(md->disk); } + if (md->pending_io) { + free_percpu(md->pending_io); + md->pending_io = NULL; + } + cleanup_srcu_struct(&md->io_barrier); mutex_destroy(&md->suspend_lock); @@ -1702,7 +1727,6 @@ static void cleanup_mapped_device(struct mapped_device *md) mutex_destroy(&md->swap_bios_lock); dm_mq_cleanup_mapped_device(md); - dm_cleanup_zoned_dev(md); } /* @@ -1792,6 +1816,10 @@ static struct mapped_device *alloc_dev(int minor) if (!md->wq) goto bad; + md->pending_io = alloc_percpu(unsigned long); + if (!md->pending_io) + goto bad; + dm_stats_init(&md->stats); /* Populate the mapping, nobody knows we exist yet */ @@ -2156,7 +2184,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); - blk_set_queue_dying(md->queue); + blk_mark_disk_dead(md->disk); /* * Take suspend_lock so that presuspend and postsuspend methods @@ -2207,16 +2235,13 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static bool md_in_flight_bios(struct mapped_device *md) +static bool dm_in_flight_bios(struct mapped_device *md) { int cpu; - struct block_device *part = dm_disk(md)->part0; - long sum = 0; + unsigned long sum = 0; - for_each_possible_cpu(cpu) { - sum += part_stat_local_read_cpu(part, in_flight[0], cpu); - sum += part_stat_local_read_cpu(part, in_flight[1], cpu); - } + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(md->pending_io, cpu); return sum != 0; } @@ -2229,7 +2254,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta while (true) { prepare_to_wait(&md->wait, &wait, task_state); - if (!md_in_flight_bios(md)) + if (!dm_in_flight_bios(md)) break; if (signal_pending_state(task_state, current)) { @@ -2241,6 +2266,8 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta } finish_wait(&md->wait, &wait); + smp_rmb(); + return r; } |