From 1926d2da825f2ffc3f0626f4df63474ed85032a1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Jul 2020 12:35:02 +0100 Subject: md: raid0/linear: fix dereference before null check on pointer mddev [ Upstream commit 9a5a85972c073f720d81a7ebd08bfe278e6e16db ] Pointer mddev is being dereferenced with a test_bit call before mddev is being null checked, this may cause a null pointer dereference. Fix this by moving the null pointer checks to sanity check mddev before it is dereferenced. Addresses-Coverity: ("Dereference before null check") Fixes: 62f7b1989c02 ("md raid0/linear: Mark array as 'broken' and fail BIOs if a member is gone") Signed-off-by: Colin Ian King Reviewed-by: Guilherme G. Piccoli Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/md.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index f567f536b529..90756450b958 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -470,17 +470,18 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) struct mddev *mddev = bio->bi_disk->private_data; unsigned int sectors; - if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { + if (mddev == NULL || mddev->pers == NULL) { bio_io_error(bio); return BLK_QC_T_NONE; } - blk_queue_split(q, &bio); - - if (mddev == NULL || mddev->pers == NULL) { + if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { bio_io_error(bio); return BLK_QC_T_NONE; } + + blk_queue_split(q, &bio); + if (mddev->ro == 1 && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) bio->bi_status = BLK_STS_IOERR; -- cgit v1.2.3 From 679d3baca18fcae7c51e2eedab784ae276ed78fb Mon Sep 17 00:00:00 2001 From: Zhao Heming Date: Thu, 9 Jul 2020 11:29:29 +0800 Subject: md-cluster: fix wild pointer of unlock_all_bitmaps() [ Upstream commit 60f80d6f2d07a6d8aee485a1d1252327eeee0c81 ] reproduction steps: ``` node1 # mdadm -C /dev/md0 -b clustered -e 1.2 -n 2 -l mirror /dev/sda /dev/sdb node2 # mdadm -A /dev/md0 /dev/sda /dev/sdb node1 # mdadm -G /dev/md0 -b none mdadm: failed to remove clustered bitmap. node1 # mdadm -S --scan ^C <==== mdadm hung & kernel crash ``` kernel stack: ``` [ 335.230657] general protection fault: 0000 [#1] SMP NOPTI [...] [ 335.230848] Call Trace: [ 335.230873] ? unlock_all_bitmaps+0x5/0x70 [md_cluster] [ 335.230886] unlock_all_bitmaps+0x3d/0x70 [md_cluster] [ 335.230899] leave+0x10f/0x190 [md_cluster] [ 335.230932] ? md_super_wait+0x93/0xa0 [md_mod] [ 335.230947] ? leave+0x5/0x190 [md_cluster] [ 335.230973] md_cluster_stop+0x1a/0x30 [md_mod] [ 335.230999] md_bitmap_free+0x142/0x150 [md_mod] [ 335.231013] ? _cond_resched+0x15/0x40 [ 335.231025] ? mutex_lock+0xe/0x30 [ 335.231056] __md_stop+0x1c/0xa0 [md_mod] [ 335.231083] do_md_stop+0x160/0x580 [md_mod] [ 335.231119] ? 0xffffffffc05fb078 [ 335.231148] md_ioctl+0xa04/0x1930 [md_mod] [ 335.231165] ? filename_lookup+0xf2/0x190 [ 335.231179] blkdev_ioctl+0x93c/0xa10 [ 335.231205] ? _cond_resched+0x15/0x40 [ 335.231214] ? __check_object_size+0xd4/0x1a0 [ 335.231224] block_ioctl+0x39/0x40 [ 335.231243] do_vfs_ioctl+0xa0/0x680 [ 335.231253] ksys_ioctl+0x70/0x80 [ 335.231261] __x64_sys_ioctl+0x16/0x20 [ 335.231271] do_syscall_64+0x65/0x1f0 [ 335.231278] entry_SYSCALL_64_after_hwframe+0x44/0xa9 ``` Signed-off-by: Zhao Heming Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/md-cluster.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 813a99ffa86f..73fd50e77975 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1518,6 +1518,7 @@ static void unlock_all_bitmaps(struct mddev *mddev) } } kfree(cinfo->other_bitmap_lockres); + cinfo->other_bitmap_lockres = NULL; } } -- cgit v1.2.3 From 11bf4638158aff0d18ce898af37a48a486326394 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:26 +0800 Subject: bcache: fix super block seq numbers comparision in register_cache_set() [ Upstream commit 117f636ea695270fe492d0c0c9dfadc7a662af47 ] In register_cache_set(), c is pointer to struct cache_set, and ca is pointer to struct cache, if ca->sb.seq > c->sb.seq, it means this registering cache has up to date version and other members, the in- memory version and other members should be updated to the newer value. But current implementation makes a cache set only has a single cache device, so the above assumption works well except for a special case. The execption is when a cache device new created and both ca->sb.seq and c->sb.seq are 0, because the super block is never flushed out yet. In the location for the following if() check, 2156 if (ca->sb.seq > c->sb.seq) { 2157 c->sb.version = ca->sb.version; 2158 memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); 2159 c->sb.flags = ca->sb.flags; 2160 c->sb.seq = ca->sb.seq; 2161 pr_debug("set version = %llu\n", c->sb.version); 2162 } c->sb.version is not initialized yet and valued 0. When ca->sb.seq is 0, the if() check will fail (because both values are 0), and the cache set version, set_uuid, flags and seq won't be updated. The above problem is hiden for current code, because the bucket size is compatible among different super block version. And the next time when running cache set again, ca->sb.seq will be larger than 0 and cache set super block version will be updated properly. But if the large bucket feature is enabled, sb->bucket_size is the low 16bits of the bucket size. For a power of 2 value, when the actual bucket size exceeds 16bit width, sb->bucket_size will always be 0. Then read_super_common() will fail because the if() check to is_power_of_2(sb->bucket_size) is false. This is how the long time hidden bug is triggered. This patch modifies the if() check to the following way, 2156 if (ca->sb.seq > c->sb.seq || c->sb.seq == 0) { Then cache set's version, set_uuid, flags and seq will always be updated corectly including for a new created cache device. Signed-off-by: Coly Li Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/super.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 2014016f9a60..445bb84ee27f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2100,7 +2100,14 @@ found: sysfs_create_link(&c->kobj, &ca->kobj, buf)) goto err; - if (ca->sb.seq > c->sb.seq) { + /* + * A special case is both ca->sb.seq and c->sb.seq are 0, + * such condition happens on a new created cache device whose + * super block is never flushed yet. In this case c->sb.version + * and other members should be updated too, otherwise we will + * have a mistaken super block version in cache set. + */ + if (ca->sb.seq > c->sb.seq || c->sb.seq == 0) { c->sb.version = ca->sb.version; memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); c->sb.flags = ca->sb.flags; -- cgit v1.2.3 From 6c38defccf6142b11e0ca2ad590a06f6dae40c00 Mon Sep 17 00:00:00 2001 From: ChangSyun Peng Date: Fri, 31 Jul 2020 17:50:17 +0800 Subject: md/raid5: Fix Force reconstruct-write io stuck in degraded raid5 commit a1c6ae3d9f3dd6aa5981a332a6f700cf1c25edef upstream. In degraded raid5, we need to read parity to do reconstruct-write when data disks fail. However, we can not read parity from handle_stripe_dirtying() in force reconstruct-write mode. Reproducible Steps: 1. Create degraded raid5 mdadm -C /dev/md2 --assume-clean -l5 -n3 /dev/sda2 /dev/sdb2 missing 2. Set rmw_level to 0 echo 0 > /sys/block/md2/md/rmw_level 3. IO to raid5 Now some io may be stuck in raid5. We can use handle_stripe_fill() to read the parity in this situation. Cc: # v4.4+ Reviewed-by: Alex Wu Reviewed-by: BingJing Chang Reviewed-by: Danny Shih Signed-off-by: ChangSyun Peng Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ab8067f9ce8c..43eedf7adc79 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3607,6 +3607,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, * is missing/faulty, then we need to read everything we can. */ if (sh->raid_conf->level != 6 && + sh->raid_conf->rmw_level != PARITY_DISABLE_RMW && sh->sector < sh->raid_conf->mddev->recovery_cp) /* reconstruct-write isn't being forced */ return 0; @@ -4842,7 +4843,7 @@ static void handle_stripe(struct stripe_head *sh) * or to load a block that is being partially written. */ if (s.to_read || s.non_overwrite - || (conf->level == 6 && s.to_write && s.failed) + || (s.to_write && s.failed) || (s.syncing && (s.uptodate + s.compute < disks)) || s.replacing || s.expanding) -- cgit v1.2.3 From d024010903677831608d7a0c166c3de7921de848 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:16 +0800 Subject: bcache: allocate meta data pages as compound pages commit 5fe48867856367142d91a82f2cbf7a57a24cbb70 upstream. There are some meta data of bcache are allocated by multiple pages, and they are used as bio bv_page for I/Os to the cache device. for example cache_set->uuids, cache->disk_buckets, journal_write->data, bset_tree->data. For such meta data memory, all the allocated pages should be treated as a single memory block. Then the memory management and underlying I/O code can treat them more clearly. This patch adds __GFP_COMP flag to all the location allocating >0 order pages for the above mentioned meta data. Then their pages are treated as compound pages now. Signed-off-by: Coly Li Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/bset.c | 2 +- drivers/md/bcache/btree.c | 2 +- drivers/md/bcache/journal.c | 4 ++-- drivers/md/bcache/super.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 4995fcaefe29..67a2c47f4201 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -322,7 +322,7 @@ int bch_btree_keys_alloc(struct btree_keys *b, b->page_order = page_order; - t->data = (void *) __get_free_pages(gfp, b->page_order); + t->data = (void *) __get_free_pages(__GFP_COMP|gfp, b->page_order); if (!t->data) goto err; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 6548a601edf0..dd116c83de80 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -785,7 +785,7 @@ int bch_btree_cache_alloc(struct cache_set *c) mutex_init(&c->verify_lock); c->verify_ondisk = (void *) - __get_free_pages(GFP_KERNEL, ilog2(bucket_pages(c))); + __get_free_pages(GFP_KERNEL|__GFP_COMP, ilog2(bucket_pages(c))); c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 90aac4e2333f..d8586b6ccb76 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -999,8 +999,8 @@ int bch_journal_alloc(struct cache_set *c) j->w[1].c = c; if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || - !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS))) + !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS)) || + !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS))) return -ENOMEM; return 0; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 445bb84ee27f..99810b622bec 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1776,7 +1776,7 @@ void bch_cache_set_unregister(struct cache_set *c) } #define alloc_bucket_pages(gfp, c) \ - ((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(c)))) + ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(bucket_pages(c)))) struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) { -- cgit v1.2.3 From 9c8656abb350a834b4b265831d2f028ad980d1f0 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:22 +0800 Subject: bcache: fix overflow in offset_to_stripe() commit 7a1481267999c02abf4a624515c1b5c7c1fccbd6 upstream. offset_to_stripe() returns the stripe number (in type unsigned int) from an offset (in type uint64_t) by the following calculation, do_div(offset, d->stripe_size); For large capacity backing device (e.g. 18TB) with small stripe size (e.g. 4KB), the result is 4831838208 and exceeds UINT_MAX. The actual returned value which caller receives is 536870912, due to the overflow. Indeed in bcache_device_init(), bcache_device->nr_stripes is limited in range [1, INT_MAX]. Therefore all valid stripe numbers in bcache are in range [0, bcache_dev->nr_stripes - 1]. This patch adds a upper limition check in offset_to_stripe(): the max valid stripe number should be less than bcache_device->nr_stripes. If the calculated stripe number from do_div() is equal to or larger than bcache_device->nr_stripe, -EINVAL will be returned. (Normally nr_stripes is less than INT_MAX, exceeding upper limitation doesn't mean overflow, therefore -EOVERFLOW is not used as error code.) This patch also changes nr_stripes' type of struct bcache_device from 'unsigned int' to 'int', and return value type of offset_to_stripe() from 'unsigned int' to 'int', to match their exact data ranges. All locations where bcache_device->nr_stripes and offset_to_stripe() are referenced also get updated for the above type change. Reported-and-tested-by: Ken Raeburn Signed-off-by: Coly Li Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783075 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/writeback.c | 14 +++++++++----- drivers/md/bcache/writeback.h | 19 +++++++++++++++++-- 3 files changed, 27 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 221e0191b687..80e3c4813fb0 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -264,7 +264,7 @@ struct bcache_device { #define BCACHE_DEV_UNLINK_DONE 2 #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 - unsigned int nr_stripes; + int nr_stripes; unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 1cf1e5016cb9..ab101ad55459 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -523,15 +523,19 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, uint64_t offset, int nr_sectors) { struct bcache_device *d = c->devices[inode]; - unsigned int stripe_offset, stripe, sectors_dirty; + unsigned int stripe_offset, sectors_dirty; + int stripe; if (!d) return; + stripe = offset_to_stripe(d, offset); + if (stripe < 0) + return; + if (UUID_FLASH_ONLY(&c->uuids[inode])) atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors); - stripe = offset_to_stripe(d, offset); stripe_offset = offset & (d->stripe_size - 1); while (nr_sectors) { @@ -571,12 +575,12 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k) static void refill_full_stripes(struct cached_dev *dc) { struct keybuf *buf = &dc->writeback_keys; - unsigned int start_stripe, stripe, next_stripe; + unsigned int start_stripe, next_stripe; + int stripe; bool wrapped = false; stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned)); - - if (stripe >= dc->disk.nr_stripes) + if (stripe < 0) stripe = 0; start_stripe = stripe; diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index b029843ce5b6..3f1230e22de0 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -52,10 +52,22 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) return ret; } -static inline unsigned int offset_to_stripe(struct bcache_device *d, +static inline int offset_to_stripe(struct bcache_device *d, uint64_t offset) { do_div(offset, d->stripe_size); + + /* d->nr_stripes is in range [1, INT_MAX] */ + if (unlikely(offset >= d->nr_stripes)) { + pr_err("Invalid stripe %llu (>= nr_stripes %d).\n", + offset, d->nr_stripes); + return -EINVAL; + } + + /* + * Here offset is definitly smaller than INT_MAX, + * return it as int will never overflow. + */ return offset; } @@ -63,7 +75,10 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc, uint64_t offset, unsigned int nr_sectors) { - unsigned int stripe = offset_to_stripe(&dc->disk, offset); + int stripe = offset_to_stripe(&dc->disk, offset); + + if (stripe < 0) + return false; while (1) { if (atomic_read(dc->disk.stripe_sectors_dirty + stripe)) -- cgit v1.2.3 From add47ff1e48227b9d64efc18951bd15897f78221 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:21 +0800 Subject: bcache: avoid nr_stripes overflow in bcache_device_init() commit 65f0f017e7be8c70330372df23bcb2a407ecf02d upstream. For some block devices which large capacity (e.g. 8TB) but small io_opt size (e.g. 8 sectors), in bcache_device_init() the stripes number calcu- lated by, DIV_ROUND_UP_ULL(sectors, d->stripe_size); might be overflow to the unsigned int bcache_device->nr_stripes. This patch uses the uint64_t variable to store DIV_ROUND_UP_ULL() and after the value is checked to be available in unsigned int range, sets it to bache_device->nr_stripes. Then the overflow is avoided. Reported-and-tested-by: Ken Raeburn Signed-off-by: Coly Li Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783075 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 99810b622bec..e15d07823031 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -826,19 +826,19 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, SIZE_MAX / sizeof(atomic_t)); - size_t n; + uint64_t n; int idx; if (!d->stripe_size) d->stripe_size = 1 << 31; - d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); - - if (!d->nr_stripes || d->nr_stripes > max_stripes) { - pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n", - (unsigned int)d->nr_stripes); + n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); + if (!n || n > max_stripes) { + pr_err("nr_stripes too large or invalid: %llu (start sector beyond end of disk?)\n", + n); return -ENOMEM; } + d->nr_stripes = n; n = d->nr_stripes * sizeof(atomic_t); d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL); -- cgit v1.2.3 From 99ea492e8a1ac3170fbca22643173393f5f3bd03 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 25 Jul 2020 20:00:39 +0800 Subject: bcache: fix bio_{start,end}_io_acct with proper device commit a2f32ee8fd853cec8860f883d98afc3a339546de upstream. Commit 85750aeb748f ("bcache: use bio_{start,end}_io_acct") moves the io account code to the location after bio_set_dev(bio, dc->bdev) in cached_dev_make_request(). Then the account is performed incorrectly on backing device, indeed the I/O should be counted to bcache device like /dev/bcache0. With the mistaken I/O account, iostat does not display I/O counts for bcache device and all the numbers go to backing device. In writeback mode, the hard drive may have 340K+ IOPS which is impossible and wrong for spinning disk. This patch introduces bch_bio_start_io_acct() and bch_bio_end_io_acct(), which switches bio->bi_disk to bcache device before calling bio_start_io_acct() or bio_end_io_acct(). Now the I/Os are counted to bcache device, and bcache device, cache device and backing device have their correct I/O count information back. Fixes: 85750aeb748f ("bcache: use bio_{start,end}_io_acct") Signed-off-by: Coly Li Cc: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 7acf024e99f3..8ea0f079c1d0 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -617,6 +617,28 @@ static void cache_lookup(struct closure *cl) /* Common code for the make_request functions */ +static inline void bch_bio_start_io_acct(struct gendisk *acct_bi_disk, + struct bio *bio, + unsigned long *start_time) +{ + struct gendisk *saved_bi_disk = bio->bi_disk; + + bio->bi_disk = acct_bi_disk; + *start_time = bio_start_io_acct(bio); + bio->bi_disk = saved_bi_disk; +} + +static inline void bch_bio_end_io_acct(struct gendisk *acct_bi_disk, + struct bio *bio, + unsigned long start_time) +{ + struct gendisk *saved_bi_disk = bio->bi_disk; + + bio->bi_disk = acct_bi_disk; + bio_end_io_acct(bio, start_time); + bio->bi_disk = saved_bi_disk; +} + static void request_endio(struct bio *bio) { struct closure *cl = bio->bi_private; @@ -668,7 +690,7 @@ static void backing_request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - bio_end_io_acct(s->orig_bio, s->start_time); + bch_bio_end_io_acct(s->d->disk, s->orig_bio, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); s->orig_bio->bi_status = s->iop.status; bio_endio(s->orig_bio); @@ -728,7 +750,7 @@ static inline struct search *search_alloc(struct bio *bio, s->recoverable = 1; s->write = op_is_write(bio_op(bio)); s->read_dirty_data = 0; - s->start_time = bio_start_io_acct(bio); + bch_bio_start_io_acct(d->disk, bio, &s->start_time); s->iop.c = d->c; s->iop.bio = NULL; @@ -1080,7 +1102,7 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_end_io = ddip->bi_end_io; bio->bi_private = ddip->bi_private; - bio_end_io_acct(bio, ddip->start_time); + bch_bio_end_io_acct(ddip->d->disk, bio, ddip->start_time); if (bio->bi_status) { struct cached_dev *dc = container_of(ddip->d, @@ -1105,7 +1127,8 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) */ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); ddip->d = d; - ddip->start_time = bio_start_io_acct(bio); + bch_bio_start_io_acct(d->disk, bio, &ddip->start_time); + ddip->bi_end_io = bio->bi_end_io; ddip->bi_private = bio->bi_private; bio->bi_end_io = detached_dev_end_io; -- cgit v1.2.3 From 9f05adac97ed19371e1e80405f7c28b49024fb46 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Tue, 28 Jul 2020 21:59:20 +0800 Subject: bcache: use disk_{start,end}_io_acct() to count I/O for bcache device commit c5be1f2c5bab1538aa29cd42e226d6b80391e3ff upstream. This patch is a fix to patch "bcache: fix bio_{start,end}_io_acct with proper device". The previous patch uses a hack to temporarily set bi_disk to bcache device, which is mistaken too. As Christoph suggests, this patch uses disk_{start,end}_io_acct() to count I/O for bcache device in the correct way. Fixes: 85750aeb748f ("bcache: use bio_{start,end}_io_acct") Signed-off-by: Coly Li Cc: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 8ea0f079c1d0..9cc044293acd 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -617,28 +617,6 @@ static void cache_lookup(struct closure *cl) /* Common code for the make_request functions */ -static inline void bch_bio_start_io_acct(struct gendisk *acct_bi_disk, - struct bio *bio, - unsigned long *start_time) -{ - struct gendisk *saved_bi_disk = bio->bi_disk; - - bio->bi_disk = acct_bi_disk; - *start_time = bio_start_io_acct(bio); - bio->bi_disk = saved_bi_disk; -} - -static inline void bch_bio_end_io_acct(struct gendisk *acct_bi_disk, - struct bio *bio, - unsigned long start_time) -{ - struct gendisk *saved_bi_disk = bio->bi_disk; - - bio->bi_disk = acct_bi_disk; - bio_end_io_acct(bio, start_time); - bio->bi_disk = saved_bi_disk; -} - static void request_endio(struct bio *bio) { struct closure *cl = bio->bi_private; @@ -690,7 +668,9 @@ static void backing_request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - bch_bio_end_io_acct(s->d->disk, s->orig_bio, s->start_time); + /* Count on bcache device */ + disk_end_io_acct(s->d->disk, bio_op(s->orig_bio), s->start_time); + trace_bcache_request_end(s->d, s->orig_bio); s->orig_bio->bi_status = s->iop.status; bio_endio(s->orig_bio); @@ -750,8 +730,8 @@ static inline struct search *search_alloc(struct bio *bio, s->recoverable = 1; s->write = op_is_write(bio_op(bio)); s->read_dirty_data = 0; - bch_bio_start_io_acct(d->disk, bio, &s->start_time); - + /* Count on the bcache device */ + s->start_time = disk_start_io_acct(d->disk, bio_sectors(bio), bio_op(bio)); s->iop.c = d->c; s->iop.bio = NULL; s->iop.inode = d->id; @@ -1102,7 +1082,8 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_end_io = ddip->bi_end_io; bio->bi_private = ddip->bi_private; - bch_bio_end_io_acct(ddip->d->disk, bio, ddip->start_time); + /* Count on the bcache device */ + disk_end_io_acct(ddip->d->disk, bio_op(bio), ddip->start_time); if (bio->bi_status) { struct cached_dev *dc = container_of(ddip->d, @@ -1127,8 +1108,8 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) */ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); ddip->d = d; - bch_bio_start_io_acct(d->disk, bio, &ddip->start_time); - + /* Count on the bcache device */ + ddip->start_time = disk_start_io_acct(d->disk, bio_sectors(bio), bio_op(bio)); ddip->bi_end_io = bio->bi_end_io; ddip->bi_private = bio->bi_private; bio->bi_end_io = detached_dev_end_io; -- cgit v1.2.3 From b1735d353e615eb1ab2d24b3bbe41a482c349c5c Mon Sep 17 00:00:00 2001 From: John Dorminy Date: Fri, 31 Jul 2020 18:46:45 -0400 Subject: dm ebs: Fix incorrect checking for REQ_OP_FLUSH commit 4cb6f22612511ff2aba4c33fb0f281cae7c23772 upstream. REQ_OP_FLUSH was being treated as a flag, but the operation part of bio->bi_opf must be treated as a whole. Change to accessing the operation part via bio_op(bio) and checking for equality. Signed-off-by: John Dorminy Acked-by: Heinz Mauelshagen Fixes: d3c7b35c20d60 ("dm: add emulated block size target") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ebs-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 44451276f128..cb85610527c2 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -363,7 +363,7 @@ static int ebs_map(struct dm_target *ti, struct bio *bio) bio_set_dev(bio, ec->dev->bdev); bio->bi_iter.bi_sector = ec->start + dm_target_offset(ti, bio->bi_iter.bi_sector); - if (unlikely(bio->bi_opf & REQ_OP_FLUSH)) + if (unlikely(bio_op(bio) == REQ_OP_FLUSH)) return DM_MAPIO_REMAPPED; /* * Only queue for bufio processing in case of partial or overlapping buffers -- cgit v1.2.3 From 20d175c70e601e0630395c0b9fa0c25c720cd599 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 4 Aug 2020 18:25:01 +0900 Subject: dm: don't call report zones for more than the user requested commit a9cb9f4148ef6bb8fabbdaa85c42b2171fbd5a0d upstream. Don't call report zones for more zones than the user actually requested, otherwise this can lead to out-of-bounds accesses in the callback functions. Such a situation can happen if the target's ->report_zones() callback function returns 0 because we've reached the end of the target and then restart the report zones on the second target. We're again calling into ->report_zones() and ultimately into the user supplied callback function but when we're not subtracting the number of zones already processed this may lead to out-of-bounds accesses in the user callbacks. Signed-off-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Fixes: d41003513e61 ("block: rework zone reporting") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5b9de2f71bb0..88b391ff9bea 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -504,7 +504,8 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, } args.tgt = tgt; - ret = tgt->type->report_zones(tgt, &args, nr_zones); + ret = tgt->type->report_zones(tgt, &args, + nr_zones - args.zone_idx); if (ret < 0) goto out; } while (args.zone_idx < nr_zones && -- cgit v1.2.3 From e5703d615a7c601e2421e687321fc2e315573c3e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 19 Jun 2020 16:42:14 +0800 Subject: dm rq: don't call blk_mq_queue_stopped() in dm_stop_queue() [ Upstream commit e766668c6cd49d741cfb49eaeb38998ba34d27bc ] dm_stop_queue() only uses blk_mq_quiesce_queue() so it doesn't formally stop the blk-mq queue; therefore there is no point making the blk_mq_queue_stopped() check -- it will never be stopped. In addition, even though dm_stop_queue() actually tries to quiesce hw queues via blk_mq_quiesce_queue(), checking with blk_queue_quiesced() to avoid unnecessary queue quiesce isn't reliable because: the QUEUE_FLAG_QUIESCED flag is set before synchronize_rcu() and dm_stop_queue() may be called when synchronize_rcu() from another blk_mq_quiesce_queue() is in-progress. Fixes: 7b17c2f7292ba ("dm: Fix a race condition related to stopping and starting queues") Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-rq.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 85e0daabad49..20745e2e34b9 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -70,9 +70,6 @@ void dm_start_queue(struct request_queue *q) void dm_stop_queue(struct request_queue *q) { - if (blk_mq_queue_stopped(q)) - return; - blk_mq_quiesce_queue(q); } -- cgit v1.2.3 From 0b702cd4141800dfa3e75caa1ca9a79ce752453e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 4 Aug 2020 13:16:45 +0300 Subject: md-cluster: Fix potential error pointer dereference in resize_bitmaps() [ Upstream commit e8abe1de43dac658dacbd04a4543e0c988a8d386 ] The error handling calls md_bitmap_free(bitmap) which checks for NULL but will Oops if we pass an error pointer. Let's set "bitmap" to NULL on this error path. Fixes: afd756286083 ("md-cluster/raid10: resize all the bitmaps before start reshape") Signed-off-by: Dan Carpenter Reviewed-by: Guoqing Jiang Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/md-cluster.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 73fd50e77975..d50737ec4039 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1139,6 +1139,7 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz bitmap = get_bitmap_from_slot(mddev, i); if (IS_ERR(bitmap)) { pr_err("can't get bitmap from slot %d\n", i); + bitmap = NULL; goto out; } counts = &bitmap->counts; -- cgit v1.2.3