summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 22:02:48 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 22:02:48 +0300
commit582cd91f69de8e44857cb610ebca661dac8656b7 (patch)
tree0d680db02a5c236ee87b408b3f13ce33ebaca907 /include
parentbd018bbaa58640da786d4289563e71c5ef3938c7 (diff)
parentf885056a48ccf4ad4332def91e973f3993fa8695 (diff)
downloadlinux-582cd91f69de8e44857cb610ebca661dac8656b7.tar.xz
Merge tag 'for-5.12/block-2021-02-17' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: "Another nice round of removing more code than what is added, mostly due to Christoph's relentless pursuit of tech debt removal/cleanups. This pull request contains: - Two series of BFQ improvements (Paolo, Jan, Jia) - Block iov_iter improvements (Pavel) - bsg error path fix (Pan) - blk-mq scheduler improvements (Jan) - -EBUSY discard fix (Jan) - bvec allocation improvements (Ming, Christoph) - bio allocation and init improvements (Christoph) - Store bdev pointer in bio instead of gendisk + partno (Christoph) - Block trace point cleanups (Christoph) - hard read-only vs read-only split (Christoph) - Block based swap cleanups (Christoph) - Zoned write granularity support (Damien) - Various fixes/tweaks (Chunguang, Guoqing, Lei, Lukas, Huhai)" * tag 'for-5.12/block-2021-02-17' of git://git.kernel.dk/linux-block: (104 commits) mm: simplify swapdev_block sd_zbc: clear zone resources for non-zoned case block: introduce blk_queue_clear_zone_settings() zonefs: use zone write granularity as block size block: introduce zone_write_granularity limit block: use blk_queue_set_zoned in add_partition() nullb: use blk_queue_set_zoned() to setup zoned devices nvme: cleanup zone information initialization block: document zone_append_max_bytes attribute block: use bi_max_vecs to find the bvec pool md/raid10: remove dead code in reshape_request block: mark the bio as cloned in bio_iov_bvec_set block: set BIO_NO_PAGE_REF in bio_iov_bvec_set block: remove a layer of indentation in bio_iov_iter_get_pages block: turn the nr_iovecs argument to bio_alloc* into an unsigned short block: remove the 1 and 4 vec bvec_slabs entries block: streamline bvec_alloc block: factor out a bvec_alloc_gfp helper block: move struct biovec_slab to bio.c block: reuse BIO_INLINE_VECS for integrity bvecs ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h55
-rw-r--r--include/linux/blk-mq.h8
-rw-r--r--include/linux/blk_types.h33
-rw-r--r--include/linux/blkdev.h53
-rw-r--r--include/linux/elevator.h2
-rw-r--r--include/linux/genhd.h27
-rw-r--r--include/linux/swap.h1
7 files changed, 69 insertions, 110 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index de62911473bb..5b468f2242ff 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -10,6 +10,7 @@
#include <linux/ioprio.h>
/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
#include <linux/blk_types.h>
+#include <linux/uio.h>
#define BIO_DEBUG
@@ -328,7 +329,6 @@ struct bio_integrity_payload {
struct bvec_iter bip_iter;
- unsigned short bip_slab; /* slab the bip came from */
unsigned short bip_vcnt; /* # of integrity bio_vecs */
unsigned short bip_max_vcnt; /* integrity bio_vec slots */
unsigned short bip_flags; /* control flags */
@@ -406,7 +406,9 @@ extern void bioset_exit(struct bio_set *);
extern int biovec_init_pool(mempool_t *pool, int pool_entries);
extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
-extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *);
+struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs,
+ struct bio_set *bs);
+struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs);
extern void bio_put(struct bio *);
extern void __bio_clone_fast(struct bio *, struct bio *);
@@ -414,16 +416,11 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
extern struct bio_set fs_bio_set;
-static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
{
return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
}
-static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
-{
- return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
-}
-
extern blk_qc_t submit_bio(struct bio *);
extern void bio_endio(struct bio *);
@@ -441,6 +438,18 @@ static inline void bio_wouldblock_error(struct bio *bio)
bio_endio(bio);
}
+/*
+ * Calculate number of bvec segments that should be allocated to fit data
+ * pointed by @iter. If @iter is backed by bvec it's going to be reused
+ * instead of allocating a new one.
+ */
+static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
+{
+ if (iov_iter_is_bvec(iter))
+ return 0;
+ return iov_iter_npages(iter, max_segs);
+}
+
struct request_queue;
extern int submit_bio_wait(struct bio *bio);
@@ -480,29 +489,26 @@ static inline void zero_fill_bio(struct bio *bio)
zero_fill_bio_iter(bio, bio->bi_iter);
}
-extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
-extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
-extern unsigned int bvec_nr_vecs(unsigned short idx);
extern const char *bio_devname(struct bio *bio, char *buffer);
-#define bio_set_dev(bio, bdev) \
-do { \
- if ((bio)->bi_disk != (bdev)->bd_disk) \
- bio_clear_flag(bio, BIO_THROTTLED);\
- (bio)->bi_disk = (bdev)->bd_disk; \
- (bio)->bi_partno = (bdev)->bd_partno; \
- bio_associate_blkg(bio); \
+#define bio_set_dev(bio, bdev) \
+do { \
+ bio_clear_flag(bio, BIO_REMAPPED); \
+ if ((bio)->bi_bdev != (bdev)) \
+ bio_clear_flag(bio, BIO_THROTTLED); \
+ (bio)->bi_bdev = (bdev); \
+ bio_associate_blkg(bio); \
} while (0)
#define bio_copy_dev(dst, src) \
do { \
- (dst)->bi_disk = (src)->bi_disk; \
- (dst)->bi_partno = (src)->bi_partno; \
+ bio_clear_flag(dst, BIO_REMAPPED); \
+ (dst)->bi_bdev = (src)->bi_bdev; \
bio_clone_blkg_association(dst, src); \
} while (0)
#define bio_dev(bio) \
- disk_devt((bio)->bi_disk)
+ disk_devt((bio)->bi_bdev->bd_disk)
#ifdef CONFIG_BLK_CGROUP
void bio_associate_blkg(struct bio *bio);
@@ -705,6 +711,7 @@ struct bio_set {
mempool_t bvec_integrity_pool;
#endif
+ unsigned int back_pad;
/*
* Deadlock avoidance for stacking block drivers: see comments in
* bio_alloc_bioset() for details
@@ -715,12 +722,6 @@ struct bio_set {
struct workqueue_struct *rescue_workqueue;
};
-struct biovec_slab {
- int nr_vecs;
- char *name;
- struct kmem_cache *slab;
-};
-
static inline bool bioset_initialized(struct bio_set *bs)
{
return bs->bio_slab != NULL;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d705b174d346..aabbf6830ffc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -140,10 +140,6 @@ struct blk_mq_hw_ctx {
* shared across request queues.
*/
atomic_t nr_active;
- /**
- * @elevator_queued: Number of queued requests on hctx.
- */
- atomic_t elevator_queued;
/** @cpuhp_online: List to store request if CPU is going to die */
struct hlist_node cpuhp_online;
@@ -602,8 +598,8 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
rq->bio = rq->biotail = bio;
rq->ioprio = bio_prio(bio);
- if (bio->bi_disk)
- rq->rq_disk = bio->bi_disk;
+ if (bio->bi_bdev)
+ rq->rq_disk = bio->bi_bdev->bd_disk;
}
blk_qc_t blk_mq_submit_bio(struct bio *bio);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 866f74261b3b..db026b6ec15a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -222,16 +222,15 @@ static inline void bio_issue_init(struct bio_issue *issue,
*/
struct bio {
struct bio *bi_next; /* request queue link */
- struct gendisk *bi_disk;
+ struct block_device *bi_bdev;
unsigned int bi_opf; /* bottom bits req flags,
* top bits REQ_OP. Use
* accessors.
*/
- unsigned short bi_flags; /* status, etc and bvec pool number */
+ unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
unsigned short bi_write_hint;
blk_status_t bi_status;
- u8 bi_partno;
atomic_t __bi_remaining;
struct bvec_iter bi_iter;
@@ -304,36 +303,10 @@ enum {
* of this bio. */
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
BIO_TRACKED, /* set if bio goes through the rq_qos path */
+ BIO_REMAPPED,
BIO_FLAG_LAST
};
-/* See BVEC_POOL_OFFSET below before adding new flags */
-
-/*
- * We support 6 different bvec pools, the last one is magic in that it
- * is backed by a mempool.
- */
-#define BVEC_POOL_NR 6
-#define BVEC_POOL_MAX (BVEC_POOL_NR - 1)
-
-/*
- * Top 3 bits of bio flags indicate the pool the bvecs came from. We add
- * 1 to the actual index so that 0 indicates that there are no bvecs to be
- * freed.
- */
-#define BVEC_POOL_BITS (3)
-#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS)
-#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET)
-#if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1)
-# error "BVEC_POOL_BITS is too small"
-#endif
-
-/*
- * Flags starting here get preserved by bio_reset() - this includes
- * only BVEC_POOL_IDX()
- */
-#define BIO_RESET_BITS BVEC_POOL_OFFSET
-
typedef __u32 __bitwise blk_mq_req_flags_t;
/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f94ee3089e01..9149f4a5adb3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -337,6 +337,7 @@ struct queue_limits {
unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
+ unsigned int zone_write_granularity;
unsigned short max_segments;
unsigned short max_integrity_segments;
@@ -948,9 +949,8 @@ extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, uns
extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, const struct iov_iter *,
gfp_t);
-extern void blk_execute_rq(struct request_queue *, struct gendisk *,
- struct request *, int);
-extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
+extern void blk_execute_rq(struct gendisk *, struct request *, int);
+extern void blk_execute_rq_nowait(struct gendisk *,
struct request *, int, rq_end_io_fn *);
/* Helper to convert REQ_OP_XXX to its string format XXX */
@@ -1161,6 +1161,8 @@ extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
unsigned int max_zone_append_sectors);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
+void blk_queue_zone_write_granularity(struct request_queue *q,
+ unsigned int size);
extern void blk_queue_alignment_offset(struct request_queue *q,
unsigned int alignment);
void blk_queue_update_readahead(struct request_queue *q);
@@ -1289,7 +1291,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
!list_empty(&plug->cb_list));
}
-int blkdev_issue_flush(struct block_device *, gfp_t);
+int blkdev_issue_flush(struct block_device *bdev);
long nr_blockdev_pages(void);
#else /* CONFIG_BLOCK */
struct blk_plug {
@@ -1317,7 +1319,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
return false;
}
-static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
+static inline int blkdev_issue_flush(struct block_device *bdev)
{
return 0;
}
@@ -1474,6 +1476,18 @@ static inline int bdev_io_opt(struct block_device *bdev)
return queue_io_opt(bdev_get_queue(bdev));
}
+static inline unsigned int
+queue_zone_write_granularity(const struct request_queue *q)
+{
+ return q->limits.zone_write_granularity;
+}
+
+static inline unsigned int
+bdev_zone_write_granularity(struct block_device *bdev)
+{
+ return queue_zone_write_granularity(bdev_get_queue(bdev));
+}
+
static inline int queue_alignment_offset(const struct request_queue *q)
{
if (q->limits.misaligned)
@@ -1954,21 +1968,9 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
unsigned long start_time);
-unsigned long part_start_io_acct(struct gendisk *disk,
- struct block_device **part, struct bio *bio);
-void part_end_io_acct(struct block_device *part, struct bio *bio,
- unsigned long start_time);
-
-/**
- * bio_start_io_acct - start I/O accounting for bio based drivers
- * @bio: bio to start account for
- *
- * Returns the start time that should be passed back to bio_end_io_acct().
- */
-static inline unsigned long bio_start_io_acct(struct bio *bio)
-{
- return disk_start_io_acct(bio->bi_disk, bio_sectors(bio), bio_op(bio));
-}
+unsigned long bio_start_io_acct(struct bio *bio);
+void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
+ struct block_device *orig_bdev);
/**
* bio_end_io_acct - end I/O accounting for bio based drivers
@@ -1977,7 +1979,7 @@ static inline unsigned long bio_start_io_acct(struct bio *bio)
*/
static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
{
- return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
+ return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev);
}
int bdev_read_only(struct block_device *bdev);
@@ -2012,21 +2014,16 @@ void bdev_add(struct block_device *bdev, dev_t dev);
struct block_device *I_BDEV(struct inode *inode);
struct block_device *bdgrab(struct block_device *bdev);
void bdput(struct block_device *);
+int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
+ loff_t lend);
#ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev);
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
- loff_t lend);
int sync_blockdev(struct block_device *bdev);
#else
static inline void invalidate_bdev(struct block_device *bdev)
{
}
-static inline int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
- loff_t lstart, loff_t lend)
-{
- return 0;
-}
static inline int sync_blockdev(struct block_device *bdev)
{
return 0;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index bacc40a0bdf3..1fe8e105b83b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -172,6 +172,8 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
/* Supports zoned block devices sequential write constraint */
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
+/* Supports scheduling on multiple hardware queues */
+#define ELEVATOR_F_MQ_AWARE (1U << 1)
#endif /* CONFIG_BLOCK */
#endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 809aaa32d53c..f364619092cc 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -32,6 +32,7 @@ extern struct class block_class;
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/workqueue.h>
+#include <linux/xarray.h>
#define PARTITION_META_INFO_VOLNAMELTH 64
/*
@@ -116,13 +117,6 @@ enum {
DISK_EVENT_FLAG_UEVENT = 1 << 1,
};
-struct disk_part_tbl {
- struct rcu_head rcu_head;
- int len;
- struct block_device __rcu *last_lookup;
- struct block_device __rcu *part[];
-};
-
struct disk_events;
struct badblocks;
@@ -148,12 +142,7 @@ struct gendisk {
unsigned short events; /* supported events */
unsigned short event_flags; /* flags related to event processing */
- /* Array of pointers to partitions indexed by partno.
- * Protected with matching bdev lock but stat and other
- * non-critical accesses use RCU. Always access through
- * helpers.
- */
- struct disk_part_tbl __rcu *part_tbl;
+ struct xarray part_tbl;
struct block_device *part0;
const struct block_device_operations *fops;
@@ -163,6 +152,7 @@ struct gendisk {
int flags;
unsigned long state;
#define GD_NEED_PART_SCAN 0
+#define GD_READ_ONLY 1
struct kobject *slave_dir;
struct timer_rand_state *random;
@@ -212,10 +202,11 @@ static inline dev_t disk_devt(struct gendisk *disk)
return MKDEV(disk->major, disk->first_minor);
}
+void disk_uevent(struct gendisk *disk, enum kobject_action action);
+
/*
* Smarter partition iterator without context limits.
*/
-#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */
#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */
#define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */
#define DISK_PITER_INCL_EMPTY_PART0 (1 << 3) /* include empty partition 0 */
@@ -223,7 +214,7 @@ static inline dev_t disk_devt(struct gendisk *disk)
struct disk_part_iter {
struct gendisk *disk;
struct block_device *part;
- int idx;
+ unsigned long idx;
unsigned int flags;
};
@@ -231,7 +222,6 @@ extern void disk_part_iter_init(struct disk_part_iter *piter,
struct gendisk *disk, unsigned int flags);
struct block_device *disk_part_iter_next(struct disk_part_iter *piter);
extern void disk_part_iter_exit(struct disk_part_iter *piter);
-extern bool disk_has_partitions(struct gendisk *disk);
/* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk,
@@ -249,11 +239,12 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk)
extern void del_gendisk(struct gendisk *gp);
extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
-extern void set_disk_ro(struct gendisk *disk, int flag);
+void set_disk_ro(struct gendisk *disk, bool read_only);
static inline int get_disk_ro(struct gendisk *disk)
{
- return disk->part0->bd_read_only;
+ return disk->part0->bd_read_only ||
+ test_bit(GD_READ_ONLY, &disk->state);
}
extern void disk_block_events(struct gendisk *disk);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 596bc2f4d9b0..3f1f7ae0fbe9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -468,7 +468,6 @@ extern int free_swap_and_cache(swp_entry_t);
int swap_type_of(dev_t device, sector_t offset);
int find_first_swap(dev_t *device);
extern unsigned int count_swap_pages(int, int);
-extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
extern int page_swapcount(struct page *);
extern int __swap_count(swp_entry_t entry);