summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 23:10:25 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 23:10:25 +0300
commit1081230b748de8f03f37f80c53dfa89feda9b8de (patch)
tree7238d60e01f0843bad8f03b5d84e4220fbba5e76 /include
parentdf910390e2db07a76c87f258475f6c96253cee6c (diff)
parent2ca495ac27d245513c11fed70591b1838250e240 (diff)
downloadlinux-1081230b748de8f03f37f80c53dfa89feda9b8de.tar.xz
Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: "This first core part of the block IO changes contains: - Cleanup of the bio IO error signaling from Christoph. We used to rely on the uptodate bit and passing around of an error, now we store the error in the bio itself. - Improvement of the above from myself, by shrinking the bio size down again to fit in two cachelines on x86-64. - Revert of the max_hw_sectors cap removal from a revision again, from Jeff Moyer. This caused performance regressions in various tests. Reinstate the limit, bump it to a more reasonable size instead. - Make /sys/block/<dev>/queue/discard_max_bytes writeable, by me. Most devices have huge trim limits, which can cause nasty latencies when deleting files. Enable the admin to configure the size down. We will look into having a more sane default instead of UINT_MAX sectors. - Improvement of the SGP gaps logic from Keith Busch. - Enable the block core to handle arbitrarily sized bios, which enables a nice simplification of bio_add_page() (which is an IO hot path). From Kent. - Improvements to the partition io stats accounting, making it faster. From Ming Lei. - Also from Ming Lei, a basic fixup for overflow of the sysfs pending file in blk-mq, as well as a fix for a blk-mq timeout race condition. - Ming Lin has been carrying Kents above mentioned patches forward for a while, and testing them. Ming also did a few fixes around that. - Sasha Levin found and fixed a use-after-free problem introduced by the bio->bi_error changes from Christoph. - Small blk cgroup cleanup from Viresh Kumar" * 'for-4.3/core' of git://git.kernel.dk/linux-block: (26 commits) blk: Fix bio_io_vec index when checking bvec gaps block: Replace SG_GAPS with new queue limits mask block: bump BLK_DEF_MAX_SECTORS to 2560 Revert "block: remove artifical max_hw_sectors cap" blk-mq: fix race between timeout and freeing request blk-mq: fix buffer overflow when reading sysfs file of 'pending' Documentation: update notes in biovecs about arbitrarily sized bios block: remove bio_get_nr_vecs() fs: use helper bio_add_page() instead of open coding on bi_io_vec block: kill merge_bvec_fn() completely md/raid5: get rid of bio_fits_rdev() md/raid5: split bio for chunk_aligned_read block: remove split code in blkdev_issue_{discard,write_same} btrfs: remove bio splitting and merge_bvec_fn() calls bcache: remove driver private bio splitting code block: simplify bio_add_page() block: make generic_make_request handle arbitrarily sized bios blk-cgroup: Drop unlikely before IS_ERR(_OR_NULL) block: don't access bio->bi_error after bio_put() block: shrink struct bio down to 2 cache lines again ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h38
-rw-r--r--include/linux/blk-cgroup.h2
-rw-r--r--include/linux/blk_types.h10
-rw-r--r--include/linux/blkdev.h36
-rw-r--r--include/linux/device-mapper.h4
-rw-r--r--include/linux/genhd.h33
-rw-r--r--include/linux/swap.h4
7 files changed, 79 insertions, 48 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5e963a6d7c14..b9b6e046b52e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -187,17 +187,6 @@ static inline void *bio_data(struct bio *bio)
__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
/*
- * Check if adding a bio_vec after bprv with offset would create a gap in
- * the SG list. Most drivers don't care about this, but some do.
- */
-static inline bool bvec_gap_to_prev(struct bio_vec *bprv, unsigned int offset)
-{
- return offset || ((bprv->bv_offset + bprv->bv_len) & (PAGE_SIZE - 1));
-}
-
-#define bio_io_error(bio) bio_endio((bio), -EIO)
-
-/*
* drivers should _never_ use the all version - the bio may have been split
* before it got to the driver and the driver won't own all of it
*/
@@ -306,6 +295,21 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
atomic_set(&bio->__bi_cnt, count);
}
+static inline bool bio_flagged(struct bio *bio, unsigned int bit)
+{
+ return (bio->bi_flags & (1U << bit)) != 0;
+}
+
+static inline void bio_set_flag(struct bio *bio, unsigned int bit)
+{
+ bio->bi_flags |= (1U << bit);
+}
+
+static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
+{
+ bio->bi_flags &= ~(1U << bit);
+}
+
enum bip_flags {
BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
@@ -426,7 +430,14 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
}
-extern void bio_endio(struct bio *, int);
+extern void bio_endio(struct bio *);
+
+static inline void bio_io_error(struct bio *bio)
+{
+ bio->bi_error = -EIO;
+ bio_endio(bio);
+}
+
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
@@ -440,7 +451,6 @@ void bio_chain(struct bio *, struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
-extern int bio_get_nr_vecs(struct block_device *);
struct rq_map_data;
extern struct bio *bio_map_user_iov(struct request_queue *,
const struct iov_iter *, gfp_t);
@@ -717,7 +727,7 @@ extern void bio_integrity_free(struct bio *);
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
extern bool bio_integrity_enabled(struct bio *bio);
extern int bio_integrity_prep(struct bio *);
-extern void bio_integrity_endio(struct bio *, int);
+extern void bio_integrity_endio(struct bio *);
extern void bio_integrity_advance(struct bio *, unsigned int);
extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1b62d768c7df..a4cd1641e9e2 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -374,7 +374,7 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
* root_rl in such cases.
*/
blkg = blkg_lookup_create(blkcg, q);
- if (unlikely(IS_ERR(blkg)))
+ if (IS_ERR(blkg))
goto root_rl;
blkg_get(blkg);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 7303b3405520..4b7b4ebaa633 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -14,7 +14,7 @@ struct page;
struct block_device;
struct io_context;
struct cgroup_subsys_state;
-typedef void (bio_end_io_t) (struct bio *, int);
+typedef void (bio_end_io_t) (struct bio *);
typedef void (bio_destructor_t) (struct bio *);
/*
@@ -46,7 +46,8 @@ struct bvec_iter {
struct bio {
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
- unsigned long bi_flags; /* status, command, etc */
+ unsigned int bi_flags; /* status, command, etc */
+ int bi_error;
unsigned long bi_rw; /* bottom bits READ/WRITE,
* top bits priority
*/
@@ -111,7 +112,6 @@ struct bio {
/*
* bio flags
*/
-#define BIO_UPTODATE 0 /* ok after I/O completion */
#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
#define BIO_CLONED 2 /* doesn't own data */
#define BIO_BOUNCED 3 /* bio is a bounce bio */
@@ -129,14 +129,12 @@ struct bio {
#define BIO_RESET_BITS 13
#define BIO_OWNS_VEC 13 /* bio_free() should free bvec */
-#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
-
/*
* top 4 bits of bio flags indicate the pool this bio came from
*/
#define BIO_POOL_BITS (4)
#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1)
-#define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS)
+#define BIO_POOL_OFFSET (32 - BIO_POOL_BITS)
#define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET)
#define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4068c17d0df..a622f270f09e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -213,14 +213,6 @@ typedef int (prep_rq_fn) (struct request_queue *, struct request *);
typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
struct bio_vec;
-struct bvec_merge_data {
- struct block_device *bi_bdev;
- sector_t bi_sector;
- unsigned bi_size;
- unsigned long bi_rw;
-};
-typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
- struct bio_vec *);
typedef void (softirq_done_fn)(struct request *);
typedef int (dma_drain_needed_fn)(struct request *);
typedef int (lld_busy_fn) (struct request_queue *q);
@@ -258,6 +250,7 @@ struct blk_queue_tag {
struct queue_limits {
unsigned long bounce_pfn;
unsigned long seg_boundary_mask;
+ unsigned long virt_boundary_mask;
unsigned int max_hw_sectors;
unsigned int chunk_sectors;
@@ -268,6 +261,7 @@ struct queue_limits {
unsigned int io_min;
unsigned int io_opt;
unsigned int max_discard_sectors;
+ unsigned int max_hw_discard_sectors;
unsigned int max_write_same_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -305,7 +299,6 @@ struct request_queue {
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
unprep_rq_fn *unprep_rq_fn;
- merge_bvec_fn *merge_bvec_fn;
softirq_done_fn *softirq_done_fn;
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
@@ -462,6 +455,7 @@ struct request_queue {
struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list;
+ struct bio_set *bio_split;
};
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -486,7 +480,6 @@ struct request_queue {
#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */
#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/
-#define QUEUE_FLAG_SG_GAPS 22 /* queue doesn't support SG gaps */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -782,6 +775,8 @@ extern void blk_rq_unprep_clone(struct request *rq);
extern int blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern void blk_delay_queue(struct request_queue *, unsigned long);
+extern void blk_queue_split(struct request_queue *, struct bio **,
+ struct bio_set *);
extern void blk_recount_segments(struct request_queue *, struct bio *);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
@@ -986,9 +981,9 @@ extern int blk_queue_dma_drain(struct request_queue *q,
void *buf, unsigned int size);
extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
+extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
-extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
@@ -1138,6 +1133,7 @@ extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
enum blk_default_limits {
BLK_MAX_SEGMENTS = 128,
BLK_SAFE_MAX_SECTORS = 255,
+ BLK_DEF_MAX_SECTORS = 2560,
BLK_MAX_SEGMENT_SIZE = 65536,
BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
};
@@ -1154,6 +1150,11 @@ static inline unsigned long queue_segment_boundary(struct request_queue *q)
return q->limits.seg_boundary_mask;
}
+static inline unsigned long queue_virt_boundary(struct request_queue *q)
+{
+ return q->limits.virt_boundary_mask;
+}
+
static inline unsigned int queue_max_sectors(struct request_queue *q)
{
return q->limits.max_sectors;
@@ -1354,6 +1355,19 @@ static inline void put_dev_sector(Sector p)
page_cache_release(p.v);
}
+/*
+ * Check if adding a bio_vec after bprv with offset would create a gap in
+ * the SG list. Most drivers don't care about this, but some do.
+ */
+static inline bool bvec_gap_to_prev(struct request_queue *q,
+ struct bio_vec *bprv, unsigned int offset)
+{
+ if (!queue_virt_boundary(q))
+ return false;
+ return offset ||
+ ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+}
+
struct work_struct;
int kblockd_schedule_work(struct work_struct *work);
int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 51cc1deb7af3..76d23fa8c7d3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -82,9 +82,6 @@ typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
unsigned long arg);
-typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
- struct bio_vec *biovec, int max_size);
-
/*
* These iteration functions are typically used to check (and combine)
* properties of underlying devices.
@@ -160,7 +157,6 @@ struct target_type {
dm_status_fn status;
dm_message_fn message;
dm_ioctl_fn ioctl;
- dm_merge_fn merge;
dm_busy_fn busy;
dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ec274e0f4ed2..2adbfa6d02bc 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -13,6 +13,7 @@
#include <linux/kdev_t.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
+#include <linux/percpu-refcount.h>
#ifdef CONFIG_BLOCK
@@ -124,7 +125,7 @@ struct hd_struct {
#else
struct disk_stats dkstats;
#endif
- atomic_t ref;
+ struct percpu_ref ref;
struct rcu_head rcu_head;
};
@@ -611,7 +612,7 @@ extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
sector_t len, int flags,
struct partition_meta_info
*info);
-extern void __delete_partition(struct hd_struct *);
+extern void __delete_partition(struct percpu_ref *);
extern void delete_partition(struct gendisk *, int);
extern void printk_all_partitions(void);
@@ -640,27 +641,39 @@ extern ssize_t part_fail_store(struct device *dev,
const char *buf, size_t count);
#endif /* CONFIG_FAIL_MAKE_REQUEST */
-static inline void hd_ref_init(struct hd_struct *part)
+static inline int hd_ref_init(struct hd_struct *part)
{
- atomic_set(&part->ref, 1);
- smp_mb();
+ if (percpu_ref_init(&part->ref, __delete_partition, 0,
+ GFP_KERNEL))
+ return -ENOMEM;
+ return 0;
}
static inline void hd_struct_get(struct hd_struct *part)
{
- atomic_inc(&part->ref);
- smp_mb__after_atomic();
+ percpu_ref_get(&part->ref);
}
static inline int hd_struct_try_get(struct hd_struct *part)
{
- return atomic_inc_not_zero(&part->ref);
+ return percpu_ref_tryget_live(&part->ref);
}
static inline void hd_struct_put(struct hd_struct *part)
{
- if (atomic_dec_and_test(&part->ref))
- __delete_partition(part);
+ percpu_ref_put(&part->ref);
+}
+
+static inline void hd_struct_kill(struct hd_struct *part)
+{
+ percpu_ref_kill(&part->ref);
+}
+
+static inline void hd_free_part(struct hd_struct *part)
+{
+ free_part_stats(part);
+ free_part_info(part);
+ percpu_ref_exit(&part->ref);
}
/*
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 38874729dc5f..31496d201fdc 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -373,9 +373,9 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
/* linux/mm/page_io.c */
extern int swap_readpage(struct page *);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern void end_swap_bio_write(struct bio *bio, int err);
+extern void end_swap_bio_write(struct bio *bio);
extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
- void (*end_write_func)(struct bio *, int));
+ bio_end_io_t end_write_func);
extern int swap_set_page_dirty(struct page *page);
int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,