summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 20:23:25 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 20:23:25 +0300
commit73ba2fb33c492916853dfe63e3b3163da0be661d (patch)
treec2fda8ca1273744d2e884d24189a15ac1a7d63c2 /include/linux
parent958f338e96f874a0d29442396d6adf9c1e17aa2d (diff)
parentb86d865cb1cae1e61527ea0b8977078bbf694328 (diff)
downloadlinux-73ba2fb33c492916853dfe63e3b3163da0be661d.tar.xz
Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio.h19
-rw-r--r--include/linux/blk-cgroup.h146
-rw-r--r--include/linux/blk-mq.h4
-rw-r--r--include/linux/blk_types.h27
-rw-r--r--include/linux/blkdev.h66
-rw-r--r--include/linux/cdrom.h3
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--include/linux/genhd.h14
-rw-r--r--include/linux/memcontrol.h13
-rw-r--r--include/linux/nvme.h72
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/swap.h11
-rw-r--r--include/linux/t10-pi.h24
-rw-r--r--include/linux/tracehook.h2
14 files changed, 361 insertions, 51 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f08f5fe7bd08..51371740d2a8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -429,7 +429,6 @@ extern void bio_put(struct bio *);
extern void __bio_clone_fast(struct bio *, struct bio *);
extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
-extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
extern struct bio_set fs_bio_set;
@@ -443,12 +442,6 @@ static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
}
-static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
-{
- return bio_clone_bioset(bio, gfp_mask, NULL);
-
-}
-
extern blk_qc_t submit_bio(struct bio *);
extern void bio_endio(struct bio *);
@@ -496,9 +489,9 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
-void generic_start_io_acct(struct request_queue *q, int rw,
+void generic_start_io_acct(struct request_queue *q, int op,
unsigned long sectors, struct hd_struct *part);
-void generic_end_io_acct(struct request_queue *q, int rw,
+void generic_end_io_acct(struct request_queue *q, int op,
struct hd_struct *part,
unsigned long start_time);
@@ -553,8 +546,16 @@ do { \
#define bio_dev(bio) \
disk_devt((bio)->bi_disk)
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
+#else
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+ struct page *page) { return 0; }
+#endif
+
#ifdef CONFIG_BLK_CGROUP
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
void bio_disassociate_task(struct bio *bio);
void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
#else /* CONFIG_BLK_CGROUP */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6c666fd7de3c..34aec30e06c7 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -35,6 +35,7 @@ enum blkg_rwstat_type {
BLKG_RWSTAT_WRITE,
BLKG_RWSTAT_SYNC,
BLKG_RWSTAT_ASYNC,
+ BLKG_RWSTAT_DISCARD,
BLKG_RWSTAT_NR,
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
@@ -136,6 +137,12 @@ struct blkcg_gq {
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
struct rcu_head rcu_head;
+
+ atomic_t use_delay;
+ atomic64_t delay_nsec;
+ atomic64_t delay_start;
+ u64 last_delay;
+ int last_use;
};
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -148,6 +155,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
+typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
+ size_t size);
struct blkcg_policy {
int plid;
@@ -167,6 +176,7 @@ struct blkcg_policy {
blkcg_pol_offline_pd_fn *pd_offline_fn;
blkcg_pol_free_pd_fn *pd_free_fn;
blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
+ blkcg_pol_stat_pd_fn *pd_stat_fn;
};
extern struct blkcg blkcg_root;
@@ -238,6 +248,42 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
return css_to_blkcg(task_css(current, io_cgrp_id));
}
+static inline bool blk_cgroup_congested(void)
+{
+ struct cgroup_subsys_state *css;
+ bool ret = false;
+
+ rcu_read_lock();
+ css = kthread_blkcg();
+ if (!css)
+ css = task_css(current, io_cgrp_id);
+ while (css) {
+ if (atomic_read(&css->cgroup->congestion_count)) {
+ ret = true;
+ break;
+ }
+ css = css->parent;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
+ * @return: true if this bio needs to be submitted with the root blkg context.
+ *
+ * In order to avoid priority inversions we sometimes need to issue a bio as if
+ * it were attached to the root blkg, and then backcharge to the actual owning
+ * blkg. The idea is we do bio_blkcg() to look up the actual context for the
+ * bio and attach the appropriate blkg to the bio. Then we call this helper and
+ * if it is true run with the root blkg for that queue and then do any
+ * backcharging to the originating cgroup once the io is complete.
+ */
+static inline bool bio_issue_as_root_blkg(struct bio *bio)
+{
+ return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
+}
+
/**
* blkcg_parent - get the parent of a blkcg
* @blkcg: blkcg of interest
@@ -296,6 +342,17 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
}
/**
+ * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for @q at the root level. See also blkg_lookup().
+ */
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{
+ return q->root_blkg;
+}
+
+/**
* blkg_to_pdata - get policy private data
* @blkg: blkg of interest
* @pol: policy of interest
@@ -355,6 +412,21 @@ static inline void blkg_get(struct blkcg_gq *blkg)
atomic_inc(&blkg->refcnt);
}
+/**
+ * blkg_try_get - try and get a blkg reference
+ * @blkg: blkg to get
+ *
+ * This is for use when doing an RCU lookup of the blkg. We may be in the midst
+ * of freeing this blkg, so we can only use it if the refcnt is not zero.
+ */
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
+{
+ if (atomic_inc_not_zero(&blkg->refcnt))
+ return blkg;
+ return NULL;
+}
+
+
void __blkg_release_rcu(struct rcu_head *rcu);
/**
@@ -589,7 +661,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
{
struct percpu_counter *cnt;
- if (op_is_write(op))
+ if (op_is_discard(op))
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
+ else if (op_is_write(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
@@ -706,8 +780,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
if (!throtl) {
blkg = blkg ?: q->root_blkg;
- blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
- bio->bi_iter.bi_size);
+ /*
+ * If the bio is flagged with BIO_QUEUE_ENTERED it means this
+ * is a split bio and we would have already accounted for the
+ * size of the bio.
+ */
+ if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
+ blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
+ bio->bi_iter.bi_size);
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
}
@@ -715,6 +795,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
return !throtl;
}
+static inline void blkcg_use_delay(struct blkcg_gq *blkg)
+{
+ if (atomic_add_return(1, &blkg->use_delay) == 1)
+ atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
+{
+ int old = atomic_read(&blkg->use_delay);
+
+ if (old == 0)
+ return 0;
+
+ /*
+ * We do this song and dance because we can race with somebody else
+ * adding or removing delay. If we just did an atomic_dec we'd end up
+ * negative and we'd already be in trouble. We need to subtract 1 and
+ * then check to see if we were the last delay so we can drop the
+ * congestion count on the cgroup.
+ */
+ while (old) {
+ int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
+ if (cur == old)
+ break;
+ old = cur;
+ }
+
+ if (old == 0)
+ return 0;
+ if (old == 1)
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+ return 1;
+}
+
+static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
+{
+ int old = atomic_read(&blkg->use_delay);
+ if (!old)
+ return;
+ /* We only want 1 person clearing the congestion count for this blkg. */
+ while (old) {
+ int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
+ if (cur == old) {
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+ break;
+ }
+ old = cur;
+ }
+}
+
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+void blkcg_maybe_throttle_current(void);
#else /* CONFIG_BLK_CGROUP */
struct blkcg {
@@ -734,9 +867,16 @@ struct blkcg_policy {
#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
+static inline void blkcg_maybe_throttle_current(void) { }
+static inline bool blk_cgroup_congested(void) { return false; }
+
#ifdef CONFIG_BLOCK
+static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
+
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{ return NULL; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ca3f2c2edd85..1da59c16f637 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -35,10 +35,12 @@ struct blk_mq_hw_ctx {
struct sbitmap ctx_map;
struct blk_mq_ctx *dispatch_from;
+ unsigned int dispatch_busy;
- struct blk_mq_ctx **ctxs;
unsigned int nr_ctx;
+ struct blk_mq_ctx **ctxs;
+ spinlock_t dispatch_wait_lock;
wait_queue_entry_t dispatch_wait;
atomic_t wait_index;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3c4f390aea4b..f6dfb30737d8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -179,11 +179,9 @@ struct bio {
*/
struct io_context *bi_ioc;
struct cgroup_subsys_state *bi_css;
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
- void *bi_cg_private;
+ struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#endif
-#endif
union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
@@ -329,7 +327,7 @@ enum req_flag_bits {
/* for driver use */
__REQ_DRV,
-
+ __REQ_SWAP, /* swapping request. */
__REQ_NR_BITS, /* stops here */
};
@@ -351,6 +349,7 @@ enum req_flag_bits {
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
#define REQ_DRV (1ULL << __REQ_DRV)
+#define REQ_SWAP (1ULL << __REQ_SWAP)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@ -358,6 +357,14 @@ enum req_flag_bits {
#define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
+enum stat_group {
+ STAT_READ,
+ STAT_WRITE,
+ STAT_DISCARD,
+
+ NR_STAT_GROUPS
+};
+
#define bio_op(bio) \
((bio)->bi_opf & REQ_OP_MASK)
#define req_op(req) \
@@ -395,6 +402,18 @@ static inline bool op_is_sync(unsigned int op)
(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
}
+static inline bool op_is_discard(unsigned int op)
+{
+ return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
+}
+
+static inline int op_stat_group(unsigned int op)
+{
+ if (op_is_discard(op))
+ return STAT_DISCARD;
+ return op_is_write(op);
+}
+
typedef unsigned int blk_qc_t;
#define BLK_QC_T_NONE -1U
#define BLK_QC_T_SHIFT 16
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79226ca8f80f..d6869e0e2b64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,8 +27,6 @@
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
-#include <linux/seqlock.h>
-#include <linux/u64_stats_sync.h>
struct module;
struct scsi_ioctl_command;
@@ -42,7 +40,7 @@ struct bsg_job;
struct blkcg_gq;
struct blk_flush_queue;
struct pr_ops;
-struct rq_wb;
+struct rq_qos;
struct blk_queue_stats;
struct blk_stat_callback;
@@ -442,10 +440,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
- atomic_t shared_hctx_restart;
-
struct blk_queue_stats *stats;
- struct rq_wb *rq_wb;
+ struct rq_qos *rq_qos;
/*
* If blkcg is not used, @q->root_rl serves all requests. If blkcg
@@ -592,6 +588,7 @@ struct request_queue {
struct queue_limits limits;
+#ifdef CONFIG_BLK_DEV_ZONED
/*
* Zoned block device information for request dispatch control.
* nr_zones is the total number of zones of the device. This is always
@@ -612,6 +609,7 @@ struct request_queue {
unsigned int nr_zones;
unsigned long *seq_zones_bitmap;
unsigned long *seq_zones_wlock;
+#endif /* CONFIG_BLK_DEV_ZONED */
/*
* sg stuff
@@ -800,11 +798,7 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
}
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
-{
- return q->nr_zones;
-}
-
+#ifdef CONFIG_BLK_DEV_ZONED
static inline unsigned int blk_queue_zone_no(struct request_queue *q,
sector_t sector)
{
@@ -820,6 +814,7 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
return false;
return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
}
+#endif /* CONFIG_BLK_DEV_ZONED */
static inline bool rq_is_sync(struct request *rq)
{
@@ -1070,6 +1065,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
}
+#ifdef CONFIG_BLK_DEV_ZONED
static inline unsigned int blk_rq_zone_no(struct request *rq)
{
return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
@@ -1079,6 +1075,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
{
return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
}
+#endif /* CONFIG_BLK_DEV_ZONED */
/*
* Some commands like WRITE SAME have a payload or data transfer size which
@@ -1437,8 +1434,6 @@ enum blk_default_limits {
BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
};
-#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
-
static inline unsigned long queue_segment_boundary(struct request_queue *q)
{
return q->limits.seg_boundary_mask;
@@ -1639,15 +1634,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
return 0;
}
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (q)
- return blk_queue_nr_zones(q);
- return 0;
-}
-
static inline int queue_dma_alignment(struct request_queue *q)
{
return q ? q->dma_alignment : 511;
@@ -1877,6 +1863,28 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
bip_next->bip_vec[0].bv_offset);
}
+/**
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
+ * @bi: blk_integrity profile for device
+ * @sectors: Size of the bio in 512-byte sectors
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device. Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
+ */
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return sectors >> (bi->interval_exp - 9);
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
+}
+
#else /* CONFIG_BLK_DEV_INTEGRITY */
struct bio;
@@ -1950,12 +1958,24 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
return false;
}
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return 0;
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return 0;
+}
+
#endif /* CONFIG_BLK_DEV_INTEGRITY */
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
- int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
+ int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
unsigned int (*check_events) (struct gendisk *disk,
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index e75dfd1f1dec..528271c60018 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -13,6 +13,7 @@
#include <linux/fs.h> /* not really needed, later.. */
#include <linux/list.h>
+#include <scsi/scsi_common.h>
#include <uapi/linux/cdrom.h>
struct packet_command
@@ -21,7 +22,7 @@ struct packet_command
unsigned char *buffer;
unsigned int buflen;
int stat;
- struct request_sense *sense;
+ struct scsi_sense_hdr *sshdr;
unsigned char data_direction;
int quiet;
int timeout;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c0e68f903011..ff20b677fb9f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -438,6 +438,9 @@ struct cgroup {
/* used to store eBPF programs */
struct cgroup_bpf bpf;
+ /* If there is block congestion on this cgroup. */
+ atomic_t congestion_count;
+
/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6cb8a5789668..57864422a2c8 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
#include <linux/uuid.h>
+#include <linux/blk_types.h>
#ifdef CONFIG_BLOCK
@@ -82,10 +83,10 @@ struct partition {
} __attribute__((packed));
struct disk_stats {
- unsigned long sectors[2]; /* READs and WRITEs */
- unsigned long ios[2];
- unsigned long merges[2];
- unsigned long ticks[2];
+ unsigned long sectors[NR_STAT_GROUPS];
+ unsigned long ios[NR_STAT_GROUPS];
+ unsigned long merges[NR_STAT_GROUPS];
+ unsigned long ticks[NR_STAT_GROUPS];
unsigned long io_ticks;
unsigned long time_in_queue;
};
@@ -353,6 +354,11 @@ static inline void free_part_stats(struct hd_struct *part)
#endif /* CONFIG_SMP */
+#define part_stat_read_accum(part, field) \
+ (part_stat_read(part, field[STAT_READ]) + \
+ part_stat_read(part, field[STAT_WRITE]) + \
+ part_stat_read(part, field[STAT_DISCARD]))
+
#define part_stat_add(cpu, part, field, addnd) do { \
__part_stat_add((cpu), (part), field, addnd); \
if ((part)->partno) \
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6c6fb116e925..680d3395fc83 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, struct mem_cgroup **memcgp,
bool compound);
+int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp,
+ bool compound);
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
bool lrucare, bool compound);
void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
@@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
return 0;
}
+static inline int mem_cgroup_try_charge_delay(struct page *page,
+ struct mm_struct *mm,
+ gfp_t gfp_mask,
+ struct mem_cgroup **memcgp,
+ bool compound)
+{
+ *memcgp = NULL;
+ return 0;
+}
+
static inline void mem_cgroup_commit_charge(struct page *page,
struct mem_cgroup *memcg,
bool lrucare, bool compound)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2950ce957656..68e91ef5494c 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -242,7 +242,12 @@ struct nvme_id_ctrl {
__le32 sanicap;
__le32 hmminds;
__le16 hmmaxd;
- __u8 rsvd338[174];
+ __u8 rsvd338[4];
+ __u8 anatt;
+ __u8 anacap;
+ __le32 anagrpmax;
+ __le32 nanagrpid;
+ __u8 rsvd352[160];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
@@ -254,11 +259,12 @@ struct nvme_id_ctrl {
__le16 awun;
__le16 awupf;
__u8 nvscc;
- __u8 rsvd531;
+ __u8 nwpc;
__le16 acwu;
__u8 rsvd534[2];
__le32 sgls;
- __u8 rsvd540[228];
+ __le32 mnan;
+ __u8 rsvd544[224];
char subnqn[256];
__u8 rsvd1024[768];
__le32 ioccsz;
@@ -312,7 +318,11 @@ struct nvme_id_ns {
__le16 nabspf;
__le16 noiob;
__u8 nvmcap[16];
- __u8 rsvd64[40];
+ __u8 rsvd64[28];
+ __le32 anagrpid;
+ __u8 rsvd96[3];
+ __u8 nsattr;
+ __u8 rsvd100[4];
__u8 nguid[16];
__u8 eui64[8];
struct nvme_lbaf lbaf[16];
@@ -425,6 +435,32 @@ struct nvme_effects_log {
__u8 resv[2048];
};
+enum nvme_ana_state {
+ NVME_ANA_OPTIMIZED = 0x01,
+ NVME_ANA_NONOPTIMIZED = 0x02,
+ NVME_ANA_INACCESSIBLE = 0x03,
+ NVME_ANA_PERSISTENT_LOSS = 0x04,
+ NVME_ANA_CHANGE = 0x0f,
+};
+
+struct nvme_ana_group_desc {
+ __le32 grpid;
+ __le32 nnsids;
+ __le64 chgcnt;
+ __u8 state;
+ __u8 rsvd17[15];
+ __le32 nsids[];
+};
+
+/* flag for the log specific field of the ANA log */
+#define NVME_ANA_LOG_RGO (1 << 0)
+
+struct nvme_ana_rsp_hdr {
+ __le64 chgcnt;
+ __le16 ngrps;
+ __le16 rsvd10[3];
+};
+
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@@ -444,11 +480,13 @@ enum {
enum {
NVME_AER_NOTICE_NS_CHANGED = 0x00,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
+ NVME_AER_NOTICE_ANA = 0x03,
};
enum {
NVME_AEN_CFG_NS_ATTR = 1 << 8,
NVME_AEN_CFG_FW_ACT = 1 << 9,
+ NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
};
struct nvme_lba_range_type {
@@ -749,15 +787,22 @@ enum {
NVME_FEAT_HOST_MEM_BUF = 0x0d,
NVME_FEAT_TIMESTAMP = 0x0e,
NVME_FEAT_KATO = 0x0f,
+ NVME_FEAT_HCTM = 0x10,
+ NVME_FEAT_NOPSC = 0x11,
+ NVME_FEAT_RRL = 0x12,
+ NVME_FEAT_PLM_CONFIG = 0x13,
+ NVME_FEAT_PLM_WINDOW = 0x14,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
NVME_FEAT_RESV_PERSIST = 0x83,
+ NVME_FEAT_WRITE_PROTECT = 0x84,
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
NVME_LOG_CHANGED_NS = 0x04,
NVME_LOG_CMD_EFFECTS = 0x05,
+ NVME_LOG_ANA = 0x0c,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -765,6 +810,14 @@ enum {
NVME_FWACT_ACTV = (2 << 3),
};
+/* NVMe Namespace Write Protect State */
+enum {
+ NVME_NS_NO_WRITE_PROTECT = 0,
+ NVME_NS_WRITE_PROTECT,
+ NVME_NS_WRITE_PROTECT_POWER_CYCLE,
+ NVME_NS_WRITE_PROTECT_PERMANENT,
+};
+
#define NVME_MAX_CHANGED_NAMESPACES 1024
struct nvme_identify {
@@ -880,7 +933,7 @@ struct nvme_get_log_page_command {
__u64 rsvd2[2];
union nvme_data_ptr dptr;
__u8 lid;
- __u8 rsvd10;
+ __u8 lsp; /* upper 4 bits reserved */
__le16 numdl;
__le16 numdu;
__u16 rsvd11;
@@ -1111,6 +1164,8 @@ enum {
NVME_SC_SGL_INVALID_OFFSET = 0x16,
NVME_SC_SGL_INVALID_SUBTYPE = 0x17,
+ NVME_SC_NS_WRITE_PROTECTED = 0x20,
+
NVME_SC_LBA_RANGE = 0x80,
NVME_SC_CAP_EXCEEDED = 0x81,
NVME_SC_NS_NOT_READY = 0x82,
@@ -1180,6 +1235,13 @@ enum {
NVME_SC_ACCESS_DENIED = 0x286,
NVME_SC_UNWRITTEN_BLOCK = 0x287,
+ /*
+ * Path-related Errors:
+ */
+ NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
+ NVME_SC_ANA_INACCESSIBLE = 0x302,
+ NVME_SC_ANA_TRANSITION = 0x303,
+
NVME_SC_DNR = 0x4000,
};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dac5086e3815..95a5018c338e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,10 @@ struct task_struct {
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
#endif
+#ifdef CONFIG_BLK_CGROUP
+ /* to be used once the psi infrastructure lands upstream. */
+ unsigned use_memdelay:1;
+#endif
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -1150,6 +1154,10 @@ struct task_struct {
unsigned int memcg_nr_pages_over_high;
#endif
+#ifdef CONFIG_BLK_CGROUP
+ struct request_queue *throttle_queue;
+#endif
+
#ifdef CONFIG_UPROBES
struct uprobe_task *utask;
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c063443d8638..1a8bd05a335e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -629,7 +629,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
return memcg->swappiness;
}
-
#else
static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
{
@@ -637,6 +636,16 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
}
#endif
+#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
+ gfp_t gfp_mask);
+#else
+static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg,
+ int node, gfp_t gfp_mask)
+{
+}
+#endif
+
#ifdef CONFIG_MEMCG_SWAP
extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c6aa8a3c42ed..b9626aa7e90c 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -37,9 +37,33 @@ struct t10_pi_tuple {
#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
+static inline u32 t10_pi_ref_tag(struct request *rq)
+{
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ return blk_rq_pos(rq) >>
+ (rq->q->integrity.interval_exp - 9) & 0xffffffff;
+#else
+ return -1U;
+#endif
+}
+
extern const struct blk_integrity_profile t10_pi_type1_crc;
extern const struct blk_integrity_profile t10_pi_type1_ip;
extern const struct blk_integrity_profile t10_pi_type3_crc;
extern const struct blk_integrity_profile t10_pi_type3_ip;
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+extern void t10_pi_prepare(struct request *rq, u8 protection_type);
+extern void t10_pi_complete(struct request *rq, u8 protection_type,
+ unsigned int intervals);
+#else
+static inline void t10_pi_complete(struct request *rq, u8 protection_type,
+ unsigned int intervals)
+{
+}
+static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+}
+#endif
+
#endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 4a8841963c2e..05589a3e37f4 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -51,6 +51,7 @@
#include <linux/security.h>
#include <linux/task_work.h>
#include <linux/memcontrol.h>
+#include <linux/blk-cgroup.h>
struct linux_binprm;
/*
@@ -192,6 +193,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
task_work_run();
mem_cgroup_handle_over_high();
+ blkcg_maybe_throttle_current();
}
#endif /* <linux/tracehook.h> */