summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 00:27:02 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 00:27:02 +0300
commit3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch)
tree22fbac64eb40a0b29bfa4c029695f39b2f591e62 /drivers
parentdd972f924df6bdbc0ab185a38d5d2361dbc26311 (diff)
parentbc6d65e6dc89c3b7ff78e4ad797117c122ffde8e (diff)
downloadlinux-3526dd0c7832f1011a0477cc6d903662bae05ea8.tar.xz
Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "It's a pretty quiet round this time, which is nice. This contains: - series from Bart, cleaning up the way we set/test/clear atomic queue flags. - series from Bart, fixing races between gendisk and queue registration and removal. - set of bcache fixes and improvements from various folks, by way of Michael Lyle. - set of lightnvm updates from Matias, most of it being the 1.2 to 2.0 transition. - removal of unused DIO flags from Nikolay. - blk-mq/sbitmap memory ordering fixes from Omar. - divide-by-zero fix for BFQ from Paolo. - minor documentation patches from Randy. - timeout fix from Tejun. - Alpha "can't write a char atomically" fix from Mikulas. - set of NVMe fixes by way of Keith. - bsg and bsg-lib improvements from Christoph. - a few sed-opal fixes from Jonas. - cdrom check-disk-change deadlock fix from Maurizio. - various little fixes, comment fixes, etc from various folks" * tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits) blk-mq: Directly schedule q->timeout_work when aborting a request blktrace: fix comment in blktrace_api.h lightnvm: remove function name in strings lightnvm: pblk: remove some unnecessary NULL checks lightnvm: pblk: don't recover unwritten lines lightnvm: pblk: implement 2.0 support lightnvm: pblk: implement get log report chunk lightnvm: pblk: rename ppaf* to addrf* lightnvm: pblk: check for supported version lightnvm: implement get log report chunk helpers lightnvm: make address conversions depend on generic device lightnvm: add support for 2.0 address format lightnvm: normalize geometry nomenclature lightnvm: complete geo structure with maxoc* lightnvm: add shorten OCSSD version in geo lightnvm: add minor version to generic geometry lightnvm: simplify geometry structure lightnvm: pblk: refactor init/exit sequences lightnvm: Avoid validation of default op value lightnvm: centralize permission check for lightnvm ioctl ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/brd.c1
-rw-r--r--drivers/block/drbd/drbd_main.c3
-rw-r--r--drivers/block/drbd/drbd_nl.c4
-rw-r--r--drivers/block/loop.c77
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c8
-rw-r--r--drivers/block/nbd.c8
-rw-r--r--drivers/block/null_blk.c124
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/block/rbd.c13
-rw-r--r--drivers/block/rsxx/dev.c6
-rw-r--r--drivers/block/skd_main.c4
-rw-r--r--drivers/block/umem.c7
-rw-r--r--drivers/block/xen-blkfront.c10
-rw-r--r--drivers/block/zram/zram_drv.c8
-rw-r--r--drivers/block/zram/zram_drv.h1
-rw-r--r--drivers/cdrom/cdrom.c3
-rw-r--r--drivers/cdrom/gdrom.c3
-rw-r--r--drivers/ide/ide-cd.c10
-rw-r--r--drivers/ide/ide-cd.h6
-rw-r--r--drivers/ide/ide-disk.c4
-rw-r--r--drivers/ide/ide-probe.c4
-rw-r--r--drivers/lightnvm/core.c240
-rw-r--r--drivers/lightnvm/pblk-cache.c4
-rw-r--r--drivers/lightnvm/pblk-core.c202
-rw-r--r--drivers/lightnvm/pblk-gc.c12
-rw-r--r--drivers/lightnvm/pblk-init.c820
-rw-r--r--drivers/lightnvm/pblk-map.c6
-rw-r--r--drivers/lightnvm/pblk-rb.c21
-rw-r--r--drivers/lightnvm/pblk-read.c2
-rw-r--r--drivers/lightnvm/pblk-recovery.c91
-rw-r--r--drivers/lightnvm/pblk-rl.c2
-rw-r--r--drivers/lightnvm/pblk-sysfs.c235
-rw-r--r--drivers/lightnvm/pblk-write.c2
-rw-r--r--drivers/lightnvm/pblk.h304
-rw-r--r--drivers/md/bcache/alloc.c3
-rw-r--r--drivers/md/bcache/bcache.h57
-rw-r--r--drivers/md/bcache/bset.c4
-rw-r--r--drivers/md/bcache/bset.h5
-rw-r--r--drivers/md/bcache/btree.c26
-rw-r--r--drivers/md/bcache/closure.c17
-rw-r--r--drivers/md/bcache/closure.h5
-rw-r--r--drivers/md/bcache/debug.c14
-rw-r--r--drivers/md/bcache/extents.c2
-rw-r--r--drivers/md/bcache/io.c16
-rw-r--r--drivers/md/bcache/journal.c8
-rw-r--r--drivers/md/bcache/request.c186
-rw-r--r--drivers/md/bcache/super.c160
-rw-r--r--drivers/md/bcache/sysfs.c55
-rw-r--r--drivers/md/bcache/util.c25
-rw-r--r--drivers/md/bcache/util.h6
-rw-r--r--drivers/md/bcache/writeback.c92
-rw-r--r--drivers/md/bcache/writeback.h4
-rw-r--r--drivers/md/dm-table.c16
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/md/md-linear.c4
-rw-r--r--drivers/md/md.c10
-rw-r--r--drivers/md/raid0.c4
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c4
-rw-r--r--drivers/misc/cardreader/rtsx_pcr.c4
-rw-r--r--drivers/mmc/core/block.c2
-rw-r--r--drivers/mmc/core/queue.c8
-rw-r--r--drivers/mtd/mtd_blkdevs.c6
-rw-r--r--drivers/nvdimm/blk.c2
-rw-r--r--drivers/nvdimm/btt.c2
-rw-r--r--drivers/nvdimm/nd.h1
-rw-r--r--drivers/nvdimm/pmem.c6
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c123
-rw-r--r--drivers/nvme/host/fault_inject.c79
-rw-r--r--drivers/nvme/host/fc.c36
-rw-r--r--drivers/nvme/host/lightnvm.c757
-rw-r--r--drivers/nvme/host/multipath.c8
-rw-r--r--drivers/nvme/host/nvme.h35
-rw-r--r--drivers/nvme/host/pci.c26
-rw-r--r--drivers/nvme/host/rdma.c34
-rw-r--r--drivers/nvme/target/configfs.c65
-rw-r--r--drivers/nvme/target/core.c12
-rw-r--r--drivers/nvme/target/discovery.c30
-rw-r--r--drivers/nvme/target/fc.c23
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/nvme/target/nvmet.h12
-rw-r--r--drivers/nvme/target/rdma.c72
-rw-r--r--drivers/s390/block/dasd.c4
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c4
-rw-r--r--drivers/s390/block/xpram.c4
-rw-r--r--drivers/s390/scsi/zfcp_fc.c4
-rw-r--r--drivers/scsi/gdth.h3
-rw-r--r--drivers/scsi/iscsi_tcp.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c2
-rw-r--r--drivers/scsi/scsi_debug.c2
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/scsi/scsi_sysfs.c3
-rw-r--r--drivers/scsi/scsi_transport_sas.c3
-rw-r--r--drivers/scsi/sd.c8
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c2
-rw-r--r--drivers/scsi/sr.c2
-rw-r--r--drivers/staging/rts5208/rtsx_chip.h12
-rw-r--r--drivers/staging/rts5208/rtsx_transport.c10
-rw-r--r--drivers/target/iscsi/iscsi_target.c28
-rw-r--r--drivers/target/loopback/tcm_loop.c2
106 files changed, 3072 insertions, 1369 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index deea78e485da..66cb0f857f64 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -24,7 +24,6 @@
#include <linux/uaccess.h>
-#define SECTOR_SHIFT 9
#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0a0394aa1b9c..185f1ef00a7c 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- q = blk_alloc_queue(GFP_KERNEL);
+ q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
if (!q)
goto out_no_q;
device->rq_queue = q;
@@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
- q->queue_lock = &resource->req_lock;
device->md_io.page = alloc_page(GFP_KERNEL);
if (!device->md_io.page)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index a12f77e6891e..b4f02768ba47 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1212,10 +1212,10 @@ static void decide_on_discard_support(struct drbd_device *device,
* topology on all peers. */
blk_queue_discard_granularity(q, 512);
q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
} else {
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
blk_queue_discard_granularity(q, 0);
q->limits.max_discard_sectors = 0;
q->limits.max_write_zeroes_sectors = 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ee62d2d517bf..264abaaff662 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -214,10 +214,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
if (use_dio) {
- queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
} else {
- queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
}
blk_mq_unfreeze_queue(lo->lo_queue);
@@ -817,7 +817,7 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(q, 0);
blk_queue_max_write_zeroes_sectors(q, 0);
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
return;
}
@@ -826,7 +826,7 @@ static void loop_config_discard(struct loop_device *lo)
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
static void loop_unprepare_queue(struct loop_device *lo)
@@ -1167,21 +1167,17 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
static int
loop_get_status(struct loop_device *lo, struct loop_info64 *info)
{
- struct file *file = lo->lo_backing_file;
+ struct file *file;
struct kstat stat;
- int error;
+ int ret;
- if (lo->lo_state != Lo_bound)
+ if (lo->lo_state != Lo_bound) {
+ mutex_unlock(&lo->lo_ctl_mutex);
return -ENXIO;
- error = vfs_getattr(&file->f_path, &stat,
- STATX_INO, AT_STATX_SYNC_AS_STAT);
- if (error)
- return error;
+ }
+
memset(info, 0, sizeof(*info));
info->lo_number = lo->lo_number;
- info->lo_device = huge_encode_dev(stat.dev);
- info->lo_inode = stat.ino;
- info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
info->lo_offset = lo->lo_offset;
info->lo_sizelimit = lo->lo_sizelimit;
info->lo_flags = lo->lo_flags;
@@ -1194,7 +1190,19 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
lo->lo_encrypt_key_size);
}
- return 0;
+
+ /* Drop lo_ctl_mutex while we call into the filesystem. */
+ file = get_file(lo->lo_backing_file);
+ mutex_unlock(&lo->lo_ctl_mutex);
+ ret = vfs_getattr(&file->f_path, &stat, STATX_INO,
+ AT_STATX_SYNC_AS_STAT);
+ if (!ret) {
+ info->lo_device = huge_encode_dev(stat.dev);
+ info->lo_inode = stat.ino;
+ info->lo_rdevice = huge_encode_dev(stat.rdev);
+ }
+ fput(file);
+ return ret;
}
static void
@@ -1352,7 +1360,10 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
struct loop_device *lo = bdev->bd_disk->private_data;
int err;
- mutex_lock_nested(&lo->lo_ctl_mutex, 1);
+ err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1);
+ if (err)
+ goto out_unlocked;
+
switch (cmd) {
case LOOP_SET_FD:
err = loop_set_fd(lo, mode, bdev, arg);
@@ -1374,7 +1385,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
break;
case LOOP_GET_STATUS:
err = loop_get_status_old(lo, (struct loop_info __user *) arg);
- break;
+ /* loop_get_status() unlocks lo_ctl_mutex */
+ goto out_unlocked;
case LOOP_SET_STATUS64:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
@@ -1383,7 +1395,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
break;
case LOOP_GET_STATUS64:
err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
- break;
+ /* loop_get_status() unlocks lo_ctl_mutex */
+ goto out_unlocked;
case LOOP_SET_CAPACITY:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
@@ -1535,16 +1548,20 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
switch(cmd) {
case LOOP_SET_STATUS:
- mutex_lock(&lo->lo_ctl_mutex);
- err = loop_set_status_compat(
- lo, (const struct compat_loop_info __user *) arg);
- mutex_unlock(&lo->lo_ctl_mutex);
+ err = mutex_lock_killable(&lo->lo_ctl_mutex);
+ if (!err) {
+ err = loop_set_status_compat(lo,
+ (const struct compat_loop_info __user *)arg);
+ mutex_unlock(&lo->lo_ctl_mutex);
+ }
break;
case LOOP_GET_STATUS:
- mutex_lock(&lo->lo_ctl_mutex);
- err = loop_get_status_compat(
- lo, (struct compat_loop_info __user *) arg);
- mutex_unlock(&lo->lo_ctl_mutex);
+ err = mutex_lock_killable(&lo->lo_ctl_mutex);
+ if (!err) {
+ err = loop_get_status_compat(lo,
+ (struct compat_loop_info __user *)arg);
+ /* loop_get_status() unlocks lo_ctl_mutex */
+ }
break;
case LOOP_SET_CAPACITY:
case LOOP_CLR_FD:
@@ -1808,7 +1825,7 @@ static int loop_add(struct loop_device **l, int i)
* page. For directio mode, merge does help to dispatch bigger request
* to underlayer disk. We will enable merge once directio is enabled.
*/
- queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
err = -ENOMEM;
disk = lo->lo_disk = alloc_disk(1 << part_shift);
@@ -1864,8 +1881,8 @@ out:
static void loop_remove(struct loop_device *lo)
{
- blk_cleanup_queue(lo->lo_queue);
del_gendisk(lo->lo_disk);
+ blk_cleanup_queue(lo->lo_queue);
blk_mq_free_tag_set(&lo->tag_set);
put_disk(lo->lo_disk);
kfree(lo);
@@ -1949,7 +1966,9 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
ret = loop_lookup(&lo, parm);
if (ret < 0)
break;
- mutex_lock(&lo->lo_ctl_mutex);
+ ret = mutex_lock_killable(&lo->lo_ctl_mutex);
+ if (ret)
+ break;
if (lo->lo_state != Lo_unbound) {
ret = -EBUSY;
mutex_unlock(&lo->lo_ctl_mutex);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index b8af7352a18f..769c551e3d71 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -159,7 +159,7 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
if (vendor_id == 0xFFFF) {
dd->sr = true;
if (dd->queue)
- set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue);
else
dev_warn(&dd->pdev->dev,
"%s: dd->queue is NULL\n", __func__);
@@ -3855,8 +3855,8 @@ skip_create_disk:
goto start_service_thread;
/* Set device limits. */
- set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
- clear_bit(QUEUE_FLAG_ADD_RANDOM, &dd->queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
blk_queue_physical_block_size(dd->queue, 4096);
blk_queue_max_hw_sectors(dd->queue, 0xffff);
@@ -3866,7 +3866,7 @@ skip_create_disk:
/* Signal trim support */
if (dd->trim_supp == true) {
- set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, dd->queue);
dd->queue->limits.discard_granularity = 4096;
blk_queue_max_discard_sectors(dd->queue,
MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 86258b00a1d4..afbc202ca6fd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -964,7 +964,7 @@ static void nbd_parse_flags(struct nbd_device *nbd)
else
set_disk_ro(nbd->disk, false);
if (config->flags & NBD_FLAG_SEND_TRIM)
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
if (config->flags & NBD_FLAG_SEND_FLUSH) {
if (config->flags & NBD_FLAG_SEND_FUA)
blk_queue_write_cache(nbd->disk->queue, true, true);
@@ -1040,7 +1040,7 @@ static void nbd_config_put(struct nbd_device *nbd)
nbd->config = NULL;
nbd->tag_set.timeout = 0;
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
@@ -1488,8 +1488,8 @@ static int nbd_dev_add(int index)
/*
* Tell the block layer that we are not a rotational device
*/
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
disk->queue->limits.discard_granularity = 512;
blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
blk_queue_max_segment_size(disk->queue, UINT_MAX);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 287a09611c0f..a76553293a31 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -16,10 +16,8 @@
#include <linux/badblocks.h>
#include <linux/fault-inject.h>
-#define SECTOR_SHIFT 9
#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#define SECTOR_MASK (PAGE_SECTORS - 1)
#define FREE_BATCH 16
@@ -29,6 +27,7 @@
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
static DECLARE_FAULT_ATTR(null_timeout_attr);
+static DECLARE_FAULT_ATTR(null_requeue_attr);
#endif
static inline u64 mb_per_tick(int mbps)
@@ -53,6 +52,7 @@ struct nullb_queue {
wait_queue_head_t wait;
unsigned int queue_depth;
struct nullb_device *dev;
+ unsigned int requeue_selection;
struct nullb_cmd *cmds;
};
@@ -72,6 +72,7 @@ enum nullb_device_flags {
NULLB_DEV_FL_CACHE = 3,
};
+#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
/*
* nullb_page is a page in memory for nullb devices.
*
@@ -86,10 +87,10 @@ enum nullb_device_flags {
*/
struct nullb_page {
struct page *page;
- unsigned long bitmap;
+ DECLARE_BITMAP(bitmap, MAP_SZ);
};
-#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1)
-#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2)
+#define NULLB_PAGE_LOCK (MAP_SZ - 1)
+#define NULLB_PAGE_FREE (MAP_SZ - 2)
struct nullb_device {
struct nullb *nullb;
@@ -170,6 +171,9 @@ MODULE_PARM_DESC(home_node, "Home node for the device");
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
static char g_timeout_str[80];
module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
+
+static char g_requeue_str[80];
+module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
#endif
static int g_queue_mode = NULL_Q_MQ;
@@ -728,7 +732,7 @@ static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
if (!t_page->page)
goto out_freepage;
- t_page->bitmap = 0;
+ memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
return t_page;
out_freepage:
kfree(t_page);
@@ -738,13 +742,20 @@ out:
static void null_free_page(struct nullb_page *t_page)
{
- __set_bit(NULLB_PAGE_FREE, &t_page->bitmap);
- if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap))
+ __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
+ if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
return;
__free_page(t_page->page);
kfree(t_page);
}
+static bool null_page_empty(struct nullb_page *page)
+{
+ int size = MAP_SZ - 2;
+
+ return find_first_bit(page->bitmap, size) == size;
+}
+
static void null_free_sector(struct nullb *nullb, sector_t sector,
bool is_cache)
{
@@ -759,9 +770,9 @@ static void null_free_sector(struct nullb *nullb, sector_t sector,
t_page = radix_tree_lookup(root, idx);
if (t_page) {
- __clear_bit(sector_bit, &t_page->bitmap);
+ __clear_bit(sector_bit, t_page->bitmap);
- if (!t_page->bitmap) {
+ if (null_page_empty(t_page)) {
ret = radix_tree_delete_item(root, idx, t_page);
WARN_ON(ret != t_page);
null_free_page(ret);
@@ -832,7 +843,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb,
t_page = radix_tree_lookup(root, idx);
WARN_ON(t_page && t_page->page->index != idx);
- if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap)))
+ if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
return t_page;
return NULL;
@@ -895,10 +906,10 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
- __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap);
- if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) {
+ __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
+ if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
null_free_page(c_page);
- if (t_page && t_page->bitmap == 0) {
+ if (t_page && null_page_empty(t_page)) {
ret = radix_tree_delete_item(&nullb->dev->data,
idx, t_page);
null_free_page(t_page);
@@ -914,11 +925,11 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
for (i = 0; i < PAGE_SECTORS;
i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
- if (test_bit(i, &c_page->bitmap)) {
+ if (test_bit(i, c_page->bitmap)) {
offset = (i << SECTOR_SHIFT);
memcpy(dst + offset, src + offset,
nullb->dev->blocksize);
- __set_bit(i, &t_page->bitmap);
+ __set_bit(i, t_page->bitmap);
}
}
@@ -955,10 +966,10 @@ again:
* We found the page which is being flushed to disk by other
* threads
*/
- if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap))
+ if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
c_pages[i] = NULL;
else
- __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap);
+ __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
}
one_round = 0;
@@ -1011,7 +1022,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
kunmap_atomic(dst);
kunmap_atomic(src);
- __set_bit(sector & SECTOR_MASK, &t_page->bitmap);
+ __set_bit(sector & SECTOR_MASK, t_page->bitmap);
if (is_fua)
null_free_sector(nullb, sector, true);
@@ -1380,7 +1391,15 @@ static bool should_timeout_request(struct request *rq)
if (g_timeout_str[0])
return should_fail(&null_timeout_attr, 1);
#endif
+ return false;
+}
+static bool should_requeue_request(struct request *rq)
+{
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+ if (g_requeue_str[0])
+ return should_fail(&null_requeue_attr, 1);
+#endif
return false;
}
@@ -1391,11 +1410,17 @@ static void null_request_fn(struct request_queue *q)
while ((rq = blk_fetch_request(q)) != NULL) {
struct nullb_cmd *cmd = rq->special;
- if (!should_timeout_request(rq)) {
- spin_unlock_irq(q->queue_lock);
- null_handle_cmd(cmd);
- spin_lock_irq(q->queue_lock);
+ /* just ignore the request */
+ if (should_timeout_request(rq))
+ continue;
+ if (should_requeue_request(rq)) {
+ blk_requeue_request(q, rq);
+ continue;
}
+
+ spin_unlock_irq(q->queue_lock);
+ null_handle_cmd(cmd);
+ spin_lock_irq(q->queue_lock);
}
}
@@ -1422,10 +1447,23 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq);
- if (!should_timeout_request(bd->rq))
- return null_handle_cmd(cmd);
+ if (should_requeue_request(bd->rq)) {
+ /*
+ * Alternate between hitting the core BUSY path, and the
+ * driver driven requeue path
+ */
+ nq->requeue_selection++;
+ if (nq->requeue_selection & 1)
+ return BLK_STS_RESOURCE;
+ else {
+ blk_mq_requeue_request(bd->rq, true);
+ return BLK_STS_OK;
+ }
+ }
+ if (should_timeout_request(bd->rq))
+ return BLK_STS_OK;
- return BLK_STS_OK;
+ return null_handle_cmd(cmd);
}
static const struct blk_mq_ops null_mq_ops = {
@@ -1485,7 +1523,7 @@ static void null_config_discard(struct nullb *nullb)
nullb->q->limits.discard_granularity = nullb->dev->blocksize;
nullb->q->limits.discard_alignment = nullb->dev->blocksize;
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
}
static int null_open(struct block_device *bdev, fmode_t mode)
@@ -1659,16 +1697,27 @@ static void null_validate_conf(struct nullb_device *dev)
dev->mbps = 0;
}
-static bool null_setup_fault(void)
-{
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (!g_timeout_str[0])
+static bool __null_setup_fault(struct fault_attr *attr, char *str)
+{
+ if (!str[0])
return true;
- if (!setup_fault_attr(&null_timeout_attr, g_timeout_str))
+ if (!setup_fault_attr(attr, str))
return false;
- null_timeout_attr.verbose = 0;
+ attr->verbose = 0;
+ return true;
+}
+#endif
+
+static bool null_setup_fault(void)
+{
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+ if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
+ return false;
+ if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
+ return false;
#endif
return true;
}
@@ -1717,7 +1766,8 @@ static int null_add_dev(struct nullb_device *dev)
}
null_init_queues(nullb);
} else if (dev->queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
+ nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node,
+ NULL);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
@@ -1758,8 +1808,8 @@ static int null_add_dev(struct nullb_device *dev)
}
nullb->q->queuedata = nullb;
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock);
nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
@@ -1802,10 +1852,6 @@ static int __init null_init(void)
struct nullb *nullb;
struct nullb_device *dev;
- /* check for nullb_page.bitmap */
- if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT))
- return -EINVAL;
-
if (g_bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n");
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 7b8c6368beb7..a026211afb51 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -230,6 +230,8 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode)
struct pcd_unit *cd = bdev->bd_disk->private_data;
int ret;
+ check_disk_change(bdev);
+
mutex_lock(&pcd_mutex);
ret = cdrom_open(&cd->info, bdev, mode);
mutex_unlock(&pcd_mutex);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8e40da093766..1e03b04819c8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -51,15 +51,6 @@
#define RBD_DEBUG /* Activate rbd_assert() calls */
/*
- * The basic unit of block I/O is a sector. It is interpreted in a
- * number of contexts in Linux (blk, bio, genhd), but the default is
- * universally 512 bytes. These symbols are just slightly more
- * meaningful than the bare numbers they represent.
- */
-#define SECTOR_SHIFT 9
-#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
-
-/*
* Increment the given counter and return its updated value.
* If the counter is already 0 it will not be incremented.
* If the counter is already at its maximum value returns
@@ -4370,7 +4361,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
goto out_tag_set;
}
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
/* set io sizes to object size */
@@ -4383,7 +4374,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
blk_queue_io_opt(q, segment_size);
/* enable the discard support */
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
q->limits.discard_granularity = segment_size;
blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index e397d3ee7308..dddb3f2490b6 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -287,10 +287,10 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, card->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue);
if (rsxx_discard_supported(card)) {
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue);
blk_queue_max_discard_sectors(card->queue,
RSXX_HW_BLK_SIZE >> 9);
card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index e41935ab41ef..bc7aea6d7b7c 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -2858,8 +2858,8 @@ static int skd_cons_disk(struct skd_device *skdev)
/* set optimal I/O size to 8KB */
blk_queue_io_opt(q, 8192);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
blk_queue_rq_timeout(q, 8 * HZ);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 8077123678ad..5c7fb8cc4149 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
card->Active = -1; /* no page is active */
card->bio = NULL;
card->biotail = &card->bio;
+ spin_lock_init(&card->lock);
- card->queue = blk_alloc_queue(GFP_KERNEL);
+ card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
+ &card->lock);
if (!card->queue)
goto failed_alloc;
blk_queue_make_request(card->queue, mm_make_request);
- card->queue->queue_lock = &card->lock;
card->queue->queuedata = card;
tasklet_init(&card->tasklet, process_page, (unsigned long)card);
@@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
dev_printk(KERN_INFO, &card->dev->dev,
"Window size %d bytes, IRQ %d\n", data, dev->irq);
- spin_lock_init(&card->lock);
-
pci_set_drvdata(dev, card);
if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 92ec1bbece51..2a8e7813bd1a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -932,15 +932,15 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
unsigned int segments = info->max_indirect_segments ? :
BLKIF_MAX_SEGMENTS_PER_REQUEST;
- queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+ blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
if (info->feature_discard) {
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
blk_queue_max_discard_sectors(rq, get_capacity(gd));
rq->limits.discard_granularity = info->discard_granularity;
rq->limits.discard_alignment = info->discard_alignment;
if (info->feature_secdiscard)
- queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -1611,8 +1611,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
- queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
- queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+ blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
}
break;
case BLKIF_OP_FLUSH_DISKCACHE:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0afa6c8c3857..71b449613cfa 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1530,8 +1530,8 @@ static int zram_add(void)
/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
set_capacity(zram->disk, 0);
/* zram devices sort of resembles non-rotational disks */
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
/*
* To ensure that we always get PAGE_SIZE aligned
@@ -1544,7 +1544,7 @@ static int zram_add(void)
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
/*
* zram_bio_discard() will clear all logical blocks if logical block
@@ -1620,8 +1620,8 @@ static int zram_remove(struct zram *zram)
pr_info("Removed device: %s\n", zram->disk->disk_name);
- blk_cleanup_queue(zram->disk->queue);
del_gendisk(zram->disk);
+ blk_cleanup_queue(zram->disk->queue);
put_disk(zram->disk);
kfree(zram);
return 0;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 31762db861e3..1e9bf65c0bfb 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -37,7 +37,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/*-- End of configurable params */
-#define SECTOR_SHIFT 9
#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
#define ZRAM_LOGICAL_BLOCK_SHIFT 12
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index e36d160c458f..8327478effd0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1152,9 +1152,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
cd_dbg(CD_OPEN, "entering cdrom_open\n");
- /* open is event synchronization point, check events first */
- check_disk_change(bdev);
-
/* if this was a O_NONBLOCK open and we should honor the flags,
* do a quick open without drive/disc integrity checks. */
cdi->use_count++;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 6495b03f576c..ae3a7537cf0f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -497,6 +497,9 @@ static const struct cdrom_device_ops gdrom_ops = {
static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
{
int ret;
+
+ check_disk_change(bdev);
+
mutex_lock(&gdrom_mutex);
ret = cdrom_open(gd.cd_info, bdev, mode);
mutex_unlock(&gdrom_mutex);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 7c3ed7c9af77..5a8e8e3c22cd 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -712,7 +712,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
struct request_queue *q = drive->queue;
int write = rq_data_dir(rq) == WRITE;
unsigned short sectors_per_frame =
- queue_logical_block_size(q) >> SECTOR_BITS;
+ queue_logical_block_size(q) >> SECTOR_SHIFT;
ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, "
"secs_per_frame: %u",
@@ -919,7 +919,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
* end up being bogus.
*/
blocklen = be32_to_cpu(capbuf.blocklen);
- blocklen = (blocklen >> SECTOR_BITS) << SECTOR_BITS;
+ blocklen = (blocklen >> SECTOR_SHIFT) << SECTOR_SHIFT;
switch (blocklen) {
case 512:
case 1024:
@@ -935,7 +935,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
}
*capacity = 1 + be32_to_cpu(capbuf.lba);
- *sectors_per_frame = blocklen >> SECTOR_BITS;
+ *sectors_per_frame = blocklen >> SECTOR_SHIFT;
ide_debug_log(IDE_DBG_PROBE, "cap: %lu, sectors_per_frame: %lu",
*capacity, *sectors_per_frame);
@@ -1012,7 +1012,7 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense)
drive->probed_capacity = toc->capacity * sectors_per_frame;
blk_queue_logical_block_size(drive->queue,
- sectors_per_frame << SECTOR_BITS);
+ sectors_per_frame << SECTOR_SHIFT);
/* first read just the header, so we know how long the TOC is */
stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
@@ -1613,6 +1613,8 @@ static int idecd_open(struct block_device *bdev, fmode_t mode)
struct cdrom_info *info;
int rc = -ENXIO;
+ check_disk_change(bdev);
+
mutex_lock(&ide_cd_mutex);
info = ide_cd_get(bdev->bd_disk);
if (!info)
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 264e822eba58..04f0f310a856 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -21,11 +21,7 @@
/************************************************************************/
-#define SECTOR_BITS 9
-#ifndef SECTOR_SIZE
-#define SECTOR_SIZE (1 << SECTOR_BITS)
-#endif
-#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_BITS)
+#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_SHIFT)
#define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32)
/* Capabilities Page size including 8 bytes of Mode Page Header */
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 67bc72d78fbf..f1a7c58fe418 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -687,8 +687,8 @@ static void ide_disk_setup(ide_drive_t *drive)
queue_max_sectors(q) / 2);
if (ata_id_is_ssd(id)) {
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
}
/* calculate drive capacity, and select LBA if possible */
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index caa20eb5f26b..2019e66eada7 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -766,14 +766,14 @@ static int ide_init_queue(ide_drive_t *drive)
* limits and LBA48 we could raise it but as yet
* do not.
*/
- q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif));
+ q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL);
if (!q)
return 1;
q->request_fn = do_ide_request;
q->initialize_rq_fn = ide_initialize_rq;
q->cmd_size = sizeof(struct ide_request);
- queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+ blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
return 1;
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index dcc9e621e651..63171cdce270 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -36,13 +36,13 @@ static DECLARE_RWSEM(nvm_lock);
/* Map between virtual and physical channel and lun */
struct nvm_ch_map {
int ch_off;
- int nr_luns;
+ int num_lun;
int *lun_offs;
};
struct nvm_dev_map {
struct nvm_ch_map *chnls;
- int nr_chnls;
+ int num_ch;
};
static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
@@ -114,15 +114,15 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear)
struct nvm_dev_map *dev_map = tgt_dev->map;
int i, j;
- for (i = 0; i < dev_map->nr_chnls; i++) {
+ for (i = 0; i < dev_map->num_ch; i++) {
struct nvm_ch_map *ch_map = &dev_map->chnls[i];
int *lun_offs = ch_map->lun_offs;
int ch = i + ch_map->ch_off;
if (clear) {
- for (j = 0; j < ch_map->nr_luns; j++) {
+ for (j = 0; j < ch_map->num_lun; j++) {
int lun = j + lun_offs[j];
- int lunid = (ch * dev->geo.nr_luns) + lun;
+ int lunid = (ch * dev->geo.num_lun) + lun;
WARN_ON(!test_and_clear_bit(lunid,
dev->lun_map));
@@ -147,47 +147,46 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
struct nvm_dev_map *dev_rmap = dev->rmap;
struct nvm_dev_map *dev_map;
struct ppa_addr *luns;
- int nr_luns = lun_end - lun_begin + 1;
- int luns_left = nr_luns;
- int nr_chnls = nr_luns / dev->geo.nr_luns;
- int nr_chnls_mod = nr_luns % dev->geo.nr_luns;
- int bch = lun_begin / dev->geo.nr_luns;
- int blun = lun_begin % dev->geo.nr_luns;
+ int num_lun = lun_end - lun_begin + 1;
+ int luns_left = num_lun;
+ int num_ch = num_lun / dev->geo.num_lun;
+ int num_ch_mod = num_lun % dev->geo.num_lun;
+ int bch = lun_begin / dev->geo.num_lun;
+ int blun = lun_begin % dev->geo.num_lun;
int lunid = 0;
int lun_balanced = 1;
- int prev_nr_luns;
+ int sec_per_lun, prev_num_lun;
int i, j;
- nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
+ num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1;
dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
if (!dev_map)
goto err_dev;
- dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map),
- GFP_KERNEL);
+ dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL);
if (!dev_map->chnls)
goto err_chnls;
- luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL);
+ luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL);
if (!luns)
goto err_luns;
- prev_nr_luns = (luns_left > dev->geo.nr_luns) ?
- dev->geo.nr_luns : luns_left;
- for (i = 0; i < nr_chnls; i++) {
+ prev_num_lun = (luns_left > dev->geo.num_lun) ?
+ dev->geo.num_lun : luns_left;
+ for (i = 0; i < num_ch; i++) {
struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
int *lun_roffs = ch_rmap->lun_offs;
struct nvm_ch_map *ch_map = &dev_map->chnls[i];
int *lun_offs;
- int luns_in_chnl = (luns_left > dev->geo.nr_luns) ?
- dev->geo.nr_luns : luns_left;
+ int luns_in_chnl = (luns_left > dev->geo.num_lun) ?
+ dev->geo.num_lun : luns_left;
- if (lun_balanced && prev_nr_luns != luns_in_chnl)
+ if (lun_balanced && prev_num_lun != luns_in_chnl)
lun_balanced = 0;
ch_map->ch_off = ch_rmap->ch_off = bch;
- ch_map->nr_luns = luns_in_chnl;
+ ch_map->num_lun = luns_in_chnl;
lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
if (!lun_offs)
@@ -195,8 +194,8 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
for (j = 0; j < luns_in_chnl; j++) {
luns[lunid].ppa = 0;
- luns[lunid].g.ch = i;
- luns[lunid++].g.lun = j;
+ luns[lunid].a.ch = i;
+ luns[lunid++].a.lun = j;
lun_offs[j] = blun;
lun_roffs[j + blun] = blun;
@@ -209,24 +208,29 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
luns_left -= luns_in_chnl;
}
- dev_map->nr_chnls = nr_chnls;
+ dev_map->num_ch = num_ch;
tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
if (!tgt_dev)
goto err_ch;
+ /* Inherit device geometry from parent */
memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
+
/* Target device only owns a portion of the physical device */
- tgt_dev->geo.nr_chnls = nr_chnls;
- tgt_dev->geo.all_luns = nr_luns;
- tgt_dev->geo.nr_luns = (lun_balanced) ? prev_nr_luns : -1;
+ tgt_dev->geo.num_ch = num_ch;
+ tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1;
+ tgt_dev->geo.all_luns = num_lun;
+ tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk;
+
tgt_dev->geo.op = op;
- tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
+
+ sec_per_lun = dev->geo.clba * dev->geo.num_chk;
+ tgt_dev->geo.total_secs = num_lun * sec_per_lun;
+
tgt_dev->q = dev->q;
tgt_dev->map = dev_map;
tgt_dev->luns = luns;
- memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
-
tgt_dev->parent = dev;
return tgt_dev;
@@ -296,24 +300,20 @@ static int __nvm_config_simple(struct nvm_dev *dev,
static int __nvm_config_extended(struct nvm_dev *dev,
struct nvm_ioctl_create_extended *e)
{
- struct nvm_geo *geo = &dev->geo;
-
if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) {
e->lun_begin = 0;
e->lun_end = dev->geo.all_luns - 1;
}
/* op not set falls into target's default */
- if (e->op == 0xFFFF)
+ if (e->op == 0xFFFF) {
e->op = NVM_TARGET_DEFAULT_OP;
-
- if (e->op < NVM_TARGET_MIN_OP ||
- e->op > NVM_TARGET_MAX_OP) {
+ } else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) {
pr_err("nvm: invalid over provisioning value\n");
return -EINVAL;
}
- return nvm_config_check_luns(geo, e->lun_begin, e->lun_end);
+ return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end);
}
static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
@@ -384,7 +384,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
goto err_dev;
}
- tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+ tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL);
if (!tqueue) {
ret = -ENOMEM;
goto err_disk;
@@ -407,7 +407,8 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
tdisk->private_data = targetdata;
tqueue->queuedata = targetdata;
- blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
+ blk_queue_max_hw_sectors(tqueue,
+ (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
set_capacity(tdisk, tt->capacity(targetdata));
add_disk(tdisk);
@@ -503,20 +504,20 @@ static int nvm_register_map(struct nvm_dev *dev)
if (!rmap)
goto err_rmap;
- rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map),
+ rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map),
GFP_KERNEL);
if (!rmap->chnls)
goto err_chnls;
- for (i = 0; i < dev->geo.nr_chnls; i++) {
+ for (i = 0; i < dev->geo.num_ch; i++) {
struct nvm_ch_map *ch_rmap;
int *lun_roffs;
- int luns_in_chnl = dev->geo.nr_luns;
+ int luns_in_chnl = dev->geo.num_lun;
ch_rmap = &rmap->chnls[i];
ch_rmap->ch_off = -1;
- ch_rmap->nr_luns = luns_in_chnl;
+ ch_rmap->num_lun = luns_in_chnl;
lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
if (!lun_roffs)
@@ -545,7 +546,7 @@ static void nvm_unregister_map(struct nvm_dev *dev)
struct nvm_dev_map *rmap = dev->rmap;
int i;
- for (i = 0; i < dev->geo.nr_chnls; i++)
+ for (i = 0; i < dev->geo.num_ch; i++)
kfree(rmap->chnls[i].lun_offs);
kfree(rmap->chnls);
@@ -555,22 +556,22 @@ static void nvm_unregister_map(struct nvm_dev *dev)
static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
{
struct nvm_dev_map *dev_map = tgt_dev->map;
- struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch];
- int lun_off = ch_map->lun_offs[p->g.lun];
+ struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch];
+ int lun_off = ch_map->lun_offs[p->a.lun];
- p->g.ch += ch_map->ch_off;
- p->g.lun += lun_off;
+ p->a.ch += ch_map->ch_off;
+ p->a.lun += lun_off;
}
static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
{
struct nvm_dev *dev = tgt_dev->parent;
struct nvm_dev_map *dev_rmap = dev->rmap;
- struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch];
- int lun_roff = ch_rmap->lun_offs[p->g.lun];
+ struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch];
+ int lun_roff = ch_rmap->lun_offs[p->a.lun];
- p->g.ch -= ch_rmap->ch_off;
- p->g.lun -= lun_roff;
+ p->a.ch -= ch_rmap->ch_off;
+ p->a.lun -= lun_roff;
}
static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
@@ -580,7 +581,7 @@ static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
for (i = 0; i < nr_ppas; i++) {
nvm_map_to_dev(tgt_dev, &ppa_list[i]);
- ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]);
+ ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]);
}
}
@@ -590,7 +591,7 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
int i;
for (i = 0; i < nr_ppas; i++) {
- ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]);
+ ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]);
nvm_map_to_tgt(tgt_dev, &ppa_list[i]);
}
}
@@ -674,7 +675,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
int i, plane_cnt, pl_idx;
struct ppa_addr ppa;
- if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
+ if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
rqd->nr_ppas = nr_ppas;
rqd->ppa_addr = ppas[0];
@@ -688,7 +689,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
return -ENOMEM;
}
- plane_cnt = geo->plane_mode;
+ plane_cnt = geo->pln_mode;
rqd->nr_ppas *= plane_cnt;
for (i = 0; i < nr_ppas; i++) {
@@ -711,6 +712,17 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
}
+int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta,
+ struct ppa_addr ppa, int nchks)
+{
+ struct nvm_dev *dev = tgt_dev->parent;
+
+ nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
+
+ return dev->ops->get_chk_meta(tgt_dev->parent, meta,
+ (sector_t)ppa.ppa, nchks);
+}
+EXPORT_SYMBOL(nvm_get_chunk_meta);
int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
int nr_ppas, int type)
@@ -719,7 +731,7 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
struct nvm_rq rqd;
int ret;
- if (nr_ppas > dev->ops->max_phys_sect) {
+ if (nr_ppas > NVM_MAX_VLBA) {
pr_err("nvm: unable to update all blocks atomically\n");
return -EINVAL;
}
@@ -740,14 +752,6 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
}
EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
-int nvm_max_phys_sects(struct nvm_tgt_dev *tgt_dev)
-{
- struct nvm_dev *dev = tgt_dev->parent;
-
- return dev->ops->max_phys_sect;
-}
-EXPORT_SYMBOL(nvm_max_phys_sects);
-
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
struct nvm_dev *dev = tgt_dev->parent;
@@ -814,15 +818,15 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
struct nvm_geo *geo = &dev->geo;
int blk, offset, pl, blktype;
- if (nr_blks != geo->nr_chks * geo->plane_mode)
+ if (nr_blks != geo->num_chk * geo->pln_mode)
return -EINVAL;
- for (blk = 0; blk < geo->nr_chks; blk++) {
- offset = blk * geo->plane_mode;
+ for (blk = 0; blk < geo->num_chk; blk++) {
+ offset = blk * geo->pln_mode;
blktype = blks[offset];
/* Bad blocks on any planes take precedence over other types */
- for (pl = 0; pl < geo->plane_mode; pl++) {
+ for (pl = 0; pl < geo->pln_mode; pl++) {
if (blks[offset + pl] &
(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
blktype = blks[offset + pl];
@@ -833,7 +837,7 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
blks[blk] = blktype;
}
- return geo->nr_chks;
+ return geo->num_chk;
}
EXPORT_SYMBOL(nvm_bb_tbl_fold);
@@ -850,44 +854,9 @@ EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
static int nvm_core_init(struct nvm_dev *dev)
{
- struct nvm_id *id = &dev->identity;
- struct nvm_id_group *grp = &id->grp;
struct nvm_geo *geo = &dev->geo;
int ret;
- memcpy(&geo->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
-
- if (grp->mtype != 0) {
- pr_err("nvm: memory type not supported\n");
- return -EINVAL;
- }
-
- /* Whole device values */
- geo->nr_chnls = grp->num_ch;
- geo->nr_luns = grp->num_lun;
-
- /* Generic device geometry values */
- geo->ws_min = grp->ws_min;
- geo->ws_opt = grp->ws_opt;
- geo->ws_seq = grp->ws_seq;
- geo->ws_per_chk = grp->ws_per_chk;
- geo->nr_chks = grp->num_chk;
- geo->sec_size = grp->csecs;
- geo->oob_size = grp->sos;
- geo->mccap = grp->mccap;
- geo->max_rq_size = dev->ops->max_phys_sect * geo->sec_size;
-
- geo->sec_per_chk = grp->clba;
- geo->sec_per_lun = geo->sec_per_chk * geo->nr_chks;
- geo->all_luns = geo->nr_luns * geo->nr_chnls;
-
- /* 1.2 spec device geometry values */
- geo->plane_mode = 1 << geo->ws_seq;
- geo->nr_planes = geo->ws_opt / geo->ws_min;
- geo->sec_per_pg = geo->ws_min;
- geo->sec_per_pl = geo->sec_per_pg * geo->nr_planes;
-
- dev->total_secs = geo->all_luns * geo->sec_per_lun;
dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns),
sizeof(unsigned long), GFP_KERNEL);
if (!dev->lun_map)
@@ -902,7 +871,6 @@ static int nvm_core_init(struct nvm_dev *dev)
if (ret)
goto err_fmtype;
- blk_queue_logical_block_size(dev->q, geo->sec_size);
return 0;
err_fmtype:
kfree(dev->lun_map);
@@ -927,18 +895,14 @@ static int nvm_init(struct nvm_dev *dev)
struct nvm_geo *geo = &dev->geo;
int ret = -EINVAL;
- if (dev->ops->identity(dev, &dev->identity)) {
+ if (dev->ops->identity(dev)) {
pr_err("nvm: device could not be identified\n");
goto err;
}
- pr_debug("nvm: ver:%x nvm_vendor:%x\n",
- dev->identity.ver_id, dev->identity.vmnt);
-
- if (dev->identity.ver_id != 1) {
- pr_err("nvm: device not supported by kernel.");
- goto err;
- }
+ pr_debug("nvm: ver:%u.%u nvm_vendor:%x\n",
+ geo->major_ver_id, geo->minor_ver_id,
+ geo->vmnt);
ret = nvm_core_init(dev);
if (ret) {
@@ -946,10 +910,10 @@ static int nvm_init(struct nvm_dev *dev)
goto err;
}
- pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
- dev->name, geo->sec_per_pg, geo->nr_planes,
- geo->ws_per_chk, geo->nr_chks,
- geo->all_luns, geo->nr_chnls);
+ pr_info("nvm: registered %s [%u/%u/%u/%u/%u]\n",
+ dev->name, dev->geo.ws_min, dev->geo.ws_opt,
+ dev->geo.num_chk, dev->geo.all_luns,
+ dev->geo.num_ch);
return 0;
err:
pr_err("nvm: failed to initialize nvm\n");
@@ -969,17 +933,10 @@ int nvm_register(struct nvm_dev *dev)
if (!dev->q || !dev->ops)
return -EINVAL;
- if (dev->ops->max_phys_sect > 256) {
- pr_info("nvm: max sectors supported is 256.\n");
- return -EINVAL;
- }
-
- if (dev->ops->max_phys_sect > 1) {
- dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
- if (!dev->dma_pool) {
- pr_err("nvm: could not create dma pool\n");
- return -ENOMEM;
- }
+ dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
+ if (!dev->dma_pool) {
+ pr_err("nvm: could not create dma pool\n");
+ return -ENOMEM;
}
ret = nvm_init(dev);
@@ -1040,9 +997,6 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
struct nvm_tgt_type *tt;
int tgt_iter = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
if (IS_ERR(info))
return -EFAULT;
@@ -1081,9 +1035,6 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
struct nvm_dev *dev;
int i = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
if (!devices)
return -ENOMEM;
@@ -1124,9 +1075,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
{
struct nvm_ioctl_create create;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
return -EFAULT;
@@ -1162,9 +1110,6 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
struct nvm_dev *dev;
int ret = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
return -EFAULT;
@@ -1189,9 +1134,6 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
{
struct nvm_ioctl_dev_init init;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
return -EFAULT;
@@ -1208,9 +1150,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
{
struct nvm_ioctl_dev_factory fact;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
return -EFAULT;
@@ -1226,6 +1165,9 @@ static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
switch (cmd) {
case NVM_INFO:
return nvm_ioctl_info(file, argp);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 000fcad38136..29a23111b31c 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -63,6 +63,8 @@ retry:
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
+ atomic64_add(nr_entries, &pblk->user_wa);
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(nr_entries, &pblk->inflight_writes);
atomic_long_add(nr_entries, &pblk->req_writes);
@@ -117,6 +119,8 @@ retry:
WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
"pblk: inconsistent GC write\n");
+ atomic64_add(valid_entries, &pblk->gc_wa);
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(valid_entries, &pblk->inflight_writes);
atomic_long_add(valid_entries, &pblk->recov_gc_writes);
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 0487b9340c1d..94d5d97c9d8a 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -44,11 +44,12 @@ static void pblk_line_mark_bb(struct work_struct *work)
}
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
- struct ppa_addr *ppa)
+ struct ppa_addr ppa_addr)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- int pos = pblk_ppa_to_pos(geo, *ppa);
+ struct ppa_addr *ppa;
+ int pos = pblk_ppa_to_pos(geo, ppa_addr);
pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
atomic_long_inc(&pblk->erase_failed);
@@ -58,26 +59,38 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
line->id, pos);
+ /* Not necessary to mark bad blocks on 2.0 spec. */
+ if (geo->version == NVM_OCSSD_SPEC_20)
+ return;
+
+ ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
+ if (!ppa)
+ return;
+
+ *ppa = ppa_addr;
pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb,
GFP_ATOMIC, pblk->bb_wq);
}
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct nvm_chk_meta *chunk;
struct pblk_line *line;
+ int pos;
line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
+ pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
+ chunk = &line->chks[pos];
+
atomic_dec(&line->left_seblks);
if (rqd->error) {
- struct ppa_addr *ppa;
-
- ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
- if (!ppa)
- return;
-
- *ppa = rqd->ppa_addr;
- pblk_mark_bb(pblk, line, ppa);
+ chunk->state = NVM_CHK_ST_OFFLINE;
+ pblk_mark_bb(pblk, line, rqd->ppa_addr);
+ } else {
+ chunk->state = NVM_CHK_ST_FREE;
}
atomic_dec(&pblk->inflight_io);
@@ -92,6 +105,49 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
mempool_free(rqd, pblk->e_rq_pool);
}
+/*
+ * Get information for all chunks from the device.
+ *
+ * The caller is responsible for freeing the returned structure
+ */
+struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct nvm_chk_meta *meta;
+ struct ppa_addr ppa;
+ unsigned long len;
+ int ret;
+
+ ppa.ppa = 0;
+
+ len = geo->all_chunks * sizeof(*meta);
+ meta = kzalloc(len, GFP_KERNEL);
+ if (!meta)
+ return ERR_PTR(-ENOMEM);
+
+ ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks);
+ if (ret) {
+ kfree(meta);
+ return ERR_PTR(-EIO);
+ }
+
+ return meta;
+}
+
+struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
+ struct nvm_chk_meta *meta,
+ struct ppa_addr ppa)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun;
+ int lun_off = ppa.m.pu * geo->num_chk;
+ int chk_off = ppa.m.chk;
+
+ return meta + ch_off + lun_off + chk_off;
+}
+
void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
u64 paddr)
{
@@ -613,7 +669,7 @@ next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- rq_len = rq_ppas * geo->sec_size;
+ rq_len = rq_ppas * geo->csecs;
bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
l_mg->emeta_alloc_type, GFP_KERNEL);
@@ -722,7 +778,7 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
if (bit >= lm->blk_per_line)
return -1;
- return bit * geo->sec_per_pl;
+ return bit * geo->ws_opt;
}
static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
@@ -885,7 +941,7 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
}
ppa = pblk->luns[bit].bppa; /* set ch and lun */
- ppa.g.blk = line->id;
+ ppa.a.blk = line->id;
atomic_dec(&line->left_eblks);
WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
@@ -975,7 +1031,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
smeta_buf->header.id = cpu_to_le32(line->id);
smeta_buf->header.type = cpu_to_le16(line->type);
- smeta_buf->header.version = SMETA_VERSION;
+ smeta_buf->header.version_major = SMETA_VERSION_MAJOR;
+ smeta_buf->header.version_minor = SMETA_VERSION_MINOR;
/* Start metadata */
smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
@@ -998,6 +1055,12 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
/* End metadata */
memcpy(&emeta_buf->header, &smeta_buf->header,
sizeof(struct line_header));
+
+ emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
+ emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
+ emeta_buf->header.crc = cpu_to_le32(
+ pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
+
emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
emeta_buf->nr_valid_lbas = cpu_to_le64(0);
@@ -1018,28 +1081,26 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int nr_bb = 0;
u64 off;
int bit = -1;
+ int emeta_secs;
line->sec_in_line = lm->sec_per_line;
/* Capture bad block information on line mapping bitmaps */
while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
bit + 1)) < lm->blk_per_line) {
- off = bit * geo->sec_per_pl;
+ off = bit * geo->ws_opt;
bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
lm->sec_per_line);
bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
lm->sec_per_line);
- line->sec_in_line -= geo->sec_per_chk;
- if (bit >= lm->emeta_bb)
- nr_bb++;
+ line->sec_in_line -= geo->clba;
}
/* Mark smeta metadata sectors as bad sectors */
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- off = bit * geo->sec_per_pl;
+ off = bit * geo->ws_opt;
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec;
line->smeta_ssec = off;
@@ -1055,18 +1116,18 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
/* Mark emeta metadata sectors as bad sectors. We need to consider bad
* blocks to make sure that there are enough sectors to store emeta
*/
- off = lm->sec_per_line - lm->emeta_sec[0];
- bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
- while (nr_bb) {
- off -= geo->sec_per_pl;
+ emeta_secs = lm->emeta_sec[0];
+ off = lm->sec_per_line;
+ while (emeta_secs) {
+ off -= geo->ws_opt;
if (!test_bit(off, line->invalid_bitmap)) {
- bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
- nr_bb--;
+ bitmap_set(line->invalid_bitmap, off, geo->ws_opt);
+ emeta_secs -= geo->ws_opt;
}
}
- line->sec_in_line -= lm->emeta_sec[0];
line->emeta_ssec = off;
+ line->sec_in_line -= lm->emeta_sec[0];
line->nr_valid_lbas = 0;
line->left_msecs = line->sec_in_line;
*line->vsc = cpu_to_le32(line->sec_in_line);
@@ -1086,10 +1147,34 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
return 1;
}
+static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int blk_to_erase = atomic_read(&line->blk_in_line);
+ int i;
+
+ for (i = 0; i < lm->blk_per_line; i++) {
+ struct pblk_lun *rlun = &pblk->luns[i];
+ int pos = pblk_ppa_to_pos(geo, rlun->bppa);
+ int state = line->chks[pos].state;
+
+ /* Free chunks should not be erased */
+ if (state & NVM_CHK_ST_FREE) {
+ set_bit(pblk_ppa_to_pos(geo, rlun->bppa),
+ line->erase_bitmap);
+ blk_to_erase--;
+ }
+ }
+
+ return blk_to_erase;
+}
+
static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
- int blk_in_line = atomic_read(&line->blk_in_line);
+ int blk_to_erase;
line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
if (!line->map_bitmap)
@@ -1102,7 +1187,21 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
return -ENOMEM;
}
+ /* Bad blocks do not need to be erased */
+ bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
+
spin_lock(&line->lock);
+
+ /* If we have not written to this line, we need to mark up free chunks
+ * as already erased
+ */
+ if (line->state == PBLK_LINESTATE_NEW) {
+ blk_to_erase = pblk_prepare_new_line(pblk, line);
+ line->state = PBLK_LINESTATE_FREE;
+ } else {
+ blk_to_erase = atomic_read(&line->blk_in_line);
+ }
+
if (line->state != PBLK_LINESTATE_FREE) {
kfree(line->map_bitmap);
kfree(line->invalid_bitmap);
@@ -1114,15 +1213,12 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
line->state = PBLK_LINESTATE_OPEN;
- atomic_set(&line->left_eblks, blk_in_line);
- atomic_set(&line->left_seblks, blk_in_line);
+ atomic_set(&line->left_eblks, blk_to_erase);
+ atomic_set(&line->left_seblks, blk_to_erase);
line->meta_distance = lm->meta_distance;
spin_unlock(&line->lock);
- /* Bad blocks do not need to be erased */
- bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
-
kref_init(&line->ref);
return 0;
@@ -1399,13 +1495,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
l_mg->data_line = new;
spin_lock(&l_mg->free_lock);
- if (pblk->state != PBLK_STATE_RUNNING) {
- l_mg->data_line = NULL;
- l_mg->data_next = NULL;
- spin_unlock(&l_mg->free_lock);
- goto out;
- }
-
pblk_line_setup_metadata(new, l_mg, &pblk->lm);
spin_unlock(&l_mg->free_lock);
@@ -1585,12 +1674,14 @@ static void pblk_line_should_sync_meta(struct pblk *pblk)
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
+ int i;
#ifdef CONFIG_NVM_DEBUG
- struct pblk_line_meta *lm = &pblk->lm;
-
WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
"pblk: corrupt closed line %d\n", line->id);
#endif
@@ -1612,6 +1703,15 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
line->smeta = NULL;
line->emeta = NULL;
+ for (i = 0; i < lm->blk_per_line; i++) {
+ struct pblk_lun *rlun = &pblk->luns[i];
+ int pos = pblk_ppa_to_pos(geo, rlun->bppa);
+ int state = line->chks[pos].state;
+
+ if (!(state & NVM_CHK_ST_OFFLINE))
+ state = NVM_CHK_ST_CLOSED;
+ }
+
spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock);
}
@@ -1622,11 +1722,16 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_emeta *emeta = line->emeta;
struct line_emeta *emeta_buf = emeta->buf;
+ struct wa_counters *wa = emeta_to_wa(lm, emeta_buf);
/* No need for exact vsc value; avoid a big line lock and take aprox. */
memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+ wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa));
+ wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
+ wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
+
emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
@@ -1680,8 +1785,8 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
int i;
for (i = 1; i < nr_ppas; i++)
- WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
- ppa_list[0].g.ch != ppa_list[i].g.ch);
+ WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
+ ppa_list[0].a.ch != ppa_list[i].a.ch);
#endif
ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
@@ -1725,8 +1830,8 @@ void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
int i;
for (i = 1; i < nr_ppas; i++)
- WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
- ppa_list[0].g.ch != ppa_list[i].g.ch);
+ WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
+ ppa_list[0].a.ch != ppa_list[i].a.ch);
#endif
rlun = &pblk->luns[pos];
@@ -1739,10 +1844,10 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
- int nr_luns = geo->all_luns;
+ int num_lun = geo->all_luns;
int bit = -1;
- while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
+ while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) {
rlun = &pblk->luns[bit];
up(&rlun->wr_sem);
}
@@ -1829,6 +1934,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
#endif
/* Invalidate and discard padded entries */
if (lba == ADDR_EMPTY) {
+ atomic64_inc(&pblk->pad_wa);
#ifdef CONFIG_NVM_DEBUG
atomic_long_inc(&pblk->padded_wb);
#endif
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index 3d899383666e..6851a5c67189 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -88,7 +88,7 @@ static void pblk_gc_line_ws(struct work_struct *work)
up(&gc->gc_sem);
- gc_rq->data = vmalloc(gc_rq->nr_secs * geo->sec_size);
+ gc_rq->data = vmalloc(gc_rq->nr_secs * geo->csecs);
if (!gc_rq->data) {
pr_err("pblk: could not GC line:%d (%d/%d)\n",
line->id, *line->vsc, gc_rq->nr_secs);
@@ -147,10 +147,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
int ret;
invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!invalid_bitmap) {
- pr_err("pblk: could not allocate GC invalid bitmap\n");
+ if (!invalid_bitmap)
goto fail_free_ws;
- }
emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
GFP_KERNEL);
@@ -666,12 +664,10 @@ void pblk_gc_exit(struct pblk *pblk)
kthread_stop(gc->gc_reader_ts);
flush_workqueue(gc->gc_reader_wq);
- if (gc->gc_reader_wq)
- destroy_workqueue(gc->gc_reader_wq);
+ destroy_workqueue(gc->gc_reader_wq);
flush_workqueue(gc->gc_line_reader_wq);
- if (gc->gc_line_reader_wq)
- destroy_workqueue(gc->gc_line_reader_wq);
+ destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 93d671ca518e..91a5bc2556a3 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -80,7 +80,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk)
{
int entry_size = 8;
- if (pblk->ppaf_bitsize < 32)
+ if (pblk->addrf_len < 32)
entry_size = 4;
return entry_size * pblk->rl.nr_secs;
@@ -103,7 +103,40 @@ static void pblk_l2p_free(struct pblk *pblk)
vfree(pblk->trans_map);
}
-static int pblk_l2p_init(struct pblk *pblk)
+static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
+{
+ struct pblk_line *line = NULL;
+
+ if (factory_init) {
+ pblk_setup_uuid(pblk);
+ } else {
+ line = pblk_recov_l2p(pblk);
+ if (IS_ERR(line)) {
+ pr_err("pblk: could not recover l2p table\n");
+ return -EFAULT;
+ }
+ }
+
+#ifdef CONFIG_NVM_DEBUG
+ pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
+#endif
+
+ /* Free full lines directly as GC has not been started yet */
+ pblk_gc_free_full_lines(pblk);
+
+ if (!line) {
+ /* Configure next line for user data */
+ line = pblk_line_get_first_data(pblk);
+ if (!line) {
+ pr_err("pblk: line list corrupted\n");
+ return -EFAULT;
+ }
+ }
+
+ return 0;
+}
+
+static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
{
sector_t i;
struct ppa_addr ppa;
@@ -119,7 +152,7 @@ static int pblk_l2p_init(struct pblk *pblk)
for (i = 0; i < pblk->rl.nr_secs; i++)
pblk_trans_map_set(pblk, i, ppa);
- return 0;
+ return pblk_l2p_recover(pblk, factory_init);
}
static void pblk_rwb_free(struct pblk *pblk)
@@ -146,7 +179,7 @@ static int pblk_rwb_init(struct pblk *pblk)
return -ENOMEM;
power_size = get_count_order(nr_entries);
- power_seg_sz = get_count_order(geo->sec_size);
+ power_seg_sz = get_count_order(geo->csecs);
return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
}
@@ -154,47 +187,103 @@ static int pblk_rwb_init(struct pblk *pblk)
/* Minimum pages needed within a lun */
#define ADDR_POOL_SIZE 64
-static int pblk_set_ppaf(struct pblk *pblk)
+static int pblk_set_addrf_12(struct nvm_geo *geo, struct nvm_addrf_12 *dst)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_addr_format ppaf = geo->ppaf;
+ struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf;
int power_len;
/* Re-calculate channel and lun format to adapt to configuration */
- power_len = get_count_order(geo->nr_chnls);
- if (1 << power_len != geo->nr_chnls) {
+ power_len = get_count_order(geo->num_ch);
+ if (1 << power_len != geo->num_ch) {
pr_err("pblk: supports only power-of-two channel config.\n");
return -EINVAL;
}
- ppaf.ch_len = power_len;
+ dst->ch_len = power_len;
- power_len = get_count_order(geo->nr_luns);
- if (1 << power_len != geo->nr_luns) {
+ power_len = get_count_order(geo->num_lun);
+ if (1 << power_len != geo->num_lun) {
pr_err("pblk: supports only power-of-two LUN config.\n");
return -EINVAL;
}
- ppaf.lun_len = power_len;
-
- pblk->ppaf.sec_offset = 0;
- pblk->ppaf.pln_offset = ppaf.sect_len;
- pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len;
- pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len;
- pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len;
- pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len;
- pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1;
- pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) <<
- pblk->ppaf.pln_offset;
- pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) <<
- pblk->ppaf.ch_offset;
- pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) <<
- pblk->ppaf.lun_offset;
- pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) <<
- pblk->ppaf.pg_offset;
- pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) <<
- pblk->ppaf.blk_offset;
-
- pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len;
+ dst->lun_len = power_len;
+
+ dst->blk_len = src->blk_len;
+ dst->pg_len = src->pg_len;
+ dst->pln_len = src->pln_len;
+ dst->sec_len = src->sec_len;
+
+ dst->sec_offset = 0;
+ dst->pln_offset = dst->sec_len;
+ dst->ch_offset = dst->pln_offset + dst->pln_len;
+ dst->lun_offset = dst->ch_offset + dst->ch_len;
+ dst->pg_offset = dst->lun_offset + dst->lun_len;
+ dst->blk_offset = dst->pg_offset + dst->pg_len;
+
+ dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
+ dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
+ dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
+ dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
+ dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
+ dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
+
+ return dst->blk_offset + src->blk_len;
+}
+
+static int pblk_set_addrf_20(struct nvm_geo *geo, struct nvm_addrf *adst,
+ struct pblk_addrf *udst)
+{
+ struct nvm_addrf *src = &geo->addrf;
+
+ adst->ch_len = get_count_order(geo->num_ch);
+ adst->lun_len = get_count_order(geo->num_lun);
+ adst->chk_len = src->chk_len;
+ adst->sec_len = src->sec_len;
+
+ adst->sec_offset = 0;
+ adst->ch_offset = adst->sec_len;
+ adst->lun_offset = adst->ch_offset + adst->ch_len;
+ adst->chk_offset = adst->lun_offset + adst->lun_len;
+
+ adst->sec_mask = ((1ULL << adst->sec_len) - 1) << adst->sec_offset;
+ adst->chk_mask = ((1ULL << adst->chk_len) - 1) << adst->chk_offset;
+ adst->lun_mask = ((1ULL << adst->lun_len) - 1) << adst->lun_offset;
+ adst->ch_mask = ((1ULL << adst->ch_len) - 1) << adst->ch_offset;
+
+ udst->sec_stripe = geo->ws_opt;
+ udst->ch_stripe = geo->num_ch;
+ udst->lun_stripe = geo->num_lun;
+
+ udst->sec_lun_stripe = udst->sec_stripe * udst->ch_stripe;
+ udst->sec_ws_stripe = udst->sec_lun_stripe * udst->lun_stripe;
+
+ return adst->chk_offset + adst->chk_len;
+}
+
+static int pblk_set_addrf(struct pblk *pblk)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int mod;
+
+ switch (geo->version) {
+ case NVM_OCSSD_SPEC_12:
+ div_u64_rem(geo->clba, pblk->min_write_pgs, &mod);
+ if (mod) {
+ pr_err("pblk: bad configuration of sectors/pages\n");
+ return -EINVAL;
+ }
+
+ pblk->addrf_len = pblk_set_addrf_12(geo, (void *)&pblk->addrf);
+ break;
+ case NVM_OCSSD_SPEC_20:
+ pblk->addrf_len = pblk_set_addrf_20(geo, (void *)&pblk->addrf,
+ &pblk->uaddrf);
+ break;
+ default:
+ pr_err("pblk: OCSSD revision not supported (%d)\n",
+ geo->version);
+ return -EINVAL;
+ }
return 0;
}
@@ -252,16 +341,41 @@ static int pblk_core_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
+ int max_write_ppas;
- pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
- geo->nr_planes * geo->all_luns;
+ atomic64_set(&pblk->user_wa, 0);
+ atomic64_set(&pblk->pad_wa, 0);
+ atomic64_set(&pblk->gc_wa, 0);
+ pblk->user_rst_wa = 0;
+ pblk->pad_rst_wa = 0;
+ pblk->gc_rst_wa = 0;
- if (pblk_init_global_caches(pblk))
+ atomic64_set(&pblk->nr_flush, 0);
+ pblk->nr_flush_rst = 0;
+
+ pblk->pgs_in_buffer = geo->mw_cunits * geo->all_luns;
+
+ pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE);
+ max_write_ppas = pblk->min_write_pgs * geo->all_luns;
+ pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
+ pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+ if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+ pr_err("pblk: vector list too big(%u > %u)\n",
+ pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS);
+ return -EINVAL;
+ }
+
+ pblk->pad_dist = kzalloc((pblk->min_write_pgs - 1) * sizeof(atomic64_t),
+ GFP_KERNEL);
+ if (!pblk->pad_dist)
return -ENOMEM;
+ if (pblk_init_global_caches(pblk))
+ goto fail_free_pad_dist;
+
/* Internal bios can be at most the sectors signaled by the device. */
- pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev),
- 0);
+ pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
if (!pblk->page_bio_pool)
goto free_global_caches;
@@ -305,13 +419,11 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->r_end_wq)
goto free_bb_wq;
- if (pblk_set_ppaf(pblk))
- goto free_r_end_wq;
-
- if (pblk_rwb_init(pblk))
+ if (pblk_set_addrf(pblk))
goto free_r_end_wq;
INIT_LIST_HEAD(&pblk->compl_list);
+
return 0;
free_r_end_wq:
@@ -334,6 +446,8 @@ free_page_bio_pool:
mempool_destroy(pblk->page_bio_pool);
free_global_caches:
pblk_free_global_caches(pblk);
+fail_free_pad_dist:
+ kfree(pblk->pad_dist);
return -ENOMEM;
}
@@ -355,20 +469,31 @@ static void pblk_core_free(struct pblk *pblk)
mempool_destroy(pblk->e_rq_pool);
mempool_destroy(pblk->w_rq_pool);
- pblk_rwb_free(pblk);
-
pblk_free_global_caches(pblk);
+ kfree(pblk->pad_dist);
}
-static void pblk_luns_free(struct pblk *pblk)
+static void pblk_line_mg_free(struct pblk *pblk)
{
- kfree(pblk->luns);
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ int i;
+
+ kfree(l_mg->bb_template);
+ kfree(l_mg->bb_aux);
+ kfree(l_mg->vsc_list);
+
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ kfree(l_mg->sline_meta[i]);
+ pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+ kfree(l_mg->eline_meta[i]);
+ }
}
-static void pblk_free_line_bitmaps(struct pblk_line *line)
+static void pblk_line_meta_free(struct pblk_line *line)
{
kfree(line->blk_bitmap);
kfree(line->erase_bitmap);
+ kfree(line->chks);
}
static void pblk_lines_free(struct pblk *pblk)
@@ -382,40 +507,21 @@ static void pblk_lines_free(struct pblk *pblk)
line = &pblk->lines[i];
pblk_line_free(pblk, line);
- pblk_free_line_bitmaps(line);
+ pblk_line_meta_free(line);
}
spin_unlock(&l_mg->free_lock);
-}
-
-static void pblk_line_meta_free(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int i;
-
- kfree(l_mg->bb_template);
- kfree(l_mg->bb_aux);
- kfree(l_mg->vsc_list);
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- kfree(l_mg->sline_meta[i]);
- pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
- kfree(l_mg->eline_meta[i]);
- }
+ pblk_line_mg_free(pblk);
+ kfree(pblk->luns);
kfree(pblk->lines);
}
-static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
+static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun,
+ u8 *blks, int nr_blks)
{
- struct nvm_geo *geo = &dev->geo;
struct ppa_addr ppa;
- u8 *blks;
- int nr_blks, ret;
-
- nr_blks = geo->nr_chks * geo->plane_mode;
- blks = kmalloc(nr_blks, GFP_KERNEL);
- if (!blks)
- return -ENOMEM;
+ int ret;
ppa.ppa = 0;
ppa.g.ch = rlun->bppa.g.ch;
@@ -423,69 +529,64 @@ static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
if (ret)
- goto out;
+ return ret;
nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
- if (nr_blks < 0) {
- ret = nr_blks;
- goto out;
- }
-
- rlun->bb_list = blks;
+ if (nr_blks < 0)
+ return -EIO;
return 0;
-out:
- kfree(blks);
- return ret;
}
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
- int blk_per_line)
+static void *pblk_bb_get_meta(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int bb_cnt = 0;
- int i;
+ u8 *meta;
+ int i, nr_blks, blk_per_lun;
+ int ret;
- for (i = 0; i < blk_per_line; i++) {
- rlun = &pblk->luns[i];
- if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
- continue;
+ blk_per_lun = geo->num_chk * geo->pln_mode;
+ nr_blks = blk_per_lun * geo->all_luns;
+
+ meta = kmalloc(nr_blks, GFP_KERNEL);
+ if (!meta)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < geo->all_luns; i++) {
+ struct pblk_lun *rlun = &pblk->luns[i];
+ u8 *meta_pos = meta + i * blk_per_lun;
- set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
- bb_cnt++;
+ ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun);
+ if (ret) {
+ kfree(meta);
+ return ERR_PTR(-EIO);
+ }
}
- return bb_cnt;
+ return meta;
}
-static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+static void *pblk_chunk_get_meta(struct pblk *pblk)
{
- struct pblk_line_meta *lm = &pblk->lm;
-
- line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
- if (!line->blk_bitmap)
- return -ENOMEM;
-
- line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
- if (!line->erase_bitmap) {
- kfree(line->blk_bitmap);
- return -ENOMEM;
- }
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
- return 0;
+ if (geo->version == NVM_OCSSD_SPEC_12)
+ return pblk_bb_get_meta(pblk);
+ else
+ return pblk_chunk_get_info(pblk);
}
-static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
+static int pblk_luns_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
- int i, ret;
+ int i;
/* TODO: Implement unbalanced LUN support */
- if (geo->nr_luns < 0) {
+ if (geo->num_lun < 0) {
pr_err("pblk: unbalanced LUN config.\n");
return -EINVAL;
}
@@ -497,58 +598,19 @@ static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
for (i = 0; i < geo->all_luns; i++) {
/* Stripe across channels */
- int ch = i % geo->nr_chnls;
- int lun_raw = i / geo->nr_chnls;
- int lunid = lun_raw + ch * geo->nr_luns;
+ int ch = i % geo->num_ch;
+ int lun_raw = i / geo->num_ch;
+ int lunid = lun_raw + ch * geo->num_lun;
rlun = &pblk->luns[i];
- rlun->bppa = luns[lunid];
+ rlun->bppa = dev->luns[lunid];
sema_init(&rlun->wr_sem, 1);
-
- ret = pblk_bb_discovery(dev, rlun);
- if (ret) {
- while (--i >= 0)
- kfree(pblk->luns[i].bb_list);
- return ret;
- }
}
return 0;
}
-static int pblk_lines_configure(struct pblk *pblk, int flags)
-{
- struct pblk_line *line = NULL;
- int ret = 0;
-
- if (!(flags & NVM_TARGET_FACTORY)) {
- line = pblk_recov_l2p(pblk);
- if (IS_ERR(line)) {
- pr_err("pblk: could not recover l2p table\n");
- ret = -EFAULT;
- }
- }
-
-#ifdef CONFIG_NVM_DEBUG
- pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
-#endif
-
- /* Free full lines directly as GC has not been started yet */
- pblk_gc_free_full_lines(pblk);
-
- if (!line) {
- /* Configure next line for user data */
- line = pblk_line_get_first_data(pblk);
- if (!line) {
- pr_err("pblk: line list corrupted\n");
- ret = -EFAULT;
- }
- }
-
- return ret;
-}
-
/* See comment over struct line_emeta definition */
static unsigned int calc_emeta_len(struct pblk *pblk)
{
@@ -559,19 +621,19 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
/* Round to sector size so that lba_list starts on its own sector */
lm->emeta_sec[1] = DIV_ROUND_UP(
- sizeof(struct line_emeta) + lm->blk_bitmap_len,
- geo->sec_size);
- lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+ sizeof(struct line_emeta) + lm->blk_bitmap_len +
+ sizeof(struct wa_counters), geo->csecs);
+ lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs;
/* Round to sector size so that vsc_list starts on its own sector */
lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
- geo->sec_size);
- lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+ geo->csecs);
+ lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs;
lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
- geo->sec_size);
- lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+ geo->csecs);
+ lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs;
lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
@@ -602,23 +664,211 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
* on user capacity consider only provisioned blocks
*/
pblk->rl.total_blocks = nr_free_blks;
- pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk;
+ pblk->rl.nr_secs = nr_free_blks * geo->clba;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
- blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);
+ blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
- pblk->capacity = (provisioned - blk_meta) * geo->sec_per_chk;
+ pblk->capacity = (provisioned - blk_meta) * geo->clba;
atomic_set(&pblk->rl.free_blocks, nr_free_blks);
atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
}
-static int pblk_lines_alloc_metadata(struct pblk *pblk)
+static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
+ void *chunk_meta)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i, chk_per_lun, nr_bad_chks = 0;
+
+ chk_per_lun = geo->num_chk * geo->pln_mode;
+
+ for (i = 0; i < lm->blk_per_line; i++) {
+ struct pblk_lun *rlun = &pblk->luns[i];
+ struct nvm_chk_meta *chunk;
+ int pos = pblk_ppa_to_pos(geo, rlun->bppa);
+ u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
+
+ chunk = &line->chks[pos];
+
+ /*
+ * In 1.2 spec. chunk state is not persisted by the device. Thus
+ * some of the values are reset each time pblk is instantiated.
+ */
+ if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
+ chunk->state = NVM_CHK_ST_FREE;
+ else
+ chunk->state = NVM_CHK_ST_OFFLINE;
+
+ chunk->type = NVM_CHK_TP_W_SEQ;
+ chunk->wi = 0;
+ chunk->slba = -1;
+ chunk->cnlb = geo->clba;
+ chunk->wp = 0;
+
+ if (!(chunk->state & NVM_CHK_ST_OFFLINE))
+ continue;
+
+ set_bit(pos, line->blk_bitmap);
+ nr_bad_chks++;
+ }
+
+ return nr_bad_chks;
+}
+
+static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
+ struct nvm_chk_meta *meta)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i, nr_bad_chks = 0;
+
+ for (i = 0; i < lm->blk_per_line; i++) {
+ struct pblk_lun *rlun = &pblk->luns[i];
+ struct nvm_chk_meta *chunk;
+ struct nvm_chk_meta *chunk_meta;
+ struct ppa_addr ppa;
+ int pos;
+
+ ppa = rlun->bppa;
+ pos = pblk_ppa_to_pos(geo, ppa);
+ chunk = &line->chks[pos];
+
+ ppa.m.chk = line->id;
+ chunk_meta = pblk_chunk_get_off(pblk, meta, ppa);
+
+ chunk->state = chunk_meta->state;
+ chunk->type = chunk_meta->type;
+ chunk->wi = chunk_meta->wi;
+ chunk->slba = chunk_meta->slba;
+ chunk->cnlb = chunk_meta->cnlb;
+ chunk->wp = chunk_meta->wp;
+
+ if (!(chunk->state & NVM_CHK_ST_OFFLINE))
+ continue;
+
+ if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
+ WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
+ continue;
+ }
+
+ set_bit(pos, line->blk_bitmap);
+ nr_bad_chks++;
+ }
+
+ return nr_bad_chks;
+}
+
+static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
+ void *chunk_meta, int line_id)
{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- int i;
+ long nr_bad_chks, chk_in_line;
+
+ line->pblk = pblk;
+ line->id = line_id;
+ line->type = PBLK_LINETYPE_FREE;
+ line->state = PBLK_LINESTATE_NEW;
+ line->gc_group = PBLK_LINEGC_NONE;
+ line->vsc = &l_mg->vsc_list[line_id];
+ spin_lock_init(&line->lock);
+
+ if (geo->version == NVM_OCSSD_SPEC_12)
+ nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta);
+ else
+ nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta);
+
+ chk_in_line = lm->blk_per_line - nr_bad_chks;
+ if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line ||
+ chk_in_line < lm->min_blk_line) {
+ line->state = PBLK_LINESTATE_BAD;
+ list_add_tail(&line->list, &l_mg->bad_list);
+ return 0;
+ }
+
+ atomic_set(&line->blk_in_line, chk_in_line);
+ list_add_tail(&line->list, &l_mg->free_list);
+ l_mg->nr_free_lines++;
+
+ return chk_in_line;
+}
+
+static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+
+ line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
+ if (!line->blk_bitmap)
+ return -ENOMEM;
+
+ line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
+ if (!line->erase_bitmap) {
+ kfree(line->blk_bitmap);
+ return -ENOMEM;
+ }
+
+ line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
+ GFP_KERNEL);
+ if (!line->chks) {
+ kfree(line->erase_bitmap);
+ kfree(line->blk_bitmap);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int pblk_line_mg_init(struct pblk *pblk)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i, bb_distance;
+
+ l_mg->nr_lines = geo->num_chk;
+ l_mg->log_line = l_mg->data_line = NULL;
+ l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+ l_mg->nr_free_lines = 0;
+ bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+
+ INIT_LIST_HEAD(&l_mg->free_list);
+ INIT_LIST_HEAD(&l_mg->corrupt_list);
+ INIT_LIST_HEAD(&l_mg->bad_list);
+ INIT_LIST_HEAD(&l_mg->gc_full_list);
+ INIT_LIST_HEAD(&l_mg->gc_high_list);
+ INIT_LIST_HEAD(&l_mg->gc_mid_list);
+ INIT_LIST_HEAD(&l_mg->gc_low_list);
+ INIT_LIST_HEAD(&l_mg->gc_empty_list);
+
+ INIT_LIST_HEAD(&l_mg->emeta_list);
+
+ l_mg->gc_lists[0] = &l_mg->gc_high_list;
+ l_mg->gc_lists[1] = &l_mg->gc_mid_list;
+ l_mg->gc_lists[2] = &l_mg->gc_low_list;
+
+ spin_lock_init(&l_mg->free_lock);
+ spin_lock_init(&l_mg->close_lock);
+ spin_lock_init(&l_mg->gc_lock);
+
+ l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+ if (!l_mg->vsc_list)
+ goto fail;
+
+ l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+ if (!l_mg->bb_template)
+ goto fail_free_vsc_list;
+
+ l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+ if (!l_mg->bb_aux)
+ goto fail_free_bb_template;
/* smeta is always small enough to fit on a kmalloc memory allocation,
* emeta depends on the number of LUNs allocated to the pblk instance
@@ -664,13 +914,13 @@ static int pblk_lines_alloc_metadata(struct pblk *pblk)
}
}
- l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
- if (!l_mg->vsc_list)
- goto fail_free_emeta;
-
for (i = 0; i < l_mg->nr_lines; i++)
l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+ bb_distance = (geo->all_luns) * geo->ws_opt;
+ for (i = 0; i < lm->sec_per_line; i += bb_distance)
+ bitmap_set(l_mg->bb_template, i, geo->ws_opt);
+
return 0;
fail_free_emeta:
@@ -681,50 +931,27 @@ fail_free_emeta:
kfree(l_mg->eline_meta[i]->buf);
kfree(l_mg->eline_meta[i]);
}
-
fail_free_smeta:
for (i = 0; i < PBLK_DATA_LINES; i++)
kfree(l_mg->sline_meta[i]);
-
+ kfree(l_mg->bb_aux);
+fail_free_bb_template:
+ kfree(l_mg->bb_template);
+fail_free_vsc_list:
+ kfree(l_mg->vsc_list);
+fail:
return -ENOMEM;
}
-static int pblk_lines_init(struct pblk *pblk)
+static int pblk_line_meta_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line;
unsigned int smeta_len, emeta_len;
- long nr_bad_blks, nr_free_blks;
- int bb_distance, max_write_ppas, mod;
- int i, ret;
-
- pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
- max_write_ppas = pblk->min_write_pgs * geo->all_luns;
- pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
- max_write_ppas : nvm_max_phys_sects(dev);
- pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
-
- if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
- pr_err("pblk: cannot support device max_phys_sect\n");
- return -EINVAL;
- }
-
- div_u64_rem(geo->sec_per_chk, pblk->min_write_pgs, &mod);
- if (mod) {
- pr_err("pblk: bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
- l_mg->nr_lines = geo->nr_chks;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+ int i;
- lm->sec_per_line = geo->sec_per_chk * geo->all_luns;
+ lm->sec_per_line = geo->clba * geo->all_luns;
lm->blk_per_line = geo->all_luns;
lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
@@ -738,8 +965,8 @@ static int pblk_lines_init(struct pblk *pblk)
*/
i = 1;
add_smeta_page:
- lm->smeta_sec = i * geo->sec_per_pl;
- lm->smeta_len = lm->smeta_sec * geo->sec_size;
+ lm->smeta_sec = i * geo->ws_opt;
+ lm->smeta_len = lm->smeta_sec * geo->csecs;
smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
if (smeta_len > lm->smeta_len) {
@@ -752,8 +979,8 @@ add_smeta_page:
*/
i = 1;
add_emeta_page:
- lm->emeta_sec[0] = i * geo->sec_per_pl;
- lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
+ lm->emeta_sec[0] = i * geo->ws_opt;
+ lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs;
emeta_len = calc_emeta_len(pblk);
if (emeta_len > lm->emeta_len[0]) {
@@ -766,119 +993,75 @@ add_emeta_page:
lm->min_blk_line = 1;
if (geo->all_luns > 1)
lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
- lm->emeta_sec[0], geo->sec_per_chk);
+ lm->emeta_sec[0], geo->clba);
if (lm->min_blk_line > lm->blk_per_line) {
pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
lm->blk_per_line);
- ret = -EINVAL;
- goto fail;
- }
-
- ret = pblk_lines_alloc_metadata(pblk);
- if (ret)
- goto fail;
-
- l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!l_mg->bb_template) {
- ret = -ENOMEM;
- goto fail_free_meta;
+ return -EINVAL;
}
- l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!l_mg->bb_aux) {
- ret = -ENOMEM;
- goto fail_free_bb_template;
- }
+ return 0;
+}
- bb_distance = (geo->all_luns) * geo->sec_per_pl;
- for (i = 0; i < lm->sec_per_line; i += bb_distance)
- bitmap_set(l_mg->bb_template, i, geo->sec_per_pl);
+static int pblk_lines_init(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line *line;
+ void *chunk_meta;
+ long nr_free_chks = 0;
+ int i, ret;
- INIT_LIST_HEAD(&l_mg->free_list);
- INIT_LIST_HEAD(&l_mg->corrupt_list);
- INIT_LIST_HEAD(&l_mg->bad_list);
- INIT_LIST_HEAD(&l_mg->gc_full_list);
- INIT_LIST_HEAD(&l_mg->gc_high_list);
- INIT_LIST_HEAD(&l_mg->gc_mid_list);
- INIT_LIST_HEAD(&l_mg->gc_low_list);
- INIT_LIST_HEAD(&l_mg->gc_empty_list);
+ ret = pblk_line_meta_init(pblk);
+ if (ret)
+ return ret;
- INIT_LIST_HEAD(&l_mg->emeta_list);
+ ret = pblk_line_mg_init(pblk);
+ if (ret)
+ return ret;
- l_mg->gc_lists[0] = &l_mg->gc_high_list;
- l_mg->gc_lists[1] = &l_mg->gc_mid_list;
- l_mg->gc_lists[2] = &l_mg->gc_low_list;
+ ret = pblk_luns_init(pblk);
+ if (ret)
+ goto fail_free_meta;
- spin_lock_init(&l_mg->free_lock);
- spin_lock_init(&l_mg->close_lock);
- spin_lock_init(&l_mg->gc_lock);
+ chunk_meta = pblk_chunk_get_meta(pblk);
+ if (IS_ERR(chunk_meta)) {
+ ret = PTR_ERR(chunk_meta);
+ goto fail_free_luns;
+ }
pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
GFP_KERNEL);
if (!pblk->lines) {
ret = -ENOMEM;
- goto fail_free_bb_aux;
+ goto fail_free_chunk_meta;
}
- nr_free_blks = 0;
for (i = 0; i < l_mg->nr_lines; i++) {
- int blk_in_line;
-
line = &pblk->lines[i];
- line->pblk = pblk;
- line->id = i;
- line->type = PBLK_LINETYPE_FREE;
- line->state = PBLK_LINESTATE_FREE;
- line->gc_group = PBLK_LINEGC_NONE;
- line->vsc = &l_mg->vsc_list[i];
- spin_lock_init(&line->lock);
-
- ret = pblk_alloc_line_bitmaps(pblk, line);
+ ret = pblk_alloc_line_meta(pblk, line);
if (ret)
goto fail_free_lines;
- nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
- if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
- pblk_free_line_bitmaps(line);
- ret = -EINVAL;
- goto fail_free_lines;
- }
-
- blk_in_line = lm->blk_per_line - nr_bad_blks;
- if (blk_in_line < lm->min_blk_line) {
- line->state = PBLK_LINESTATE_BAD;
- list_add_tail(&line->list, &l_mg->bad_list);
- continue;
- }
-
- nr_free_blks += blk_in_line;
- atomic_set(&line->blk_in_line, blk_in_line);
-
- l_mg->nr_free_lines++;
- list_add_tail(&line->list, &l_mg->free_list);
+ nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
}
- pblk_set_provision(pblk, nr_free_blks);
-
- /* Cleanup per-LUN bad block lists - managed within lines on run-time */
- for (i = 0; i < geo->all_luns; i++)
- kfree(pblk->luns[i].bb_list);
+ pblk_set_provision(pblk, nr_free_chks);
+ kfree(chunk_meta);
return 0;
+
fail_free_lines:
while (--i >= 0)
- pblk_free_line_bitmaps(&pblk->lines[i]);
-fail_free_bb_aux:
- kfree(l_mg->bb_aux);
-fail_free_bb_template:
- kfree(l_mg->bb_template);
+ pblk_line_meta_free(&pblk->lines[i]);
+ kfree(pblk->lines);
+fail_free_chunk_meta:
+ kfree(chunk_meta);
+fail_free_luns:
+ kfree(pblk->luns);
fail_free_meta:
- pblk_line_meta_free(pblk);
-fail:
- for (i = 0; i < geo->all_luns; i++)
- kfree(pblk->luns[i].bb_list);
+ pblk_line_mg_free(pblk);
return ret;
}
@@ -912,18 +1095,17 @@ static void pblk_writer_stop(struct pblk *pblk)
WARN(pblk_rb_sync_count(&pblk->rwb),
"Stopping not fully synced write buffer\n");
+ del_timer_sync(&pblk->wtimer);
if (pblk->writer_ts)
kthread_stop(pblk->writer_ts);
- del_timer(&pblk->wtimer);
}
static void pblk_free(struct pblk *pblk)
{
- pblk_luns_free(pblk);
pblk_lines_free(pblk);
- pblk_line_meta_free(pblk);
- pblk_core_free(pblk);
pblk_l2p_free(pblk);
+ pblk_rwb_free(pblk);
+ pblk_core_free(pblk);
kfree(pblk);
}
@@ -970,9 +1152,17 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
struct pblk *pblk;
int ret;
- if (dev->identity.dom & NVM_RSP_L2P) {
+ /* pblk supports 1.2 and 2.0 versions */
+ if (!(geo->version == NVM_OCSSD_SPEC_12 ||
+ geo->version == NVM_OCSSD_SPEC_20)) {
+ pr_err("pblk: OCSSD version not supported (%u)\n",
+ geo->version);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) {
pr_err("pblk: host-side L2P table not supported. (%x)\n",
- dev->identity.dom);
+ geo->dom);
return ERR_PTR(-EINVAL);
}
@@ -988,14 +1178,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
- if (flags & NVM_TARGET_FACTORY)
- pblk_setup_uuid(pblk);
-
#ifdef CONFIG_NVM_DEBUG
atomic_long_set(&pblk->inflight_writes, 0);
atomic_long_set(&pblk->padded_writes, 0);
atomic_long_set(&pblk->padded_wb, 0);
- atomic_long_set(&pblk->nr_flush, 0);
atomic_long_set(&pblk->req_writes, 0);
atomic_long_set(&pblk->sub_writes, 0);
atomic_long_set(&pblk->sync_writes, 0);
@@ -1015,41 +1201,35 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
atomic_long_set(&pblk->write_failed, 0);
atomic_long_set(&pblk->erase_failed, 0);
- ret = pblk_luns_init(pblk, dev->luns);
+ ret = pblk_core_init(pblk);
if (ret) {
- pr_err("pblk: could not initialize luns\n");
+ pr_err("pblk: could not initialize core\n");
goto fail;
}
ret = pblk_lines_init(pblk);
if (ret) {
pr_err("pblk: could not initialize lines\n");
- goto fail_free_luns;
+ goto fail_free_core;
}
- ret = pblk_core_init(pblk);
+ ret = pblk_rwb_init(pblk);
if (ret) {
- pr_err("pblk: could not initialize core\n");
- goto fail_free_line_meta;
+ pr_err("pblk: could not initialize write buffer\n");
+ goto fail_free_lines;
}
- ret = pblk_l2p_init(pblk);
+ ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY);
if (ret) {
pr_err("pblk: could not initialize maps\n");
- goto fail_free_core;
- }
-
- ret = pblk_lines_configure(pblk, flags);
- if (ret) {
- pr_err("pblk: could not configure lines\n");
- goto fail_free_l2p;
+ goto fail_free_rwb;
}
ret = pblk_writer_init(pblk);
if (ret) {
if (ret != -EINTR)
pr_err("pblk: could not initialize write thread\n");
- goto fail_free_lines;
+ goto fail_free_l2p;
}
ret = pblk_gc_init(pblk);
@@ -1064,10 +1244,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
blk_queue_write_cache(tqueue, true, false);
- tqueue->limits.discard_granularity = geo->sec_per_chk * geo->sec_size;
+ tqueue->limits.discard_granularity = geo->clba * geo->csecs;
tqueue->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue);
pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
tdisk->disk_name,
@@ -1084,16 +1264,14 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
fail_stop_writer:
pblk_writer_stop(pblk);
-fail_free_lines:
- pblk_lines_free(pblk);
fail_free_l2p:
pblk_l2p_free(pblk);
+fail_free_rwb:
+ pblk_rwb_free(pblk);
+fail_free_lines:
+ pblk_lines_free(pblk);
fail_free_core:
pblk_core_free(pblk);
-fail_free_line_meta:
- pblk_line_meta_free(pblk);
-fail_free_luns:
- pblk_luns_free(pblk);
fail:
kfree(pblk);
return ERR_PTR(ret);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 7445e6430c52..20dbaa89c9df 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -65,6 +65,8 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
if (lba_list[paddr] != addr_empty)
line->nr_valid_lbas++;
+ else
+ atomic64_inc(&pblk->pad_wa);
} else {
lba_list[paddr] = meta_list[i].lba = addr_empty;
__pblk_map_invalidate(pblk, line, paddr);
@@ -125,7 +127,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
atomic_dec(&e_line->left_eblks);
*erase_ppa = rqd->ppa_list[i];
- erase_ppa->g.blk = e_line->id;
+ erase_ppa->a.blk = e_line->id;
spin_unlock(&e_line->lock);
@@ -166,6 +168,6 @@ retry:
set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
- erase_ppa->g.blk = e_line->id;
+ erase_ppa->a.blk = e_line->id;
}
}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index ec8fc314646b..52fdd85dbc97 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -355,10 +355,13 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
struct pblk_rb_entry *entry;
unsigned int sync, flush_point;
+ pblk_rb_sync_init(rb, NULL);
sync = READ_ONCE(rb->sync);
- if (pos == sync)
+ if (pos == sync) {
+ pblk_rb_sync_end(rb, NULL);
return 0;
+ }
#ifdef CONFIG_NVM_DEBUG
atomic_inc(&rb->inflight_flush_point);
@@ -367,8 +370,6 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
entry = &rb->entries[flush_point];
- pblk_rb_sync_init(rb, NULL);
-
/* Protect flush points */
smp_store_release(&rb->flush_point, flush_point);
@@ -437,9 +438,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
if (bio->bi_opf & REQ_PREFLUSH) {
struct pblk *pblk = container_of(rb, struct pblk, rwb);
-#ifdef CONFIG_NVM_DEBUG
- atomic_long_inc(&pblk->nr_flush);
-#endif
+ atomic64_inc(&pblk->nr_flush);
if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
*io_ret = NVM_IO_OK;
}
@@ -620,11 +619,17 @@ try:
pr_err("pblk: could not pad page in write bio\n");
return NVM_IO_ERR;
}
+
+ if (pad < pblk->min_write_pgs)
+ atomic64_inc(&pblk->pad_dist[pad - 1]);
+ else
+ pr_warn("pblk: padding more than min. sectors\n");
+
+ atomic64_add(pad, &pblk->pad_wa);
}
#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(pad, &((struct pblk *)
- (container_of(rb, struct pblk, rwb)))->padded_writes);
+ atomic_long_add(pad, &pblk->padded_writes);
#endif
return NVM_IO_OK;
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 2f761283f43e..9eee10f69df0 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -563,7 +563,7 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
if (!(gc_rq->secs_to_gc))
goto out;
- data_len = (gc_rq->secs_to_gc) * geo->sec_size;
+ data_len = (gc_rq->secs_to_gc) * geo->csecs;
bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len,
PBLK_VMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 1d5e961bf5e0..3e079c2afa6e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -21,17 +21,15 @@ void pblk_submit_rec(struct work_struct *work)
struct pblk_rec_ctx *recovery =
container_of(work, struct pblk_rec_ctx, ws_rec);
struct pblk *pblk = recovery->pblk;
- struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_rq *rqd = recovery->rqd;
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- int max_secs = nvm_max_phys_sects(dev);
struct bio *bio;
unsigned int nr_rec_secs;
unsigned int pgs_read;
int ret;
nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
- max_secs);
+ NVM_MAX_VLBA);
bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
@@ -74,8 +72,6 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
struct pblk_rec_ctx *recovery, u64 *comp_bits,
unsigned int comp)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- int max_secs = nvm_max_phys_sects(dev);
struct nvm_rq *rec_rqd;
struct pblk_c_ctx *rec_ctx;
int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
@@ -86,7 +82,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
/* Copy completion bitmap, but exclude the first X completed entries */
bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
(unsigned long int *)comp_bits,
- comp, max_secs);
+ comp, NVM_MAX_VLBA);
/* Save the context for the entries that need to be re-written and
* update current context with the completed entries.
@@ -188,7 +184,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
- nr_bb * geo->sec_per_chk;
+ nr_bb * geo->clba;
}
struct pblk_recov_alloc {
@@ -236,7 +232,7 @@ next_read_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
rq_ppas = pblk->min_write_pgs;
- rq_len = rq_ppas * geo->sec_size;
+ rq_len = rq_ppas * geo->csecs;
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
if (IS_ERR(bio))
@@ -355,7 +351,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
if (!pad_rq)
return -ENOMEM;
- data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+ data = vzalloc(pblk->max_write_pgs * geo->csecs);
if (!data) {
ret = -ENOMEM;
goto free_rq;
@@ -372,7 +368,7 @@ next_pad_rq:
goto fail_free_pad;
}
- rq_len = rq_ppas * geo->sec_size;
+ rq_len = rq_ppas * geo->csecs;
meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
if (!meta_list) {
@@ -513,7 +509,7 @@ next_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
rq_ppas = pblk->min_write_pgs;
- rq_len = rq_ppas * geo->sec_size;
+ rq_len = rq_ppas * geo->csecs;
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
if (IS_ERR(bio))
@@ -644,7 +640,7 @@ next_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
rq_ppas = pblk->min_write_pgs;
- rq_len = rq_ppas * geo->sec_size;
+ rq_len = rq_ppas * geo->csecs;
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
if (IS_ERR(bio))
@@ -749,7 +745,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
- data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
+ data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
if (!data) {
ret = -ENOMEM;
goto free_meta_list;
@@ -826,6 +822,63 @@ static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
return emeta_start;
}
+static int pblk_recov_check_line_version(struct pblk *pblk,
+ struct line_emeta *emeta)
+{
+ struct line_header *header = &emeta->header;
+
+ if (header->version_major != EMETA_VERSION_MAJOR) {
+ pr_err("pblk: line major version mismatch: %d, expected: %d\n",
+ header->version_major, EMETA_VERSION_MAJOR);
+ return 1;
+ }
+
+#ifdef NVM_DEBUG
+ if (header->version_minor > EMETA_VERSION_MINOR)
+ pr_info("pblk: newer line minor version found: %d\n", line_v);
+#endif
+
+ return 0;
+}
+
+static void pblk_recov_wa_counters(struct pblk *pblk,
+ struct line_emeta *emeta)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct line_header *header = &emeta->header;
+ struct wa_counters *wa = emeta_to_wa(lm, emeta);
+
+ /* WA counters were introduced in emeta version 0.2 */
+ if (header->version_major > 0 || header->version_minor >= 2) {
+ u64 user = le64_to_cpu(wa->user);
+ u64 pad = le64_to_cpu(wa->pad);
+ u64 gc = le64_to_cpu(wa->gc);
+
+ atomic64_set(&pblk->user_wa, user);
+ atomic64_set(&pblk->pad_wa, pad);
+ atomic64_set(&pblk->gc_wa, gc);
+
+ pblk->user_rst_wa = user;
+ pblk->pad_rst_wa = pad;
+ pblk->gc_rst_wa = gc;
+ }
+}
+
+static int pblk_line_was_written(struct pblk_line *line,
+ struct pblk_line_meta *lm)
+{
+
+ int i;
+ int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
+
+ for (i = 0; i < lm->blk_per_line; i++) {
+ if (!(line->chks[i].state & state_mask))
+ return 1;
+ }
+
+ return 0;
+}
+
struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
{
struct pblk_line_meta *lm = &pblk->lm;
@@ -862,6 +915,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
+ if (!pblk_line_was_written(line, lm))
+ continue;
+
/* Lines that cannot be read are assumed as not written here */
if (pblk_line_read_smeta(pblk, line))
continue;
@@ -873,9 +929,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
continue;
- if (smeta_buf->header.version != SMETA_VERSION) {
+ if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
pr_err("pblk: found incompatible line version %u\n",
- le16_to_cpu(smeta_buf->header.version));
+ smeta_buf->header.version_major);
return ERR_PTR(-EINVAL);
}
@@ -943,6 +999,11 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
goto next;
}
+ if (pblk_recov_check_line_version(pblk, line->emeta->buf))
+ return ERR_PTR(-EINVAL);
+
+ pblk_recov_wa_counters(pblk, line->emeta->buf);
+
if (pblk_recov_l2p_from_emeta(pblk, line))
pblk_recov_l2p_from_oob(pblk, line);
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index 0d457b162f23..883a7113b19d 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -200,7 +200,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
- blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);
+ blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
rl->high_pw = get_count_order(rl->high);
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index 620bab853579..e61909af23a5 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -39,8 +39,8 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
sz += snprintf(page + sz, PAGE_SIZE - sz,
"pblk: pos:%d, ch:%d, lun:%d - %d\n",
i,
- rlun->bppa.g.ch,
- rlun->bppa.g.lun,
+ rlun->bppa.a.ch,
+ rlun->bppa.a.lun,
active);
}
@@ -115,24 +115,47 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
struct nvm_geo *geo = &dev->geo;
ssize_t sz = 0;
- sz = snprintf(page, PAGE_SIZE - sz,
- "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
- pblk->ppaf_bitsize,
- pblk->ppaf.blk_offset, geo->ppaf.blk_len,
- pblk->ppaf.pg_offset, geo->ppaf.pg_len,
- pblk->ppaf.lun_offset, geo->ppaf.lun_len,
- pblk->ppaf.ch_offset, geo->ppaf.ch_len,
- pblk->ppaf.pln_offset, geo->ppaf.pln_len,
- pblk->ppaf.sec_offset, geo->ppaf.sect_len);
+ if (geo->version == NVM_OCSSD_SPEC_12) {
+ struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
+ struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
- sz += snprintf(page + sz, PAGE_SIZE - sz,
- "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
- geo->ppaf.blk_offset, geo->ppaf.blk_len,
- geo->ppaf.pg_offset, geo->ppaf.pg_len,
- geo->ppaf.lun_offset, geo->ppaf.lun_len,
- geo->ppaf.ch_offset, geo->ppaf.ch_len,
- geo->ppaf.pln_offset, geo->ppaf.pln_len,
- geo->ppaf.sect_offset, geo->ppaf.sect_len);
+ sz = snprintf(page, PAGE_SIZE,
+ "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
+ pblk->addrf_len,
+ ppaf->blk_offset, ppaf->blk_len,
+ ppaf->pg_offset, ppaf->pg_len,
+ ppaf->lun_offset, ppaf->lun_len,
+ ppaf->ch_offset, ppaf->ch_len,
+ ppaf->pln_offset, ppaf->pln_len,
+ ppaf->sec_offset, ppaf->sec_len);
+
+ sz += snprintf(page + sz, PAGE_SIZE - sz,
+ "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
+ gppaf->blk_offset, gppaf->blk_len,
+ gppaf->pg_offset, gppaf->pg_len,
+ gppaf->lun_offset, gppaf->lun_len,
+ gppaf->ch_offset, gppaf->ch_len,
+ gppaf->pln_offset, gppaf->pln_len,
+ gppaf->sec_offset, gppaf->sec_len);
+ } else {
+ struct nvm_addrf *ppaf = &pblk->addrf;
+ struct nvm_addrf *gppaf = &geo->addrf;
+
+ sz = snprintf(page, PAGE_SIZE,
+ "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
+ pblk->addrf_len,
+ ppaf->ch_offset, ppaf->ch_len,
+ ppaf->lun_offset, ppaf->lun_len,
+ ppaf->chk_offset, ppaf->chk_len,
+ ppaf->sec_offset, ppaf->sec_len);
+
+ sz += snprintf(page + sz, PAGE_SIZE - sz,
+ "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
+ gppaf->ch_offset, gppaf->ch_len,
+ gppaf->lun_offset, gppaf->lun_len,
+ gppaf->chk_offset, gppaf->chk_len,
+ gppaf->sec_offset, gppaf->sec_len);
+ }
return sz;
}
@@ -288,7 +311,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
"blk_line:%d, sec_line:%d, sec_blk:%d\n",
lm->blk_per_line,
lm->sec_per_line,
- geo->sec_per_chk);
+ geo->clba);
return sz;
}
@@ -298,15 +321,104 @@ static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
}
+static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
+ char *page)
+{
+ int sz;
+
+
+ sz = snprintf(page, PAGE_SIZE,
+ "user:%lld gc:%lld pad:%lld WA:",
+ user, gc, pad);
+
+ if (!user) {
+ sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
+ } else {
+ u64 wa_int;
+ u32 wa_frac;
+
+ wa_int = (user + gc + pad) * 100000;
+ wa_int = div_u64(wa_int, user);
+ wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
+
+ sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
+ wa_int, wa_frac);
+ }
+
+ return sz;
+}
+
+static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
+{
+ return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
+ atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
+ page);
+}
+
+static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
+{
+ return pblk_get_write_amp(
+ atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
+ atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
+ atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
+}
+
+static long long bucket_percentage(unsigned long long bucket,
+ unsigned long long total)
+{
+ int p = bucket * 100;
+
+ p = div_u64(p, total);
+
+ return p;
+}
+
+static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
+{
+ int sz = 0;
+ unsigned long long total;
+ unsigned long long total_buckets = 0;
+ int buckets = pblk->min_write_pgs - 1;
+ int i;
+
+ total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
+ if (!total) {
+ for (i = 0; i < (buckets + 1); i++)
+ sz += snprintf(page + sz, PAGE_SIZE - sz,
+ "%d:0 ", i);
+ sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+
+ return sz;
+ }
+
+ for (i = 0; i < buckets; i++)
+ total_buckets += atomic64_read(&pblk->pad_dist[i]);
+
+ sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
+ bucket_percentage(total - total_buckets, total));
+
+ for (i = 0; i < buckets; i++) {
+ unsigned long long p;
+
+ p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
+ total);
+ sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
+ i + 1, p);
+ }
+ sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+
+ return sz;
+}
+
#ifdef CONFIG_NVM_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
return snprintf(page, PAGE_SIZE,
- "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
+ "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
atomic_long_read(&pblk->inflight_writes),
atomic_long_read(&pblk->inflight_reads),
atomic_long_read(&pblk->req_writes),
- atomic_long_read(&pblk->nr_flush),
+ (u64)atomic64_read(&pblk->nr_flush),
atomic_long_read(&pblk->padded_writes),
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
@@ -360,6 +472,56 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
return len;
}
+static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
+ const char *page, size_t len)
+{
+ size_t c_len;
+ int reset_value;
+
+ c_len = strcspn(page, "\n");
+ if (c_len >= len)
+ return -EINVAL;
+
+ if (kstrtouint(page, 0, &reset_value))
+ return -EINVAL;
+
+ if (reset_value != 0)
+ return -EINVAL;
+
+ pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
+ pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
+ pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
+
+ return len;
+}
+
+
+static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
+ const char *page, size_t len)
+{
+ size_t c_len;
+ int reset_value;
+ int buckets = pblk->min_write_pgs - 1;
+ int i;
+
+ c_len = strcspn(page, "\n");
+ if (c_len >= len)
+ return -EINVAL;
+
+ if (kstrtouint(page, 0, &reset_value))
+ return -EINVAL;
+
+ if (reset_value != 0)
+ return -EINVAL;
+
+ for (i = 0; i < buckets; i++)
+ atomic64_set(&pblk->pad_dist[i], 0);
+
+ pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
+
+ return len;
+}
+
static struct attribute sys_write_luns = {
.name = "write_luns",
.mode = 0444,
@@ -410,6 +572,21 @@ static struct attribute sys_max_sec_per_write = {
.mode = 0644,
};
+static struct attribute sys_write_amp_mileage = {
+ .name = "write_amp_mileage",
+ .mode = 0444,
+};
+
+static struct attribute sys_write_amp_trip = {
+ .name = "write_amp_trip",
+ .mode = 0644,
+};
+
+static struct attribute sys_padding_dist = {
+ .name = "padding_dist",
+ .mode = 0644,
+};
+
#ifdef CONFIG_NVM_DEBUG
static struct attribute sys_stats_debug_attr = {
.name = "stats",
@@ -428,6 +605,9 @@ static struct attribute *pblk_attrs[] = {
&sys_stats_ppaf_attr,
&sys_lines_attr,
&sys_lines_info_attr,
+ &sys_write_amp_mileage,
+ &sys_write_amp_trip,
+ &sys_padding_dist,
#ifdef CONFIG_NVM_DEBUG
&sys_stats_debug_attr,
#endif
@@ -457,6 +637,12 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_lines_info(pblk, buf);
else if (strcmp(attr->name, "max_sec_per_write") == 0)
return pblk_sysfs_get_sec_per_write(pblk, buf);
+ else if (strcmp(attr->name, "write_amp_mileage") == 0)
+ return pblk_sysfs_get_write_amp_mileage(pblk, buf);
+ else if (strcmp(attr->name, "write_amp_trip") == 0)
+ return pblk_sysfs_get_write_amp_trip(pblk, buf);
+ else if (strcmp(attr->name, "padding_dist") == 0)
+ return pblk_sysfs_get_padding_dist(pblk, buf);
#ifdef CONFIG_NVM_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
@@ -473,7 +659,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_gc_force(pblk, buf, len);
else if (strcmp(attr->name, "max_sec_per_write") == 0)
return pblk_sysfs_set_sec_per_write(pblk, buf, len);
-
+ else if (strcmp(attr->name, "write_amp_trip") == 0)
+ return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
+ else if (strcmp(attr->name, "padding_dist") == 0)
+ return pblk_sysfs_set_padding_dist(pblk, buf, len);
return 0;
}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aae86ed60b98..3e6f1ebd743a 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -333,7 +333,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
m_ctx = nvm_rq_to_pdu(rqd);
m_ctx->private = meta_line;
- rq_len = rq_ppas * geo->sec_size;
+ rq_len = rq_ppas * geo->csecs;
data = ((void *)emeta->buf) + emeta->mem;
bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 8c357fb6538e..9c682acfc5d1 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -201,12 +201,6 @@ struct pblk_rb {
struct pblk_lun {
struct ppa_addr bppa;
-
- u8 *bb_list; /* Bad block list for LUN. Only used on
- * bring up. Bad blocks are managed
- * within lines on run-time.
- */
-
struct semaphore wr_sem;
};
@@ -303,6 +297,7 @@ enum {
PBLK_LINETYPE_DATA = 2,
/* Line state */
+ PBLK_LINESTATE_NEW = 9,
PBLK_LINESTATE_FREE = 10,
PBLK_LINESTATE_OPEN = 11,
PBLK_LINESTATE_CLOSED = 12,
@@ -320,14 +315,26 @@ enum {
};
#define PBLK_MAGIC 0x70626c6b /*pblk*/
-#define SMETA_VERSION cpu_to_le16(1)
+
+/* emeta/smeta persistent storage format versions:
+ * Changes in major version requires offline migration.
+ * Changes in minor version are handled automatically during
+ * recovery.
+ */
+
+#define SMETA_VERSION_MAJOR (0)
+#define SMETA_VERSION_MINOR (1)
+
+#define EMETA_VERSION_MAJOR (0)
+#define EMETA_VERSION_MINOR (2)
struct line_header {
__le32 crc;
__le32 identifier; /* pblk identifier */
__u8 uuid[16]; /* instance uuid */
__le16 type; /* line type */
- __le16 version; /* type version */
+ __u8 version_major; /* version major */
+ __u8 version_minor; /* version minor */
__le32 id; /* line id for current line */
};
@@ -349,11 +356,13 @@ struct line_smeta {
__le64 lun_bitmap[];
};
+
/*
* Metadata layout in media:
* First sector:
* 1. struct line_emeta
* 2. bad block bitmap (u64 * window_wr_lun)
+ * 3. write amplification counters
* Mid sectors (start at lbas_sector):
* 3. nr_lbas (u64) forming lba list
* Last sectors (start at vsc_sector):
@@ -377,7 +386,15 @@ struct line_emeta {
__le32 next_id; /* Line id for next line */
__le64 nr_lbas; /* Number of lbas mapped in line */
__le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
- __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
+ __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
+};
+
+
+/* Write amplification counters stored on media */
+struct wa_counters {
+ __le64 user; /* Number of user written sectors */
+ __le64 gc; /* Number of sectors written by GC*/
+ __le64 pad; /* Number of padded sectors */
};
struct pblk_emeta {
@@ -410,6 +427,8 @@ struct pblk_line {
unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
+ struct nvm_chk_meta *chks; /* Chunks forming line */
+
struct pblk_smeta *smeta; /* Start metadata */
struct pblk_emeta *emeta; /* End medatada */
@@ -507,10 +526,11 @@ struct pblk_line_meta {
unsigned int smeta_sec; /* Sectors needed for smeta */
unsigned int emeta_len[4]; /* Lengths for emeta:
- * [0]: Total length
- * [1]: struct line_emeta length
- * [2]: L2P portion length
- * [3]: vsc list length
+ * [0]: Total
+ * [1]: struct line_emeta +
+ * bb_bitmap + struct wa_counters
+ * [2]: L2P portion
+ * [3]: vsc
*/
unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
* as emeta_len
@@ -534,21 +554,6 @@ struct pblk_line_meta {
unsigned int meta_distance; /* Distance between data and metadata */
};
-struct pblk_addr_format {
- u64 ch_mask;
- u64 lun_mask;
- u64 pln_mask;
- u64 blk_mask;
- u64 pg_mask;
- u64 sec_mask;
- u8 ch_offset;
- u8 lun_offset;
- u8 pln_offset;
- u8 blk_offset;
- u8 pg_offset;
- u8 sec_offset;
-};
-
enum {
PBLK_STATE_RUNNING = 0,
PBLK_STATE_STOPPING = 1,
@@ -556,6 +561,18 @@ enum {
PBLK_STATE_STOPPED = 3,
};
+/* Internal format to support not power-of-2 device formats */
+struct pblk_addrf {
+ /* gen to dev */
+ int sec_stripe;
+ int ch_stripe;
+ int lun_stripe;
+
+ /* dev to gen */
+ int sec_lun_stripe;
+ int sec_ws_stripe;
+};
+
struct pblk {
struct nvm_tgt_dev *dev;
struct gendisk *disk;
@@ -568,8 +585,9 @@ struct pblk {
struct pblk_line_mgmt l_mg; /* Line management */
struct pblk_line_meta lm; /* Line metadata */
- int ppaf_bitsize;
- struct pblk_addr_format ppaf;
+ struct nvm_addrf addrf; /* Aligned address format */
+ struct pblk_addrf uaddrf; /* Unaligned address format */
+ int addrf_len;
struct pblk_rb rwb;
@@ -592,12 +610,27 @@ struct pblk {
int sec_per_write;
unsigned char instance_uuid[16];
+
+ /* Persistent write amplification counters, 4kb sector I/Os */
+ atomic64_t user_wa; /* Sectors written by user */
+ atomic64_t gc_wa; /* Sectors written by GC */
+ atomic64_t pad_wa; /* Padded sectors written */
+
+ /* Reset values for delta write amplification measurements */
+ u64 user_rst_wa;
+ u64 gc_rst_wa;
+ u64 pad_rst_wa;
+
+ /* Counters used for calculating padding distribution */
+ atomic64_t *pad_dist; /* Padding distribution buckets */
+ u64 nr_flush_rst; /* Flushes reset value for pad dist.*/
+ atomic64_t nr_flush; /* Number of flush/fua I/O */
+
#ifdef CONFIG_NVM_DEBUG
- /* All debug counters apply to 4kb sector I/Os */
+ /* Non-persistent debug counters, 4kb sector I/Os */
atomic_long_t inflight_writes; /* Inflight writes (user and gc) */
atomic_long_t padded_writes; /* Sectors padded due to flush/fua */
atomic_long_t padded_wb; /* Sectors padded in write buffer */
- atomic_long_t nr_flush; /* Number of flush/fua I/O */
atomic_long_t req_writes; /* Sectors stored on write buffer */
atomic_long_t sub_writes; /* Sectors submitted from buffer */
atomic_long_t sync_writes; /* Sectors synced to media */
@@ -712,6 +745,10 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx);
void pblk_discard(struct pblk *pblk, struct bio *bio);
+struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk);
+struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
+ struct nvm_chk_meta *lp,
+ struct ppa_addr ppa);
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
@@ -888,6 +925,12 @@ static inline void *emeta_to_bb(struct line_emeta *emeta)
return emeta->bb_bitmap;
}
+static inline void *emeta_to_wa(struct pblk_line_meta *lm,
+ struct line_emeta *emeta)
+{
+ return emeta->bb_bitmap + lm->blk_bitmap_len;
+}
+
static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
{
return ((void *)emeta + pblk->lm.emeta_len[1]);
@@ -903,38 +946,60 @@ static inline int pblk_line_vsc(struct pblk_line *line)
return le32_to_cpu(*line->vsc);
}
-#define NVM_MEM_PAGE_WRITE (8)
-
static inline int pblk_pad_distance(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- return NVM_MEM_PAGE_WRITE * geo->all_luns * geo->sec_per_pl;
+ return geo->mw_cunits * geo->all_luns * geo->ws_opt;
}
static inline int pblk_ppa_to_line(struct ppa_addr p)
{
- return p.g.blk;
+ return p.a.blk;
}
static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
{
- return p.g.lun * geo->nr_chnls + p.g.ch;
+ return p.a.lun * geo->num_ch + p.a.ch;
}
static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
u64 line_id)
{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct ppa_addr ppa;
- ppa.ppa = 0;
- ppa.g.blk = line_id;
- ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset;
- ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset;
- ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset;
- ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset;
- ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset;
+ if (geo->version == NVM_OCSSD_SPEC_12) {
+ struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
+
+ ppa.ppa = 0;
+ ppa.g.blk = line_id;
+ ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset;
+ ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset;
+ ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset;
+ ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset;
+ ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset;
+ } else {
+ struct pblk_addrf *uaddrf = &pblk->uaddrf;
+ int secs, chnls, luns;
+
+ ppa.ppa = 0;
+
+ ppa.m.chk = line_id;
+
+ paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs);
+ ppa.m.sec = secs;
+
+ paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls);
+ ppa.m.grp = chnls;
+
+ paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns);
+ ppa.m.pu = luns;
+
+ ppa.m.sec += uaddrf->sec_stripe * paddr;
+ }
return ppa;
}
@@ -942,13 +1007,30 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
struct ppa_addr p)
{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
u64 paddr;
- paddr = (u64)p.g.pg << pblk->ppaf.pg_offset;
- paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset;
- paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset;
- paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset;
- paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset;
+ if (geo->version == NVM_OCSSD_SPEC_12) {
+ struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
+
+ paddr = (u64)p.g.ch << ppaf->ch_offset;
+ paddr |= (u64)p.g.lun << ppaf->lun_offset;
+ paddr |= (u64)p.g.pg << ppaf->pg_offset;
+ paddr |= (u64)p.g.pl << ppaf->pln_offset;
+ paddr |= (u64)p.g.sec << ppaf->sec_offset;
+ } else {
+ struct pblk_addrf *uaddrf = &pblk->uaddrf;
+ u64 secs = p.m.sec;
+ int sec_stripe;
+
+ paddr = (u64)p.m.grp * uaddrf->sec_stripe;
+ paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe;
+
+ secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe);
+ paddr += secs * uaddrf->sec_ws_stripe;
+ paddr += sec_stripe;
+ }
return paddr;
}
@@ -965,18 +1047,37 @@ static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
ppa64.c.line = ppa32 & ((~0U) >> 1);
ppa64.c.is_cached = 1;
} else {
- ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >>
- pblk->ppaf.blk_offset;
- ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >>
- pblk->ppaf.pg_offset;
- ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >>
- pblk->ppaf.lun_offset;
- ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >>
- pblk->ppaf.ch_offset;
- ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >>
- pblk->ppaf.pln_offset;
- ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >>
- pblk->ppaf.sec_offset;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+
+ if (geo->version == NVM_OCSSD_SPEC_12) {
+ struct nvm_addrf_12 *ppaf =
+ (struct nvm_addrf_12 *)&pblk->addrf;
+
+ ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
+ ppaf->ch_offset;
+ ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
+ ppaf->lun_offset;
+ ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
+ ppaf->blk_offset;
+ ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
+ ppaf->pg_offset;
+ ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
+ ppaf->pln_offset;
+ ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
+ ppaf->sec_offset;
+ } else {
+ struct nvm_addrf *lbaf = &pblk->addrf;
+
+ ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
+ lbaf->ch_offset;
+ ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
+ lbaf->lun_offset;
+ ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
+ lbaf->chk_offset;
+ ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
+ lbaf->sec_offset;
+ }
}
return ppa64;
@@ -992,12 +1093,27 @@ static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
ppa32 |= ppa64.c.line;
ppa32 |= 1U << 31;
} else {
- ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset;
- ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset;
- ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset;
- ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset;
- ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset;
- ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+
+ if (geo->version == NVM_OCSSD_SPEC_12) {
+ struct nvm_addrf_12 *ppaf =
+ (struct nvm_addrf_12 *)&pblk->addrf;
+
+ ppa32 |= ppa64.g.ch << ppaf->ch_offset;
+ ppa32 |= ppa64.g.lun << ppaf->lun_offset;
+ ppa32 |= ppa64.g.blk << ppaf->blk_offset;
+ ppa32 |= ppa64.g.pg << ppaf->pg_offset;
+ ppa32 |= ppa64.g.pl << ppaf->pln_offset;
+ ppa32 |= ppa64.g.sec << ppaf->sec_offset;
+ } else {
+ struct nvm_addrf *lbaf = &pblk->addrf;
+
+ ppa32 |= ppa64.m.grp << lbaf->ch_offset;
+ ppa32 |= ppa64.m.pu << lbaf->lun_offset;
+ ppa32 |= ppa64.m.chk << lbaf->chk_offset;
+ ppa32 |= ppa64.m.sec << lbaf->sec_offset;
+ }
}
return ppa32;
@@ -1008,7 +1124,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
{
struct ppa_addr ppa;
- if (pblk->ppaf_bitsize < 32) {
+ if (pblk->addrf_len < 32) {
u32 *map = (u32 *)pblk->trans_map;
ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
@@ -1024,7 +1140,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
struct ppa_addr ppa)
{
- if (pblk->ppaf_bitsize < 32) {
+ if (pblk->addrf_len < 32) {
u32 *map = (u32 *)pblk->trans_map;
map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
@@ -1115,7 +1231,10 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
struct nvm_geo *geo = &dev->geo;
int flags;
- flags = geo->plane_mode >> 1;
+ if (geo->version == NVM_OCSSD_SPEC_20)
+ return 0;
+
+ flags = geo->pln_mode >> 1;
if (type == PBLK_WRITE)
flags |= NVM_IO_SCRAMBLE_ENABLE;
@@ -1134,9 +1253,12 @@ static inline int pblk_set_read_mode(struct pblk *pblk, int type)
struct nvm_geo *geo = &dev->geo;
int flags;
+ if (geo->version == NVM_OCSSD_SPEC_20)
+ return 0;
+
flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
if (type == PBLK_READ_SEQUENTIAL)
- flags |= geo->plane_mode >> 1;
+ flags |= geo->pln_mode >> 1;
return flags;
}
@@ -1147,16 +1269,21 @@ static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
}
#ifdef CONFIG_NVM_DEBUG
-static inline void print_ppa(struct ppa_addr *p, char *msg, int error)
+static inline void print_ppa(struct nvm_geo *geo, struct ppa_addr *p,
+ char *msg, int error)
{
if (p->c.is_cached) {
pr_err("ppa: (%s: %x) cache line: %llu\n",
msg, error, (u64)p->c.line);
- } else {
+ } else if (geo->version == NVM_OCSSD_SPEC_12) {
pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
msg, error,
p->g.ch, p->g.lun, p->g.blk,
p->g.pg, p->g.pl, p->g.sec);
+ } else {
+ pr_err("ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n",
+ msg, error,
+ p->m.grp, p->m.pu, p->m.chk, p->m.sec);
}
}
@@ -1166,13 +1293,13 @@ static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
int bit = -1;
if (rqd->nr_ppas == 1) {
- print_ppa(&rqd->ppa_addr, "rqd", error);
+ print_ppa(&pblk->dev->geo, &rqd->ppa_addr, "rqd", error);
return;
}
while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
bit + 1)) < rqd->nr_ppas) {
- print_ppa(&rqd->ppa_list[bit], "rqd", error);
+ print_ppa(&pblk->dev->geo, &rqd->ppa_list[bit], "rqd", error);
}
pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
@@ -1188,16 +1315,25 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
for (i = 0; i < nr_ppas; i++) {
ppa = &ppas[i];
- if (!ppa->c.is_cached &&
- ppa->g.ch < geo->nr_chnls &&
- ppa->g.lun < geo->nr_luns &&
- ppa->g.pl < geo->nr_planes &&
- ppa->g.blk < geo->nr_chks &&
- ppa->g.pg < geo->ws_per_chk &&
- ppa->g.sec < geo->sec_per_pg)
- continue;
+ if (geo->version == NVM_OCSSD_SPEC_12) {
+ if (!ppa->c.is_cached &&
+ ppa->g.ch < geo->num_ch &&
+ ppa->g.lun < geo->num_lun &&
+ ppa->g.pl < geo->num_pln &&
+ ppa->g.blk < geo->num_chk &&
+ ppa->g.pg < geo->num_pg &&
+ ppa->g.sec < geo->ws_min)
+ continue;
+ } else {
+ if (!ppa->c.is_cached &&
+ ppa->m.grp < geo->num_ch &&
+ ppa->m.pu < geo->num_lun &&
+ ppa->m.chk < geo->num_chk &&
+ ppa->m.sec < geo->clba)
+ continue;
+ }
- print_ppa(ppa, "boundary", i);
+ print_ppa(geo, ppa, "boundary", i);
return 1;
}
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 458e1d38577d..004cc3cc6123 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -287,7 +287,8 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
- if (kthread_should_stop()) { \
+ if (kthread_should_stop() || \
+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
set_current_state(TASK_RUNNING); \
return 0; \
} \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 12e5197f186c..d338b7086013 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -188,6 +188,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
#include "bset.h"
#include "util.h"
@@ -258,10 +259,11 @@ struct bcache_device {
struct gendisk *disk;
unsigned long flags;
-#define BCACHE_DEV_CLOSING 0
-#define BCACHE_DEV_DETACHING 1
-#define BCACHE_DEV_UNLINK_DONE 2
-
+#define BCACHE_DEV_CLOSING 0
+#define BCACHE_DEV_DETACHING 1
+#define BCACHE_DEV_UNLINK_DONE 2
+#define BCACHE_DEV_WB_RUNNING 3
+#define BCACHE_DEV_RATE_DW_RUNNING 4
unsigned nr_stripes;
unsigned stripe_size;
atomic_t *stripe_sectors_dirty;
@@ -286,6 +288,12 @@ struct io {
sector_t last;
};
+enum stop_on_failure {
+ BCH_CACHED_DEV_STOP_AUTO = 0,
+ BCH_CACHED_DEV_STOP_ALWAYS,
+ BCH_CACHED_DEV_STOP_MODE_MAX,
+};
+
struct cached_dev {
struct list_head list;
struct bcache_device disk;
@@ -359,6 +367,7 @@ struct cached_dev {
unsigned sequential_cutoff;
unsigned readahead;
+ unsigned io_disable:1;
unsigned verify:1;
unsigned bypass_torture_test:1;
@@ -378,6 +387,11 @@ struct cached_dev {
unsigned writeback_rate_i_term_inverse;
unsigned writeback_rate_p_term_inverse;
unsigned writeback_rate_minimum;
+
+ enum stop_on_failure stop_when_cache_set_failed;
+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
+ atomic_t io_errors;
+ unsigned error_limit;
};
enum alloc_reserve {
@@ -474,10 +488,15 @@ struct gc_stat {
*
* CACHE_SET_RUNNING means all cache devices have been registered and journal
* replay is complete.
+ *
+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
+ * external and internal I/O should be denied when this flag is set.
+ *
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
#define CACHE_SET_RUNNING 2
+#define CACHE_SET_IO_DISABLE 3
struct cache_set {
struct closure cl;
@@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c)
wake_up_process(ca->alloc_thread);
}
+static inline void closure_bio_submit(struct cache_set *c,
+ struct bio *bio,
+ struct closure *cl)
+{
+ closure_get(cl);
+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return;
+ }
+ generic_make_request(bio);
+}
+
+/*
+ * Prevent the kthread exits directly, and make sure when kthread_stop()
+ * is called to stop a kthread, it is still alive. If a kthread might be
+ * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
+ * necessary before the kthread returns.
+ */
+static inline void wait_for_kthread_stop(void)
+{
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+}
+
/* Forward declarations */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
blk_status_t, const char *);
@@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
+bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *, const char *, ...);
@@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
extern struct workqueue_struct *bcache_wq;
extern const char * const bch_cache_modes[];
+extern const char * const bch_stop_on_failure_modes[];
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e56d3ecdbfcb..579c696a5fe0 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init);
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
btree_iter_cmp_fn *cmp)
{
- struct btree_iter_set unused;
+ struct btree_iter_set b __maybe_unused;
struct bkey *ret = NULL;
if (!btree_iter_end(iter)) {
@@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
}
if (iter->data->k == iter->data->end)
- heap_pop(iter, unused, cmp);
+ heap_pop(iter, b, cmp);
else
heap_sift(iter, 0, cmp);
}
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index fa506c1aa524..0c24280f3b98 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
#ifdef CONFIG_BCACHE_DEBUG
int __bch_count_data(struct btree_keys *);
-void __bch_check_keys(struct btree_keys *, const char *, ...);
+void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
void bch_dump_bucket(struct btree_keys *);
#else
static inline int __bch_count_data(struct btree_keys *b) { return -1; }
-static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
+static inline void __printf(2, 3)
+ __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
static inline void bch_dump_bucket(struct btree_keys *b) {}
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fad9fe8817eb..17936b2dc7d6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
struct btree *b, *t;
unsigned long i, nr = sc->nr_to_scan;
unsigned long freed = 0;
+ unsigned int btree_cache_used;
if (c->shrinker_disabled)
return SHRINK_STOP;
@@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
nr = min_t(unsigned long, nr, mca_can_free(c));
i = 0;
+ btree_cache_used = c->btree_cache_used;
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
- if (freed >= nr)
- break;
+ if (nr <= 0)
+ goto out;
if (++i > 3 &&
!mca_reap(b, 0, false)) {
@@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
rw_unlock(true, b);
freed++;
}
+ nr--;
}
- for (i = 0; (nr--) && i < c->btree_cache_used; i++) {
+ for (; (nr--) && i < btree_cache_used; i++) {
if (list_empty(&c->btree_cache))
goto out;
@@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
}
out:
mutex_unlock(&c->bucket_lock);
- return freed;
+ return freed * c->btree_pages;
}
static unsigned long bch_mca_count(struct shrinker *shrink,
@@ -959,7 +962,7 @@ err:
return b;
}
-/**
+/*
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
*
@@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c)
btree_gc_start(c);
+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
@@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c)
if (ret && ret != -EAGAIN)
pr_warn("gc failed!");
- } while (ret);
+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
bch_btree_gc_finish(c);
wake_up_allocators(c);
@@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg)
while (1) {
wait_event_interruptible(c->gc_wait,
- kthread_should_stop() || gc_should_run(c));
+ kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
+ gc_should_run(c));
- if (kthread_should_stop())
+ if (kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
break;
set_gc_sectors(c);
bch_btree_gc(c);
}
+ wait_for_kthread_stop();
return 0;
}
@@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
if (b->key.ptr[0] != btree_ptr ||
b->seq != seq + 1) {
- op->lock = b->level;
+ op->lock = b->level;
goto out;
}
}
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 7f12920c14f7..0e14969182c6 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v)
}
EXPORT_SYMBOL(closure_sub);
-/**
+/*
* closure_put - decrement a closure's refcount
*/
void closure_put(struct closure *cl)
@@ -55,7 +55,7 @@ void closure_put(struct closure *cl)
}
EXPORT_SYMBOL(closure_put);
-/**
+/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
*/
void __closure_wake_up(struct closure_waitlist *wait_list)
@@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up);
/**
* closure_wait - add a closure to a waitlist
- *
- * @waitlist will own a ref on @cl, which will be released when
+ * @waitlist: will own a ref on @cl, which will be released when
* closure_wake_up() is called on @waitlist.
+ * @cl: closure pointer.
*
*/
bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
@@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
}
EXPORT_SYMBOL(closure_debug_destroy);
-static struct dentry *debug;
+static struct dentry *closure_debug;
static int debug_seq_show(struct seq_file *f, void *data)
{
@@ -199,11 +199,12 @@ static const struct file_operations debug_ops = {
.release = single_release
};
-void __init closure_debug_init(void)
+int __init closure_debug_init(void)
{
- debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
+ closure_debug = debugfs_create_file("closures",
+ 0400, bcache_debug, NULL, &debug_ops);
+ return IS_ERR_OR_NULL(closure_debug);
}
-
#endif
MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 3b9dfc9962ad..71427eb5fdae 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -105,6 +105,7 @@
struct closure;
struct closure_syncer;
typedef void (closure_fn) (struct closure *);
+extern struct dentry *bcache_debug;
struct closure_waitlist {
struct llist_head list;
@@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-void closure_debug_init(void);
+int closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);
#else
-static inline void closure_debug_init(void) {}
+static inline int closure_debug_init(void) { return 0; }
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index af89408befe8..028f7b386e01 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -17,7 +17,7 @@
#include <linux/random.h>
#include <linux/seq_file.h>
-static struct dentry *debug;
+struct dentry *bcache_debug;
#ifdef CONFIG_BCACHE_DEBUG
@@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = {
void bch_debug_init_cache_set(struct cache_set *c)
{
- if (!IS_ERR_OR_NULL(debug)) {
+ if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
- c->debug = debugfs_create_file(name, 0400, debug, c,
+ c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
&cache_set_debug_ops);
}
}
@@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c)
void bch_debug_exit(void)
{
- if (!IS_ERR_OR_NULL(debug))
- debugfs_remove_recursive(debug);
+ if (!IS_ERR_OR_NULL(bcache_debug))
+ debugfs_remove_recursive(bcache_debug);
}
int __init bch_debug_init(struct kobject *kobj)
{
- debug = debugfs_create_dir("bcache", NULL);
+ bcache_debug = debugfs_create_dir("bcache", NULL);
- return IS_ERR_OR_NULL(debug);
+ return IS_ERR_OR_NULL(bcache_debug);
}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index f9d391711595..c334e6666461 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -534,7 +534,6 @@ err:
static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
- struct bucket *g;
unsigned i, stale;
if (!KEY_PTRS(k) ||
@@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
return false;
for (i = 0; i < KEY_PTRS(k); i++) {
- g = PTR_BUCKET(b->c, k, i);
stale = ptr_stale(b->c, k, i);
btree_bug_on(stale > 96, b,
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index a783c5a41ff1..7fac97ae036e 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
b->submit_time_us = local_clock_us();
- closure_bio_submit(bio, bio->bi_private);
+ closure_bio_submit(c, bio, bio->bi_private);
}
void bch_submit_bbio(struct bio *bio, struct cache_set *c,
@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
}
/* IO errors */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
+{
+ char buf[BDEVNAME_SIZE];
+ unsigned errors;
+
+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
+
+ errors = atomic_add_return(1, &dc->io_errors);
+ if (errors < dc->error_limit)
+ pr_err("%s: IO error on backing device, unrecoverable",
+ bio_devname(bio, buf));
+ else
+ bch_cached_dev_error(dc);
+}
void bch_count_io_errors(struct cache *ca,
blk_status_t error,
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1b736b860739..18f1b5239620 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch_bio_map(bio, data);
- closure_bio_submit(bio, &cl);
+ closure_bio_submit(ca->set, bio, &cl);
closure_sync(&cl);
/* This function could be simpler now since we no longer write
@@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c)
struct cache *ca;
uint64_t last_seq;
unsigned iter, n = 0;
- atomic_t p;
+ atomic_t p __maybe_unused;
atomic_long_inc(&c->reclaim);
@@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl)
}
static void journal_write_unlock(struct closure *cl)
+ __releases(&c->journal.lock)
{
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
@@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl)
spin_unlock(&c->journal.lock);
while ((bio = bio_list_pop(&list)))
- closure_bio_submit(bio, cl);
+ closure_bio_submit(c, bio, cl);
continue_at(cl, journal_write_done, NULL);
}
@@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c)
static struct journal_write *journal_wait_for_write(struct cache_set *c,
unsigned nkeys)
+ __acquires(&c->journal.lock)
{
size_t sectors;
struct closure cl;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6422846b546e..a65e3365eeb9 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
}
op->insert_data_done = true;
+ /* get in bch_data_insert() */
bio_put(bio);
out:
continue_at(cl, bch_data_insert_keys, op->wq);
@@ -295,6 +296,7 @@ err:
/**
* bch_data_insert - stick some data in the cache
+ * @cl: closure pointer.
*
* This is the starting point for any data to end up in a cache device; it could
* be from a normal write, or a writeback write, or a write to a flash only
@@ -630,6 +632,41 @@ static void request_endio(struct bio *bio)
closure_put(cl);
}
+static void backing_request_endio(struct bio *bio)
+{
+ struct closure *cl = bio->bi_private;
+
+ if (bio->bi_status) {
+ struct search *s = container_of(cl, struct search, cl);
+ struct cached_dev *dc = container_of(s->d,
+ struct cached_dev, disk);
+ /*
+ * If a bio has REQ_PREFLUSH for writeback mode, it is
+ * speically assembled in cached_dev_write() for a non-zero
+ * write request which has REQ_PREFLUSH. we don't set
+ * s->iop.status by this failure, the status will be decided
+ * by result of bch_data_insert() operation.
+ */
+ if (unlikely(s->iop.writeback &&
+ bio->bi_opf & REQ_PREFLUSH)) {
+ char buf[BDEVNAME_SIZE];
+
+ bio_devname(bio, buf);
+ pr_err("Can't flush %s: returned bi_status %i",
+ buf, bio->bi_status);
+ } else {
+ /* set to orig_bio->bi_status in bio_complete() */
+ s->iop.status = bio->bi_status;
+ }
+ s->recoverable = false;
+ /* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
+ }
+
+ bio_put(bio);
+ closure_put(cl);
+}
+
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
@@ -644,13 +681,21 @@ static void bio_complete(struct search *s)
}
}
-static void do_bio_hook(struct search *s, struct bio *orig_bio)
+static void do_bio_hook(struct search *s,
+ struct bio *orig_bio,
+ bio_end_io_t *end_io_fn)
{
struct bio *bio = &s->bio.bio;
bio_init(bio, NULL, 0);
__bio_clone_fast(bio, orig_bio);
- bio->bi_end_io = request_endio;
+ /*
+ * bi_end_io can be set separately somewhere else, e.g. the
+ * variants in,
+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
+ * - n->bi_end_io from cache_lookup_fn()
+ */
+ bio->bi_end_io = end_io_fn;
bio->bi_private = &s->cl;
bio_cnt_set(bio, 3);
@@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio,
s = mempool_alloc(d->c->search, GFP_NOIO);
closure_init(&s->cl, NULL);
- do_bio_hook(s, bio);
+ do_bio_hook(s, bio, request_endio);
s->orig_bio = bio;
s->cache_miss = NULL;
@@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl)
trace_bcache_read_retry(s->orig_bio);
s->iop.status = 0;
- do_bio_hook(s, s->orig_bio);
+ do_bio_hook(s, s->orig_bio, backing_request_endio);
/* XXX: invalidate cache */
- closure_bio_submit(bio, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, bio, cl);
}
continue_at(cl, cached_dev_cache_miss_done, NULL);
@@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
- cache_bio->bi_end_io = request_endio;
+ cache_bio->bi_end_io = backing_request_endio;
cache_bio->bi_private = &s->cl;
bch_bio_map(cache_bio, NULL);
@@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
- closure_bio_submit(cache_bio, &s->cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
return ret;
out_put:
bio_put(cache_bio);
out_submit:
- miss->bi_end_io = request_endio;
+ miss->bi_end_io = backing_request_endio;
miss->bi_private = &s->cl;
- closure_bio_submit(miss, &s->cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, miss, &s->cl);
return ret;
}
@@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
s->iop.bio = s->orig_bio;
bio_get(s->iop.bio);
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
- closure_bio_submit(bio, cl);
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ goto insert_data;
+
+ /* I/O request sent to backing device */
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
+
} else if (s->iop.writeback) {
bch_writeback_add(dc);
s->iop.bio = bio;
if (bio->bi_opf & REQ_PREFLUSH) {
- /* Also need to send a flush to the backing device */
- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
- dc->disk.bio_split);
-
+ /*
+ * Also need to send a flush to the backing
+ * device.
+ */
+ struct bio *flush;
+
+ flush = bio_alloc_bioset(GFP_NOIO, 0,
+ dc->disk.bio_split);
+ if (!flush) {
+ s->iop.status = BLK_STS_RESOURCE;
+ goto insert_data;
+ }
bio_copy_dev(flush, bio);
- flush->bi_end_io = request_endio;
+ flush->bi_end_io = backing_request_endio;
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
- closure_bio_submit(flush, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, flush, cl);
}
} else {
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
- closure_bio_submit(bio, cl);
+ /* I/O request sent to backing device */
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
}
+insert_data:
closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
continue_at(cl, cached_dev_write_complete, NULL);
}
@@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl)
bch_journal_meta(s->iop.c, cl);
/* If it's a flush, we send the flush to the backing device too */
- closure_bio_submit(bio, cl);
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
continue_at(cl, cached_dev_bio_complete, NULL);
}
+struct detached_dev_io_private {
+ struct bcache_device *d;
+ unsigned long start_time;
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+};
+
+static void detached_dev_end_io(struct bio *bio)
+{
+ struct detached_dev_io_private *ddip;
+
+ ddip = bio->bi_private;
+ bio->bi_end_io = ddip->bi_end_io;
+ bio->bi_private = ddip->bi_private;
+
+ generic_end_io_acct(ddip->d->disk->queue,
+ bio_data_dir(bio),
+ &ddip->d->disk->part0, ddip->start_time);
+
+ if (bio->bi_status) {
+ struct cached_dev *dc = container_of(ddip->d,
+ struct cached_dev, disk);
+ /* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
+ }
+
+ kfree(ddip);
+ bio->bi_end_io(bio);
+}
+
+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
+{
+ struct detached_dev_io_private *ddip;
+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
+ /*
+ * no need to call closure_get(&dc->disk.cl),
+ * because upper layer had already opened bcache device,
+ * which would call closure_get(&dc->disk.cl)
+ */
+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
+ ddip->d = d;
+ ddip->start_time = jiffies;
+ ddip->bi_end_io = bio->bi_end_io;
+ ddip->bi_private = bio->bi_private;
+ bio->bi_end_io = detached_dev_end_io;
+ bio->bi_private = ddip;
+
+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ bio->bi_end_io(bio);
+ else
+ generic_make_request(bio);
+}
+
/* Cached devices - read & write stuff */
static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
+ dc->io_disable)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
atomic_set(&dc->backing_idle, 0);
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
@@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
else
cached_dev_read(dc, s);
}
- } else {
- if ((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(dc->bdev)))
- bio_endio(bio);
- else
- generic_make_request(bio);
- }
+ } else
+ /* I/O request sent to backing device */
+ detached_dev_do_request(d, bio);
return BLK_QC_T_NONE;
}
@@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
struct bcache_device *d = bio->bi_disk->private_data;
int rw = bio_data_dir(bio);
+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f2273143b3cb..d90d9e59ca00 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
NULL
};
+/* Default is -1; we skip past it for stop_when_cache_set_failed */
+const char * const bch_stop_on_failure_modes[] = {
+ "default",
+ "auto",
+ "always",
+ NULL
+};
+
static struct kobject *bcache_kobj;
struct mutex bch_register_lock;
LIST_HEAD(bch_cache_sets);
@@ -265,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
bio->bi_private = dc;
closure_get(cl);
+ /* I/O request sent to backing device */
__write_super(&dc->sb, bio);
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
@@ -521,7 +530,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
bch_bio_map(bio, ca->disk_buckets);
- closure_bio_submit(bio, &ca->prio);
+ closure_bio_submit(ca->set, bio, &ca->prio);
closure_sync(cl);
}
@@ -769,6 +778,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
sector_t sectors)
{
struct request_queue *q;
+ const size_t max_stripes = min_t(size_t, INT_MAX,
+ SIZE_MAX / sizeof(atomic_t));
size_t n;
int idx;
@@ -777,9 +788,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
- if (!d->nr_stripes ||
- d->nr_stripes > INT_MAX ||
- d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
+ if (!d->nr_stripes || d->nr_stripes > max_stripes) {
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
(unsigned)d->nr_stripes);
return -ENOMEM;
@@ -833,9 +842,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
q->limits.io_min = block_size;
q->limits.logical_block_size = block_size;
q->limits.physical_block_size = block_size;
- set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags);
- clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags);
- set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
blk_queue_write_cache(q, true, true);
@@ -899,6 +908,31 @@ void bch_cached_dev_run(struct cached_dev *dc)
pr_debug("error creating sysfs link");
}
+/*
+ * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
+ * work dc->writeback_rate_update is running. Wait until the routine
+ * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
+ * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
+ * seconds, give up waiting here and continue to cancel it too.
+ */
+static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
+{
+ int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
+
+ do {
+ if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
+ &dc->disk.flags))
+ break;
+ time_out--;
+ schedule_timeout_interruptible(1);
+ } while (time_out > 0);
+
+ if (time_out == 0)
+ pr_warn("give up waiting for dc->writeback_write_update to quit");
+
+ cancel_delayed_work_sync(&dc->writeback_rate_update);
+}
+
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
@@ -911,7 +945,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_lock(&bch_register_lock);
- cancel_delayed_work_sync(&dc->writeback_rate_update);
+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ cancel_writeback_rate_update_dwork(dc);
+
if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
kthread_stop(dc->writeback_thread);
dc->writeback_thread = NULL;
@@ -954,6 +990,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
closure_get(&dc->disk.cl);
bch_writeback_queue(dc);
+
cached_dev_put(dc);
}
@@ -1065,7 +1102,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(&dc->disk);
atomic_set(&dc->has_dirty, 1);
- refcount_inc(&dc->count);
bch_writeback_queue(dc);
}
@@ -1093,14 +1129,16 @@ static void cached_dev_free(struct closure *cl)
{
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
- cancel_delayed_work_sync(&dc->writeback_rate_update);
+ mutex_lock(&bch_register_lock);
+
+ if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ cancel_writeback_rate_update_dwork(dc);
+
if (!IS_ERR_OR_NULL(dc->writeback_thread))
kthread_stop(dc->writeback_thread);
if (dc->writeback_write_wq)
destroy_workqueue(dc->writeback_write_wq);
- mutex_lock(&bch_register_lock);
-
if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk);
@@ -1170,6 +1208,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
+ atomic_set(&dc->io_errors, 0);
+ dc->io_disable = false;
+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
+ /* default to auto */
+ dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
+
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
return 0;
@@ -1321,6 +1365,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
return flash_dev_run(c, u);
}
+bool bch_cached_dev_error(struct cached_dev *dc)
+{
+ char name[BDEVNAME_SIZE];
+
+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
+ return false;
+
+ dc->io_disable = true;
+ /* make others know io_disable is true earlier */
+ smp_mb();
+
+ pr_err("stop %s: too many IO errors on backing device %s\n",
+ dc->disk.disk->disk_name, bdevname(dc->bdev, name));
+
+ bcache_device_stop(&dc->disk);
+ return true;
+}
+
/* Cache set */
__printf(2, 3)
@@ -1332,6 +1394,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
test_bit(CACHE_SET_STOPPING, &c->flags))
return false;
+ if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ pr_warn("CACHE_SET_IO_DISABLE already set");
+
/* XXX: we can be called from atomic context
acquire_console_sem();
*/
@@ -1443,25 +1508,72 @@ static void cache_set_flush(struct closure *cl)
closure_return(cl);
}
+/*
+ * This function is only called when CACHE_SET_IO_DISABLE is set, which means
+ * cache set is unregistering due to too many I/O errors. In this condition,
+ * the bcache device might be stopped, it depends on stop_when_cache_set_failed
+ * value and whether the broken cache has dirty data:
+ *
+ * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
+ * BCH_CACHED_STOP_AUTO 0 NO
+ * BCH_CACHED_STOP_AUTO 1 YES
+ * BCH_CACHED_DEV_STOP_ALWAYS 0 YES
+ * BCH_CACHED_DEV_STOP_ALWAYS 1 YES
+ *
+ * The expected behavior is, if stop_when_cache_set_failed is configured to
+ * "auto" via sysfs interface, the bcache device will not be stopped if the
+ * backing device is clean on the broken cache device.
+ */
+static void conditional_stop_bcache_device(struct cache_set *c,
+ struct bcache_device *d,
+ struct cached_dev *dc)
+{
+ if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
+ d->disk->disk_name, c->sb.set_uuid);
+ bcache_device_stop(d);
+ } else if (atomic_read(&dc->has_dirty)) {
+ /*
+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
+ * and dc->has_dirty == 1
+ */
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
+ d->disk->disk_name);
+ bcache_device_stop(d);
+ } else {
+ /*
+ * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
+ * and dc->has_dirty == 0
+ */
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
+ d->disk->disk_name);
+ }
+}
+
static void __cache_set_unregister(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
struct cached_dev *dc;
+ struct bcache_device *d;
size_t i;
mutex_lock(&bch_register_lock);
- for (i = 0; i < c->devices_max_used; i++)
- if (c->devices[i]) {
- if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
- test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
- dc = container_of(c->devices[i],
- struct cached_dev, disk);
- bch_cached_dev_detach(dc);
- } else {
- bcache_device_stop(c->devices[i]);
- }
+ for (i = 0; i < c->devices_max_used; i++) {
+ d = c->devices[i];
+ if (!d)
+ continue;
+
+ if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+ test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+ dc = container_of(d, struct cached_dev, disk);
+ bch_cached_dev_detach(dc);
+ if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
+ conditional_stop_bcache_device(c, d, dc);
+ } else {
+ bcache_device_stop(d);
}
+ }
mutex_unlock(&bch_register_lock);
@@ -1567,6 +1679,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
+ WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
err:
@@ -2148,7 +2261,6 @@ static int __init bcache_init(void)
mutex_init(&bch_register_lock);
init_waitqueue_head(&unregister_wait);
register_reboot_notifier(&reboot);
- closure_debug_init();
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
@@ -2160,7 +2272,7 @@ static int __init bcache_init(void)
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
bch_request_init() ||
- bch_debug_init(bcache_kobj) ||
+ bch_debug_init(bcache_kobj) || closure_debug_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 78cd7bd50fdd..dfeef583ee50 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
+rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
rw_attribute(writeback_percent);
@@ -95,6 +96,7 @@ read_attribute(partial_stripes_expensive);
rw_attribute(synchronous);
rw_attribute(journal_delay_ms);
+rw_attribute(io_disable);
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
@@ -125,6 +127,12 @@ SHOW(__bch_cached_dev)
bch_cache_modes + 1,
BDEV_CACHE_MODE(&dc->sb));
+ if (attr == &sysfs_stop_when_cache_set_failed)
+ return bch_snprint_string_list(buf, PAGE_SIZE,
+ bch_stop_on_failure_modes + 1,
+ dc->stop_when_cache_set_failed);
+
+
sysfs_printf(data_csum, "%i", dc->disk.data_csum);
var_printf(verify, "%i");
var_printf(bypass_torture_test, "%i");
@@ -133,7 +141,9 @@ SHOW(__bch_cached_dev)
var_print(writeback_delay);
var_print(writeback_percent);
sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
-
+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
+ sysfs_printf(io_disable, "%i", dc->io_disable);
var_print(writeback_rate_update_seconds);
var_print(writeback_rate_i_term_inverse);
var_print(writeback_rate_p_term_inverse);
@@ -173,7 +183,7 @@ SHOW(__bch_cached_dev)
sysfs_hprint(dirty_data,
bcache_dev_sectors_dirty(&dc->disk) << 9);
- sysfs_hprint(stripe_size, dc->disk.stripe_size << 9);
+ sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9);
var_printf(partial_stripes_expensive, "%u");
var_hprint(sequential_cutoff);
@@ -224,6 +234,14 @@ STORE(__cached_dev)
d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
+
+ if (attr == &sysfs_io_disable) {
+ int v = strtoul_or_return(buf);
+
+ dc->io_disable = v ? 1 : 0;
+ }
+
d_strtoi_h(sequential_cutoff);
d_strtoi_h(readahead);
@@ -246,6 +264,15 @@ STORE(__cached_dev)
}
}
+ if (attr == &sysfs_stop_when_cache_set_failed) {
+ v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
+
+ if (v < 0)
+ return v;
+
+ dc->stop_when_cache_set_failed = v;
+ }
+
if (attr == &sysfs_label) {
if (size > SB_LABEL_SIZE)
return -EINVAL;
@@ -309,7 +336,8 @@ STORE(bch_cached_dev)
bch_writeback_queue(dc);
if (attr == &sysfs_writeback_percent)
- schedule_delayed_work(&dc->writeback_rate_update,
+ if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+ schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
mutex_unlock(&bch_register_lock);
@@ -324,6 +352,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
+ &sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
&sysfs_writeback_delay,
@@ -333,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_rate_i_term_inverse,
&sysfs_writeback_rate_p_term_inverse,
&sysfs_writeback_rate_debug,
+ &sysfs_errors,
+ &sysfs_io_error_limit,
+ &sysfs_io_disable,
&sysfs_dirty_data,
&sysfs_stripe_size,
&sysfs_partial_stripes_expensive,
@@ -590,6 +622,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+ sysfs_printf(io_disable, "%i",
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
if (attr == &sysfs_bset_tree_stats)
return bch_bset_print_stats(c, buf);
@@ -679,6 +713,20 @@ STORE(__bch_cache_set)
if (attr == &sysfs_io_error_halflife)
c->error_decay = strtoul_or_return(buf) / 88;
+ if (attr == &sysfs_io_disable) {
+ int v = strtoul_or_return(buf);
+
+ if (v) {
+ if (test_and_set_bit(CACHE_SET_IO_DISABLE,
+ &c->flags))
+ pr_warn("CACHE_SET_IO_DISABLE already set");
+ } else {
+ if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
+ &c->flags))
+ pr_warn("CACHE_SET_IO_DISABLE already cleared");
+ }
+ }
+
sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
sysfs_strtoul(verify, c->verify);
sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
@@ -764,6 +812,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
+ &sysfs_io_disable,
NULL
};
KTYPE(bch_cache_set_internal);
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index a23cd6a14b74..74febd5230df 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -32,20 +32,27 @@ int bch_ ## name ## _h(const char *cp, type *res) \
case 'y': \
case 'z': \
u++; \
+ /* fall through */ \
case 'e': \
u++; \
+ /* fall through */ \
case 'p': \
u++; \
+ /* fall through */ \
case 't': \
u++; \
+ /* fall through */ \
case 'g': \
u++; \
+ /* fall through */ \
case 'm': \
u++; \
+ /* fall through */ \
case 'k': \
u++; \
if (e++ == cp) \
return -EINVAL; \
+ /* fall through */ \
case '\n': \
case '\0': \
if (*e == '\n') \
@@ -75,10 +82,9 @@ STRTO_H(strtoll, long long)
STRTO_H(strtoull, unsigned long long)
/**
- * bch_hprint() - formats @v to human readable string for sysfs.
- *
- * @v - signed 64 bit integer
- * @buf - the (at least 8 byte) buffer to format the result into.
+ * bch_hprint - formats @v to human readable string for sysfs.
+ * @buf: the (at least 8 byte) buffer to format the result into.
+ * @v: signed 64 bit integer
*
* Returns the number of bytes used by format.
*/
@@ -218,13 +224,12 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
}
/**
- * bch_next_delay() - increment @d by the amount of work done, and return how
- * long to delay until the next time to do some work.
- *
- * @d - the struct bch_ratelimit to update
- * @done - the amount of work done, in arbitrary units
+ * bch_next_delay() - update ratelimiting statistics and calculate next delay
+ * @d: the struct bch_ratelimit to update
+ * @done: the amount of work done, in arbitrary units
*
- * Returns the amount of time to delay by, in jiffies
+ * Increment @d by the amount of work done, and return how long to delay in
+ * jiffies until the next time to do some work.
*/
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index a6763db7f061..268024529edd 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
return bdev->bd_inode->i_size >> 9;
}
-#define closure_bio_submit(bio, cl) \
-do { \
- closure_get(cl); \
- generic_make_request(bio); \
-} while (0)
-
uint64_t bch_crc64_update(uint64_t, const void *, size_t);
uint64_t bch_crc64(const void *, size_t);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f1d2fc15abcc..4a9547cdcdc5 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -114,6 +114,27 @@ static void update_writeback_rate(struct work_struct *work)
struct cached_dev *dc = container_of(to_delayed_work(work),
struct cached_dev,
writeback_rate_update);
+ struct cache_set *c = dc->disk.c;
+
+ /*
+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
+ * cancel_delayed_work_sync().
+ */
+ set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
+ smp_mb();
+
+ /*
+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
+ * check it here too.
+ */
+ if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
+ smp_mb();
+ return;
+ }
down_read(&dc->writeback_lock);
@@ -123,8 +144,23 @@ static void update_writeback_rate(struct work_struct *work)
up_read(&dc->writeback_lock);
- schedule_delayed_work(&dc->writeback_rate_update,
+ /*
+ * CACHE_SET_IO_DISABLE might be set via sysfs interface,
+ * check it here too.
+ */
+ if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
+ schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
+ }
+
+ /*
+ * should check BCACHE_DEV_RATE_DW_RUNNING before calling
+ * cancel_delayed_work_sync().
+ */
+ clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
+ /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
+ smp_mb();
}
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
@@ -253,7 +289,8 @@ static void write_dirty(struct closure *cl)
bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
- closure_bio_submit(&io->bio, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
}
atomic_set(&dc->writeback_sequence_next, next_sequence);
@@ -279,7 +316,7 @@ static void read_dirty_submit(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
- closure_bio_submit(&io->bio, cl);
+ closure_bio_submit(io->dc->disk.c, &io->bio, cl);
continue_at(cl, write_dirty, io->dc->writeback_write_wq);
}
@@ -305,7 +342,9 @@ static void read_dirty(struct cached_dev *dc)
next = bch_keybuf_next(&dc->writeback_keys);
- while (!kthread_should_stop() && next) {
+ while (!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
+ next) {
size = 0;
nk = 0;
@@ -402,7 +441,9 @@ static void read_dirty(struct cached_dev *dc)
}
}
- while (!kthread_should_stop() && delay) {
+ while (!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
+ delay) {
schedule_timeout_interruptible(delay);
delay = writeback_delay(dc, 0);
}
@@ -558,21 +599,30 @@ static bool refill_dirty(struct cached_dev *dc)
static int bch_writeback_thread(void *arg)
{
struct cached_dev *dc = arg;
+ struct cache_set *c = dc->disk.c;
bool searched_full_index;
bch_ratelimit_reset(&dc->writeback_rate);
- while (!kthread_should_stop()) {
+ while (!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
down_write(&dc->writeback_lock);
set_current_state(TASK_INTERRUPTIBLE);
- if (!atomic_read(&dc->has_dirty) ||
- (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
- !dc->writeback_running)) {
+ /*
+ * If the bache device is detaching, skip here and continue
+ * to perform writeback. Otherwise, if no dirty data on cache,
+ * or there is dirty data on cache but writeback is disabled,
+ * the writeback thread should sleep here and wait for others
+ * to wake up it.
+ */
+ if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
+ (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
up_write(&dc->writeback_lock);
- if (kthread_should_stop()) {
+ if (kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
set_current_state(TASK_RUNNING);
- return 0;
+ break;
}
schedule();
@@ -585,9 +635,16 @@ static int bch_writeback_thread(void *arg)
if (searched_full_index &&
RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
atomic_set(&dc->has_dirty, 0);
- cached_dev_put(dc);
SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
bch_write_bdev_super(dc, NULL);
+ /*
+ * If bcache device is detaching via sysfs interface,
+ * writeback thread should stop after there is no dirty
+ * data on cache. BCACHE_DEV_DETACHING flag is set in
+ * bch_cached_dev_detach().
+ */
+ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
+ break;
}
up_write(&dc->writeback_lock);
@@ -599,6 +656,7 @@ static int bch_writeback_thread(void *arg)
while (delay &&
!kthread_should_stop() &&
+ !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
delay = schedule_timeout_interruptible(delay);
@@ -606,6 +664,9 @@ static int bch_writeback_thread(void *arg)
}
}
+ cached_dev_put(dc);
+ wait_for_kthread_stop();
+
return 0;
}
@@ -659,6 +720,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_p_term_inverse = 40;
dc->writeback_rate_i_term_inverse = 10000;
+ WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
}
@@ -669,11 +731,15 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
if (!dc->writeback_write_wq)
return -ENOMEM;
+ cached_dev_get(dc);
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback");
- if (IS_ERR(dc->writeback_thread))
+ if (IS_ERR(dc->writeback_thread)) {
+ cached_dev_put(dc);
return PTR_ERR(dc->writeback_thread);
+ }
+ WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 587b25599856..610fb01de629 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -39,7 +39,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
continue;
- ret += bcache_dev_sectors_dirty(d);
+ ret += bcache_dev_sectors_dirty(d);
}
mutex_unlock(&bch_register_lock);
@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
{
if (!atomic_read(&dc->has_dirty) &&
!atomic_xchg(&dc->has_dirty, 1)) {
- refcount_inc(&dc->count);
-
if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
/* XXX: should do this synchronously */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7eb3e2a3c07d..954f4e3b68ac 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1857,7 +1857,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits = *limits;
if (!dm_table_supports_discards(t)) {
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
/* Must also clear discard limits... */
q->limits.max_discard_sectors = 0;
q->limits.max_hw_discard_sectors = 0;
@@ -1865,7 +1865,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits.discard_alignment = 0;
q->limits.discard_misaligned = 0;
} else
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
@@ -1875,15 +1875,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
blk_queue_write_cache(q, wc, fua);
if (dm_table_supports_dax(t))
- queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
+ blk_queue_flag_set(QUEUE_FLAG_DAX, q);
if (dm_table_supports_dax_write_cache(t))
dax_write_cache(t->md->dax_dev, true);
/* Ensure that all underlying devices are non-rotational. */
if (dm_table_all_devices_attribute(t, device_is_nonrot))
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0;
@@ -1891,9 +1891,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits.max_write_zeroes_sectors = 0;
if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
- queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
else
- queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+ blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
dm_table_verify_integrity(t);
@@ -1904,7 +1904,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
* have it set.
*/
if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
}
unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 353ea0ede091..ded74e1eb0d1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1848,7 +1848,7 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
- md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
+ md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
if (!md->queue)
goto bad;
md->queue->queuedata = md;
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 773fc70dced7..4964323d936b 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -138,9 +138,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
}
if (!discard_supported)
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
else
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
/*
* Here we calculate the device offsets.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 254e44e44668..3bea45e8ccff 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5206,12 +5206,12 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state);
+ if (mddev->gendisk)
+ del_gendisk(mddev->gendisk);
if (mddev->queue)
blk_cleanup_queue(mddev->queue);
- if (mddev->gendisk) {
- del_gendisk(mddev->gendisk);
+ if (mddev->gendisk)
put_disk(mddev->gendisk);
- }
percpu_ref_exit(&mddev->writes_pending);
kfree(mddev);
@@ -5619,9 +5619,9 @@ int md_run(struct mddev *mddev)
if (mddev->degraded)
nonrot = false;
if (nonrot)
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
mddev->queue->backing_dev_info->congested_data = mddev;
mddev->queue->backing_dev_info->congested_fn = md_congested;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 5ecba9eef441..584c10347267 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev)
discard_supported = true;
}
if (!discard_supported)
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
else
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
}
/* calculate array device size */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fe872dc6712e..e2943fb74056 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1760,7 +1760,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
}
}
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
}
@@ -3110,10 +3110,10 @@ static int raid1_run(struct mddev *mddev)
if (mddev->queue) {
if (discard_supported)
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c5e6c60fc0d4..3c60774c8430 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1845,7 +1845,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
@@ -3846,10 +3846,10 @@ static int raid10_run(struct mddev *mddev)
if (mddev->queue) {
if (discard_supported)
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
}
/* need to check that every block has at least one working mirror */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b5d2601483e3..be117d0a65a8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7443,10 +7443,10 @@ static int raid5_run(struct mddev *mddev)
if (devices_handle_discard_safely &&
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
mddev->queue->limits.discard_granularity >= stripe)
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index fd09b0960097..e8f1d4bb806a 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -444,12 +444,12 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
{
u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
u64 val;
- u8 option = SG_VALID | SG_TRANS_DATA;
+ u8 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len);
if (end)
- option |= SG_END;
+ option |= RTSX_SG_END;
val = ((u64)addr << 32) | ((u64)len << 12) | option;
put_unaligned_le64(val, ptr);
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index a2b9c2500c4c..02485e310c81 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2659,7 +2659,6 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
* from being accepted.
*/
card = md->queue.card;
- mmc_cleanup_queue(&md->queue);
if (md->disk->flags & GENHD_FL_UP) {
device_remove_file(disk_to_dev(md->disk), &md->force_ro);
if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
@@ -2669,6 +2668,7 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
del_gendisk(md->disk);
}
+ mmc_cleanup_queue(&md->queue);
mmc_blk_put(md);
}
}
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 421fab7250ac..56e9a803db21 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -185,14 +185,14 @@ static void mmc_queue_setup_discard(struct request_queue *q,
if (!max_discard)
return;
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
blk_queue_max_discard_sectors(q, max_discard);
q->limits.discard_granularity = card->pref_erase << 9;
/* granularity must not be greater than max. discard */
if (card->pref_erase > max_discard)
q->limits.discard_granularity = 0;
if (mmc_can_secure_erase_trim(card))
- queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q);
+ blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
}
/**
@@ -356,8 +356,8 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, mq->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
if (mmc_can_erase(card))
mmc_queue_setup_discard(mq->queue, card);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 9ec8f033ac5f..16ae4ae8e8f9 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -419,11 +419,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
blk_queue_logical_block_size(new->rq, tr->blksize);
blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
if (tr->discard) {
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
blk_queue_max_discard_sectors(new->rq, UINT_MAX);
}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 1bd7b3734751..62e9cb167aad 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -266,7 +266,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
q->queuedata = nsblk;
disk = alloc_disk(0);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 4b95ac513de2..85de8053aa34 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1542,7 +1542,7 @@ static int btt_blk_init(struct btt *btt)
blk_queue_make_request(btt->btt_queue, btt_make_request);
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
btt->btt_queue->queuedata = btt;
if (btt_meta_size(btt)) {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 8d6375ee0fda..184e070d50a2 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -29,7 +29,6 @@ enum {
* BTT instance
*/
ND_MAX_LANES = 256,
- SECTOR_SHIFT = 9,
INT_LBASIZE_ALIGNMENT = 64,
NVDIMM_IO_ATOMIC = 1,
};
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 06f8dcc52ca6..5a96d30c294a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -343,7 +343,7 @@ static int pmem_attach_disk(struct device *dev,
return -EBUSY;
}
- q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
+ q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL);
if (!q)
return -ENOMEM;
@@ -387,8 +387,8 @@ static int pmem_attach_disk(struct device *dev,
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
blk_queue_max_hw_sectors(q, UINT_MAX);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
- queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_DAX, q);
q->queuedata = pmem;
disk = alloc_disk_node(0, nid);
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 441e67e3a9d7..aea459c65ae1 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -12,6 +12,7 @@ nvme-core-y := core.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
+nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7aeca5db7916..197a6ba9700f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -100,11 +100,6 @@ static struct class *nvme_subsys_class;
static void nvme_ns_remove(struct nvme_ns *ns);
static int nvme_revalidate_disk(struct gendisk *disk);
-static __le32 nvme_get_log_dw10(u8 lid, size_t size)
-{
- return cpu_to_le32((((size / 4) - 1) << 16) | lid);
-}
-
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -135,6 +130,9 @@ static void nvme_delete_ctrl_work(struct work_struct *work)
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, delete_work);
+ dev_info(ctrl->device,
+ "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn);
+
flush_work(&ctrl->reset_work);
nvme_stop_ctrl(ctrl);
nvme_remove_namespaces(ctrl);
@@ -948,7 +946,8 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
c.identify.opcode = nvme_admin_identify;
c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
c.identify.nsid = cpu_to_le32(nsid);
- return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list,
+ NVME_IDENTIFY_DATA_SIZE);
}
static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
@@ -1124,13 +1123,13 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
struct nvme_ns *ns, *next;
LIST_HEAD(rm_list);
- mutex_lock(&ctrl->namespaces_mutex);
+ down_write(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->disk && nvme_revalidate_disk(ns->disk)) {
list_move_tail(&ns->list, &rm_list);
}
}
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_write(&ctrl->namespaces_rwsem);
list_for_each_entry_safe(ns, next, &rm_list, list)
nvme_ns_remove(ns);
@@ -1358,7 +1357,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
blk_queue_max_discard_sectors(queue, UINT_MAX);
blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -1449,6 +1448,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->noiob)
nvme_set_chunk_size(ns);
nvme_update_disk_info(disk, ns, id);
+ if (ns->ndev)
+ nvme_nvm_update_nvm_info(ns);
#ifdef CONFIG_NVME_MULTIPATH
if (ns->head->disk)
nvme_update_disk_info(ns->head->disk, ns, id);
@@ -2217,18 +2218,35 @@ out_unlock:
return ret;
}
-static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
- size_t size)
+int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ u8 log_page, void *log,
+ size_t size, size_t offset)
{
struct nvme_command c = { };
+ unsigned long dwlen = size / 4 - 1;
+
+ c.get_log_page.opcode = nvme_admin_get_log_page;
+
+ if (ns)
+ c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
+ else
+ c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
- c.common.opcode = nvme_admin_get_log_page;
- c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
- c.common.cdw10[0] = nvme_get_log_dw10(log_page, size);
+ c.get_log_page.lid = log_page;
+ c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
+ c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
+ c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1));
+ c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL);
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
+static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
+ size_t size)
+{
+ return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
+}
+
static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
{
int ret;
@@ -2440,7 +2458,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
struct nvme_ns *ns;
int ret;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
if (list_empty(&ctrl->namespaces)) {
ret = -ENOTTY;
goto out_unlock;
@@ -2457,14 +2475,14 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
dev_warn(ctrl->device,
"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
kref_get(&ns->kref);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
ret = nvme_user_cmd(ctrl, ns, argp);
nvme_put_ns(ns);
return ret;
out_unlock:
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
return ret;
}
@@ -2793,6 +2811,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
list_for_each_entry(h, &subsys->nsheads, entry) {
if (nvme_ns_ids_valid(&new->ids) &&
+ !list_empty(&h->list) &&
nvme_ns_ids_equal(&new->ids, &h->ids))
return -EINVAL;
}
@@ -2893,7 +2912,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns, *ret = NULL;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->head->ns_id == nsid) {
if (!kref_get_unless_zero(&ns->kref))
@@ -2904,7 +2923,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (ns->head->ns_id > nsid)
break;
}
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
return ret;
}
@@ -2949,7 +2968,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->queue = blk_mq_init_queue(ctrl->tagset);
if (IS_ERR(ns->queue))
goto out_free_ns;
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
ns->queue->queuedata = ns;
ns->ctrl = ctrl;
@@ -3015,9 +3034,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
__nvme_revalidate_disk(disk, id);
- mutex_lock(&ctrl->namespaces_mutex);
+ down_write(&ctrl->namespaces_rwsem);
list_add_tail(&ns->list, &ctrl->namespaces);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_write(&ctrl->namespaces_rwsem);
nvme_get_ctrl(ctrl);
@@ -3033,6 +3052,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->disk->disk_name);
nvme_mpath_add_disk(ns->head);
+ nvme_fault_inject_init(ns);
return;
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
@@ -3051,6 +3071,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
return;
+ nvme_fault_inject_fini(ns);
if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_id_attr_group);
@@ -3067,9 +3088,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
list_del_rcu(&ns->siblings);
mutex_unlock(&ns->ctrl->subsys->lock);
- mutex_lock(&ns->ctrl->namespaces_mutex);
+ down_write(&ns->ctrl->namespaces_rwsem);
list_del_init(&ns->list);
- mutex_unlock(&ns->ctrl->namespaces_mutex);
+ up_write(&ns->ctrl->namespaces_rwsem);
synchronize_srcu(&ns->head->srcu);
nvme_mpath_check_last_path(ns);
@@ -3093,11 +3114,18 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid)
{
struct nvme_ns *ns, *next;
+ LIST_HEAD(rm_list);
+ down_write(&ctrl->namespaces_rwsem);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
if (ns->head->ns_id > nsid)
- nvme_ns_remove(ns);
+ list_move_tail(&ns->list, &rm_list);
}
+ up_write(&ctrl->namespaces_rwsem);
+
+ list_for_each_entry_safe(ns, next, &rm_list, list)
+ nvme_ns_remove(ns);
+
}
static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
@@ -3107,7 +3135,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
int ret = 0;
- ns_list = kzalloc(0x1000, GFP_KERNEL);
+ ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list)
return -ENOMEM;
@@ -3173,9 +3201,9 @@ static void nvme_scan_work(struct work_struct *work)
}
nvme_scan_ns_sequential(ctrl, nn);
done:
- mutex_lock(&ctrl->namespaces_mutex);
+ down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_write(&ctrl->namespaces_rwsem);
kfree(id);
}
@@ -3197,6 +3225,7 @@ EXPORT_SYMBOL_GPL(nvme_queue_scan);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns, *next;
+ LIST_HEAD(ns_list);
/*
* The dead states indicates the controller was not gracefully
@@ -3207,7 +3236,11 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
if (ctrl->state == NVME_CTRL_DEAD)
nvme_kill_queues(ctrl);
- list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
+ down_write(&ctrl->namespaces_rwsem);
+ list_splice_init(&ctrl->namespaces, &ns_list);
+ up_write(&ctrl->namespaces_rwsem);
+
+ list_for_each_entry_safe(ns, next, &ns_list, list)
nvme_ns_remove(ns);
}
EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
@@ -3337,6 +3370,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
cancel_work_sync(&ctrl->fw_act_work);
+ if (ctrl->ops->stop_ctrl)
+ ctrl->ops->stop_ctrl(ctrl);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
@@ -3394,7 +3429,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->state = NVME_CTRL_NEW;
spin_lock_init(&ctrl->lock);
INIT_LIST_HEAD(&ctrl->namespaces);
- mutex_init(&ctrl->namespaces_mutex);
+ init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev;
ctrl->ops = ops;
ctrl->quirks = quirks;
@@ -3455,7 +3490,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
/* Forcibly unquiesce queues to avoid blocking dispatch */
if (ctrl->admin_q)
@@ -3474,7 +3509,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
/* Forcibly unquiesce queues to avoid blocking dispatch */
blk_mq_unquiesce_queue(ns->queue);
}
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_kill_queues);
@@ -3482,10 +3517,10 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_mq_unfreeze_queue(ns->queue);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_unfreeze);
@@ -3493,13 +3528,13 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
if (timeout <= 0)
break;
}
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
@@ -3507,10 +3542,10 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_mq_freeze_queue_wait(ns->queue);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_wait_freeze);
@@ -3518,10 +3553,10 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_freeze_queue_start(ns->queue);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_start_freeze);
@@ -3529,10 +3564,10 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_mq_quiesce_queue(ns->queue);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_stop_queues);
@@ -3540,10 +3575,10 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_mq_unquiesce_queue(ns->queue);
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
new file mode 100644
index 000000000000..02632266ac06
--- /dev/null
+++ b/drivers/nvme/host/fault_inject.c
@@ -0,0 +1,79 @@
+/*
+ * fault injection support for nvme.
+ *
+ * Copyright (c) 2018, Oracle and/or its affiliates
+ *
+ */
+
+#include <linux/moduleparam.h>
+#include "nvme.h"
+
+static DECLARE_FAULT_ATTR(fail_default_attr);
+/* optional fault injection attributes boot time option:
+ * nvme_core.fail_request=<interval>,<probability>,<space>,<times>
+ */
+static char *fail_request;
+module_param(fail_request, charp, 0000);
+
+void nvme_fault_inject_init(struct nvme_ns *ns)
+{
+ struct dentry *dir, *parent;
+ char *name = ns->disk->disk_name;
+ struct nvme_fault_inject *fault_inj = &ns->fault_inject;
+ struct fault_attr *attr = &fault_inj->attr;
+
+ /* set default fault injection attribute */
+ if (fail_request)
+ setup_fault_attr(&fail_default_attr, fail_request);
+
+ /* create debugfs directory and attribute */
+ parent = debugfs_create_dir(name, NULL);
+ if (!parent) {
+ pr_warn("%s: failed to create debugfs directory\n", name);
+ return;
+ }
+
+ *attr = fail_default_attr;
+ dir = fault_create_debugfs_attr("fault_inject", parent, attr);
+ if (IS_ERR(dir)) {
+ pr_warn("%s: failed to create debugfs attr\n", name);
+ debugfs_remove_recursive(parent);
+ return;
+ }
+ ns->fault_inject.parent = parent;
+
+ /* create debugfs for status code and dont_retry */
+ fault_inj->status = NVME_SC_INVALID_OPCODE;
+ fault_inj->dont_retry = true;
+ debugfs_create_x16("status", 0600, dir, &fault_inj->status);
+ debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry);
+}
+
+void nvme_fault_inject_fini(struct nvme_ns *ns)
+{
+ /* remove debugfs directories */
+ debugfs_remove_recursive(ns->fault_inject.parent);
+}
+
+void nvme_should_fail(struct request *req)
+{
+ struct gendisk *disk = req->rq_disk;
+ struct nvme_ns *ns = NULL;
+ u16 status;
+
+ /*
+ * make sure this request is coming from a valid namespace
+ */
+ if (!disk)
+ return;
+
+ ns = disk->private_data;
+ if (ns && should_fail(&ns->fault_inject.attr, 1)) {
+ /* inject status code and DNR bit */
+ status = ns->fault_inject.status;
+ if (ns->fault_inject.dont_retry)
+ status |= NVME_SC_DNR;
+ nvme_req(req)->status = status;
+ }
+}
+EXPORT_SYMBOL_GPL(nvme_should_fail);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 1dc1387b7134..c6e719b2f3ca 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -588,6 +588,8 @@ nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
return ERR_PTR(-ESTALE);
}
+ rport->remoteport.port_role = pinfo->port_role;
+ rport->remoteport.port_id = pinfo->port_id;
rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
rport->dev_loss_end = 0;
@@ -768,8 +770,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
*/
if (nvme_reset_ctrl(&ctrl->ctrl)) {
dev_warn(ctrl->ctrl.device,
- "NVME-FC{%d}: Couldn't schedule reset. "
- "Deleting controller.\n",
+ "NVME-FC{%d}: Couldn't schedule reset.\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
}
@@ -836,8 +837,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
/* if dev_loss_tmo==0, dev loss is immediate */
if (!portptr->dev_loss_tmo) {
dev_warn(ctrl->ctrl.device,
- "NVME-FC{%d}: controller connectivity lost. "
- "Deleting controller.\n",
+ "NVME-FC{%d}: controller connectivity lost.\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
} else
@@ -2076,20 +2076,10 @@ nvme_fc_timeout(struct request *rq, bool reserved)
{
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_ctrl *ctrl = op->ctrl;
- int ret;
-
- if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
- atomic_read(&op->state) == FCPOP_STATE_ABORTED)
- return BLK_EH_RESET_TIMER;
-
- ret = __nvme_fc_abort_op(ctrl, op);
- if (ret)
- /* io wasn't active to abort */
- return BLK_EH_NOT_HANDLED;
/*
* we can't individually ABTS an io without affecting the queue,
- * thus killing the queue, adn thus the association.
+ * thus killing the queue, and thus the association.
* So resolve by performing a controller reset, which will stop
* the host/io stack, terminate the association on the link,
* and recreate an association on the link.
@@ -2191,7 +2181,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
struct nvme_command *sqe = &cmdiu->sqe;
u32 csn;
- int ret;
+ int ret, opstate;
/*
* before attempting to send the io, check to see if we believe
@@ -2269,6 +2259,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
queue->lldd_handle, &op->fcp_req);
if (ret) {
+ opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
+ __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
+
if (!(op->flags & FCOP_FLAGS_AEN))
nvme_fc_unmap_data(ctrl, op->rq, op);
@@ -2889,14 +2882,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
if (portptr->port_state == FC_OBJSTATE_ONLINE)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
- "reached. Removing controller\n",
+ "reached.\n",
ctrl->cnum, ctrl->ctrl.nr_reconnects);
else
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
- "while waiting for remoteport connectivity. "
- "Removing controller\n", ctrl->cnum,
- portptr->dev_loss_tmo);
+ "while waiting for remoteport connectivity.\n",
+ ctrl->cnum, portptr->dev_loss_tmo);
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
}
}
@@ -3133,6 +3125,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
}
if (ret) {
+ nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
+ cancel_work_sync(&ctrl->ctrl.reset_work);
+ cancel_delayed_work_sync(&ctrl->connect_work);
+
/* couldn't schedule retry - fail out */
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 50ef71ee3d86..41279da799ed 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -35,6 +35,10 @@ enum nvme_nvm_admin_opcode {
nvme_nvm_admin_set_bb_tbl = 0xf1,
};
+enum nvme_nvm_log_page {
+ NVME_NVM_LOG_REPORT_CHUNK = 0xca,
+};
+
struct nvme_nvm_ph_rw {
__u8 opcode;
__u8 flags;
@@ -51,6 +55,21 @@ struct nvme_nvm_ph_rw {
__le64 resv;
};
+struct nvme_nvm_erase_blk {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd[2];
+ __le64 prp1;
+ __le64 prp2;
+ __le64 spba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le64 resv;
+};
+
struct nvme_nvm_identity {
__u8 opcode;
__u8 flags;
@@ -59,8 +78,7 @@ struct nvme_nvm_identity {
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
- __le32 chnl_off;
- __u32 rsvd11[5];
+ __u32 rsvd11[6];
};
struct nvme_nvm_getbbtbl {
@@ -90,44 +108,18 @@ struct nvme_nvm_setbbtbl {
__u32 rsvd4[3];
};
-struct nvme_nvm_erase_blk {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 length;
- __le16 control;
- __le32 dsmgmt;
- __le64 resv;
-};
-
struct nvme_nvm_command {
union {
struct nvme_common_command common;
- struct nvme_nvm_identity identity;
struct nvme_nvm_ph_rw ph_rw;
+ struct nvme_nvm_erase_blk erase;
+ struct nvme_nvm_identity identity;
struct nvme_nvm_getbbtbl get_bb;
struct nvme_nvm_setbbtbl set_bb;
- struct nvme_nvm_erase_blk erase;
};
};
-#define NVME_NVM_LP_MLC_PAIRS 886
-struct nvme_nvm_lp_mlc {
- __le16 num_pairs;
- __u8 pairs[NVME_NVM_LP_MLC_PAIRS];
-};
-
-struct nvme_nvm_lp_tbl {
- __u8 id[8];
- struct nvme_nvm_lp_mlc mlc;
-};
-
-struct nvme_nvm_id_group {
+struct nvme_nvm_id12_grp {
__u8 mtype;
__u8 fmtype;
__le16 res16;
@@ -150,11 +142,10 @@ struct nvme_nvm_id_group {
__le32 mpos;
__le32 mccap;
__le16 cpar;
- __u8 reserved[10];
- struct nvme_nvm_lp_tbl lptbl;
+ __u8 reserved[906];
} __packed;
-struct nvme_nvm_addr_format {
+struct nvme_nvm_id12_addrf {
__u8 ch_offset;
__u8 ch_len;
__u8 lun_offset;
@@ -165,21 +156,22 @@ struct nvme_nvm_addr_format {
__u8 blk_len;
__u8 pg_offset;
__u8 pg_len;
- __u8 sect_offset;
- __u8 sect_len;
+ __u8 sec_offset;
+ __u8 sec_len;
__u8 res[4];
} __packed;
-struct nvme_nvm_id {
+struct nvme_nvm_id12 {
__u8 ver_id;
__u8 vmnt;
__u8 cgrps;
__u8 res;
__le32 cap;
__le32 dom;
- struct nvme_nvm_addr_format ppaf;
+ struct nvme_nvm_id12_addrf ppaf;
__u8 resv[228];
- struct nvme_nvm_id_group groups[4];
+ struct nvme_nvm_id12_grp grp;
+ __u8 resv2[2880];
} __packed;
struct nvme_nvm_bb_tbl {
@@ -196,6 +188,68 @@ struct nvme_nvm_bb_tbl {
__u8 blk[0];
};
+struct nvme_nvm_id20_addrf {
+ __u8 grp_len;
+ __u8 pu_len;
+ __u8 chk_len;
+ __u8 lba_len;
+ __u8 resv[4];
+};
+
+struct nvme_nvm_id20 {
+ __u8 mjr;
+ __u8 mnr;
+ __u8 resv[6];
+
+ struct nvme_nvm_id20_addrf lbaf;
+
+ __le32 mccap;
+ __u8 resv2[12];
+
+ __u8 wit;
+ __u8 resv3[31];
+
+ /* Geometry */
+ __le16 num_grp;
+ __le16 num_pu;
+ __le32 num_chk;
+ __le32 clba;
+ __u8 resv4[52];
+
+ /* Write data requirements */
+ __le32 ws_min;
+ __le32 ws_opt;
+ __le32 mw_cunits;
+ __le32 maxoc;
+ __le32 maxocpu;
+ __u8 resv5[44];
+
+ /* Performance related metrics */
+ __le32 trdt;
+ __le32 trdm;
+ __le32 twrt;
+ __le32 twrm;
+ __le32 tcrst;
+ __le32 tcrsm;
+ __u8 resv6[40];
+
+ /* Reserved area */
+ __u8 resv7[2816];
+
+ /* Vendor specific */
+ __u8 vs[1024];
+};
+
+struct nvme_nvm_chk_meta {
+ __u8 state;
+ __u8 type;
+ __u8 wi;
+ __u8 rsvd[5];
+ __le64 slba;
+ __le64 cnlb;
+ __le64 wp;
+};
+
/*
* Check we didn't inadvertently grow the command struct
*/
@@ -203,105 +257,238 @@ static inline void _nvme_nvm_check_size(void)
{
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) !=
+ sizeof(struct nvm_chk_meta));
+}
+
+static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst,
+ struct nvme_nvm_id12_addrf *src)
+{
+ dst->ch_len = src->ch_len;
+ dst->lun_len = src->lun_len;
+ dst->blk_len = src->blk_len;
+ dst->pg_len = src->pg_len;
+ dst->pln_len = src->pln_len;
+ dst->sec_len = src->sec_len;
+
+ dst->ch_offset = src->ch_offset;
+ dst->lun_offset = src->lun_offset;
+ dst->blk_offset = src->blk_offset;
+ dst->pg_offset = src->pg_offset;
+ dst->pln_offset = src->pln_offset;
+ dst->sec_offset = src->sec_offset;
+
+ dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
+ dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
+ dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
+ dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
+ dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
+ dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
}
-static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
+static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id,
+ struct nvm_geo *geo)
{
- struct nvme_nvm_id_group *src;
- struct nvm_id_group *grp;
+ struct nvme_nvm_id12_grp *src;
int sec_per_pg, sec_per_pl, pg_per_blk;
- if (nvme_nvm_id->cgrps != 1)
+ if (id->cgrps != 1)
+ return -EINVAL;
+
+ src = &id->grp;
+
+ if (src->mtype != 0) {
+ pr_err("nvm: memory type not supported\n");
return -EINVAL;
+ }
+
+ /* 1.2 spec. only reports a single version id - unfold */
+ geo->major_ver_id = id->ver_id;
+ geo->minor_ver_id = 2;
- src = &nvme_nvm_id->groups[0];
- grp = &nvm_id->grp;
+ /* Set compacted version for upper layers */
+ geo->version = NVM_OCSSD_SPEC_12;
- grp->mtype = src->mtype;
- grp->fmtype = src->fmtype;
+ geo->num_ch = src->num_ch;
+ geo->num_lun = src->num_lun;
+ geo->all_luns = geo->num_ch * geo->num_lun;
- grp->num_ch = src->num_ch;
- grp->num_lun = src->num_lun;
+ geo->num_chk = le16_to_cpu(src->num_chk);
- grp->num_chk = le16_to_cpu(src->num_chk);
- grp->csecs = le16_to_cpu(src->csecs);
- grp->sos = le16_to_cpu(src->sos);
+ geo->csecs = le16_to_cpu(src->csecs);
+ geo->sos = le16_to_cpu(src->sos);
pg_per_blk = le16_to_cpu(src->num_pg);
- sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs;
+ sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs;
sec_per_pl = sec_per_pg * src->num_pln;
- grp->clba = sec_per_pl * pg_per_blk;
- grp->ws_per_chk = pg_per_blk;
-
- grp->mpos = le32_to_cpu(src->mpos);
- grp->cpar = le16_to_cpu(src->cpar);
- grp->mccap = le32_to_cpu(src->mccap);
-
- grp->ws_opt = grp->ws_min = sec_per_pg;
- grp->ws_seq = NVM_IO_SNGL_ACCESS;
-
- if (grp->mpos & 0x020202) {
- grp->ws_seq = NVM_IO_DUAL_ACCESS;
- grp->ws_opt <<= 1;
- } else if (grp->mpos & 0x040404) {
- grp->ws_seq = NVM_IO_QUAD_ACCESS;
- grp->ws_opt <<= 2;
- }
+ geo->clba = sec_per_pl * pg_per_blk;
+
+ geo->all_chunks = geo->all_luns * geo->num_chk;
+ geo->total_secs = geo->clba * geo->all_chunks;
+
+ geo->ws_min = sec_per_pg;
+ geo->ws_opt = sec_per_pg;
+ geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */
- grp->trdt = le32_to_cpu(src->trdt);
- grp->trdm = le32_to_cpu(src->trdm);
- grp->tprt = le32_to_cpu(src->tprt);
- grp->tprm = le32_to_cpu(src->tprm);
- grp->tbet = le32_to_cpu(src->tbet);
- grp->tbem = le32_to_cpu(src->tbem);
+ /* Do not impose values for maximum number of open blocks as it is
+ * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
+ * specify these values through a quirk if restrictions apply.
+ */
+ geo->maxoc = geo->all_luns * geo->num_chk;
+ geo->maxocpu = geo->num_chk;
+
+ geo->mccap = le32_to_cpu(src->mccap);
+
+ geo->trdt = le32_to_cpu(src->trdt);
+ geo->trdm = le32_to_cpu(src->trdm);
+ geo->tprt = le32_to_cpu(src->tprt);
+ geo->tprm = le32_to_cpu(src->tprm);
+ geo->tbet = le32_to_cpu(src->tbet);
+ geo->tbem = le32_to_cpu(src->tbem);
/* 1.2 compatibility */
- grp->num_pln = src->num_pln;
- grp->num_pg = le16_to_cpu(src->num_pg);
- grp->fpg_sz = le16_to_cpu(src->fpg_sz);
+ geo->vmnt = id->vmnt;
+ geo->cap = le32_to_cpu(id->cap);
+ geo->dom = le32_to_cpu(id->dom);
+
+ geo->mtype = src->mtype;
+ geo->fmtype = src->fmtype;
+
+ geo->cpar = le16_to_cpu(src->cpar);
+ geo->mpos = le32_to_cpu(src->mpos);
+
+ geo->pln_mode = NVM_PLANE_SINGLE;
+
+ if (geo->mpos & 0x020202) {
+ geo->pln_mode = NVM_PLANE_DOUBLE;
+ geo->ws_opt <<= 1;
+ } else if (geo->mpos & 0x040404) {
+ geo->pln_mode = NVM_PLANE_QUAD;
+ geo->ws_opt <<= 2;
+ }
+
+ geo->num_pln = src->num_pln;
+ geo->num_pg = le16_to_cpu(src->num_pg);
+ geo->fpg_sz = le16_to_cpu(src->fpg_sz);
+
+ nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf);
return 0;
}
-static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
+static void nvme_nvm_set_addr_20(struct nvm_addrf *dst,
+ struct nvme_nvm_id20_addrf *src)
+{
+ dst->ch_len = src->grp_len;
+ dst->lun_len = src->pu_len;
+ dst->chk_len = src->chk_len;
+ dst->sec_len = src->lba_len;
+
+ dst->sec_offset = 0;
+ dst->chk_offset = dst->sec_len;
+ dst->lun_offset = dst->chk_offset + dst->chk_len;
+ dst->ch_offset = dst->lun_offset + dst->lun_len;
+
+ dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
+ dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
+ dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset;
+ dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
+}
+
+static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id,
+ struct nvm_geo *geo)
+{
+ geo->major_ver_id = id->mjr;
+ geo->minor_ver_id = id->mnr;
+
+ /* Set compacted version for upper layers */
+ geo->version = NVM_OCSSD_SPEC_20;
+
+ if (!(geo->major_ver_id == 2 && geo->minor_ver_id == 0)) {
+ pr_err("nvm: OCSSD version not supported (v%d.%d)\n",
+ geo->major_ver_id, geo->minor_ver_id);
+ return -EINVAL;
+ }
+
+ geo->num_ch = le16_to_cpu(id->num_grp);
+ geo->num_lun = le16_to_cpu(id->num_pu);
+ geo->all_luns = geo->num_ch * geo->num_lun;
+
+ geo->num_chk = le32_to_cpu(id->num_chk);
+ geo->clba = le32_to_cpu(id->clba);
+
+ geo->all_chunks = geo->all_luns * geo->num_chk;
+ geo->total_secs = geo->clba * geo->all_chunks;
+
+ geo->ws_min = le32_to_cpu(id->ws_min);
+ geo->ws_opt = le32_to_cpu(id->ws_opt);
+ geo->mw_cunits = le32_to_cpu(id->mw_cunits);
+ geo->maxoc = le32_to_cpu(id->maxoc);
+ geo->maxocpu = le32_to_cpu(id->maxocpu);
+
+ geo->trdt = le32_to_cpu(id->trdt);
+ geo->trdm = le32_to_cpu(id->trdm);
+ geo->tprt = le32_to_cpu(id->twrt);
+ geo->tprm = le32_to_cpu(id->twrm);
+ geo->tbet = le32_to_cpu(id->tcrst);
+ geo->tbem = le32_to_cpu(id->tcrsm);
+
+ nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf);
+
+ return 0;
+}
+
+static int nvme_nvm_identity(struct nvm_dev *nvmdev)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_nvm_id *nvme_nvm_id;
+ struct nvme_nvm_id12 *id;
struct nvme_nvm_command c = {};
int ret;
c.identity.opcode = nvme_nvm_admin_identity;
c.identity.nsid = cpu_to_le32(ns->head->ns_id);
- c.identity.chnl_off = 0;
- nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL);
- if (!nvme_nvm_id)
+ id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL);
+ if (!id)
return -ENOMEM;
ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
- nvme_nvm_id, sizeof(struct nvme_nvm_id));
+ id, sizeof(struct nvme_nvm_id12));
if (ret) {
ret = -EIO;
goto out;
}
- nvm_id->ver_id = nvme_nvm_id->ver_id;
- nvm_id->vmnt = nvme_nvm_id->vmnt;
- nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
- nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
- memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
- sizeof(struct nvm_addr_format));
+ /*
+ * The 1.2 and 2.0 specifications share the first byte in their geometry
+ * command to make it possible to know what version a device implements.
+ */
+ switch (id->ver_id) {
+ case 1:
+ ret = nvme_nvm_setup_12(id, &nvmdev->geo);
+ break;
+ case 2:
+ ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id,
+ &nvmdev->geo);
+ break;
+ default:
+ dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n",
+ id->ver_id);
+ ret = -EINVAL;
+ }
- ret = init_grps(nvm_id, nvme_nvm_id);
out:
- kfree(nvme_nvm_id);
+ kfree(id);
return ret;
}
@@ -314,7 +501,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_nvm_command c = {};
struct nvme_nvm_bb_tbl *bb_tbl;
- int nr_blks = geo->nr_chks * geo->plane_mode;
+ int nr_blks = geo->num_chk * geo->num_pln;
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
int ret = 0;
@@ -355,7 +542,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
goto out;
}
- memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode);
+ memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln);
out:
kfree(bb_tbl);
return ret;
@@ -382,6 +569,61 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
return ret;
}
+/*
+ * Expect the lba in device format
+ */
+static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
+ struct nvm_chk_meta *meta,
+ sector_t slba, int nchks)
+{
+ struct nvm_geo *geo = &ndev->geo;
+ struct nvme_ns *ns = ndev->q->queuedata;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta;
+ struct ppa_addr ppa;
+ size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
+ size_t log_pos, offset, len;
+ int ret, i;
+
+ /* Normalize lba address space to obtain log offset */
+ ppa.ppa = slba;
+ ppa = dev_to_generic_addr(ndev, ppa);
+
+ log_pos = ppa.m.chk;
+ log_pos += ppa.m.pu * geo->num_chk;
+ log_pos += ppa.m.grp * geo->num_lun * geo->num_chk;
+
+ offset = log_pos * sizeof(struct nvme_nvm_chk_meta);
+
+ while (left) {
+ len = min_t(unsigned int, left, ctrl->max_hw_sectors << 9);
+
+ ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK,
+ dev_meta, len, offset);
+ if (ret) {
+ dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
+ break;
+ }
+
+ for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) {
+ meta->state = dev_meta->state;
+ meta->type = dev_meta->type;
+ meta->wi = dev_meta->wi;
+ meta->slba = le64_to_cpu(dev_meta->slba);
+ meta->cnlb = le64_to_cpu(dev_meta->cnlb);
+ meta->wp = le64_to_cpu(dev_meta->wp);
+
+ meta++;
+ dev_meta++;
+ }
+
+ offset += len;
+ left -= len;
+ }
+
+ return ret;
+}
+
static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
struct nvme_nvm_command *c)
{
@@ -513,6 +755,8 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
.get_bb_tbl = nvme_nvm_get_bb_tbl,
.set_bb_tbl = nvme_nvm_set_bb_tbl,
+ .get_chk_meta = nvme_nvm_get_chk_meta,
+
.submit_io = nvme_nvm_submit_io,
.submit_io_sync = nvme_nvm_submit_io_sync,
@@ -520,8 +764,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
.destroy_dma_pool = nvme_nvm_destroy_dma_pool,
.dev_dma_alloc = nvme_nvm_dev_dma_alloc,
.dev_dma_free = nvme_nvm_dev_dma_free,
-
- .max_phys_sect = 64,
};
static int nvme_nvm_submit_user_cmd(struct request_queue *q,
@@ -722,6 +964,15 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
}
}
+void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
+{
+ struct nvm_dev *ndev = ns->ndev;
+ struct nvm_geo *geo = &ndev->geo;
+
+ geo->csecs = 1 << ns->lba_shift;
+ geo->sos = ns->ms;
+}
+
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
{
struct request_queue *q = ns->queue;
@@ -748,125 +999,205 @@ void nvme_nvm_unregister(struct nvme_ns *ns)
}
static ssize_t nvm_dev_attr_show(struct device *dev,
- struct device_attribute *dattr, char *page)
+ struct device_attribute *dattr, char *page)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
struct nvm_dev *ndev = ns->ndev;
- struct nvm_id *id;
- struct nvm_id_group *grp;
+ struct nvm_geo *geo = &ndev->geo;
struct attribute *attr;
if (!ndev)
return 0;
- id = &ndev->identity;
- grp = &id->grp;
attr = &dattr->attr;
if (strcmp(attr->name, "version") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id);
- } else if (strcmp(attr->name, "vendor_opcode") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt);
+ if (geo->major_ver_id == 1)
+ return scnprintf(page, PAGE_SIZE, "%u\n",
+ geo->major_ver_id);
+ else
+ return scnprintf(page, PAGE_SIZE, "%u.%u\n",
+ geo->major_ver_id,
+ geo->minor_ver_id);
} else if (strcmp(attr->name, "capabilities") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap);
+ } else if (strcmp(attr->name, "read_typ") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt);
+ } else if (strcmp(attr->name, "read_max") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm);
+ } else {
+ return scnprintf(page,
+ PAGE_SIZE,
+ "Unhandled attr(%s) in `%s`\n",
+ attr->name, __func__);
+ }
+}
+
+static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page)
+{
+ return scnprintf(page, PAGE_SIZE,
+ "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+ ppaf->ch_offset, ppaf->ch_len,
+ ppaf->lun_offset, ppaf->lun_len,
+ ppaf->pln_offset, ppaf->pln_len,
+ ppaf->blk_offset, ppaf->blk_len,
+ ppaf->pg_offset, ppaf->pg_len,
+ ppaf->sec_offset, ppaf->sec_len);
+}
+
+static ssize_t nvm_dev_attr_show_12(struct device *dev,
+ struct device_attribute *dattr, char *page)
+{
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+ struct nvm_dev *ndev = ns->ndev;
+ struct nvm_geo *geo = &ndev->geo;
+ struct attribute *attr;
+
+ if (!ndev)
+ return 0;
+
+ attr = &dattr->attr;
+
+ if (strcmp(attr->name, "vendor_opcode") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt);
} else if (strcmp(attr->name, "device_mode") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom);
/* kept for compatibility */
} else if (strcmp(attr->name, "media_manager") == 0) {
return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
} else if (strcmp(attr->name, "ppa_format") == 0) {
- return scnprintf(page, PAGE_SIZE,
- "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- id->ppaf.ch_offset, id->ppaf.ch_len,
- id->ppaf.lun_offset, id->ppaf.lun_len,
- id->ppaf.pln_offset, id->ppaf.pln_len,
- id->ppaf.blk_offset, id->ppaf.blk_len,
- id->ppaf.pg_offset, id->ppaf.pg_len,
- id->ppaf.sect_offset, id->ppaf.sect_len);
+ return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page);
} else if (strcmp(attr->name, "media_type") == 0) { /* u8 */
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype);
} else if (strcmp(attr->name, "flash_media_type") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype);
} else if (strcmp(attr->name, "num_channels") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
} else if (strcmp(attr->name, "num_luns") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
} else if (strcmp(attr->name, "num_planes") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln);
} else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
} else if (strcmp(attr->name, "num_pages") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg);
} else if (strcmp(attr->name, "page_size") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz);
} else if (strcmp(attr->name, "hw_sector_size") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs);
} else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos);
- } else if (strcmp(attr->name, "read_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt);
- } else if (strcmp(attr->name, "read_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos);
} else if (strcmp(attr->name, "prog_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
} else if (strcmp(attr->name, "prog_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
} else if (strcmp(attr->name, "erase_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
} else if (strcmp(attr->name, "erase_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem);
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
} else if (strcmp(attr->name, "multiplane_modes") == 0) {
- return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos);
+ return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos);
} else if (strcmp(attr->name, "media_capabilities") == 0) {
- return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap);
+ return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap);
} else if (strcmp(attr->name, "max_phys_secs") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n",
- ndev->ops->max_phys_sect);
+ return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA);
} else {
- return scnprintf(page,
- PAGE_SIZE,
- "Unhandled attr(%s) in `nvm_dev_attr_show`\n",
- attr->name);
+ return scnprintf(page, PAGE_SIZE,
+ "Unhandled attr(%s) in `%s`\n",
+ attr->name, __func__);
+ }
+}
+
+static ssize_t nvm_dev_attr_show_20(struct device *dev,
+ struct device_attribute *dattr, char *page)
+{
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+ struct nvm_dev *ndev = ns->ndev;
+ struct nvm_geo *geo = &ndev->geo;
+ struct attribute *attr;
+
+ if (!ndev)
+ return 0;
+
+ attr = &dattr->attr;
+
+ if (strcmp(attr->name, "groups") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
+ } else if (strcmp(attr->name, "punits") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
+ } else if (strcmp(attr->name, "chunks") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
+ } else if (strcmp(attr->name, "clba") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba);
+ } else if (strcmp(attr->name, "ws_min") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min);
+ } else if (strcmp(attr->name, "ws_opt") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt);
+ } else if (strcmp(attr->name, "maxoc") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc);
+ } else if (strcmp(attr->name, "maxocpu") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu);
+ } else if (strcmp(attr->name, "mw_cunits") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits);
+ } else if (strcmp(attr->name, "write_typ") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
+ } else if (strcmp(attr->name, "write_max") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
+ } else if (strcmp(attr->name, "reset_typ") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
+ } else if (strcmp(attr->name, "reset_max") == 0) {
+ return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
+ } else {
+ return scnprintf(page, PAGE_SIZE,
+ "Unhandled attr(%s) in `%s`\n",
+ attr->name, __func__);
}
}
-#define NVM_DEV_ATTR_RO(_name) \
+#define NVM_DEV_ATTR_RO(_name) \
DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
+#define NVM_DEV_ATTR_12_RO(_name) \
+ DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
+#define NVM_DEV_ATTR_20_RO(_name) \
+ DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
+/* general attributes */
static NVM_DEV_ATTR_RO(version);
-static NVM_DEV_ATTR_RO(vendor_opcode);
static NVM_DEV_ATTR_RO(capabilities);
-static NVM_DEV_ATTR_RO(device_mode);
-static NVM_DEV_ATTR_RO(ppa_format);
-static NVM_DEV_ATTR_RO(media_manager);
-
-static NVM_DEV_ATTR_RO(media_type);
-static NVM_DEV_ATTR_RO(flash_media_type);
-static NVM_DEV_ATTR_RO(num_channels);
-static NVM_DEV_ATTR_RO(num_luns);
-static NVM_DEV_ATTR_RO(num_planes);
-static NVM_DEV_ATTR_RO(num_blocks);
-static NVM_DEV_ATTR_RO(num_pages);
-static NVM_DEV_ATTR_RO(page_size);
-static NVM_DEV_ATTR_RO(hw_sector_size);
-static NVM_DEV_ATTR_RO(oob_sector_size);
+
static NVM_DEV_ATTR_RO(read_typ);
static NVM_DEV_ATTR_RO(read_max);
-static NVM_DEV_ATTR_RO(prog_typ);
-static NVM_DEV_ATTR_RO(prog_max);
-static NVM_DEV_ATTR_RO(erase_typ);
-static NVM_DEV_ATTR_RO(erase_max);
-static NVM_DEV_ATTR_RO(multiplane_modes);
-static NVM_DEV_ATTR_RO(media_capabilities);
-static NVM_DEV_ATTR_RO(max_phys_secs);
-
-static struct attribute *nvm_dev_attrs[] = {
+
+/* 1.2 values */
+static NVM_DEV_ATTR_12_RO(vendor_opcode);
+static NVM_DEV_ATTR_12_RO(device_mode);
+static NVM_DEV_ATTR_12_RO(ppa_format);
+static NVM_DEV_ATTR_12_RO(media_manager);
+static NVM_DEV_ATTR_12_RO(media_type);
+static NVM_DEV_ATTR_12_RO(flash_media_type);
+static NVM_DEV_ATTR_12_RO(num_channels);
+static NVM_DEV_ATTR_12_RO(num_luns);
+static NVM_DEV_ATTR_12_RO(num_planes);
+static NVM_DEV_ATTR_12_RO(num_blocks);
+static NVM_DEV_ATTR_12_RO(num_pages);
+static NVM_DEV_ATTR_12_RO(page_size);
+static NVM_DEV_ATTR_12_RO(hw_sector_size);
+static NVM_DEV_ATTR_12_RO(oob_sector_size);
+static NVM_DEV_ATTR_12_RO(prog_typ);
+static NVM_DEV_ATTR_12_RO(prog_max);
+static NVM_DEV_ATTR_12_RO(erase_typ);
+static NVM_DEV_ATTR_12_RO(erase_max);
+static NVM_DEV_ATTR_12_RO(multiplane_modes);
+static NVM_DEV_ATTR_12_RO(media_capabilities);
+static NVM_DEV_ATTR_12_RO(max_phys_secs);
+
+static struct attribute *nvm_dev_attrs_12[] = {
&dev_attr_version.attr,
- &dev_attr_vendor_opcode.attr,
&dev_attr_capabilities.attr,
+
+ &dev_attr_vendor_opcode.attr,
&dev_attr_device_mode.attr,
&dev_attr_media_manager.attr,
-
&dev_attr_ppa_format.attr,
&dev_attr_media_type.attr,
&dev_attr_flash_media_type.attr,
@@ -887,22 +1218,92 @@ static struct attribute *nvm_dev_attrs[] = {
&dev_attr_multiplane_modes.attr,
&dev_attr_media_capabilities.attr,
&dev_attr_max_phys_secs.attr,
+
NULL,
};
-static const struct attribute_group nvm_dev_attr_group = {
+static const struct attribute_group nvm_dev_attr_group_12 = {
.name = "lightnvm",
- .attrs = nvm_dev_attrs,
+ .attrs = nvm_dev_attrs_12,
+};
+
+/* 2.0 values */
+static NVM_DEV_ATTR_20_RO(groups);
+static NVM_DEV_ATTR_20_RO(punits);
+static NVM_DEV_ATTR_20_RO(chunks);
+static NVM_DEV_ATTR_20_RO(clba);
+static NVM_DEV_ATTR_20_RO(ws_min);
+static NVM_DEV_ATTR_20_RO(ws_opt);
+static NVM_DEV_ATTR_20_RO(maxoc);
+static NVM_DEV_ATTR_20_RO(maxocpu);
+static NVM_DEV_ATTR_20_RO(mw_cunits);
+static NVM_DEV_ATTR_20_RO(write_typ);
+static NVM_DEV_ATTR_20_RO(write_max);
+static NVM_DEV_ATTR_20_RO(reset_typ);
+static NVM_DEV_ATTR_20_RO(reset_max);
+
+static struct attribute *nvm_dev_attrs_20[] = {
+ &dev_attr_version.attr,
+ &dev_attr_capabilities.attr,
+
+ &dev_attr_groups.attr,
+ &dev_attr_punits.attr,
+ &dev_attr_chunks.attr,
+ &dev_attr_clba.attr,
+ &dev_attr_ws_min.attr,
+ &dev_attr_ws_opt.attr,
+ &dev_attr_maxoc.attr,
+ &dev_attr_maxocpu.attr,
+ &dev_attr_mw_cunits.attr,
+
+ &dev_attr_read_typ.attr,
+ &dev_attr_read_max.attr,
+ &dev_attr_write_typ.attr,
+ &dev_attr_write_max.attr,
+ &dev_attr_reset_typ.attr,
+ &dev_attr_reset_max.attr,
+
+ NULL,
+};
+
+static const struct attribute_group nvm_dev_attr_group_20 = {
+ .name = "lightnvm",
+ .attrs = nvm_dev_attrs_20,
};
int nvme_nvm_register_sysfs(struct nvme_ns *ns)
{
- return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
- &nvm_dev_attr_group);
+ struct nvm_dev *ndev = ns->ndev;
+ struct nvm_geo *geo = &ndev->geo;
+
+ if (!ndev)
+ return -EINVAL;
+
+ switch (geo->major_ver_id) {
+ case 1:
+ return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
+ &nvm_dev_attr_group_12);
+ case 2:
+ return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
+ &nvm_dev_attr_group_20);
+ }
+
+ return -EINVAL;
}
void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
{
- sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
- &nvm_dev_attr_group);
+ struct nvm_dev *ndev = ns->ndev;
+ struct nvm_geo *geo = &ndev->geo;
+
+ switch (geo->major_ver_id) {
+ case 1:
+ sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
+ &nvm_dev_attr_group_12);
+ break;
+ case 2:
+ sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
+ &nvm_dev_attr_group_20);
+ break;
+ }
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 060f69e03427..956e0b8e9c4d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -44,12 +44,12 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->namespaces_mutex);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->head->disk)
kblockd_schedule_work(&ns->head->requeue_work);
}
- mutex_unlock(&ctrl->namespaces_mutex);
+ up_read(&ctrl->namespaces_rwsem);
}
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
@@ -162,13 +162,13 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
return 0;
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+ q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
if (!q)
goto out;
q->queuedata = head;
blk_queue_make_request(q, nvme_ns_head_make_request);
q->poll_fn = nvme_ns_head_poll;
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* set to a default value for 512 until disk is validated */
blk_queue_logical_block_size(q, 512);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d733b14ede9d..cf93690b3ffc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -21,6 +21,7 @@
#include <linux/blk-mq.h>
#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
+#include <linux/fault-inject.h>
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
@@ -140,7 +141,7 @@ struct nvme_ctrl {
struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces;
- struct mutex namespaces_mutex;
+ struct rw_semaphore namespaces_rwsem;
struct device ctrl_device;
struct device *device; /* char device */
struct cdev cdev;
@@ -261,6 +262,15 @@ struct nvme_ns_head {
int instance;
};
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+struct nvme_fault_inject {
+ struct fault_attr attr;
+ struct dentry *parent;
+ bool dont_retry; /* DNR, do not retry */
+ u16 status; /* status code */
+};
+#endif
+
struct nvme_ns {
struct list_head list;
@@ -282,6 +292,11 @@ struct nvme_ns {
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
u16 noiob;
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+ struct nvme_fault_inject fault_inject;
+#endif
+
};
struct nvme_ctrl_ops {
@@ -298,8 +313,19 @@ struct nvme_ctrl_ops {
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
int (*reinit_request)(void *data, struct request *rq);
+ void (*stop_ctrl)(struct nvme_ctrl *ctrl);
};
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+void nvme_fault_inject_init(struct nvme_ns *ns);
+void nvme_fault_inject_fini(struct nvme_ns *ns);
+void nvme_should_fail(struct request *req);
+#else
+static inline void nvme_fault_inject_init(struct nvme_ns *ns) {}
+static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {}
+static inline void nvme_should_fail(struct request *req) {}
+#endif
+
static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
{
u32 val = 0;
@@ -336,6 +362,8 @@ static inline void nvme_end_request(struct request *req, __le16 status,
rq->status = le16_to_cpu(status) >> 1;
rq->result = result;
+ /* inject error when permitted by fault injection framework */
+ nvme_should_fail(req);
blk_mq_complete_request(req);
}
@@ -401,6 +429,9 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
+int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ u8 log_page, void *log, size_t size, size_t offset);
+
extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops;
@@ -461,12 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
#endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM
+void nvme_nvm_update_nvm_info(struct nvme_ns *ns);
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
#else
+static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {};
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
int node)
{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b6f43b738f03..295fbec1e5f2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -414,7 +414,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
{
struct nvme_dev *dev = set->driver_data;
- return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev));
+ return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0);
}
/**
@@ -2197,7 +2197,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
if (!dead) {
if (shutdown)
nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+ }
+
+ nvme_stop_queues(&dev->ctrl);
+ if (!dead) {
/*
* If the controller is still alive tell it to stop using the
* host memory buffer. In theory the shutdown / reset should
@@ -2206,11 +2210,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
*/
if (dev->host_mem_descs)
nvme_set_host_mem(dev, 0);
-
- }
- nvme_stop_queues(&dev->ctrl);
-
- if (!dead) {
nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
}
@@ -2416,6 +2415,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
return 0;
}
+static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
+{
+ struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
+
+ return snprintf(buf, size, "%s", dev_name(&pdev->dev));
+}
+
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
@@ -2425,6 +2431,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.reg_read64 = nvme_pci_reg_read64,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
+ .get_address = nvme_pci_get_address,
};
static int nvme_dev_map(struct nvme_dev *dev)
@@ -2461,10 +2468,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
} else if (pdev->vendor == 0x144d && pdev->device == 0xa804) {
/*
* Samsung SSD 960 EVO drops off the PCIe bus after system
- * suspend on a Ryzen board, ASUS PRIME B350M-A.
+ * suspend on a Ryzen board, ASUS PRIME B350M-A, as well as
+ * within few minutes after bootup on a Coffee Lake board -
+ * ASUS PRIME Z370-A
*/
if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") &&
- dmi_match(DMI_BOARD_NAME, "PRIME B350M-A"))
+ (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") ||
+ dmi_match(DMI_BOARD_NAME, "PRIME Z370-A")))
return NVME_QUIRK_NO_APST;
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4d84a73ee12d..758537e9ba07 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -867,6 +867,14 @@ out_free_io_queues:
return ret;
}
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+ cancel_work_sync(&ctrl->err_work);
+ cancel_delayed_work_sync(&ctrl->reconnect_work);
+}
+
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
@@ -899,7 +907,6 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
- dev_info(ctrl->ctrl.device, "Removing controller...\n");
nvme_delete_ctrl(&ctrl->ctrl);
}
}
@@ -974,8 +981,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
nvme_start_queues(&ctrl->ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
- /* state change failure should never happen */
- WARN_ON_ONCE(1);
+ /* state change failure is ok if we're in DELETING state */
+ WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
return;
}
@@ -1719,9 +1726,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
- cancel_work_sync(&ctrl->err_work);
- cancel_delayed_work_sync(&ctrl->reconnect_work);
-
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
@@ -1799,6 +1803,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address,
+ .stop_ctrl = nvme_rdma_stop_ctrl,
};
static inline bool
@@ -2025,15 +2030,26 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
{
struct nvme_rdma_ctrl *ctrl;
+ struct nvme_rdma_device *ndev;
+ bool found = false;
+
+ mutex_lock(&device_list_mutex);
+ list_for_each_entry(ndev, &device_list, entry) {
+ if (ndev->dev == ib_device) {
+ found = true;
+ break;
+ }
+ }
+ mutex_unlock(&device_list_mutex);
+
+ if (!found)
+ return;
/* Delete all controllers using this device */
mutex_lock(&nvme_rdma_ctrl_mutex);
list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
if (ctrl->device->dev != ib_device)
continue;
- dev_info(ctrl->ctrl.device,
- "Removing ctrl: NQN \"%s\", addr %pISp\n",
- ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
nvme_delete_ctrl(&ctrl->ctrl);
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e6b2d2af81b6..ad9ff27234b5 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -23,6 +23,15 @@
static const struct config_item_type nvmet_host_type;
static const struct config_item_type nvmet_subsys_type;
+static const struct nvmet_transport_name {
+ u8 type;
+ const char *name;
+} nvmet_transport_names[] = {
+ { NVMF_TRTYPE_RDMA, "rdma" },
+ { NVMF_TRTYPE_FC, "fc" },
+ { NVMF_TRTYPE_LOOP, "loop" },
+};
+
/*
* nvmet_port Generic ConfigFS definitions.
* Used in any place in the ConfigFS tree that refers to an address.
@@ -208,43 +217,30 @@ CONFIGFS_ATTR(nvmet_, addr_trsvcid);
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
- switch (to_nvmet_port(item)->disc_addr.trtype) {
- case NVMF_TRTYPE_RDMA:
- return sprintf(page, "rdma\n");
- case NVMF_TRTYPE_LOOP:
- return sprintf(page, "loop\n");
- case NVMF_TRTYPE_FC:
- return sprintf(page, "fc\n");
- default:
- return sprintf(page, "\n");
+ struct nvmet_port *port = to_nvmet_port(item);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
+ if (port->disc_addr.trtype != nvmet_transport_names[i].type)
+ continue;
+ return sprintf(page, "%s\n", nvmet_transport_names[i].name);
}
+
+ return sprintf(page, "\n");
}
static void nvmet_port_init_tsas_rdma(struct nvmet_port *port)
{
- port->disc_addr.trtype = NVMF_TRTYPE_RDMA;
- memset(&port->disc_addr.tsas.rdma, 0, NVMF_TSAS_SIZE);
port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED;
port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED;
port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM;
}
-static void nvmet_port_init_tsas_loop(struct nvmet_port *port)
-{
- port->disc_addr.trtype = NVMF_TRTYPE_LOOP;
- memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
-}
-
-static void nvmet_port_init_tsas_fc(struct nvmet_port *port)
-{
- port->disc_addr.trtype = NVMF_TRTYPE_FC;
- memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
-}
-
static ssize_t nvmet_addr_trtype_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
+ int i;
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
@@ -252,17 +248,18 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
return -EACCES;
}
- if (sysfs_streq(page, "rdma")) {
- nvmet_port_init_tsas_rdma(port);
- } else if (sysfs_streq(page, "loop")) {
- nvmet_port_init_tsas_loop(port);
- } else if (sysfs_streq(page, "fc")) {
- nvmet_port_init_tsas_fc(port);
- } else {
- pr_err("Invalid value '%s' for trtype\n", page);
- return -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
+ if (sysfs_streq(page, nvmet_transport_names[i].name))
+ goto found;
}
+ pr_err("Invalid value '%s' for trtype\n", page);
+ return -EINVAL;
+found:
+ memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
+ port->disc_addr.trtype = nvmet_transport_names[i].type;
+ if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
+ nvmet_port_init_tsas_rdma(port);
return count;
}
@@ -333,13 +330,13 @@ out_unlock:
return ret ? ret : count;
}
+CONFIGFS_ATTR(nvmet_ns_, device_uuid);
+
static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
{
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
}
-CONFIGFS_ATTR(nvmet_ns_, device_uuid);
-
static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
const char *page, size_t count)
{
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index a78029e4e5f4..e95424f172fd 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -18,7 +18,7 @@
#include "nvmet.h"
-static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
+static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
static DEFINE_IDA(cntlid_ida);
/*
@@ -137,7 +137,7 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
schedule_work(&ctrl->async_event_work);
}
-int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
+int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
{
int ret = 0;
@@ -152,7 +152,7 @@ int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
}
EXPORT_SYMBOL_GPL(nvmet_register_transport);
-void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
+void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
{
down_write(&nvmet_config_sem);
nvmet_transports[ops->type] = NULL;
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
int nvmet_enable_port(struct nvmet_port *port)
{
- struct nvmet_fabrics_ops *ops;
+ const struct nvmet_fabrics_ops *ops;
int ret;
lockdep_assert_held(&nvmet_config_sem);
@@ -195,7 +195,7 @@ int nvmet_enable_port(struct nvmet_port *port)
void nvmet_disable_port(struct nvmet_port *port)
{
- struct nvmet_fabrics_ops *ops;
+ const struct nvmet_fabrics_ops *ops;
lockdep_assert_held(&nvmet_config_sem);
@@ -500,7 +500,7 @@ int nvmet_sq_init(struct nvmet_sq *sq)
EXPORT_SYMBOL_GPL(nvmet_sq_init);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
- struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
+ struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
{
u8 flags = req->cmd->common.flags;
u16 status;
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 8f3b57b4c97b..a72425d8bce0 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -43,7 +43,8 @@ void nvmet_referral_disable(struct nvmet_port *port)
}
static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
- struct nvmet_port *port, char *subsys_nqn, u8 type, u32 numrec)
+ struct nvmet_port *port, char *subsys_nqn, char *traddr,
+ u8 type, u32 numrec)
{
struct nvmf_disc_rsp_page_entry *e = &hdr->entries[numrec];
@@ -56,11 +57,30 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
e->subtype = type;
memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
- memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
+ memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE);
memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE);
memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
}
+/*
+ * nvmet_set_disc_traddr - set a correct discovery log entry traddr
+ *
+ * IP based transports (e.g RDMA) can listen on "any" ipv4/ipv6 addresses
+ * (INADDR_ANY or IN6ADDR_ANY_INIT). The discovery log page traddr reply
+ * must not contain that "any" IP address. If the transport implements
+ * .disc_traddr, use it. this callback will set the discovery traddr
+ * from the req->port address in case the port in question listens
+ * "any" IP address.
+ */
+static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port,
+ char *traddr)
+{
+ if (req->ops->disc_traddr)
+ req->ops->disc_traddr(req, port, traddr);
+ else
+ memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
+}
+
static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
{
const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
@@ -90,8 +110,11 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn))
continue;
if (residual_len >= entry_size) {
+ char traddr[NVMF_TRADDR_SIZE];
+
+ nvmet_set_disc_traddr(req, req->port, traddr);
nvmet_format_discovery_entry(hdr, req->port,
- p->subsys->subsysnqn,
+ p->subsys->subsysnqn, traddr,
NVME_NQN_NVME, numrec);
residual_len -= entry_size;
}
@@ -102,6 +125,7 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
if (residual_len >= entry_size) {
nvmet_format_discovery_entry(hdr, r,
NVME_DISC_SUBSYS_NAME,
+ r->disc_addr.traddr,
NVME_NQN_DISC, numrec);
residual_len -= entry_size;
}
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 9b39a6cb1935..33ee8d3145f8 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -87,6 +87,7 @@ struct nvmet_fc_fcp_iod {
struct nvmet_req req;
struct work_struct work;
struct work_struct done_work;
+ struct work_struct defer_work;
struct nvmet_fc_tgtport *tgtport;
struct nvmet_fc_tgt_queue *queue;
@@ -224,6 +225,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt);
static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work);
static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work);
+static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work);
static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc);
static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc);
static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
@@ -429,6 +431,7 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport,
for (i = 0; i < queue->sqsize; fod++, i++) {
INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work);
INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work);
+ INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work);
fod->tgtport = tgtport;
fod->queue = queue;
fod->active = false;
@@ -512,6 +515,17 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
}
static void
+nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work)
+{
+ struct nvmet_fc_fcp_iod *fod =
+ container_of(work, struct nvmet_fc_fcp_iod, defer_work);
+
+ /* Submit deferred IO for processing */
+ nvmet_fc_queue_fcp_req(fod->tgtport, fod->queue, fod->fcpreq);
+
+}
+
+static void
nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
struct nvmet_fc_fcp_iod *fod)
{
@@ -568,13 +582,12 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
/* inform LLDD IO is now being processed */
tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq);
- /* Submit deferred IO for processing */
- nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
-
/*
* Leave the queue lookup get reference taken when
* fod was originally allocated.
*/
+
+ queue_work(queue->work_q, &fod->defer_work);
}
static int
@@ -1550,7 +1563,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req);
-static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops;
+static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops;
static void
nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq)
@@ -2505,7 +2518,7 @@ nvmet_fc_remove_port(struct nvmet_port *port)
/* nothing to do */
}
-static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
+static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_FC,
.msdbd = 1,
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 861d1509b22b..a350765d2d5c 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -71,7 +71,7 @@ static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
static void nvme_loop_queue_response(struct nvmet_req *nvme_req);
static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl);
-static struct nvmet_fabrics_ops nvme_loop_ops;
+static const struct nvmet_fabrics_ops nvme_loop_ops;
static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
{
@@ -675,7 +675,7 @@ static void nvme_loop_remove_port(struct nvmet_port *port)
nvmet_loop_port = NULL;
}
-static struct nvmet_fabrics_ops nvme_loop_ops = {
+static const struct nvmet_fabrics_ops nvme_loop_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_LOOP,
.add_port = nvme_loop_add_port,
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 417f6c0331cc..15fd84ab21f8 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -130,7 +130,7 @@ struct nvmet_ctrl {
struct delayed_work ka_work;
struct work_struct fatal_err_work;
- struct nvmet_fabrics_ops *ops;
+ const struct nvmet_fabrics_ops *ops;
char subsysnqn[NVMF_NQN_FIELD_LEN];
char hostnqn[NVMF_NQN_FIELD_LEN];
@@ -209,6 +209,8 @@ struct nvmet_fabrics_ops {
int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port);
void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
+ void (*disc_traddr)(struct nvmet_req *req,
+ struct nvmet_port *port, char *traddr);
};
#define NVMET_MAX_INLINE_BIOVEC 8
@@ -231,7 +233,7 @@ struct nvmet_req {
struct nvmet_port *port;
void (*execute)(struct nvmet_req *req);
- struct nvmet_fabrics_ops *ops;
+ const struct nvmet_fabrics_ops *ops;
};
static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
@@ -267,7 +269,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
- struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
+ struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
void nvmet_req_execute(struct nvmet_req *req);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
@@ -301,8 +303,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns);
struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_ns_free(struct nvmet_ns *ns);
-int nvmet_register_transport(struct nvmet_fabrics_ops *ops);
-void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops);
+int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
+void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
int nvmet_enable_port(struct nvmet_port *port);
void nvmet_disable_port(struct nvmet_port *port);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 978e169c11bf..52e0c5d579a7 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,7 +77,6 @@ enum nvmet_rdma_queue_state {
NVMET_RDMA_Q_CONNECTING,
NVMET_RDMA_Q_LIVE,
NVMET_RDMA_Q_DISCONNECTING,
- NVMET_RDMA_IN_DEVICE_REMOVAL,
};
struct nvmet_rdma_queue {
@@ -137,7 +136,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
-static struct nvmet_fabrics_ops nvmet_rdma_ops;
+static const struct nvmet_fabrics_ops nvmet_rdma_ops;
/* XXX: really should move to a generic header sooner or later.. */
static inline u32 get_unaligned_le24(const u8 *p)
@@ -914,8 +913,11 @@ err_destroy_cq:
static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
{
- ib_drain_qp(queue->cm_id->qp);
- rdma_destroy_qp(queue->cm_id);
+ struct ib_qp *qp = queue->cm_id->qp;
+
+ ib_drain_qp(qp);
+ rdma_destroy_id(queue->cm_id);
+ ib_destroy_qp(qp);
ib_free_cq(queue->cq);
}
@@ -940,15 +942,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
{
struct nvmet_rdma_queue *queue =
container_of(w, struct nvmet_rdma_queue, release_work);
- struct rdma_cm_id *cm_id = queue->cm_id;
struct nvmet_rdma_device *dev = queue->dev;
- enum nvmet_rdma_queue_state state = queue->state;
nvmet_rdma_free_queue(queue);
- if (state != NVMET_RDMA_IN_DEVICE_REMOVAL)
- rdma_destroy_id(cm_id);
-
kref_put(&dev->ref, nvmet_rdma_free_dev);
}
@@ -1153,8 +1150,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
}
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
- if (ret)
- goto release_queue;
+ if (ret) {
+ schedule_work(&queue->release_work);
+ /* Destroying rdma_cm id is not needed here */
+ return 0;
+ }
mutex_lock(&nvmet_rdma_queue_mutex);
list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list);
@@ -1162,8 +1162,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
return 0;
-release_queue:
- nvmet_rdma_free_queue(queue);
put_device:
kref_put(&ndev->ref, nvmet_rdma_free_dev);
@@ -1209,7 +1207,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
case NVMET_RDMA_Q_CONNECTING:
case NVMET_RDMA_Q_LIVE:
queue->state = NVMET_RDMA_Q_DISCONNECTING;
- case NVMET_RDMA_IN_DEVICE_REMOVAL:
disconnect = true;
break;
case NVMET_RDMA_Q_DISCONNECTING:
@@ -1322,13 +1319,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- /*
- * We might end up here when we already freed the qp
- * which means queue release sequence is in progress,
- * so don't get in the way...
- */
- if (queue)
- nvmet_rdma_queue_disconnect(queue);
+ nvmet_rdma_queue_disconnect(queue);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
ret = nvmet_rdma_device_removal(cm_id, queue);
@@ -1445,7 +1436,24 @@ static void nvmet_rdma_remove_port(struct nvmet_port *port)
rdma_destroy_id(cm_id);
}
-static struct nvmet_fabrics_ops nvmet_rdma_ops = {
+static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
+ struct nvmet_port *port, char *traddr)
+{
+ struct rdma_cm_id *cm_id = port->priv;
+
+ if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) {
+ struct nvmet_rdma_rsp *rsp =
+ container_of(req, struct nvmet_rdma_rsp, req);
+ struct rdma_cm_id *req_cm_id = rsp->queue->cm_id;
+ struct sockaddr *addr = (void *)&req_cm_id->route.addr.src_addr;
+
+ sprintf(traddr, "%pISc", addr);
+ } else {
+ memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
+ }
+}
+
+static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA,
.sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE,
@@ -1455,13 +1463,31 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
.remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl,
+ .disc_traddr = nvmet_rdma_disc_port_addr,
};
static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
{
struct nvmet_rdma_queue *queue, *tmp;
+ struct nvmet_rdma_device *ndev;
+ bool found = false;
- /* Device is being removed, delete all queues using this device */
+ mutex_lock(&device_list_mutex);
+ list_for_each_entry(ndev, &device_list, entry) {
+ if (ndev->device == ib_device) {
+ found = true;
+ break;
+ }
+ }
+ mutex_unlock(&device_list_mutex);
+
+ if (!found)
+ return;
+
+ /*
+ * IB Device that is used by nvmet controllers is being removed,
+ * delete all queues using this device.
+ */
mutex_lock(&nvmet_rdma_queue_mutex);
list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list,
queue_list) {
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index ecef8e73d40b..b5692a284bd8 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3208,7 +3208,7 @@ static void dasd_setup_queue(struct dasd_block *block)
} else {
max = block->base->discipline->max_blocks << block->s2b_shift;
}
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
q->limits.max_dev_sectors = max;
blk_queue_logical_block_size(q, logical_block_size);
blk_queue_max_hw_sectors(q, max);
@@ -3231,7 +3231,7 @@ static void dasd_setup_queue(struct dasd_block *block)
blk_queue_max_discard_sectors(q, max_discard_sectors);
blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
}
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 9cae08b36b80..0a312e450207 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -633,7 +633,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
dev_info->gd->private_data = dev_info;
blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
- queue_flag_set_unlocked(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
+ blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
seg_byte_size = (dev_info->end - dev_info->start + 1);
set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index b4130c7880d8..b1fcb76dd272 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -472,8 +472,8 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
blk_queue_logical_block_size(rq, 1 << 12);
blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */
blk_queue_max_segments(rq, nr_max_blk);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rq);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, rq);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
bdev->gendisk = alloc_disk(SCM_NR_PARTS);
if (!bdev->gendisk) {
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 2a6334ca750e..3df5d68d09f0 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -348,8 +348,8 @@ static int __init xpram_setup_blkdev(void)
put_disk(xpram_disks[i]);
goto out;
}
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xpram_queues[i]);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
blk_queue_make_request(xpram_queues[i], xpram_make_request);
blk_queue_logical_block_size(xpram_queues[i], 4096);
}
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index ca218c82321f..6162cf57a20a 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -961,7 +961,7 @@ static int zfcp_fc_exec_els_job(struct bsg_job *job,
d_id = ntoh24(bsg_request->rqst_data.h_els.port_id);
els->handler = zfcp_fc_ct_els_job_handler;
- return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ);
+ return zfcp_fsf_send_els(adapter, d_id, els, job->timeout / HZ);
}
static int zfcp_fc_exec_ct_job(struct bsg_job *job,
@@ -980,7 +980,7 @@ static int zfcp_fc_exec_ct_job(struct bsg_job *job,
return ret;
ct->handler = zfcp_fc_ct_job_handler;
- ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->req->timeout / HZ);
+ ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->timeout / HZ);
if (ret)
zfcp_fc_wka_port_put(wka_port);
diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h
index 95fc720c1b30..e6e5ccb1e0f3 100644
--- a/drivers/scsi/gdth.h
+++ b/drivers/scsi/gdth.h
@@ -178,9 +178,6 @@
#define MSG_SIZE 34 /* size of message structure */
#define MSG_REQUEST 0 /* async. event: message */
-/* cacheservice defines */
-#define SECTOR_SIZE 0x200 /* always 512 bytes per sec. */
-
/* DPMEM constants */
#define DPMEM_MAGIC 0xC0FFEE11
#define IC_HEADER_BYTES 48
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 6d886b13dbe9..2ba4b68fdb73 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -949,7 +949,7 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev)
{
- set_bit(QUEUE_FLAG_BIDI, &sdev->request_queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_BIDI, sdev->request_queue);
return 0;
}
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index dde0798b8a91..7a37ace4239b 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1864,7 +1864,7 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev, u32 max_io_size)
blk_queue_max_hw_sectors(sdev->request_queue, (max_io_size / 512));
- queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue);
blk_queue_virt_boundary(sdev->request_queue, mr_nvme_pg_size - 1);
}
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 5ec3b74e8aed..ce97cde3b41c 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -1894,7 +1894,7 @@ megasas_is_prp_possible(struct megasas_instance *instance,
* then sending IOs with holes.
*
* Though driver can request block layer to disable IO merging by calling-
- * queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue) but
+ * blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue) but
* user may tune sysfs parameter- nomerges again to 0 or 1.
*
* If in future IO scheduling is enabled with SCSI BLK MQ,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index a1cb0236c550..aee1a0e1c600 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2352,7 +2352,7 @@ scsih_slave_configure(struct scsi_device *sdev)
** merged and can eliminate holes created during merging
** operation.
**/
- queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES,
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES,
sdev->request_queue);
blk_queue_virt_boundary(sdev->request_queue,
ioc->page_size - 1);
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 5c5dcca4d1da..822d22336e15 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -6830,7 +6830,7 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost)
if (USER_CTRL_IRQ(vha->hw))
rc = blk_mq_map_queues(&shost->tag_set);
else
- rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev);
+ rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0);
return rc;
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a5986dae9020..1cb353f18d08 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -3897,7 +3897,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp)
if (sdebug_verbose)
pr_info("slave_alloc <%u %u %u %llu>\n",
sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
- queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue);
+ blk_queue_flag_set(QUEUE_FLAG_BIDI, sdp->request_queue);
return 0;
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c84f931388f2..ed79d3925860 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2144,8 +2144,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
{
struct device *dev = shost->dma_dev;
- queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
-
/*
* this limit is imposed by hardware restrictions
*/
@@ -2227,7 +2225,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
struct Scsi_Host *shost = sdev->host;
struct request_queue *q;
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+ q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
if (!q)
return NULL;
q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
@@ -2243,6 +2241,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
}
__scsi_init_queue(shost, q);
+ blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
blk_queue_prep_rq(q, scsi_prep_fn);
blk_queue_unprep_rq(q, scsi_unprep_fn);
blk_queue_softirq_done(q, scsi_softirq_done);
@@ -2274,6 +2273,7 @@ struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
sdev->request_queue->queuedata = sdev;
__scsi_init_queue(sdev->host, sdev->request_queue);
+ blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue);
return sdev->request_queue;
}
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 91b90f672d23..7142c8be1099 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1292,8 +1292,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
transport_add_device(&sdev->sdev_gendev);
sdev->is_visible = 1;
- error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL, NULL);
-
+ error = bsg_scsi_register_queue(rq, &sdev->sdev_gendev);
if (error)
/* we're treating error on bsg register as non-fatal,
* so pretend nothing went wrong */
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 736a1f4f9676..08acbabfae07 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -227,8 +227,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
* by default assume old behaviour and bounce for any highmem page
*/
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
- queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
- queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+ blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
return 0;
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1fa84d6a0f8b..a6201e696ab9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -714,7 +714,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
case SD_LBP_FULL:
case SD_LBP_DISABLE:
blk_queue_max_discard_sectors(q, 0);
- queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
return;
case SD_LBP_UNMAP:
@@ -747,7 +747,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
}
blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}
static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
@@ -2955,8 +2955,8 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
rot = get_unaligned_be16(&buffer[4]);
if (rot == 1) {
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
- queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
}
if (sdkp->device->type == TYPE_ZBC) {
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index b2880c7709e6..10c94011c8a8 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -5348,7 +5348,7 @@ static int pqi_map_queues(struct Scsi_Host *shost)
{
struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
- return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev);
+ return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0);
}
static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 9be34d37c356..0cf25d789d05 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -525,6 +525,8 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
struct scsi_cd *cd;
int ret = -ENXIO;
+ check_disk_change(bdev);
+
mutex_lock(&sr_mutex);
cd = scsi_cd_get(bdev->bd_disk);
if (cd) {
diff --git a/drivers/staging/rts5208/rtsx_chip.h b/drivers/staging/rts5208/rtsx_chip.h
index 4f6e3c1c4621..8a8cd5d3cf7e 100644
--- a/drivers/staging/rts5208/rtsx_chip.h
+++ b/drivers/staging/rts5208/rtsx_chip.h
@@ -339,13 +339,13 @@ struct sense_data_t {
#define CHK_BIT(data, idx) ((data) & (1 << (idx)))
/* SG descriptor */
-#define SG_INT 0x04
-#define SG_END 0x02
-#define SG_VALID 0x01
+#define RTSX_SG_INT 0x04
+#define RTSX_SG_END 0x02
+#define RTSX_SG_VALID 0x01
-#define SG_NO_OP 0x00
-#define SG_TRANS_DATA (0x02 << 4)
-#define SG_LINK_DESC (0x03 << 4)
+#define RTSX_SG_NO_OP 0x00
+#define RTSX_SG_TRANS_DATA (0x02 << 4)
+#define RTSX_SG_LINK_DESC (0x03 << 4)
struct rtsx_chip;
diff --git a/drivers/staging/rts5208/rtsx_transport.c b/drivers/staging/rts5208/rtsx_transport.c
index 8b57e17ee6d3..716cce2bd7f0 100644
--- a/drivers/staging/rts5208/rtsx_transport.c
+++ b/drivers/staging/rts5208/rtsx_transport.c
@@ -308,7 +308,7 @@ static inline void rtsx_add_sg_tbl(
do {
if (len > 0x80000) {
temp_len = 0x80000;
- temp_opt = option & (~SG_END);
+ temp_opt = option & (~RTSX_SG_END);
} else {
temp_len = len;
temp_opt = option;
@@ -407,9 +407,9 @@ static int rtsx_transfer_sglist_adma_partial(struct rtsx_chip *chip, u8 card,
*index = *index + 1;
}
if ((i == (sg_cnt - 1)) || !resid)
- option = SG_VALID | SG_END | SG_TRANS_DATA;
+ option = RTSX_SG_VALID | RTSX_SG_END | RTSX_SG_TRANS_DATA;
else
- option = SG_VALID | SG_TRANS_DATA;
+ option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option);
@@ -555,9 +555,9 @@ static int rtsx_transfer_sglist_adma(struct rtsx_chip *chip, u8 card,
(unsigned int)addr, len);
if (j == (sg_cnt - 1))
- option = SG_VALID | SG_END | SG_TRANS_DATA;
+ option = RTSX_SG_VALID | RTSX_SG_END | RTSX_SG_TRANS_DATA;
else
- option = SG_VALID | SG_TRANS_DATA;
+ option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option);
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 9eb10d34682c..8e223799347a 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/sched/signal.h>
#include <asm/unaligned.h>
+#include <linux/inet.h>
#include <net/ipv6.h>
#include <scsi/scsi_proto.h>
#include <scsi/iscsi_proto.h>
@@ -3291,30 +3292,6 @@ iscsit_send_task_mgt_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0);
}
-static bool iscsit_check_inaddr_any(struct iscsi_np *np)
-{
- bool ret = false;
-
- if (np->np_sockaddr.ss_family == AF_INET6) {
- const struct sockaddr_in6 sin6 = {
- .sin6_addr = IN6ADDR_ANY_INIT };
- struct sockaddr_in6 *sock_in6 =
- (struct sockaddr_in6 *)&np->np_sockaddr;
-
- if (!memcmp(sock_in6->sin6_addr.s6_addr,
- sin6.sin6_addr.s6_addr, 16))
- ret = true;
- } else {
- struct sockaddr_in * sock_in =
- (struct sockaddr_in *)&np->np_sockaddr;
-
- if (sock_in->sin_addr.s_addr == htonl(INADDR_ANY))
- ret = true;
- }
-
- return ret;
-}
-
#define SENDTARGETS_BUF_LIMIT 32768U
static int
@@ -3393,7 +3370,6 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
list_for_each_entry(tpg_np, &tpg->tpg_gnp_list,
tpg_np_list) {
struct iscsi_np *np = tpg_np->tpg_np;
- bool inaddr_any = iscsit_check_inaddr_any(np);
struct sockaddr_storage *sockaddr;
if (np->np_network_transport != network_transport)
@@ -3422,7 +3398,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
}
}
- if (inaddr_any)
+ if (inet_addr_is_any((struct sockaddr *)&np->np_sockaddr))
sockaddr = &conn->local_sockaddr;
else
sockaddr = &np->np_sockaddr;
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 9cd4ffe76c07..60d5b918c4ac 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -309,7 +309,7 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc)
static int tcm_loop_slave_alloc(struct scsi_device *sd)
{
- set_bit(QUEUE_FLAG_BIDI, &sd->request_queue->queue_flags);
+ blk_queue_flag_set(QUEUE_FLAG_BIDI, sd->request_queue);
return 0;
}