summaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c230
1 files changed, 150 insertions, 80 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f567f536b529..153bc766bc5a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -101,6 +101,8 @@ static void mddev_detach(struct mddev *mddev);
* count by 2 for every hour elapsed between read errors.
*/
#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
+/* Default safemode delay: 200 msec */
+#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
/*
* Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
* is 1000 KB/sec, so the extra system load does not show up that much.
@@ -199,7 +201,7 @@ static int rdevs_init_serial(struct mddev *mddev)
static int rdev_need_serial(struct md_rdev *rdev)
{
return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
- rdev->bdev->bd_queue->nr_hw_queues != 1 &&
+ rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
test_bit(WriteMostly, &rdev->flags));
}
@@ -463,24 +465,46 @@ check_suspended:
}
EXPORT_SYMBOL(md_handle_request);
-static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
+struct md_io {
+ struct mddev *mddev;
+ bio_end_io_t *orig_bi_end_io;
+ void *orig_bi_private;
+ unsigned long start_time;
+};
+
+static void md_end_io(struct bio *bio)
+{
+ struct md_io *md_io = bio->bi_private;
+ struct mddev *mddev = md_io->mddev;
+
+ disk_end_io_acct(mddev->gendisk, bio_op(bio), md_io->start_time);
+
+ bio->bi_end_io = md_io->orig_bi_end_io;
+ bio->bi_private = md_io->orig_bi_private;
+
+ mempool_free(md_io, &mddev->md_io_pool);
+
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio);
+}
+
+static blk_qc_t md_submit_bio(struct bio *bio)
{
const int rw = bio_data_dir(bio);
- const int sgrp = op_stat_group(bio_op(bio));
struct mddev *mddev = bio->bi_disk->private_data;
- unsigned int sectors;
- if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
+ if (mddev == NULL || mddev->pers == NULL) {
bio_io_error(bio);
return BLK_QC_T_NONE;
}
- blk_queue_split(q, &bio);
-
- if (mddev == NULL || mddev->pers == NULL) {
+ if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
bio_io_error(bio);
return BLK_QC_T_NONE;
}
+
+ blk_queue_split(&bio);
+
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
bio->bi_status = BLK_STS_IOERR;
@@ -488,21 +512,27 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
- /*
- * save the sectors now since our bio can
- * go away inside make_request
- */
- sectors = bio_sectors(bio);
+ if (bio->bi_end_io != md_end_io) {
+ struct md_io *md_io;
+
+ md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
+ md_io->mddev = mddev;
+ md_io->orig_bi_end_io = bio->bi_end_io;
+ md_io->orig_bi_private = bio->bi_private;
+
+ bio->bi_end_io = md_end_io;
+ bio->bi_private = md_io;
+
+ md_io->start_time = disk_start_io_acct(mddev->gendisk,
+ bio_sectors(bio),
+ bio_op(bio));
+ }
+
/* bio could be mergeable after passing to underlayer */
bio->bi_opf &= ~REQ_NOMERGE;
md_handle_request(mddev, bio);
- part_stat_lock();
- part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
- part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
- part_stat_unlock();
-
return BLK_QC_T_NONE;
}
@@ -549,26 +579,6 @@ void mddev_resume(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(mddev_resume);
-int mddev_congested(struct mddev *mddev, int bits)
-{
- struct md_personality *pers = mddev->pers;
- int ret = 0;
-
- rcu_read_lock();
- if (mddev->suspended)
- ret = 1;
- else if (pers && pers->congested)
- ret = pers->congested(mddev, bits);
- rcu_read_unlock();
- return ret;
-}
-EXPORT_SYMBOL_GPL(mddev_congested);
-static int md_congested(void *data, int bits)
-{
- struct mddev *mddev = data;
- return mddev_congested(mddev, bits);
-}
-
/*
* Generic flush handling for md
*/
@@ -948,7 +958,8 @@ static void super_written(struct bio *bio)
struct mddev *mddev = rdev->mddev;
if (bio->bi_status) {
- pr_err("md: super_written gets error=%d\n", bio->bi_status);
+ pr_err("md: %s gets error=%d\n", __func__,
+ blk_status_to_errno(bio->bi_status));
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
@@ -2163,6 +2174,24 @@ retry:
sb->sb_csum = calc_sb_1_csum(sb);
}
+static sector_t super_1_choose_bm_space(sector_t dev_size)
+{
+ sector_t bm_space;
+
+ /* if the device is bigger than 8Gig, save 64k for bitmap
+ * usage, if bigger than 200Gig, save 128k
+ */
+ if (dev_size < 64*2)
+ bm_space = 0;
+ else if (dev_size - 64*2 >= 200*1024*1024*2)
+ bm_space = 128*2;
+ else if (dev_size - 4*2 > 8*1024*1024*2)
+ bm_space = 64*2;
+ else
+ bm_space = 4*2;
+ return bm_space;
+}
+
static unsigned long long
super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
{
@@ -2183,13 +2212,22 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
return 0;
} else {
/* minor version 0; superblock after data */
- sector_t sb_start;
- sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
+ sector_t sb_start, bm_space;
+ sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
+
+ /* 8K is for superblock */
+ sb_start = dev_size - 8*2;
sb_start &= ~(sector_t)(4*2 - 1);
- max_sectors = rdev->sectors + sb_start - rdev->sb_start;
+
+ bm_space = super_1_choose_bm_space(dev_size);
+
+ /* Space that can be used to store date needs to decrease
+ * superblock bitmap space and bad block space(4K)
+ */
+ max_sectors = sb_start - bm_space - 4*2;
+
if (!num_sectors || num_sectors > max_sectors)
num_sectors = max_sectors;
- rdev->sb_start = sb_start;
}
sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
@@ -2441,9 +2479,13 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
goto fail;
ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
- if (sysfs_create_link(&rdev->kobj, ko, "block"))
- /* failure here is OK */;
+ /* failure here is OK */
+ err = sysfs_create_link(&rdev->kobj, ko, "block");
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
+ rdev->sysfs_unack_badblocks =
+ sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
+ rdev->sysfs_badblocks =
+ sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
list_add_rcu(&rdev->same_set, &mddev->disks);
bd_link_disk_holder(rdev->bdev, mddev->gendisk);
@@ -2477,7 +2519,11 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
+ sysfs_put(rdev->sysfs_unack_badblocks);
+ sysfs_put(rdev->sysfs_badblocks);
rdev->sysfs_state = NULL;
+ rdev->sysfs_unack_badblocks = NULL;
+ rdev->sysfs_badblocks = NULL;
rdev->badblocks.count = 0;
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state". We also need
@@ -2822,7 +2868,7 @@ rewrite:
goto repeat;
wake_up(&mddev->sb_wait);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ sysfs_notify_dirent_safe(mddev->sysfs_completed);
rdev_for_each(rdev, mddev) {
if (test_and_clear_bit(FaultRecorded, &rdev->flags))
@@ -3202,8 +3248,8 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
return err;
} else
sysfs_notify_dirent_safe(rdev->sysfs_state);
- if (sysfs_link_rdev(rdev->mddev, rdev))
- /* failure here is OK */;
+ /* failure here is OK */;
+ sysfs_link_rdev(rdev->mddev, rdev);
/* don't wakeup anyone, leave that to userspace. */
} else {
if (slot >= rdev->mddev->raid_disks &&
@@ -4075,7 +4121,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev_resume(mddev);
if (!mddev->thread)
md_update_sb(mddev, 1);
- sysfs_notify(&mddev->kobj, NULL, "level");
+ sysfs_notify_dirent_safe(mddev->sysfs_level);
md_new_event(mddev);
rv = len;
out_unlock:
@@ -4188,6 +4234,14 @@ static struct md_sysfs_entry md_raid_disks =
__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
static ssize_t
+uuid_show(struct mddev *mddev, char *page)
+{
+ return sprintf(page, "%pU\n", mddev->uuid);
+}
+static struct md_sysfs_entry md_uuid =
+__ATTR(uuid, S_IRUGO, uuid_show, NULL);
+
+static ssize_t
chunk_size_show(struct mddev *mddev, char *page)
{
if (mddev->reshape_position != MaxSector &&
@@ -4828,7 +4882,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
}
if (err)
return err;
- sysfs_notify(&mddev->kobj, NULL, "degraded");
+ sysfs_notify_dirent_safe(mddev->sysfs_degraded);
} else {
if (cmd_match(page, "check"))
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -5443,6 +5497,7 @@ static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_layout.attr,
&md_raid_disks.attr,
+ &md_uuid.attr,
&md_chunk_size.attr,
&md_size.attr,
&md_resync_start.attr,
@@ -5534,6 +5589,13 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state);
+ if (mddev->sysfs_completed)
+ sysfs_put(mddev->sysfs_completed);
+ if (mddev->sysfs_degraded)
+ sysfs_put(mddev->sysfs_degraded);
+ if (mddev->sysfs_level)
+ sysfs_put(mddev->sysfs_level);
+
if (mddev->gendisk)
del_gendisk(mddev->gendisk);
@@ -5545,6 +5607,7 @@ static void md_free(struct kobject *ko)
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
+ mempool_exit(&mddev->md_io_pool);
kfree(mddev);
}
@@ -5640,8 +5703,13 @@ static int md_alloc(dev_t dev, char *name)
*/
mddev->hold_active = UNTIL_STOP;
+ error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
+ sizeof(struct md_io));
+ if (error)
+ goto abort;
+
error = -ENOMEM;
- mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
+ mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
if (!mddev->queue)
goto abort;
@@ -5670,6 +5738,7 @@ static int md_alloc(dev_t dev, char *name)
* remove it now.
*/
disk->flags |= GENHD_FL_EXT_DEVT;
+ disk->events |= DISK_EVENT_MEDIA_CHANGE;
mddev->gendisk = disk;
/* As soon as we call add_disk(), another thread could get
* through to md_open, so make sure it doesn't get too far
@@ -5695,6 +5764,9 @@ static int md_alloc(dev_t dev, char *name)
if (!error && mddev->kobj.sd) {
kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
+ mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
+ mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
+ mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
}
mddev_put(mddev);
return error;
@@ -5964,8 +6036,6 @@ int md_run(struct mddev *mddev)
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
else
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
- mddev->queue->backing_dev_info->congested_data = mddev;
- mddev->queue->backing_dev_info->congested_fn = md_congested;
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
@@ -5982,7 +6052,7 @@ int md_run(struct mddev *mddev)
if (mddev_is_clustered(mddev))
mddev->safemode_delay = 0;
else
- mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
+ mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
mddev->in_sync = 1;
smp_wmb();
spin_lock(&mddev->lock);
@@ -6049,7 +6119,7 @@ static int do_md_run(struct mddev *mddev)
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
sysfs_notify_dirent_safe(mddev->sysfs_state);
sysfs_notify_dirent_safe(mddev->sysfs_action);
- sysfs_notify(&mddev->kobj, NULL, "degraded");
+ sysfs_notify_dirent_safe(mddev->sysfs_degraded);
out:
clear_bit(MD_NOT_READY, &mddev->flags);
return err;
@@ -6350,7 +6420,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
__md_stop_writes(mddev);
__md_stop(mddev);
- mddev->queue->backing_dev_info->congested_fn = NULL;
/* tell userspace to handle 'inactive' */
sysfs_notify_dirent_safe(mddev->sysfs_state);
@@ -7361,6 +7430,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.nodes = 0;
md_cluster_ops->leave(mddev);
+ module_put(md_cluster_mod);
+ mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
}
mddev_suspend(mddev);
md_bitmap_destroy(mddev);
@@ -7806,23 +7877,21 @@ static void md_release(struct gendisk *disk, fmode_t mode)
mddev_put(mddev);
}
-static int md_media_changed(struct gendisk *disk)
-{
- struct mddev *mddev = disk->private_data;
-
- return mddev->changed;
-}
-
-static int md_revalidate(struct gendisk *disk)
+static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
{
struct mddev *mddev = disk->private_data;
+ unsigned int ret = 0;
+ if (mddev->changed)
+ ret = DISK_EVENT_MEDIA_CHANGE;
mddev->changed = 0;
- return 0;
+ return ret;
}
+
static const struct block_device_operations md_fops =
{
.owner = THIS_MODULE,
+ .submit_bio = md_submit_bio,
.open = md_open,
.release = md_release,
.ioctl = md_ioctl,
@@ -7830,8 +7899,7 @@ static const struct block_device_operations md_fops =
.compat_ioctl = md_compat_ioctl,
#endif
.getgeo = md_getgeo,
- .media_changed = md_media_changed,
- .revalidate_disk= md_revalidate,
+ .check_events = md_check_events,
};
static int md_thread(void *arg)
@@ -8355,6 +8423,7 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);
int md_setup_cluster(struct mddev *mddev, int nodes)
{
+ int ret;
if (!md_cluster_ops)
request_module("md-cluster");
spin_lock(&pers_lock);
@@ -8366,7 +8435,10 @@ int md_setup_cluster(struct mddev *mddev, int nodes)
}
spin_unlock(&pers_lock);
- return md_cluster_ops->join(mddev, nodes);
+ ret = md_cluster_ops->join(mddev, nodes);
+ if (!ret)
+ mddev->safemode_delay = 0;
+ return ret;
}
void md_cluster_stop(struct mddev *mddev)
@@ -8767,7 +8839,7 @@ void md_do_sync(struct md_thread *thread)
} else
mddev->curr_resync = 3; /* no longer delayed */
mddev->curr_resync_completed = j;
- sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ sysfs_notify_dirent_safe(mddev->sysfs_completed);
md_new_event(mddev);
update_time = jiffies;
@@ -8795,7 +8867,7 @@ void md_do_sync(struct md_thread *thread)
mddev->recovery_cp = j;
update_time = jiffies;
set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
- sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ sysfs_notify_dirent_safe(mddev->sysfs_completed);
}
while (j >= mddev->resync_max &&
@@ -8902,7 +8974,7 @@ void md_do_sync(struct md_thread *thread)
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev->curr_resync > 3) {
mddev->curr_resync_completed = mddev->curr_resync;
- sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ sysfs_notify_dirent_safe(mddev->sysfs_completed);
}
mddev->pers->sync_request(mddev, max_sectors, &skipped);
@@ -9032,7 +9104,7 @@ static int remove_and_add_spares(struct mddev *mddev,
}
if (removed && mddev->kobj.sd)
- sysfs_notify(&mddev->kobj, NULL, "degraded");
+ sysfs_notify_dirent_safe(mddev->sysfs_degraded);
if (this && removed)
goto no_add;
@@ -9060,8 +9132,8 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev->recovery_offset = 0;
}
if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
- if (sysfs_link_rdev(mddev, rdev))
- /* failure here is OK */;
+ /* failure here is OK */
+ sysfs_link_rdev(mddev, rdev);
if (!test_bit(Journal, &rdev->flags))
spares++;
md_new_event(mddev);
@@ -9315,8 +9387,7 @@ void md_reap_sync_thread(struct mddev *mddev)
/* success...*/
/* activate any spares */
if (mddev->pers->spare_active(mddev)) {
- sysfs_notify(&mddev->kobj, NULL,
- "degraded");
+ sysfs_notify_dirent_safe(mddev->sysfs_degraded);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
}
}
@@ -9406,8 +9477,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
if (rv == 0) {
/* Make sure they get written out promptly */
if (test_bit(ExternalBbl, &rdev->flags))
- sysfs_notify(&rdev->kobj, NULL,
- "unacknowledged_bad_blocks");
+ sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
@@ -9428,7 +9498,7 @@ int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
s += rdev->data_offset;
rv = badblocks_clear(&rdev->badblocks, s, sectors);
if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
- sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
+ sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
return rv;
}
EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
@@ -9658,7 +9728,7 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->recovery_offset == MaxSector &&
!test_bit(In_sync, &rdev->flags) &&
mddev->pers->spare_active(mddev))
- sysfs_notify(&mddev->kobj, NULL, "degraded");
+ sysfs_notify_dirent_safe(mddev->sysfs_degraded);
put_page(swapout);
return 0;