From 841ce241fa355048f66172a47e356bb6e9159c9d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 15 Dec 2010 19:31:20 +0100 Subject: drbd: Replace the ERR_IF macro with an assert-like macro Remove the file name and line number from the syslog messages generated: we have no duplicate function names, and no function contains the same assertion more than once. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 91 ++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 32 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 7b976296b564..c756b4dbd135 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -440,7 +440,8 @@ int drbd_bm_init(struct drbd_conf *mdev) sector_t drbd_bm_capacity(struct drbd_conf *mdev) { - ERR_IF(!mdev->bitmap) return 0; + if (!expect(mdev->bitmap)) + return 0; return mdev->bitmap->bm_dev_capacity; } @@ -448,7 +449,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev) */ void drbd_bm_cleanup(struct drbd_conf *mdev) { - ERR_IF (!mdev->bitmap) return; + if (!expect(mdev->bitmap)) + return; bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); kfree(mdev->bitmap); @@ -611,7 +613,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) int err = 0, growing; int opages_vmalloced; - ERR_IF(!b) return -ENOMEM; + if (!expect(b)) + return -ENOMEM; drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); @@ -733,8 +736,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) unsigned long s; unsigned long flags; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); s = b->bm_set; @@ -757,8 +762,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) size_t drbd_bm_words(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; return b->bm_words; } @@ -766,7 +773,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev) unsigned long drbd_bm_bits(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; + if (!expect(b)) + return 0; return b->bm_bits; } @@ -787,8 +795,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; if (number == 0) return; WARN_ON(offset >= b->bm_words); @@ -832,8 +842,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); if ((offset >= b->bm_words) || @@ -861,8 +873,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, void drbd_bm_set_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0xff, b->bm_words); @@ -875,8 +889,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev) void drbd_bm_clear_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0, b->bm_words); @@ -1209,8 +1225,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, struct drbd_bitmap *b = mdev->bitmap; unsigned long i = DRBD_END_OF_BITMAP; - ERR_IF(!b) return i; - ERR_IF(!b->bm_pages) return i; + if (!expect(b)) + return i; + if (!expect(b->bm_pages)) + return i; spin_lock_irq(&b->bm_lock); if (BM_DONT_TEST & b->bm_flags) @@ -1311,8 +1329,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, struct drbd_bitmap *b = mdev->bitmap; int c = 0; - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) @@ -1437,8 +1457,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) unsigned long *p_addr; int i; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1472,8 +1494,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * robust in case we screwed up elsewhere, in that case pretend there * was one dirty bit in the requested area, so we won't try to do a * local read there (no bitmap probably implies no disk) */ - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 1; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 1; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1486,11 +1510,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi bm_unmap(p_addr); p_addr = bm_map_pidx(b, idx); } - ERR_IF (bitnr >= b->bm_bits) { - dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); - } else { + if (expect(bitnr < b->bm_bits)) c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); - } + else + dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); } if (p_addr) bm_unmap(p_addr); @@ -1520,8 +1543,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) unsigned long flags; unsigned long *p_addr, *bm; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1553,8 +1578,10 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) unsigned long weight; unsigned long s, e; int count, i, do_now; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irq(&b->bm_lock); if (BM_DONT_SET & b->bm_flags) -- cgit v1.2.3 From e6b3ea83bc72e126247b241c1164794a644d6fdc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:02:01 +0100 Subject: drbd: moved receiver, worker and asender from mdev to tconn Patch mostly: sed -i -e 's/mdev->receiver/mdev->tconn->receiver/g' \ -e 's/mdev->worker/mdev->tconn->worker/g' \ -e 's/mdev->asender/mdev->tconn->asender/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 24 ++++++++++----------- drivers/block/drbd/drbd_int.h | 9 ++++---- drivers/block/drbd/drbd_main.c | 44 +++++++++++++++++++------------------- drivers/block/drbd/drbd_nl.c | 10 ++++----- drivers/block/drbd/drbd_receiver.c | 14 ++++++------ drivers/block/drbd/drbd_worker.c | 4 ++-- 6 files changed, 53 insertions(+), 52 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index c756b4dbd135..4da4c322fa56 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,13 +119,13 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, + current == mdev->tconn->receiver.task ? "receiver" : + current == mdev->tconn->asender.task ? "asender" : + current == mdev->tconn->worker.task ? "worker" : current->comm, func, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + b->bm_task == mdev->tconn->receiver.task ? "receiver" : + b->bm_task == mdev->tconn->asender.task ? "asender" : + b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -142,13 +142,13 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, + current == mdev->tconn->receiver.task ? "receiver" : + current == mdev->tconn->asender.task ? "asender" : + current == mdev->tconn->worker.task ? "worker" : current->comm, why, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + b->bm_task == mdev->tconn->receiver.task ? "receiver" : + b->bm_task == mdev->tconn->asender.task ? "asender" : + b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8de17b5bd42a..c5b1167aab50 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -972,6 +972,10 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ + + struct drbd_thread receiver; + struct drbd_thread worker; + struct drbd_thread asender; }; struct drbd_conf { @@ -1068,9 +1072,6 @@ struct drbd_conf { struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; - struct drbd_thread receiver; - struct drbd_thread worker; - struct drbd_thread asender; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -2005,7 +2006,7 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) static inline void wake_asender(struct drbd_conf *mdev) { if (test_bit(SIGNAL_ASENDER, &mdev->flags)) - force_sig(DRBD_SIG, mdev->asender.task); + force_sig(DRBD_SIG, mdev->tconn->asender.task); } static inline void request_ping(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 84e40fbfd3e9..5d8a6e94a4a5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -613,7 +613,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_unlock_irqrestore(&mdev->req_lock, flags); if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { - D_ASSERT(current != mdev->worker.task); + D_ASSERT(current != mdev->tconn->worker.task); wait_for_completion(&done); } @@ -1229,16 +1229,16 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, /* Receiver should clean up itself */ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&mdev->receiver); + drbd_thread_stop_nowait(&mdev->tconn->receiver); /* Now the receiver finished cleaning up itself, it should die */ if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&mdev->receiver); + drbd_thread_stop_nowait(&mdev->tconn->receiver); /* Upon network failure, we need to restart the receiver. */ if (os.conn > C_TEAR_DOWN && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&mdev->receiver); + drbd_thread_restart_nowait(&mdev->tconn->receiver); /* Resume AL writing if we get a connection */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) @@ -1297,7 +1297,7 @@ int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, { int rv; - D_ASSERT(current == mdev->worker.task); + D_ASSERT(current == mdev->tconn->worker.task); /* open coded non-blocking drbd_suspend_io(mdev); */ set_bit(SUSPEND_IO, &mdev->flags); @@ -1598,7 +1598,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Upon network connection, we need to start the receiver */ if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) - drbd_thread_start(&mdev->receiver); + drbd_thread_start(&mdev->tconn->receiver); /* Terminate worker thread if we are unconfigured - it will be restarted as needed... */ @@ -1609,7 +1609,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); /* set in __drbd_set_state, unless CONFIG_PENDING was set */ if (test_bit(DEVICE_DYING, &mdev->flags)) - drbd_thread_stop_nowait(&mdev->worker); + drbd_thread_stop_nowait(&mdev->tconn->worker); } drbd_md_sync(mdev); @@ -1675,9 +1675,9 @@ int drbd_thread_start(struct drbd_thread *thi) unsigned long flags; const char *me = - thi == &mdev->receiver ? "receiver" : - thi == &mdev->asender ? "asender" : - thi == &mdev->worker ? "worker" : "NONSENSE"; + thi == &mdev->tconn->receiver ? "receiver" : + thi == &mdev->tconn->asender ? "asender" : + thi == &mdev->tconn->worker ? "worker" : "NONSENSE"; /* is used from state engine doing drbd_thread_stop_nowait, * while holding the req lock irqsave */ @@ -1807,9 +1807,9 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { struct task_struct *p = current; struct drbd_thread *thi = - p == mdev->asender.task ? &mdev->asender : - p == mdev->receiver.task ? &mdev->receiver : - p == mdev->worker.task ? &mdev->worker : + p == mdev->tconn->asender.task ? &mdev->tconn->asender : + p == mdev->tconn->receiver.task ? &mdev->tconn->receiver : + p == mdev->tconn->worker.task ? &mdev->tconn->worker : NULL; if (!expect(thi != NULL)) return; @@ -2507,8 +2507,8 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * /* long elapsed = (long)(jiffies - mdev->last_received); */ drop_it = mdev->tconn->meta.socket == sock - || !mdev->asender.task - || get_t_state(&mdev->asender) != RUNNING + || !mdev->tconn->asender.task + || get_t_state(&mdev->tconn->asender) != RUNNING || mdev->state.conn < C_CONNECTED; if (drop_it) @@ -3034,9 +3034,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->receiver, drbdd_init); - drbd_thread_init(mdev, &mdev->worker, drbd_worker); - drbd_thread_init(mdev, &mdev->asender, drbd_asender); + drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init); + drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); + drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); mdev->agreed_pro_version = PRO_VERSION_MAX; mdev->write_ordering = WO_bdev_flush; @@ -3048,9 +3048,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) void drbd_mdev_cleanup(struct drbd_conf *mdev) { int i; - if (mdev->receiver.t_state != NONE) + if (mdev->tconn->receiver.t_state != NONE) dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", - mdev->receiver.t_state); + mdev->tconn->receiver.t_state); /* no need to lock it, I'm the only thread alive */ if (atomic_read(&mdev->current_epoch->epoch_size) != 0) @@ -4032,7 +4032,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, void (*done)(struct drbd_conf *, int), char *why, enum bm_flag flags) { - D_ASSERT(current == mdev->worker.task); + D_ASSERT(current == mdev->tconn->worker.task); D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); @@ -4069,7 +4069,7 @@ int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), { int rv; - D_ASSERT(current != mdev->worker.task); + D_ASSERT(current != mdev->tconn->worker.task); if ((flags & BM_LOCKED_SET_ALLOWED) == 0) drbd_suspend_io(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a936d61a90cd..59bb58c9b22f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -875,7 +875,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) { wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); - drbd_thread_start(&mdev->worker); + drbd_thread_start(&mdev->tconn->worker); drbd_flush_workqueue(mdev); } @@ -889,7 +889,7 @@ static void drbd_reconfig_done(struct drbd_conf *mdev) mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { set_bit(DEVICE_DYING, &mdev->flags); - drbd_thread_stop_nowait(&mdev->worker); + drbd_thread_stop_nowait(&mdev->tconn->worker); } else clear_bit(CONFIG_PENDING, &mdev->flags); spin_unlock_irq(&mdev->req_lock); @@ -1887,9 +1887,9 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { cpumask_copy(mdev->cpu_mask, new_cpu_mask); drbd_calc_cpu_mask(mdev); - mdev->receiver.reset_cpu_mask = 1; - mdev->asender.reset_cpu_mask = 1; - mdev->worker.reset_cpu_mask = 1; + mdev->tconn->receiver.reset_cpu_mask = 1; + mdev->tconn->asender.reset_cpu_mask = 1; + mdev->tconn->worker.reset_cpu_mask = 1; } kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2636bcc173a6..e9f670cd5542 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -833,7 +833,7 @@ retry: if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&mdev->receiver) == EXITING) + if (get_t_state(&mdev->tconn->receiver) == EXITING) goto out_release_sockets; } @@ -874,7 +874,7 @@ retry: mdev->tconn->meta.socket = msock; mdev->last_received = jiffies; - D_ASSERT(mdev->asender.task == NULL); + D_ASSERT(mdev->tconn->asender.task == NULL); h = drbd_do_handshake(mdev); if (h <= 0) @@ -901,7 +901,7 @@ retry: atomic_set(&mdev->packet_seq, 0); mdev->peer_seq = 0; - drbd_thread_start(&mdev->asender); + drbd_thread_start(&mdev->tconn->asender); if (drbd_send_protocol(mdev) == -1) return -1; @@ -3704,7 +3704,7 @@ static void drbdd(struct drbd_conf *mdev) size_t shs; /* sub header size */ int rv; - while (get_t_state(&mdev->receiver) == RUNNING) { + while (get_t_state(&mdev->tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (!drbd_recv_header(mdev, &cmd, &packet_size)) goto err_out; @@ -3768,7 +3768,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) return; /* asender does not clean up anything. it must not interfere, either */ - drbd_thread_stop(&mdev->asender); + drbd_thread_stop(&mdev->tconn->asender); drbd_free_sock(mdev); /* wait for current activity to cease. */ @@ -3891,7 +3891,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) */ static int drbd_send_handshake(struct drbd_conf *mdev) { - /* ASSERT current == mdev->receiver ... */ + /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.sbuf.handshake; int ok; @@ -3923,7 +3923,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) */ static int drbd_do_handshake(struct drbd_conf *mdev) { - /* ASSERT current == mdev->receiver ... */ + /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); unsigned int length; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9b1e2bad5fbd..1ca7856f8136 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1705,8 +1705,8 @@ int drbd_worker(struct drbd_thread *thi) D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. - * wait here for the EXITING receiver. */ - drbd_thread_stop(&mdev->receiver); + * wait here for the exiting receiver. */ + drbd_thread_stop(&mdev->tconn->receiver); drbd_mdev_cleanup(mdev); dev_info(DEV, "worker terminated\n"); -- cgit v1.2.3 From bed879ae905190028a90d53493c4f75dcd78f44d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 4 Feb 2011 14:00:37 +0100 Subject: drbd: Moved the thread name into the data structure Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 20 ++++++----------- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_main.c | 46 ++++++++++++++++++++++++---------------- 3 files changed, 36 insertions(+), 32 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4da4c322fa56..e85221f22adc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - current == mdev->tconn->receiver.task ? "receiver" : - current == mdev->tconn->asender.task ? "asender" : - current == mdev->tconn->worker.task ? "worker" : current->comm, - func, b->bm_why ?: "?", - b->bm_task == mdev->tconn->receiver.task ? "receiver" : - b->bm_task == mdev->tconn->asender.task ? "asender" : - b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev, current), + func, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev, b->bm_task)); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - current == mdev->tconn->receiver.task ? "receiver" : - current == mdev->tconn->asender.task ? "asender" : - current == mdev->tconn->worker.task ? "worker" : current->comm, - why, b->bm_why ?: "?", - b->bm_task == mdev->tconn->receiver.task ? "receiver" : - b->bm_task == mdev->tconn->asender.task ? "asender" : - b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev, current), + why, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev, b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 98addab2c928..7beb374451b3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -616,6 +616,7 @@ struct drbd_thread { int (*function) (struct drbd_thread *); struct drbd_conf *mdev; int reset_cpu_mask; + char name[9]; }; static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) @@ -1130,6 +1131,7 @@ enum dds_flags { extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); +extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4b39b3d0dd55..852a3e3fbb76 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -462,7 +462,7 @@ restart: */ if (thi->t_state == RESTARTING) { - dev_info(DEV, "Restarting %s\n", current->comm); + dev_info(DEV, "Restarting %s thread\n", thi->name); thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; @@ -482,13 +482,14 @@ restart: } static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, - int (*func) (struct drbd_thread *)) + int (*func) (struct drbd_thread *), char *name) { spin_lock_init(&thi->t_lock); thi->task = NULL; thi->t_state = NONE; thi->function = func; thi->mdev = mdev; + strncpy(thi->name, name, ARRAY_SIZE(thi->name)); } int drbd_thread_start(struct drbd_thread *thi) @@ -497,11 +498,6 @@ int drbd_thread_start(struct drbd_thread *thi) struct task_struct *nt; unsigned long flags; - const char *me = - thi == &mdev->tconn->receiver ? "receiver" : - thi == &mdev->tconn->asender ? "asender" : - thi == &mdev->tconn->worker ? "worker" : "NONSENSE"; - /* is used from state engine doing drbd_thread_stop_nowait, * while holding the req lock irqsave */ spin_lock_irqsave(&thi->t_lock, flags); @@ -509,7 +505,7 @@ int drbd_thread_start(struct drbd_thread *thi) switch (thi->t_state) { case NONE: dev_info(DEV, "Starting %s thread (from %s [%d])\n", - me, current->comm, current->pid); + thi->name, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { @@ -526,7 +522,7 @@ int drbd_thread_start(struct drbd_thread *thi) flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, - "drbd%d_%s", mdev_to_minor(mdev), me); + "drbd%d_%s", mdev_to_minor(mdev), thi->name); if (IS_ERR(nt)) { dev_err(DEV, "Couldn't start thread\n"); @@ -543,7 +539,7 @@ int drbd_thread_start(struct drbd_thread *thi) case EXITING: thi->t_state = RESTARTING; dev_info(DEV, "Restarting %s thread (from %s [%d])\n", - me, current->comm, current->pid); + thi->name, current->comm, current->pid); /* fall through */ case RUNNING: case RESTARTING: @@ -592,6 +588,23 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) wait_for_completion(&thi->stop); } +static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct task_struct *task) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_thread *thi = + task == tconn->receiver.task ? &tconn->receiver : + task == tconn->asender.task ? &tconn->asender : + task == tconn->worker.task ? &tconn->worker : NULL; + + return thi; +} + +char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) +{ + struct drbd_thread *thi = drbd_task_to_thread(mdev, task); + return thi ? thi->name : task->comm; +} + #ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs @@ -629,11 +642,8 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { struct task_struct *p = current; - struct drbd_thread *thi = - p == mdev->tconn->asender.task ? &mdev->tconn->asender : - p == mdev->tconn->receiver.task ? &mdev->tconn->receiver : - p == mdev->tconn->worker.task ? &mdev->tconn->worker : - NULL; + struct drbd_thread *thi = drbd_task_to_thread(mdev, p); + if (!expect(thi != NULL)) return; if (!thi->reset_cpu_mask) @@ -1848,9 +1858,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init); - drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); - drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); + drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init, "receiver"); + drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker, "worker"); + drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender, "asender"); /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; -- cgit v1.2.3 From 392c8801922f51466045ece2f1f2884b8c9cd9a2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 10:33:31 +0100 Subject: drbd: drbd_thread has now a pointer to a tconn instead of to a mdev Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 8 +++---- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 43 ++++++++++++++++++-------------------- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 2 +- 5 files changed, 29 insertions(+), 32 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e85221f22adc..e8d652f197c3 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,9 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev, current), + drbd_task_to_thread_name(mdev->tconn, current), func, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev, b->bm_task)); + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -138,9 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev, current), + drbd_task_to_thread_name(mdev->tconn, current), why, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev, b->bm_task)); + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index be067bfbace2..91054e4d0b2f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -625,7 +625,7 @@ struct drbd_thread { struct completion stop; enum drbd_thread_state t_state; int (*function) (struct drbd_thread *); - struct drbd_conf *mdev; + struct drbd_tconn *tconn; int reset_cpu_mask; char name[9]; }; @@ -1151,7 +1151,7 @@ enum dds_flags { extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); -extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); +extern char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_thread *thi); extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0861746a7475..2c44cc36deed 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -442,12 +442,12 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->tconn; unsigned long flags; int retval; snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", - thi->name[0], thi->mdev->tconn->name); + thi->name[0], thi->tconn->name); restart: retval = thi->function(thi); @@ -465,7 +465,7 @@ restart: */ if (thi->t_state == RESTARTING) { - dev_info(DEV, "Restarting %s thread\n", thi->name); + conn_info(tconn, "Restarting %s thread\n", thi->name); thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; @@ -477,27 +477,27 @@ restart: complete(&thi->stop); spin_unlock_irqrestore(&thi->t_lock, flags); - dev_info(DEV, "Terminating %s\n", current->comm); + conn_info(tconn, "Terminating %s\n", current->comm); /* Release mod reference taken when thread was started */ module_put(THIS_MODULE); return retval; } -static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, +static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi, int (*func) (struct drbd_thread *), char *name) { spin_lock_init(&thi->t_lock); thi->task = NULL; thi->t_state = NONE; thi->function = func; - thi->mdev = mdev; + thi->tconn = tconn; strncpy(thi->name, name, ARRAY_SIZE(thi->name)); } int drbd_thread_start(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->tconn; struct task_struct *nt; unsigned long flags; @@ -507,28 +507,27 @@ int drbd_thread_start(struct drbd_thread *thi) switch (thi->t_state) { case NONE: - dev_info(DEV, "Starting %s thread (from %s [%d])\n", + conn_info(tconn, "Starting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { - dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); + conn_err(tconn, "Failed to get module reference in drbd_thread_start\n"); spin_unlock_irqrestore(&thi->t_lock, flags); return false; } init_completion(&thi->stop); - D_ASSERT(thi->task == NULL); thi->reset_cpu_mask = 1; thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, - "drbd%d_%s", mdev_to_minor(mdev), thi->name); + "drbd_%c_%s", thi->name[0], thi->tconn->name); if (IS_ERR(nt)) { - dev_err(DEV, "Couldn't start thread\n"); + conn_err(tconn, "Couldn't start thread\n"); module_put(THIS_MODULE); return false; @@ -541,7 +540,7 @@ int drbd_thread_start(struct drbd_thread *thi) break; case EXITING: thi->t_state = RESTARTING; - dev_info(DEV, "Restarting %s thread (from %s [%d])\n", + conn_info(tconn, "Restarting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* fall through */ case RUNNING: @@ -582,7 +581,6 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) init_completion(&thi->stop); if (thi->task != current) force_sig(DRBD_SIGKILL, thi->task); - } spin_unlock_irqrestore(&thi->t_lock, flags); @@ -591,9 +589,8 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) wait_for_completion(&thi->stop); } -static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct task_struct *task) +static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task) { - struct drbd_tconn *tconn = mdev->tconn; struct drbd_thread *thi = task == tconn->receiver.task ? &tconn->receiver : task == tconn->asender.task ? &tconn->asender : @@ -602,9 +599,9 @@ static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct ta return thi; } -char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) +char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task) { - struct drbd_thread *thi = drbd_task_to_thread(mdev, task); + struct drbd_thread *thi = drbd_task_to_thread(tconn, task); return thi ? thi->name : task->comm; } @@ -656,7 +653,7 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - set_cpus_allowed_ptr(p, thi->mdev->tconn->cpu_mask); + set_cpus_allowed_ptr(p, thi->tconn->cpu_mask); } #endif @@ -1866,10 +1863,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init, "receiver"); - drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker, "worker"); - drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender, "asender"); - /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; @@ -2202,6 +2195,10 @@ struct drbd_tconn *drbd_new_tconn(char *name) init_waitqueue_head(&tconn->net_cnt_wait); idr_init(&tconn->volumes); + drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); + drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); + drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + write_lock_irq(&global_state_lock); list_add(&tconn->all_tconn, &drbd_tconns); write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f0cd3819fffa..f21b0efff6d8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4224,7 +4224,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) int drbdd_init(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; int h; conn_info(tconn, "receiver (re)started\n"); @@ -4591,7 +4591,7 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) int drbd_asender(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; struct p_header *h = &tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; struct packet_info pi; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c9b10d6eb88a..3f0f84a56ee6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1643,7 +1643,7 @@ static int _worker_dying(int vnr, void *p, void *data) int drbd_worker(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; struct drbd_work *w = NULL; LIST_HEAD(work_list); int intr = 0; -- cgit v1.2.3 From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:55 +0100 Subject: drbd: use clear_bit_unlock() where appropriate Some open-coded clear_bit(); smp_mb__after_clear_bit(); should in fact have been smp_mb__before_clear_bit(); clear_bit(); Instead, use clear_bit_unlock() to annotate the intention, and have it do the right thing. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_main.c | 3 +-- include/linux/lru_cache.h | 3 +-- lib/lru_cache.c | 10 ++++------ 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e8d652f197c3..4be737055718 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) { struct drbd_bitmap *b = mdev->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); - clear_bit(BM_PAGE_IO_LOCK, addr); - smp_mb__after_clear_bit(); + clear_bit_unlock(BM_PAGE_IO_LOCK, addr); wake_up(&mdev->bitmap->bm_io_wait); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 592f0c949fd0..c77e51a40926 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) put_ldev(mdev); } - clear_bit(BITMAP_IO, &mdev->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(BITMAP_IO, &mdev->flags); wake_up(&mdev->misc_wait); if (work->done) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 7a71ffad037c..4cceafb0732d 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); } static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7ed..9f353f7f41ca 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); } while (0) #define RETURN(x...) do { \ - clear_bit(__LC_PARANOIA, &lc->flags); \ - smp_mb__after_clear_bit(); return x ; } while (0) + clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ + return x ; } while (0) /* BUG() if e is not one of the elements tracked by lc */ #define PARANOIA_LC_ELEMENT(lc, e) do { \ @@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); lc->changing_element = NULL; lc->new_number = LC_FREE; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); RETURN(); } @@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_STARVING, &lc->flags); } RETURN(e->refcnt); } -- cgit v1.2.3 From 45dfffebd08c1445493bfa8f0ec05b38714b9b2d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:00 +0100 Subject: drbd: allow to select specific bitmap pages for writeout We are about to allow several changes to the active set in one activity log transaction. We have to write out the corresponding bitmap pages as well, if changed. Introduce drbd_bm_mark_for_writeout(), then re-use the existing bitmap writeout path to submit all marked pages in one go. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 49 +++++++++++++++++++++++++++++++++++----- drivers/block/drbd/drbd_int.h | 13 +++++++---- 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4be737055718..bc89c4a30cbc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -188,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev) /* to mark for lazy writeout once syncer cleared all clearable bits, * we if bits have been cleared since last IO. */ #define BM_PAGE_LAZY_WRITEOUT 28 +/* pages marked with this "HINT" will be considered for writeout + * on activity log transactions */ +#define BM_PAGE_HINT_WRITEOUT 27 /* store_page_idx uses non-atomic assignment. It is only used directly after * allocating the page. All other bm_set_page_* and bm_clear_page_* need to @@ -237,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page) set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); } +/** + * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout + * @mdev: DRBD device. + * @page_nr: the bitmap page to mark with the "hint" flag + * + * From within an activity log transaction, we mark a few pages with these + * hints, then call drbd_bm_write_hinted(), which will only write out changed + * pages which are flagged with this mark. + */ +void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr) +{ + struct page *page; + if (page_nr >= mdev->bitmap->bm_number_of_pages) { + dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n", + page_nr, (int)mdev->bitmap->bm_number_of_pages); + return; + } + page = mdev->bitmap->bm_pages[page_nr]; + set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)); +} + static int bm_test_page_unchanged(struct page *page) { volatile const unsigned long *addr = &page_private(page); @@ -897,6 +921,7 @@ struct bm_aio_ctx { struct completion done; unsigned flags; #define BM_AIO_COPY_PAGES 1 +#define BM_AIO_WRITE_HINTED 2 int error; }; @@ -1007,13 +1032,13 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) +static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { struct bm_aio_ctx ctx = { .mdev = mdev, .in_flight = ATOMIC_INIT(1), .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, + .flags = flags, }; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; @@ -1042,6 +1067,10 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) break; if (rw & WRITE) { + if ((flags & BM_AIO_WRITE_HINTED) && + !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, + &page_private(b->bm_pages[i]))) + continue; if (bm_test_page_unchanged(b->bm_pages[i])) { dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); continue; @@ -1099,7 +1128,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id */ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, READ, 0); + return bm_rw(mdev, READ, 0, 0); } /** @@ -1110,7 +1139,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, WRITE, 0); + return bm_rw(mdev, WRITE, 0, 0); } /** @@ -1120,12 +1149,20 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) { - return bm_rw(mdev, WRITE, upper_idx); + return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); } +/** + * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. + * @mdev: DRBD device. + */ +int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); +} /** - * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap + * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap * @mdev: DRBD device. * @idx: bitmap page index * diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 93eb3a7ac711..edfdeb62c18f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1310,11 +1310,14 @@ struct bm_extent { #define SLEEP_TIME (HZ/10) -#define BM_BLOCK_SHIFT 12 /* 4k per bit */ +/* We do bitmap IO in units of 4k blocks. + * We also still have a hardcoded 4k per bit relation. */ +#define BM_BLOCK_SHIFT 12 /* 4k per bit */ #define BM_BLOCK_SIZE (1< Date: Mon, 21 Feb 2011 15:10:23 +0100 Subject: drbd: silence some log messages on bitmap IO Summary log messages meant for global bitmap IO should not be printed for bitmap IO caused by activity log transactions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index bc89c4a30cbc..3791082979e1 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1096,9 +1096,12 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w */ if (!atomic_dec_and_test(&ctx.in_flight)) wait_for_completion(&ctx.done); - dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + + /* summary for global bitmap IO */ + if (flags == 0) + dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); if (ctx.error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); @@ -1116,8 +1119,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } now = b->bm_set; - dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", - ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); + if (flags == 0) + dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); return err; } -- cgit v1.2.3 From 9db4e77f8cbbeeb32a4d2aea022c80333c445984 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 15:38:47 +0100 Subject: drbd: use the newly introduced page pool for bitmap IO Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3791082979e1..0009e40744ab 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -963,9 +963,8 @@ static void bm_async_io_complete(struct bio *bio, int error) bm_page_unlock_io(mdev, idx); - /* FIXME give back to page pool */ if (ctx->flags & BM_AIO_COPY_PAGES) - put_page(bio->bi_io_vec[0].bv_page); + mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); bio_put(bio); @@ -999,10 +998,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - /* FIXME alloc_page is good enough for now, but actually needs - * to use pre-allocated page pool */ void *src, *dest; - page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); + page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); dest = kmap_atomic(page, KM_USER0); src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); memcpy(dest, src, PAGE_SIZE); @@ -1014,6 +1011,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; + /* bio_add_page of a single page to an empty bio will always succeed, + * according to api. Do we want to assert that? */ bio_add_page(bio, page, len, 0); bio->bi_private = ctx; bio->bi_end_io = bm_async_io_complete; -- cgit v1.2.3 From da4a75d2ef064501f6756986af6ea330ba0585d7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 17:02:01 +0100 Subject: drbd: introduce a bio_set to allocate housekeeping bios from Don't rely on availability of bios from the global fs_bio_set, we should use our own bio_set for meta data IO. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_int.h | 6 ++++++ drivers/block/drbd/drbd_main.c | 28 ++++++++++++++++++++++++++++ drivers/block/drbd/drbd_receiver.c | 6 +++++- 5 files changed, 41 insertions(+), 4 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ea3895de4e6d..7cd78617669b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -125,7 +125,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, rw |= REQ_FUA | REQ_FLUSH; rw |= REQ_SYNC; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; bio->bi_sector = sector; ok = (bio_add_page(bio, page, size, 0) == size); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 0009e40744ab..52c48143b22a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -974,8 +974,7 @@ static void bm_async_io_complete(struct bio *bio, int error) static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - /* we are process context. we always get a bio */ - struct bio *bio = bio_alloc(GFP_KERNEL, 1); + struct bio *bio = bio_alloc_drbd(GFP_KERNEL); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2444a1683475..e68758344647 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1489,6 +1489,12 @@ extern wait_queue_head_t drbd_pp_wait; #define DRBD_MIN_POOL_PAGES 128 extern mempool_t *drbd_md_io_page_pool; +/* We also need to make sure we get a bio + * when we need it for housekeeping purposes */ +extern struct bio_set *drbd_md_io_bio_set; +/* to allocate from that set */ +extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); + extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5f4c95905d5e..997b2e214670 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -130,6 +130,7 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; mempool_t *drbd_ee_mempool; mempool_t *drbd_md_io_page_pool; +struct bio_set *drbd_md_io_bio_set; /* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@ -150,6 +151,25 @@ static const struct block_device_operations drbd_ops = { .release = drbd_release, }; +static void bio_destructor_drbd(struct bio *bio) +{ + bio_free(bio, drbd_md_io_bio_set); +} + +struct bio *bio_alloc_drbd(gfp_t gfp_mask) +{ + struct bio *bio; + + if (!drbd_md_io_bio_set) + return bio_alloc(gfp_mask, 1); + + bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); + if (!bio) + return NULL; + bio->bi_destructor = bio_destructor_drbd; + return bio; +} + #ifdef __CHECKER__ /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. @@ -1953,6 +1973,8 @@ static void drbd_destroy_mempools(void) /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + if (drbd_md_io_bio_set) + bioset_free(drbd_md_io_bio_set); if (drbd_md_io_page_pool) mempool_destroy(drbd_md_io_page_pool); if (drbd_ee_mempool) @@ -1968,6 +1990,7 @@ static void drbd_destroy_mempools(void) if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache); + drbd_md_io_bio_set = NULL; drbd_md_io_page_pool = NULL; drbd_ee_mempool = NULL; drbd_request_mempool = NULL; @@ -1993,6 +2016,7 @@ static int drbd_create_mempools(void) drbd_al_ext_cache = NULL; drbd_pp_pool = NULL; drbd_md_io_page_pool = NULL; + drbd_md_io_bio_set = NULL; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2016,6 +2040,10 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ + drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); + if (drbd_md_io_bio_set == NULL) + goto Enomem; + drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); if (drbd_md_io_page_pool == NULL) goto Enomem; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cd78ebfefe52..6dcf65484c26 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1124,7 +1124,11 @@ int drbd_submit_peer_request(struct drbd_conf *mdev, /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the - * request in more than one bio. */ + * request in more than one bio. + * + * Plain bio_alloc is good enough here, this is no DRBD internally + * generated bio, but a bio allocated on behalf of the peer. + */ next_bio: bio = bio_alloc(GFP_NOIO, nr_pages); if (!bio) { -- cgit v1.2.3 From cdfda633d235028e9b27381dedb65416409e8729 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 Jul 2011 15:38:59 +0200 Subject: drbd: detach from frozen backing device * drbd-8.3: documentation: Documented detach's --force and disk's --disk-timeout drbd: Implemented the disk-timeout option drbd: Force flag for the detach operation drbd: Allow new IOs while the local disk in in FAILED state drbd: Bitmap IO functions can not return prematurely if the disk breaks drbd: Added a kref to bm_aio_ctx drbd: Hold a reference to ldev while doing meta-data IO drbd: Keep a reference to the bio until the completion handler finished drbd: Implemented wait_until_done_or_disk_failure() drbd: Replaced md_io_mutex by an atomic: md_io_in_use drbd: moved md_io into mdev drbd: Immediately allow completion of IOs, that wait for IO completions on a failed disk drbd: Keep a reference to barrier acked requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 75 ++++++++++++++++++------ drivers/block/drbd/drbd_bitmap.c | 115 +++++++++++++++++++++++++++---------- drivers/block/drbd/drbd_int.h | 12 ++-- drivers/block/drbd/drbd_main.c | 77 ++++++++++++++++++++++--- drivers/block/drbd/drbd_nl.c | 28 ++++++++- drivers/block/drbd/drbd_receiver.c | 2 - drivers/block/drbd/drbd_req.c | 52 +++++++++++------ drivers/block/drbd/drbd_req.h | 19 +++--- drivers/block/drbd/drbd_state.c | 7 +++ drivers/block/drbd/drbd_worker.c | 9 ++- include/linux/drbd_genl.h | 9 ++- include/linux/drbd_limits.h | 6 ++ 12 files changed, 321 insertions(+), 90 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index aeb483daea06..58b5b61628fc 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -114,18 +114,44 @@ struct drbd_atodb_wait { static int w_al_write_transaction(struct drbd_work *, int); +void *drbd_md_get_buffer(struct drbd_conf *mdev) +{ + int r; + + wait_event(mdev->misc_wait, + (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || + mdev->state.disk <= D_FAILED); + + return r ? NULL : page_address(mdev->md_io_page); +} + +void drbd_md_put_buffer(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->md_io_in_use)) + wake_up(&mdev->misc_wait); +} + +static bool md_io_allowed(struct drbd_conf *mdev) +{ + enum drbd_disk_state ds = mdev->state.disk; + return ds >= D_NEGOTIATING || ds == D_ATTACHING; +} + +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) +{ + wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); +} + static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) { struct bio *bio; - struct drbd_md_io md_io; int err; - md_io.mdev = mdev; - init_completion(&md_io.event); - md_io.error = 0; + mdev->md_io.done = 0; + mdev->md_io.error = -ENODEV; if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) rw |= REQ_FUA | REQ_FLUSH; @@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, err = -EIO; if (bio_add_page(bio, page, size, 0) != size) goto out; - bio->bi_private = &md_io; + bio->bi_private = &mdev->md_io; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); + err = -ENODEV; + goto out; + } + + bio_get(bio); /* one bio_put() is in the completion handler */ + atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_for_completion(&md_io.event); + wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) - err = md_io.error; + err = mdev->md_io.error; out: bio_put(bio); @@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int err; struct page *iop = mdev->md_io_page; - D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); BUG_ON(!bdev->md_bdev); @@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused) return 0; } - mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ + if (!buffer) { + dev_err(DEV, "disk failed while waiting for md_io buffer\n"); + aw->err = -EIO; + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } memset(buffer, 0, sizeof(*buffer)); buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); @@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused) mdev->al_tr_number++; } - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); @@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* lock out all other meta data io for now, * and make sure the page is mapped. */ - mutex_lock(&mdev->md_io_mutex); - b = page_address(mdev->md_io_page); + b = drbd_md_get_buffer(mdev); + if (!b) + return 0; /* Always use the full ringbuffer space for now. * possible optimization: read in all of it, @@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* IO error */ if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!found_valid) { if (found_initialized != mx) dev_warn(DEV, "No usable activity log found.\n"); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 1; } @@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!expect(rv != 0)) goto cancel; if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -643,7 +684,7 @@ cancel: mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", transactions, active_extents); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 52c48143b22a..706e5220dd4a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) struct bm_aio_ctx { struct drbd_conf *mdev; atomic_t in_flight; - struct completion done; + unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 #define BM_AIO_WRITE_HINTED 2 int error; + struct kref kref; }; +static void bm_aio_ctx_destroy(struct kref *kref) +{ + struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + + put_ldev(ctx->mdev); + kfree(ctx); +} + /* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { @@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error) bio_put(bio); - if (atomic_dec_and_test(&ctx->in_flight)) - complete(&ctx->done); + if (atomic_dec_and_test(&ctx->in_flight)) { + ctx->done = 1; + wake_up(&mdev->misc_wait); + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + } } static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - struct bio *bio = bio_alloc_drbd(GFP_KERNEL); + struct bio *bio = bio_alloc_drbd(GFP_NOIO); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; @@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must */ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { - struct bm_aio_ctx ctx = { - .mdev = mdev, - .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = flags, - }; + struct bm_aio_ctx *ctx; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; unsigned long now; @@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * For lazy writeout, we don't care for ongoing changes to the bitmap, * as we submit copies of pages anyways. */ - if (!ctx.flags) + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { + .mdev = mdev, + .in_flight = ATOMIC_INIT(1), + .done = 0, + .flags = flags, + .error = 0, + .kref = { ATOMIC_INIT(2) }, + }; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); + err = -ENODEV; + goto out; + } + + if (!ctx->flags) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); num_pages = b->bm_number_of_pages; @@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w continue; } } - atomic_inc(&ctx.in_flight); - bm_page_io_async(&ctx, i, rw); + atomic_inc(&ctx->in_flight); + bm_page_io_async(ctx, i, rw); ++count; cond_resched(); } /* - * We initialize ctx.in_flight to one to make sure bm_async_io_complete - * will not complete() early, and decrement / test it here. If there + * We initialize ctx->in_flight to one to make sure bm_async_io_complete + * will not set ctx->done early, and decrement / test it here. If there * are still some bios in flight, we need to wait for them here. + * If all IO is done already (or nothing had been submitted), there is + * no need to wait. Still, we need to put the kref associated with the + * "in_flight reached zero, all done" event. */ - if (!atomic_dec_and_test(&ctx.in_flight)) - wait_for_completion(&ctx.done); + if (!atomic_dec_and_test(&ctx->in_flight)) + wait_until_done_or_disk_failure(mdev, &ctx->done); + else + kref_put(&ctx->kref, &bm_aio_ctx_destroy); /* summary for global bitmap IO */ if (flags == 0) dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); - if (ctx.error) { + if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, true); - err = -EIO; /* ctx.error ? */ + err = -EIO; /* ctx->error ? */ } + if (atomic_read(&ctx->in_flight)) + err = -EIO; /* Disk failed during IO... */ + now = jiffies; if (rw == WRITE) { drbd_md_flush(mdev); @@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); +out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - struct bm_aio_ctx ctx = { + struct bm_aio_ctx *ctx; + int err; + + if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { + dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); + return 0; + } + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { .mdev = mdev, .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), + .done = 0, .flags = BM_AIO_COPY_PAGES, + .error = 0, + .kref = { ATOMIC_INIT(2) }, }; - if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { - dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); - return 0; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); + err = -ENODEV; + goto out; } - bm_page_io_async(&ctx, idx, WRITE_SYNC); - wait_for_completion(&ctx.done); + bm_page_io_async(ctx, idx, WRITE_SYNC); + wait_until_done_or_disk_failure(mdev, &ctx->done); - if (ctx.error) + if (ctx->error) drbd_chk_io_error(mdev, 1, true); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; - return ctx.error; + err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; + out: + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + return err; } /* NOTE diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6035784f0de3..4e582058a7c9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -780,8 +780,7 @@ struct drbd_backing_dev { }; struct drbd_md_io { - struct drbd_conf *mdev; - struct completion event; + unsigned int done; int error; }; @@ -852,6 +851,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct list_head barrier_acked_requests; struct crypto_hash *cram_hmac_tfm; struct crypto_hash *integrity_tfm; /* checksums we compute, updates protected by tconn->data->mutex */ @@ -978,7 +978,8 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct mutex md_io_mutex; /* protects the md_io_buffer */ + struct drbd_md_io md_io; + atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ spinlock_t al_lock; wait_queue_head_t al_wait; struct lru_cache *act_log; /* activity log */ @@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); extern int drbd_resync_finished(struct drbd_conf *mdev); /* maybe rather drbd_main.c ? */ +extern void *drbd_md_get_buffer(struct drbd_conf *mdev); +extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) @@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) case D_OUTDATED: case D_CONSISTENT: case D_UP_TO_DATE: + case D_FAILED: /* disk state is stable as well. */ break; /* no new io accepted during transitional states */ case D_ATTACHING: - case D_FAILED: case D_NEGOTIATING: case D_UNKNOWN: case D_MASK: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 448de7bf8223..15384986e4a4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn) tconn->oldest_tle = b; tconn->newest_tle = b; INIT_LIST_HEAD(&tconn->out_of_sequence_requests); + INIT_LIST_HEAD(&tconn->barrier_acked_requests); return 1; } @@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, These have been list_move'd to the out_of_sequence_requests list in _req_mod(, BARRIER_ACKED) above. */ - list_del_init(&b->requests); + list_splice_init(&b->requests, &tconn->barrier_acked_requests); mdev = b->w.mdev; nob = b->next; @@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) b = tmp; list_splice(&carry_reads, &b->requests); } -} + /* Actions operating on the disk state, also want to work on + requests that got barrier acked. */ + switch (what) { + case FAIL_FROZEN_DISK_IO: + case RESTART_FROZEN_DISK_IO: + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); + } + case CONNECTION_LOST_WHILE_PENDING: + case RESEND: + break; + default: + conn_err(tconn, "what = %d in _tl_restart()\n", what); + } +} /** * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL @@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) spin_unlock_irq(&tconn->req_lock); } +/** + * tl_apply() - Applies an event to all requests for a certain mdev in the TL + * @mdev: DRBD device. + * @what: The action/event to perform with all request objects + * + * @what might ony be ABORT_DISK_IO. + */ +void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_tl_epoch *b; + struct list_head *le, *tle; + struct drbd_request *req; + + D_ASSERT(what == ABORT_DISK_IO); + + spin_lock_irq(&tconn->req_lock); + b = tconn->oldest_tle; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + b = b->next; + } + + list_for_each_safe(le, tle, &tconn->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (req->w.mdev == mdev) + _req_mod(req, what); + } + + spin_unlock_irq(&tconn->req_lock); +} + static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; @@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); atomic_set(&mdev->ap_in_flight, 0); + atomic_set(&mdev->md_io_in_use, 0); - mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->own_state_mutex); mdev->state_mutex = &mdev->own_state_mutex; @@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref) struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref); struct drbd_tconn *tconn = mdev->tconn; + del_timer_sync(&mdev->request_timer); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + memset(buffer, 0, 512); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev) * since we updated it on metadata. */ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); +out: put_ldev(mdev); } @@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is @@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF; err: - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); + out: put_ldev(mdev); return rv; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 97d1dab045d2..bf8d0b077624 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) synchronize_rcu(); kfree(old_disk_conf); kfree(old_plan); + mod_timer(&mdev->request_timer, jiffies + HZ); goto success; fail_unlock: @@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) if (rv < SS_SUCCESS) goto force_diskless_dec; + mod_timer(&mdev->request_timer, jiffies + HZ); + if (mdev->state.role == R_PRIMARY) mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; else @@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) return 0; } -static int adm_detach(struct drbd_conf *mdev) +static int adm_detach(struct drbd_conf *mdev, int force) { enum drbd_state_rv retcode; int ret; + + if (force) { + drbd_force_state(mdev, NS(disk, D_FAILED)); + retcode = SS_SUCCESS; + goto out; + } + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); /* D_FAILED will transition to DISKLESS. */ @@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; +out: return retcode; } @@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev) int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) { enum drbd_ret_code retcode; + struct detach_parms parms = { }; + int err; retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); if (!adm_ctx.reply_skb) @@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - retcode = adm_detach(adm_ctx.mdev); + if (info->attrs[DRBD_NLA_DETACH_PARMS]) { + err = detach_parms_from_attrs(&parms, info); + if (err) { + retcode = ERR_MANDATORY_TAG; + drbd_msg_put_info(from_attrs_err_to_txt(err)); + goto out; + } + } + + retcode = adm_detach(adm_ctx.mdev, parms.force_detach); out: drbd_adm_finish(info, retcode); return 0; @@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) /* detach */ idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) { - retcode = adm_detach(mdev); + retcode = adm_detach(mdev, 0); if (retcode < SS_SUCCESS) { drbd_msg_put_info("failed to detach"); goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7218750d2937..3a7e54b8f418 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); wake_up(&mdev->misc_wait); - del_timer(&mdev->request_timer); - del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c4e4553f5c2c..8fa51cda3b7e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->w.mdev; - /* only WRITES may end up here without a master bio (on barrier ack) */ - int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ - if (s & RQ_LOCAL_PENDING) + if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) return; if (req->i.waiting) { /* Retry all conflicting peer requests. */ @@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) req->master_bio = NULL; } + if (s & RQ_LOCAL_PENDING) + return; + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, * or protocol C P_WRITE_ACK, @@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case COMPLETED_OK: - if (bio_data_dir(req->master_bio) == WRITE) + if (req->rq_state & RQ_WRITE) mdev->writ_cnt += req->i.size >> 9; else mdev->read_cnt += req->i.size >> 9; @@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; + case ABORT_DISK_IO: + req->rq_state |= RQ_LOCAL_ABORTED; + if (req->rq_state & RQ_WRITE) + _req_may_be_done_not_susp(req, m); + else + goto goto_queue_for_net_read; + break; + case WRITE_COMPLETED_WITH_ERROR: req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); put_ldev(mdev); + goto_queue_for_net_read: + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { @@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data) struct drbd_request *req; /* oldest request */ struct list_head *le; struct net_conf *nc; - unsigned long et; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */ rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + + if (get_ldev(mdev)) { + dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; + put_ldev(mdev); + } rcu_read_unlock(); - if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) + et = min_not_zero(dt, ent); + + if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) return; /* Recurring timer stopped */ spin_lock_irq(&tconn->req_lock); @@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data) le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (time_is_before_eq_jiffies(req->start_time + et)) { - if (req->rq_state & RQ_NET_PENDING) { + if (ent && req->rq_state & RQ_NET_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + ent)) { dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); - } else { - dev_warn(DEV, "Local backing block device frozen?\n"); - mod_timer(&mdev->request_timer, jiffies + et); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + } + } + if (dt && req->rq_state & RQ_LOCAL_PENDING) { + if (time_is_before_eq_jiffies(req->start_time + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); } - } else { - mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&tconn->req_lock); + mod_timer(&mdev->request_timer, req->start_time + et); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 5135c95fbf85..f6aff150addb 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -106,6 +106,7 @@ enum drbd_req_event { READ_COMPLETED_WITH_ERROR, READ_AHEAD_COMPLETED_WITH_ERROR, WRITE_COMPLETED_WITH_ERROR, + ABORT_DISK_IO, COMPLETED_OK, RESEND, FAIL_FROZEN_DISK_IO, @@ -119,18 +120,21 @@ enum drbd_req_event { * same time, so we should hold the request lock anyways. */ enum drbd_req_state_bits { - /* 210 - * 000: no local possible - * 001: to be submitted + /* 3210 + * 0000: no local possible + * 0001: to be submitted * UNUSED, we could map: 011: submitted, completion still pending - * 110: completed ok - * 010: completed with error + * 0110: completed ok + * 0010: completed with error + * 1001: Aborted (before completion) + * 1x10: Aborted and completed -> free */ __RQ_LOCAL_PENDING, __RQ_LOCAL_COMPLETED, __RQ_LOCAL_OK, + __RQ_LOCAL_ABORTED, - /* 76543 + /* 87654 * 00000: no network possible * 00001: to be send * 00011: to be send, on worker queue @@ -209,8 +213,9 @@ enum drbd_req_state_bits { #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) +#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) -#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ +#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 4c13a6f4f184..f51cefdbeff3 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -29,6 +29,9 @@ #include "drbd_int.h" #include "drbd_req.h" +/* in drbd_main.c */ +extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what); + struct after_state_chg_work { struct drbd_work w; union drbd_state os; @@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, rcu_read_unlock(); was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_apply(mdev, ABORT_DISK_IO); + /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, * and that may only happen after our put_ldev below. */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6410c55831e0..dac8d9bc4bec 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -67,11 +67,18 @@ rwlock_t global_state_lock; void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; + struct drbd_conf *mdev; md_io = (struct drbd_md_io *)bio->bi_private; + mdev = container_of(md_io, struct drbd_conf, md_io); + md_io->error = error; - complete(&md_io->event); + md_io->done = 1; + wake_up(&mdev->misc_wait); + bio_put(bio); + drbd_md_put_buffer(mdev); + put_ldev(mdev); } /* reads on behalf of the partner, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index e879a9324380..2e6cefefe5e5 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(17, DRBD_GENLA_F_MANDATORY, disk_flushes, DRBD_DISK_FLUSHES_DEF) __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) + __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, @@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, __flg_field(1, DRBD_GENLA_F_MANDATORY, force_disconnect) ) +GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, + __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) +) + /* * Notifications and commands (genlmsghdr->cmd) */ @@ -335,7 +340,9 @@ GENL_op( ) GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), - GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY)) + GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index f1046b13d9f6..ddd332db2a5d 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -50,6 +50,12 @@ #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + /* If backing disk takes longer than disk_timeout, mark the disk as failed */ +#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */ +#define DRBD_DISK_TIMEOUT_SCALE '1' + /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 -- cgit v1.2.3 From a209b4aec31d4b672b7a70f5de272ebf6ce40e1b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 17 Aug 2011 12:43:25 +0200 Subject: drbd: Update some outdated comments to match the code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 4 ++-- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_req.c | 7 +++---- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 706e5220dd4a..092f8273e6bd 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -389,8 +389,8 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) /* Trying kmalloc first, falling back to vmalloc. * GFP_KERNEL is ok, as this is done when a lower level disk is - * "attached" to the drbd. Context is receiver thread or cqueue - * thread. As we have no disk yet, we are not in the IO path, + * "attached" to the drbd. Context is receiver thread or drbdsetup / + * netlink process. As we have no disk yet, we are not in the IO path, * not even the IO path of the peer. */ bytes = sizeof(struct page *)*want; new_pages = kmalloc(bytes, GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 92adb3dc82c2..684e2e4d48e1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2038,7 +2038,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) goto fail; } - /* allocation not in the IO path, cqueue thread context */ + /* allocation not in the IO path, drbdsetup / netlink process context */ new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 60fc186d0a3d..d61309db14a2 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -292,13 +292,12 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, - * or protocol C P_WRITE_ACK, - * or protocol A or B P_BARRIER_ACK, + * or protocol A, B, or C P_BARRIER_ACK, * or killed from the transfer log due to connection loss. */ _req_is_done(mdev, req, rw); } /* else: network part and not DONE yet. that is - * protocol A or B, barrier ack still pending... */ + * protocol A, B, or C, barrier ack still pending... */ } static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m) @@ -668,7 +667,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; if (req->rq_state & RQ_NET_PENDING) { - /* barrier came in before all requests have been acked. + /* barrier came in before all requests were acked. * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c57e47c0a1f5..34a6065d95e6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -969,7 +969,7 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) } /** - * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS + * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST * @mdev: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways -- cgit v1.2.3 From 22d81140aea85f9ac388fa12768dc502ef00eaae Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Sep 2011 09:58:04 +0200 Subject: drbd: fix bitmap writeout after aborted resync Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 092f8273e6bd..fcbc5e1ca50d 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1448,13 +1448,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, { int i; int bits; + int changed = 0; unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1); for (i = first_word; i < last_word; i++) { bits = hweight_long(paddr[i]); paddr[i] = ~0UL; - b->bm_set += BITS_PER_LONG - bits; + changed += BITS_PER_LONG - bits; } kunmap_atomic(paddr, KM_IRQ1); + if (changed) { + /* We only need lazy writeout, the information is still in the + * remote bitmap as well, and is reconstructed during the next + * bitmap exchange, if lost locally due to a crash. */ + bm_set_page_lazy_writeout(b->bm_pages[page_nr]); + b->bm_set += changed; + } } /* Same thing as drbd_bm_set_bits, -- cgit v1.2.3 From 1a3cde440615b0be304b3f92486c5c69ede4666b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 30 Dec 2011 00:28:23 +0100 Subject: drbd: drbd_bm_ALe_set_all(): Remove unused function Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 46 ---------------------------------------- drivers/block/drbd/drbd_int.h | 2 -- 2 files changed, 48 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index fcbc5e1ca50d..e63dcd9aada6 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1661,49 +1661,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) spin_unlock_irqrestore(&b->bm_lock, flags); return count; } - -/* Set all bits covered by the AL-extent al_enr. - * Returns number of bits changed. */ -unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) -{ - struct drbd_bitmap *b = mdev->bitmap; - unsigned long *p_addr, *bm; - unsigned long weight; - unsigned long s, e; - int count, i, do_now; - if (!expect(b)) - return 0; - if (!expect(b->bm_pages)) - return 0; - - spin_lock_irq(&b->bm_lock); - if (BM_DONT_SET & b->bm_flags) - bm_print_lock_info(mdev); - weight = b->bm_set; - - s = al_enr * BM_WORDS_PER_AL_EXT; - e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); - /* assert that s and e are on the same page */ - D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) - == s >> (PAGE_SHIFT - LN2_BPL + 3)); - count = 0; - if (s < b->bm_words) { - i = do_now = e-s; - p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); - bm = p_addr + MLPP(s); - while (i--) { - count += hweight_long(*bm); - *bm = -1UL; - bm++; - } - bm_unmap(p_addr); - b->bm_set += do_now*BITS_PER_LONG - count; - if (e == b->bm_words) - b->bm_set -= bm_clear_surplus(b); - } else { - dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); - } - weight = b->bm_set - weight; - spin_unlock_irq(&b->bm_lock); - return weight; -} diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e2cccb40f5af..8001b7a2063b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1310,8 +1310,6 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); -extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, - unsigned long al_enr); extern size_t drbd_bm_words(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); -- cgit v1.2.3 From 32db80f6f6326617ed40b2d157709226af4f062b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 22 Feb 2012 11:51:57 +0100 Subject: drbd: Consider the disk-timeout also for meta-data IO operations If the backing device is already frozen during attach, we failed to recognize that. The current disk-timeout code works on top of the drbd_request objects. During attach we do not allow IO and therefore never generate a drbd_request object but block before that in drbd_make_request(). This patch adds the timeout to all drbd_md_sync_page_io(). Before this patch we used to go from D_ATTACHING directly to D_DISKLESS if IO failed during attach. We can no longer do this since we have to stay in D_FAILED until all IO ops issued to the backing device returned. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 18 +++++++++++--- drivers/block/drbd/drbd_bitmap.c | 4 +-- drivers/block/drbd/drbd_int.h | 3 ++- drivers/block/drbd/drbd_state.c | 54 +++++++++++++++++++++------------------- 4 files changed, 48 insertions(+), 31 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index d69fb7d61817..b2355994a4d6 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -129,9 +129,21 @@ static bool md_io_allowed(struct drbd_conf *mdev) return ds >= D_NEGOTIATING || ds == D_ATTACHING; } -void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done) +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + unsigned int *done) { - wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev)); + long dt; + + rcu_read_lock(); + dt = rcu_dereference(bdev->disk_conf)->disk_timeout; + rcu_read_unlock(); + dt = dt * HZ / 10; + if (dt == 0) + dt = MAX_SCHEDULE_TIMEOUT; + + dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); + if (dt == 0) + dev_err(DEV, "meta-data IO operation timed out\n"); } static int _drbd_md_sync_page_io(struct drbd_conf *mdev, @@ -171,7 +183,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_until_done_or_disk_failure(mdev, &mdev->md_io.done); + wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) err = mdev->md_io.error; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e63dcd9aada6..ef6a79b46dfa 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1123,7 +1123,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * "in_flight reached zero, all done" event. */ if (!atomic_dec_and_test(&ctx->in_flight)) - wait_until_done_or_disk_failure(mdev, &ctx->done); + wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); else kref_put(&ctx->kref, &bm_aio_ctx_destroy); @@ -1242,7 +1242,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc } bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_disk_failure(mdev, &ctx->done); + wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); if (ctx->error) drbd_chk_io_error(mdev, 1, true); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e8461f8cb046..b914f566ef36 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1428,7 +1428,8 @@ extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); -extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c0563a1aac4d..96fdbfa79a6c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1308,37 +1308,41 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* first half of local IO error, failure to attach, * or administrative detach */ if (os.disk != D_FAILED && ns.disk == D_FAILED) { - enum drbd_io_error_p eh; - int was_io_error; + enum drbd_io_error_p eh = EP_PASS_ON; + int was_io_error = 0; /* corresponding get_ldev was in __drbd_set_state, to serialize - * our cleanup here with the transition to D_DISKLESS, - * so it is safe to dreference ldev here. */ - rcu_read_lock(); - eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; - rcu_read_unlock(); - was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + * our cleanup here with the transition to D_DISKLESS. + * But is is still not save to dreference ldev here, since + * we might come from an failed Attach before ldev was set. */ + if (mdev->ldev) { + rcu_read_lock(); + eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error; + rcu_read_unlock(); - /* Immediately allow completion of all application IO, that waits - for completion from the local disk. */ - tl_abort_disk_io(mdev); + was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - /* current state still has to be D_FAILED, - * there is only one way out: to D_DISKLESS, - * and that may only happen after our put_ldev below. */ - if (mdev->state.disk != D_FAILED) - dev_err(DEV, - "ASSERT FAILED: disk is %s during detach\n", - drbd_disk_str(mdev->state.disk)); + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_abort_disk_io(mdev); - if (ns.conn >= C_CONNECTED) - drbd_send_state(mdev, ns); + /* current state still has to be D_FAILED, + * there is only one way out: to D_DISKLESS, + * and that may only happen after our put_ldev below. */ + if (mdev->state.disk != D_FAILED) + dev_err(DEV, + "ASSERT FAILED: disk is %s during detach\n", + drbd_disk_str(mdev->state.disk)); - drbd_rs_cancel_all(mdev); + if (ns.conn >= C_CONNECTED) + drbd_send_state(mdev, ns); - /* In case we want to get something to stable storage still, - * this may be the last chance. - * Following put_ldev may transition to D_DISKLESS. */ - drbd_md_sync(mdev); + drbd_rs_cancel_all(mdev); + + /* In case we want to get something to stable storage still, + * this may be the last chance. + * Following put_ldev may transition to D_DISKLESS. */ + drbd_md_sync(mdev); + } put_ldev(mdev); if (was_io_error && eh == EP_CALL_HELPER) -- cgit v1.2.3 From 9dab3842b5bfffc20135ea56f147e5fe2857be40 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 26 Mar 2012 16:22:00 +0200 Subject: drbd: fix memleak in error path in bm_rw and drbd_bm_write_range Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ef6a79b46dfa..e343817bc69e 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1075,8 +1075,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); - err = -ENODEV; - goto out; + kfree(ctx); + return -ENODEV; } if (!ctx->flags) @@ -1156,7 +1156,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); -out: kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1237,8 +1236,8 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); - err = -ENODEV; - goto out; + kfree(ctx); + return -ENODEV; } bm_page_io_async(ctx, idx, WRITE_SYNC); @@ -1251,7 +1250,6 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc mdev->bm_writ_cnt++; err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; - out: kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } -- cgit v1.2.3 From a220d291804233e3a5e3425abf79fa1e62e7bd35 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 12:07:18 +0200 Subject: drbd: allow bitmap to change during writeout from resync_finished Symptom: messages similar to "FIXME asender in bm_change_bits_to, bitmap locked for 'write from resync_finished' by worker" If a resync or verify is finished (or aborted), a full bitmap writeout is triggered. If we have ongoing local IO, the bitmap may still change during that writeout, pending and not yet processed acks may cause bits to be cleared, while new writes may cause bits to be to be set. To fix this, introduce the drbd_bm_write_copy_pages() variant. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 16 ++++++++++++++++ drivers/block/drbd/drbd_int.h | 15 +++++++++++---- drivers/block/drbd/drbd_state.c | 4 ++-- 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e343817bc69e..ddd297708194 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1190,6 +1190,22 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); } +/** + * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. + * + * Will only write pages that have changed since last IO. + * In contrast to drbd_bm_write(), this will copy the bitmap pages + * to temporary writeout pages. It is intended to trigger a full write-out + * while still allowing the bitmap to change, for example if a resync or online + * verify is aborted due to a failed peer disk, while local IO continues, or + * pending resync acks are still being processed. + */ +int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); +} + /** * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 976e78cadd3e..5b1789af6cdf 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -709,22 +709,28 @@ enum bm_flag { BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ /* currently locked for bulk operation */ - BM_LOCKED_MASK = 0x7, + BM_LOCKED_MASK = 0xf, /* in detail, that is: */ BM_DONT_CLEAR = 0x1, BM_DONT_SET = 0x2, BM_DONT_TEST = 0x4, + /* so we can mark it locked for bulk operation, + * and still allow all non-bulk operations */ + BM_IS_LOCKED = 0x8, + /* (test bit, count bit) allowed (common case) */ - BM_LOCKED_TEST_ALLOWED = 0x3, + BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED, /* testing bits, as well as setting new bits allowed, but clearing bits * would be unexpected. Used during bitmap receive. Setting new bits * requires sending of "out-of-sync" information, though. */ - BM_LOCKED_SET_ALLOWED = 0x1, + BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED, - /* clear is not expected while bitmap is locked for bulk operation */ + /* for drbd_bm_write_copy_pages, everything is allowed, + * only concurrent bulk operations are locked out. */ + BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED, }; struct drbd_work_queue { @@ -1306,6 +1312,7 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); +extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); extern size_t drbd_bm_words(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2673049df34c..dd618b5346f2 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1421,8 +1421,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * No harm done if some bits change during this phase. */ if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { - drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, - "write from resync_finished", BM_LOCKED_SET_ALLOWED); + drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL, + "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); put_ldev(mdev); } -- cgit v1.2.3 From f66ee69746f6413cae41bdc8b26260e653f62402 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 13:04:03 +0200 Subject: drbd: bm_page_async_io: properly initialize page->private If bm_page_async_io is advised to use a new page for I/O (BM_AIO_COPY_PAGES is set), it will get it from a mempool. Once the mempool has to dip into its reserves the page is not reinitialized, i.e. page->private contains garbage, which will lead to various problems once the I/O completes (dereferences of NULL pointers, the submitting thread getting stuck in D-state, ...). Signed-off-by: Arne Redlich Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ddd297708194..65c55ecfeaef 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -200,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) static void bm_store_page_idx(struct page *page, unsigned long idx) { BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); - page_private(page) |= idx; + set_page_private(page, idx); } static unsigned long bm_page_to_idx(struct page *page) -- cgit v1.2.3 From 1b6f19740da8e7ed2d1216dc69a972d10de4f0e9 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 8 Jun 2012 15:06:39 +0200 Subject: drbd: fix access of unallocated pages and kernel panic BUG: unable to handle kernel NULL pointer dereference at (null) ... [] ? _drbd_bm_set_bits+0x151/0x240 [drbd] [] ? receive_bitmap+0x4f8/0xbc0 [drbd] This fixes an off-by-one error in the receive_bitmap() path, if run-length encoded bitmap transfer is enabled. If the bitmap is an exact multiple of PAGE_SIZE, which means the visible capacity of the drbd device is an exact multiple of 128 MiB (for 4k page size), and bitmap compression (use-rle) is enabled (which became default with 8.4), and the very last bit is dirty and reported in an rle comressed bitmap packet, we ended up trying to kmap_atomic a page pointer that does not exist (bitmap->bm_pages[last index + 1]). bug introduced by: Date: Fri Jul 24 15:33:24 2009 +0200 set bits: optimize for complete last word, fix off-by-one-word corner case made effective by: Date: Thu Dec 16 00:32:38 2010 +0100 drbd: get rid of unused debug code Long time ago, we had paranoia code in the bitmap that allocated one extra word, assigned a magic value, and checked on every occasion that the magic value was still unchanged. That debug code is unused, the extra long word complicates code a bit. Get rid of it. No-one triggered this bug in the last few years, because a large subset of our userbase is unaffected: * typically the last few blocks of a device are not modified frequently, and remain unset * use-rle was disabled by default in drbd < 8.4 * those with slightly "odd" device sizes, or * drbd internal meta data (which will skew the device size slightly, thus makes it harder to have a bug relevant device size) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 65c55ecfeaef..b3d55d4b6937 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1535,10 +1535,17 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi first_word = 0; spin_lock_irq(&b->bm_lock); } - /* last page (respectively only page, for first page == last page) */ last_word = MLPP(el >> LN2_BPL); - bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); + + /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples). + * ==> e = 32767, el = 32768, last_page = 2, + * and now last_word = 0. + * We do not want to touch last_page in this case, + * as we did not allocate it, it is not present in bitmap->bm_pages. + */ + if (last_word) + bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); /* possibly trailing bits. * example: (e & 63) == 63, el will be e+1. -- cgit v1.2.3 From 0c849666016cbf541c1030eec55f5f8dd1fba513 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 30 Jul 2012 09:07:28 +0200 Subject: drbd: differentiate between normal and forced detach Aborting local requests (not waiting for completion from the lower level disk) is dangerous: if the master bio has been completed to upper layers, data pages may be re-used for other things already. If local IO is still pending and later completes, this may cause crashes or corrupt unrelated data. Only abort local IO if explicitly requested. Intended use case is a lower level device that turned into a tarpit, not completing io requests, not even doing error completion. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 4 ++-- drivers/block/drbd/drbd_int.h | 17 ++++++++++++++--- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 4 ++++ drivers/block/drbd/drbd_req.c | 4 ++-- drivers/block/drbd/drbd_state.c | 18 +++++++++++++++--- drivers/block/drbd/drbd_worker.c | 4 ++-- 8 files changed, 41 insertions(+), 14 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index f500dc5cdf52..209b2e063b92 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -445,7 +445,7 @@ _al_write_transaction(struct drbd_conf *mdev) /* drbd_chk_io_error done already */ else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { err = -EIO; - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); } else { /* advance ringbuffer position and transaction counter */ mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index b3d55d4b6937..33626e34c92a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1135,7 +1135,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); err = -EIO; /* ctx->error ? */ } @@ -1260,7 +1260,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); if (ctx->error) - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b343875c9dee..963766bafab4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -689,6 +689,7 @@ enum { BITMAP_IO_QUEUED, /* Started bitmap IO */ GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ WAS_IO_ERROR, /* Local disk failed returned IO error */ + FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ @@ -1653,8 +1654,16 @@ static inline union drbd_state drbd_read_state(struct drbd_conf *mdev) return rv; } +enum drbd_force_detach_flags { + DRBD_IO_ERROR, + DRBD_META_IO_ERROR, + DRBD_FORCE_DETACH, +}; + #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) -static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) +static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, + enum drbd_force_detach_flags forcedetach, + const char *where) { enum drbd_io_error_p ep; @@ -1663,7 +1672,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, rcu_read_unlock(); switch (ep) { case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */ - if (!forcedetach) { + if (forcedetach == DRBD_IO_ERROR) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s.\n", where); if (mdev->state.disk > D_INCONSISTENT) @@ -1674,6 +1683,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, case EP_DETACH: case EP_CALL_HELPER: set_bit(WAS_IO_ERROR, &mdev->flags); + if (forcedetach == DRBD_FORCE_DETACH) + set_bit(FORCE_DETACH, &mdev->flags); if (mdev->state.disk > D_FAILED) { _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); dev_err(DEV, @@ -1693,7 +1704,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, */ #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__) static inline void drbd_chk_io_error_(struct drbd_conf *mdev, - int error, int forcedetach, const char *where) + int error, enum drbd_force_detach_flags forcedetach, const char *where) { if (error) { unsigned long flags; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c0acd86c8415..849e5de9ea8f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2866,7 +2866,7 @@ void drbd_md_sync(struct drbd_conf *mdev) if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { /* this was a try anyways ... */ dev_err(DEV, "meta data update failed!\n"); - drbd_chk_io_error(mdev, 1, true); + drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); } /* Update mdev->ldev->md.la_size_sect, diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 323293e88878..d4c05e26a13a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1299,6 +1299,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * to realize a "hot spare" feature (not that I'd recommend that) */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + /* make sure there is no leftover from previous force-detach attempts */ + clear_bit(FORCE_DETACH, &mdev->flags); + /* allocation not in the IO path, drbdsetup context */ nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { @@ -1683,6 +1686,7 @@ static int adm_detach(struct drbd_conf *mdev, int force) int ret; if (force) { + set_bit(FORCE_DETACH, &mdev->flags); drbd_force_state(mdev, NS(disk, D_FAILED)); retcode = SS_SUCCESS; goto out; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 891c3d41a277..e215dce4c694 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -490,7 +490,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); /* fall through. */ case WRITE_COMPLETED_WITH_ERROR: - __drbd_chk_io_error(mdev, false); + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); /* fall through. */ case READ_AHEAD_COMPLETED_WITH_ERROR: /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ @@ -1210,7 +1210,7 @@ void request_timer_fn(unsigned long data) time_after(now, req->start_time + dt) && !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); - __drbd_chk_io_error(mdev, 1); + __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH); } nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; spin_unlock_irq(&tconn->req_lock); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 84a5072d7370..c9ec7d37632c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1335,9 +1335,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - /* Immediately allow completion of all application IO, that waits - for completion from the local disk. */ - tl_abort_disk_io(mdev); + /* Immediately allow completion of all application IO, + * that waits for completion from the local disk, + * if this was a force-detach due to disk_timeout + * or administrator request (drbdsetup detach --force). + * Do NOT abort otherwise. + * Aborting local requests may cause serious problems, + * if requests are completed to upper layers already, + * and then later the already submitted local bio completes. + * This can cause DMA into former bio pages that meanwhile + * have been re-used for other things. + * So aborting local requests may cause crashes, + * or even worse, silent data corruption. + */ + if (test_and_clear_bit(FORCE_DETACH, &mdev->flags)) + tl_abort_disk_io(mdev); /* current state still has to be D_FAILED, * there is only one way out: to D_DISKLESS, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 66be3910e8d2..07a4046dd8c3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -106,7 +106,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, false); + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); drbd_queue_work(&mdev->tconn->sender_work, &peer_req->w); @@ -148,7 +148,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) - __drbd_chk_io_error(mdev, false); + __drbd_chk_io_error(mdev, DRBD_IO_ERROR); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (block_id == ID_SYNCER) -- cgit v1.2.3 From fef45d297e447d710abcf0cd0bdbf8738ff469eb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 14 Aug 2012 11:46:59 +0200 Subject: drbd: Write all pages of the bitmap after an online resize We need to write the whole bitmap after we moved the meta data due to an online resize operation. With the support for one peta byte devices bitmap IO was optimized to only write out touched pages. This optimization must be turned off when writing the bitmap after an online resize. This issue was introduced with drbd-8.3.10. The impact of this bug is that after an online resize, the next resync could become larger than expected. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 16 +++++++++++++++- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_nl.c | 4 ++-- 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 33626e34c92a..4a076b2553e6 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -922,6 +922,7 @@ struct bm_aio_ctx { unsigned flags; #define BM_AIO_COPY_PAGES 1 #define BM_AIO_WRITE_HINTED 2 +#define BM_WRITE_ALL_PAGES 4 int error; struct kref kref; }; @@ -1096,7 +1097,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, &page_private(b->bm_pages[i]))) continue; - if (bm_test_page_unchanged(b->bm_pages[i])) { + + if (!(flags & BM_WRITE_ALL_PAGES) && + bm_test_page_unchanged(b->bm_pages[i])) { dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); continue; } @@ -1180,6 +1183,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) return bm_rw(mdev, WRITE, 0, 0); } +/** + * drbd_bm_write_all() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. + * + * Will write all pages. + */ +int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0); +} + /** * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 813f50dbe5ca..d8b3c88d6f1c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1329,6 +1329,7 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); +extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); extern size_t drbd_bm_words(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d1073705bf1f..c02d5265c397 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -894,8 +894,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - err = drbd_bitmap_io(mdev, &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + "size changed", BM_LOCKED_MASK); if (err) { rv = dev_size_error; goto out; -- cgit v1.2.3 From bc891c9ae3fb2848922e0f0da22fd7de0d58dc1b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 26 Sep 2012 14:18:51 +0200 Subject: drbd: fix potential deadlock during bitmap (re-)allocation The former comment arguing that GFP_KERNEL was good enough was wrong: it did not take resize into account at all, and assumed the only path leading here was the normal attach on a still secondary device, so no deadlock would be possible. Both resize on a Primary, or attach on a diskless Primary, could potentially deadlock. drbd_bm_resize() is called while IO to the respective device is suspended, so we must use GFP_NOIO to avoid potential deadlock. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4a076b2553e6..e502535d2c4d 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -388,14 +388,16 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) return old_pages; /* Trying kmalloc first, falling back to vmalloc. - * GFP_KERNEL is ok, as this is done when a lower level disk is - * "attached" to the drbd. Context is receiver thread or drbdsetup / - * netlink process. As we have no disk yet, we are not in the IO path, - * not even the IO path of the peer. */ + * GFP_NOIO, as this is called while drbd IO is "suspended", + * and during resize or attach on diskless Primary, + * we must not block on IO to ourselves. + * Context is receiver thread or dmsetup. */ bytes = sizeof(struct page *)*want; - new_pages = kmalloc(bytes, GFP_KERNEL); + new_pages = kmalloc(bytes, GFP_NOIO); if (!new_pages) { - new_pages = vmalloc(bytes); + new_pages = __vmalloc(bytes, + GFP_NOIO | __GFP_HIGHMEM, + PAGE_KERNEL); if (!new_pages) return NULL; vmalloced = 1; @@ -406,7 +408,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) for (i = 0; i < have; i++) new_pages[i] = old_pages[i]; for (; i < want; i++) { - page = alloc_page(GFP_HIGHUSER); + page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); if (!page) { bm_free_pages(new_pages + have, i - have); bm_vk_free(new_pages, vmalloced); -- cgit v1.2.3 From e34b677d09ce375a87acd0360537cbed33881b0c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 27 Sep 2012 15:07:11 +0200 Subject: drbd: wait for meta data IO completion even with failed disk, unless force-detached The intention of force-detach is to be able to deal with a completely unresponsive lower level IO stack, which does not even deliver error completions anymore, but no completion at all. In all other cases, we must still wait for the meta data IO completion. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 17 +++++++---------- drivers/block/drbd/drbd_bitmap.c | 8 ++++---- drivers/block/drbd/drbd_int.h | 4 ++-- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'drivers/block/drbd/drbd_bitmap.c') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e81085795ec4..bc6284ef21d5 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -123,13 +123,7 @@ void drbd_md_put_buffer(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); } -static bool md_io_allowed(struct drbd_conf *mdev) -{ - enum drbd_disk_state ds = mdev->state.disk; - return ds >= D_NEGOTIATING || ds == D_ATTACHING; -} - -void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, unsigned int *done) { long dt; @@ -141,9 +135,12 @@ void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing if (dt == 0) dt = MAX_SCHEDULE_TIMEOUT; - dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); - if (dt == 0) + dt = wait_event_timeout(mdev->misc_wait, + *done || test_bit(FORCE_DETACH, &mdev->flags), dt); + if (dt == 0) { dev_err(DEV, "meta-data IO operation timed out\n"); + drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH); + } } static int _drbd_md_sync_page_io(struct drbd_conf *mdev, @@ -183,7 +180,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); + wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done); if (bio_flagged(bio, BIO_UPTODATE)) err = mdev->md_io.error; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e502535d2c4d..e30ff720894f 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1128,7 +1128,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * "in_flight reached zero, all done" event. */ if (!atomic_dec_and_test(&ctx->in_flight)) - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); else kref_put(&ctx->kref, &bm_aio_ctx_destroy); @@ -1145,7 +1145,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } if (atomic_read(&ctx->in_flight)) - err = -EIO; /* Disk failed during IO... */ + err = -EIO; /* Disk timeout/force-detach during IO... */ now = jiffies; if (rw == WRITE) { @@ -1273,11 +1273,11 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc } bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); if (ctx->error) drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); - /* that should force detach, so the in memory bitmap will be + /* that causes us to detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4aadd0818179..eeab868f056b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1452,8 +1452,8 @@ extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw); extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int); -extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - unsigned int *done); +extern void wait_until_done_or_force_detached(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, unsigned int *done); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); static inline void ov_out_of_sync_print(struct drbd_conf *mdev) -- cgit v1.2.3