summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/raid56.c190
-rw-r--r--fs/btrfs/raid56.h148
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--include/trace/events/btrfs.h94
4 files changed, 300 insertions, 133 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index c48b7a0992f6..baba435692d2 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -63,138 +63,6 @@ struct sector_ptr {
unsigned int uptodate:8;
};
-enum btrfs_rbio_ops {
- BTRFS_RBIO_WRITE,
- BTRFS_RBIO_READ_REBUILD,
- BTRFS_RBIO_PARITY_SCRUB,
- BTRFS_RBIO_REBUILD_MISSING,
-};
-
-struct btrfs_raid_bio {
- struct btrfs_io_context *bioc;
-
- /* while we're doing rmw on a stripe
- * we put it into a hash table so we can
- * lock the stripe and merge more rbios
- * into it.
- */
- struct list_head hash_list;
-
- /*
- * LRU list for the stripe cache
- */
- struct list_head stripe_cache;
-
- /*
- * for scheduling work in the helper threads
- */
- struct work_struct work;
-
- /*
- * bio list and bio_list_lock are used
- * to add more bios into the stripe
- * in hopes of avoiding the full rmw
- */
- struct bio_list bio_list;
- spinlock_t bio_list_lock;
-
- /* also protected by the bio_list_lock, the
- * plug list is used by the plugging code
- * to collect partial bios while plugged. The
- * stripe locking code also uses it to hand off
- * the stripe lock to the next pending IO
- */
- struct list_head plug_list;
-
- /*
- * flags that tell us if it is safe to
- * merge with this bio
- */
- unsigned long flags;
-
- /*
- * set if we're doing a parity rebuild
- * for a read from higher up, which is handled
- * differently from a parity rebuild as part of
- * rmw
- */
- enum btrfs_rbio_ops operation;
-
- /* Size of each individual stripe on disk */
- u32 stripe_len;
-
- /* How many pages there are for the full stripe including P/Q */
- u16 nr_pages;
-
- /* How many sectors there are for the full stripe including P/Q */
- u16 nr_sectors;
-
- /* Number of data stripes (no p/q) */
- u8 nr_data;
-
- /* Number of all stripes (including P/Q) */
- u8 real_stripes;
-
- /* How many pages there are for each stripe */
- u8 stripe_npages;
-
- /* How many sectors there are for each stripe */
- u8 stripe_nsectors;
-
- /* First bad stripe, -1 means no corruption */
- s8 faila;
-
- /* Second bad stripe (for RAID6 use) */
- s8 failb;
-
- /* Stripe number that we're scrubbing */
- u8 scrubp;
-
- /*
- * size of all the bios in the bio_list. This
- * helps us decide if the rbio maps to a full
- * stripe or not
- */
- int bio_list_bytes;
-
- int generic_bio_cnt;
-
- refcount_t refs;
-
- atomic_t stripes_pending;
-
- atomic_t error;
-
- /* Bitmap to record which horizontal stripe has data */
- unsigned long dbitmap;
-
- /* Allocated with stripe_nsectors-many bits for finish_*() calls */
- unsigned long finish_pbitmap;
-
- /*
- * these are two arrays of pointers. We allocate the
- * rbio big enough to hold them both and setup their
- * locations when the rbio is allocated
- */
-
- /* pointers to pages that we allocated for
- * reading/writing stripes directly from the disk (including P/Q)
- */
- struct page **stripe_pages;
-
- /* Pointers to the sectors in the bio_list, for faster lookup */
- struct sector_ptr *bio_sectors;
-
- /*
- * For subpage support, we need to map each sector to above
- * stripe_pages.
- */
- struct sector_ptr *stripe_sectors;
-
- /* allocated with real_stripes-many pointers for finish_*() calls */
- void **finish_pointers;
-};
-
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
static void rmw_work(struct work_struct *work);
@@ -1275,6 +1143,34 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
spin_unlock_irq(&rbio->bio_list_lock);
}
+static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio,
+ struct raid56_bio_trace_info *trace_info)
+{
+ const struct btrfs_io_context *bioc = rbio->bioc;
+ int i;
+
+ ASSERT(bioc);
+
+ /* We rely on bio->bi_bdev to find the stripe number. */
+ if (!bio->bi_bdev)
+ goto not_found;
+
+ for (i = 0; i < bioc->num_stripes; i++) {
+ if (bio->bi_bdev != bioc->stripes[i].dev->bdev)
+ continue;
+ trace_info->stripe_nr = i;
+ trace_info->devid = bioc->stripes[i].dev->devid;
+ trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
+ bioc->stripes[i].physical;
+ return;
+ }
+
+not_found:
+ trace_info->devid = -1;
+ trace_info->offset = -1;
+ trace_info->stripe_nr = -1;
+}
+
/*
* this is called from one of two situations. We either
* have a full stripe from the higher layers, or we've read all
@@ -1440,6 +1336,12 @@ write_data:
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_end_io = raid_write_end_io;
+ if (trace_raid56_write_stripe_enabled()) {
+ struct raid56_bio_trace_info trace_info = { 0 };
+
+ bio_get_trace_info(rbio, bio, &trace_info);
+ trace_raid56_write_stripe(rbio, bio, &trace_info);
+ }
submit_bio(bio);
}
return;
@@ -1701,6 +1603,12 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
+ if (trace_raid56_read_partial_enabled()) {
+ struct raid56_bio_trace_info trace_info = { 0 };
+
+ bio_get_trace_info(rbio, bio, &trace_info);
+ trace_raid56_read_partial(rbio, bio, &trace_info);
+ }
submit_bio(bio);
}
/* the actual write will happen once the reads are done */
@@ -2274,6 +2182,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
+ if (trace_raid56_scrub_read_recover_enabled()) {
+ struct raid56_bio_trace_info trace_info = { 0 };
+
+ bio_get_trace_info(rbio, bio, &trace_info);
+ trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
+ }
submit_bio(bio);
}
@@ -2643,6 +2557,12 @@ submit_write:
while ((bio = bio_list_pop(&bio_list))) {
bio->bi_end_io = raid_write_end_io;
+ if (trace_raid56_scrub_write_stripe_enabled()) {
+ struct raid56_bio_trace_info trace_info = { 0 };
+
+ bio_get_trace_info(rbio, bio, &trace_info);
+ trace_raid56_scrub_write_stripe(rbio, bio, &trace_info);
+ }
submit_bio(bio);
}
return;
@@ -2822,6 +2742,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
+ if (trace_raid56_scrub_read_enabled()) {
+ struct raid56_bio_trace_info trace_info = { 0 };
+
+ bio_get_trace_info(rbio, bio, &trace_info);
+ trace_raid56_scrub_read(rbio, bio, &trace_info);
+ }
submit_bio(bio);
}
/* the actual write will happen once the reads are done */
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index aaad08aefd7d..3badde24dcbf 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -7,6 +7,152 @@
#ifndef BTRFS_RAID56_H
#define BTRFS_RAID56_H
+#include <linux/workqueue.h>
+#include "volumes.h"
+
+enum btrfs_rbio_ops {
+ BTRFS_RBIO_WRITE,
+ BTRFS_RBIO_READ_REBUILD,
+ BTRFS_RBIO_PARITY_SCRUB,
+ BTRFS_RBIO_REBUILD_MISSING,
+};
+
+struct btrfs_raid_bio {
+ struct btrfs_io_context *bioc;
+
+ /*
+ * While we're doing RMW on a stripe we put it into a hash table so we
+ * can lock the stripe and merge more rbios into it.
+ */
+ struct list_head hash_list;
+
+ /* LRU list for the stripe cache */
+ struct list_head stripe_cache;
+
+ /* For scheduling work in the helper threads */
+ struct work_struct work;
+
+ /*
+ * bio_list and bio_list_lock are used to add more bios into the stripe
+ * in hopes of avoiding the full RMW
+ */
+ struct bio_list bio_list;
+ spinlock_t bio_list_lock;
+
+ /*
+ * Also protected by the bio_list_lock, the plug list is used by the
+ * plugging code to collect partial bios while plugged. The stripe
+ * locking code also uses it to hand off the stripe lock to the next
+ * pending IO.
+ */
+ struct list_head plug_list;
+
+ /* Flags that tell us if it is safe to merge with this bio. */
+ unsigned long flags;
+
+ /*
+ * Set if we're doing a parity rebuild for a read from higher up, which
+ * is handled differently from a parity rebuild as part of RMW.
+ */
+ enum btrfs_rbio_ops operation;
+
+ /* Size of each individual stripe on disk */
+ u32 stripe_len;
+
+ /* How many pages there are for the full stripe including P/Q */
+ u16 nr_pages;
+
+ /* How many sectors there are for the full stripe including P/Q */
+ u16 nr_sectors;
+
+ /* Number of data stripes (no p/q) */
+ u8 nr_data;
+
+ /* Numer of all stripes (including P/Q) */
+ u8 real_stripes;
+
+ /* How many pages there are for each stripe */
+ u8 stripe_npages;
+
+ /* How many sectors there are for each stripe */
+ u8 stripe_nsectors;
+
+ /* First bad stripe, -1 means no corruption */
+ s8 faila;
+
+ /* Second bad stripe (for RAID6 use) */
+ s8 failb;
+
+ /* Stripe number that we're scrubbing */
+ u8 scrubp;
+
+ /*
+ * Size of all the bios in the bio_list. This helps us decide if the
+ * rbio maps to a full stripe or not.
+ */
+ int bio_list_bytes;
+
+ int generic_bio_cnt;
+
+ refcount_t refs;
+
+ atomic_t stripes_pending;
+
+ atomic_t error;
+
+ /* Bitmap to record which horizontal stripe has data */
+ unsigned long dbitmap;
+
+ /* Allocated with stripe_nsectors-many bits for finish_*() calls */
+ unsigned long finish_pbitmap;
+
+ /*
+ * These are two arrays of pointers. We allocate the rbio big enough
+ * to hold them both and setup their locations when the rbio is
+ * allocated.
+ */
+
+ /*
+ * Pointers to pages that we allocated for reading/writing stripes
+ * directly from the disk (including P/Q).
+ */
+ struct page **stripe_pages;
+
+ /* Pointers to the sectors in the bio_list, for faster lookup */
+ struct sector_ptr *bio_sectors;
+
+ /*
+ * For subpage support, we need to map each sector to above
+ * stripe_pages.
+ */
+ struct sector_ptr *stripe_sectors;
+
+ /* Allocated with real_stripes-many pointers for finish_*() calls */
+ void **finish_pointers;
+};
+
+/*
+ * For trace event usage only. Records useful debug info for each bio submitted
+ * by RAID56 to each physical device.
+ *
+ * No matter signed or not, (-1) is always the one indicating we can not grab
+ * the proper stripe number.
+ */
+struct raid56_bio_trace_info {
+ u64 devid;
+
+ /* The offset inside the stripe. (<= STRIPE_LEN) */
+ u32 offset;
+
+ /*
+ * Stripe number.
+ * 0 is the first data stripe, and nr_data for P stripe,
+ * nr_data + 1 for Q stripe.
+ * >= real_stripes for
+ */
+ u8 stripe_nr;
+};
+
static inline int nr_parity_stripes(const struct map_lookup *map)
{
if (map->type & BTRFS_BLOCK_GROUP_RAID5)
@@ -21,13 +167,13 @@ static inline int nr_data_stripes(const struct map_lookup *map)
{
return map->num_stripes - nr_parity_stripes(map);
}
+
#define RAID5_P_STRIPE ((u64)-2)
#define RAID6_Q_STRIPE ((u64)-1)
#define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \
((x) == RAID6_Q_STRIPE))
-struct btrfs_raid_bio;
struct btrfs_device;
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 24b86061c5df..8539ee2dc79f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -48,6 +48,7 @@
#include "block-group.h"
#include "discard.h"
#include "qgroup.h"
+#include "raid56.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 9ae94ef3e270..29fa8ea2cc0f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -30,6 +30,8 @@ struct btrfs_qgroup;
struct extent_io_tree;
struct prelim_ref;
struct btrfs_space_info;
+struct btrfs_raid_bio;
+struct raid56_bio_trace_info;
#define show_ref_type(type) \
__print_symbolic(type, \
@@ -2258,6 +2260,98 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
TP_ARGS(fs_info, sinfo, old, diff)
);
+DECLARE_EVENT_CLASS(btrfs_raid56_bio,
+
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, full_stripe )
+ __field( u64, physical )
+ __field( u64, devid )
+ __field( u32, offset )
+ __field( u32, len )
+ __field( u8, opf )
+ __field( u8, total_stripes )
+ __field( u8, real_stripes )
+ __field( u8, nr_data )
+ __field( u8, stripe_nr )
+ ),
+
+ TP_fast_assign_btrfs(rbio->bioc->fs_info,
+ __entry->full_stripe = rbio->bioc->raid_map[0];
+ __entry->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ __entry->len = bio->bi_iter.bi_size;
+ __entry->opf = bio_op(bio);
+ __entry->devid = trace_info->devid;
+ __entry->offset = trace_info->offset;
+ __entry->stripe_nr = trace_info->stripe_nr;
+ __entry->total_stripes = rbio->bioc->num_stripes;
+ __entry->real_stripes = rbio->real_stripes;
+ __entry->nr_data = rbio->nr_data;
+ ),
+ /*
+ * For type output, we need to output things like "DATA1"
+ * (the first data stripe), "DATA2" (the second data stripe),
+ * "PQ1" (P stripe),"PQ2" (Q stripe), "REPLACE0" (replace target device).
+ */
+ TP_printk_btrfs(
+"full_stripe=%llu devid=%lld type=%s%d offset=%d opf=0x%x physical=%llu len=%u",
+ __entry->full_stripe, __entry->devid,
+ (__entry->stripe_nr < __entry->nr_data) ? "DATA" :
+ ((__entry->stripe_nr < __entry->real_stripes) ? "PQ" :
+ "REPLACE"),
+ (__entry->stripe_nr < __entry->nr_data) ?
+ (__entry->stripe_nr + 1) :
+ ((__entry->stripe_nr < __entry->real_stripes) ?
+ (__entry->stripe_nr - __entry->nr_data + 1) : 0),
+ __entry->offset, __entry->opf, __entry->physical, __entry->len)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_write_stripe,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_write_stripe,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
+DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read_recover,
+ TP_PROTO(const struct btrfs_raid_bio *rbio,
+ const struct bio *bio,
+ const struct raid56_bio_trace_info *trace_info),
+
+ TP_ARGS(rbio, bio, trace_info)
+);
+
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */