summaryrefslogtreecommitdiff
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c249
1 files changed, 149 insertions, 100 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 50c5a8762aed..8222f6f74147 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1310,6 +1310,8 @@ again:
if (ret) {
btrfs_error(root->fs_info, ret,
"Failed to remove dev extent item");
+ } else {
+ trans->transaction->have_free_bgs = 1;
}
out:
btrfs_free_path(path);
@@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
{
- if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
+ if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
return;
btrfs_set_fs_incompat(info, RAID56);
@@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
- if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
- }
free_extent_map(em);
return len;
}
@@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
- if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6))
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
return ret;
@@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b)
}
/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
-static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
+static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
{
struct btrfs_bio_stripe s;
- int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
int i;
u64 l;
int again = 1;
- int m;
while (again) {
again = 0;
- for (i = 0; i < real_stripes - 1; i++) {
- if (parity_smaller(raid_map[i], raid_map[i+1])) {
+ for (i = 0; i < num_stripes - 1; i++) {
+ if (parity_smaller(bbio->raid_map[i],
+ bbio->raid_map[i+1])) {
s = bbio->stripes[i];
- l = raid_map[i];
+ l = bbio->raid_map[i];
bbio->stripes[i] = bbio->stripes[i+1];
- raid_map[i] = raid_map[i+1];
+ bbio->raid_map[i] = bbio->raid_map[i+1];
bbio->stripes[i+1] = s;
- raid_map[i+1] = l;
-
- if (bbio->tgtdev_map) {
- m = bbio->tgtdev_map[i];
- bbio->tgtdev_map[i] =
- bbio->tgtdev_map[i + 1];
- bbio->tgtdev_map[i + 1] = m;
- }
+ bbio->raid_map[i+1] = l;
again = 1;
}
@@ -4909,10 +4900,48 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
}
}
+static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
+{
+ struct btrfs_bio *bbio = kzalloc(
+ /* the size of the btrfs_bio */
+ sizeof(struct btrfs_bio) +
+ /* plus the variable array for the stripes */
+ sizeof(struct btrfs_bio_stripe) * (total_stripes) +
+ /* plus the variable array for the tgt dev */
+ sizeof(int) * (real_stripes) +
+ /*
+ * plus the raid_map, which includes both the tgt dev
+ * and the stripes
+ */
+ sizeof(u64) * (total_stripes),
+ GFP_NOFS);
+ if (!bbio)
+ return NULL;
+
+ atomic_set(&bbio->error, 0);
+ atomic_set(&bbio->refs, 1);
+
+ return bbio;
+}
+
+void btrfs_get_bbio(struct btrfs_bio *bbio)
+{
+ WARN_ON(!atomic_read(&bbio->refs));
+ atomic_inc(&bbio->refs);
+}
+
+void btrfs_put_bbio(struct btrfs_bio *bbio)
+{
+ if (!bbio)
+ return;
+ if (atomic_dec_and_test(&bbio->refs))
+ kfree(bbio);
+}
+
static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret,
- int mirror_num, u64 **raid_map_ret)
+ int mirror_num, int need_raid_map)
{
struct extent_map *em;
struct map_lookup *map;
@@ -4925,7 +4954,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 stripe_nr_orig;
u64 stripe_nr_end;
u64 stripe_len;
- u64 *raid_map = NULL;
int stripe_index;
int i;
int ret = 0;
@@ -4976,7 +5004,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_offset = offset - stripe_offset;
/* if we're here for raid56, we need to know the stripe aligned start */
- if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
raid56_full_stripe_start = offset;
@@ -4989,8 +5017,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (rw & REQ_DISCARD) {
/* we don't discard raid56 yet */
- if (map->type &
- (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) {
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -5000,7 +5027,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* For writes to RAID[56], allow a full stripeset across all disks.
For other RAID types and for RAID[56] reads, just allow a single
stripe (on a single disk). */
- if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) &&
+ if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
(rw & REQ_WRITE)) {
max_len = stripe_len * nr_data_stripes(map) -
(offset - raid56_full_stripe_start);
@@ -5047,7 +5074,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 physical_of_found = 0;
ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
- logical, &tmp_length, &tmp_bbio, 0, NULL);
+ logical, &tmp_length, &tmp_bbio, 0, 0);
if (ret) {
WARN_ON(tmp_bbio != NULL);
goto out;
@@ -5061,7 +5088,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
* is not left of the left cursor
*/
ret = -EIO;
- kfree(tmp_bbio);
+ btrfs_put_bbio(tmp_bbio);
goto out;
}
@@ -5096,11 +5123,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
} else {
WARN_ON(1);
ret = -EIO;
- kfree(tmp_bbio);
+ btrfs_put_bbio(tmp_bbio);
goto out;
}
- kfree(tmp_bbio);
+ btrfs_put_bbio(tmp_bbio);
} else if (mirror_num > map->num_stripes) {
mirror_num = 0;
}
@@ -5166,15 +5193,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
mirror_num = stripe_index - old_stripe_index + 1;
}
- } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
- u64 tmp;
-
- if (raid_map_ret &&
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ if (need_raid_map &&
((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
mirror_num > 1)) {
- int i, rot;
-
/* push stripe_nr back to the start of the full stripe */
stripe_nr = raid56_full_stripe_start;
do_div(stripe_nr, stripe_len * nr_data_stripes(map));
@@ -5183,32 +5205,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_stripes = map->num_stripes;
max_errors = nr_parity_stripes(map);
- raid_map = kmalloc_array(num_stripes, sizeof(u64),
- GFP_NOFS);
- if (!raid_map) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* Work out the disk rotation on this stripe-set */
- tmp = stripe_nr;
- rot = do_div(tmp, num_stripes);
-
- /* Fill in the logical address of each stripe */
- tmp = stripe_nr * nr_data_stripes(map);
- for (i = 0; i < nr_data_stripes(map); i++)
- raid_map[(i+rot) % num_stripes] =
- em->start + (tmp + i) * map->stripe_len;
-
- raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
- if (map->type & BTRFS_BLOCK_GROUP_RAID6)
- raid_map[(i+rot+1) % num_stripes] =
- RAID6_Q_STRIPE;
-
*length = map->stripe_len;
stripe_index = 0;
stripe_offset = 0;
} else {
+ u64 tmp;
+
/*
* Mirror #0 or #1 means the original data block.
* Mirror #2 is RAID5 parity block.
@@ -5246,17 +5248,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
tgtdev_indexes = num_stripes;
}
- bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes),
- GFP_NOFS);
+ bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
if (!bbio) {
- kfree(raid_map);
ret = -ENOMEM;
goto out;
}
- atomic_set(&bbio->error, 0);
if (dev_replace_is_ongoing)
bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
+ /* build raid_map */
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
+ need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+ mirror_num > 1)) {
+ u64 tmp;
+ int i, rot;
+
+ bbio->raid_map = (u64 *)((void *)bbio->stripes +
+ sizeof(struct btrfs_bio_stripe) *
+ num_alloc_stripes +
+ sizeof(int) * tgtdev_indexes);
+
+ /* Work out the disk rotation on this stripe-set */
+ tmp = stripe_nr;
+ rot = do_div(tmp, num_stripes);
+
+ /* Fill in the logical address of each stripe */
+ tmp = stripe_nr * nr_data_stripes(map);
+ for (i = 0; i < nr_data_stripes(map); i++)
+ bbio->raid_map[(i+rot) % num_stripes] =
+ em->start + (tmp + i) * map->stripe_len;
+
+ bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ bbio->raid_map[(i+rot+1) % num_stripes] =
+ RAID6_Q_STRIPE;
+ }
+
if (rw & REQ_DISCARD) {
int factor = 0;
int sub_stripes = 0;
@@ -5340,6 +5367,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
max_errors = btrfs_chunk_max_errors(map);
+ if (bbio->raid_map)
+ sort_parity_stripes(bbio, num_stripes);
+
tgtdev_indexes = 0;
if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
dev_replace->tgtdev != NULL) {
@@ -5427,6 +5457,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
*bbio_ret = bbio;
+ bbio->map_type = map->type;
bbio->num_stripes = num_stripes;
bbio->max_errors = max_errors;
bbio->mirror_num = mirror_num;
@@ -5443,10 +5474,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
bbio->mirror_num = map->num_stripes + 1;
}
- if (raid_map) {
- sort_parity_stripes(bbio, raid_map);
- *raid_map_ret = raid_map;
- }
out:
if (dev_replace_is_ongoing)
btrfs_dev_replace_unlock(dev_replace);
@@ -5459,17 +5486,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
struct btrfs_bio **bbio_ret, int mirror_num)
{
return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
- mirror_num, NULL);
+ mirror_num, 0);
}
/* For Scrub/replace */
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
- u64 **raid_map_ret)
+ int need_raid_map)
{
return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
- mirror_num, raid_map_ret);
+ mirror_num, need_raid_map);
}
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -5511,8 +5538,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
do_div(length, map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
do_div(length, map->num_stripes);
- else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
do_div(length, nr_data_stripes(map));
rmap_len = map->stripe_len * nr_data_stripes(map);
}
@@ -5565,7 +5591,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
bio_endio_nodec(bio, err);
else
bio_endio(bio, err);
- kfree(bbio);
+ btrfs_put_bbio(bbio);
}
static void btrfs_end_bio(struct bio *bio, int err)
@@ -5808,7 +5834,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
- u64 *raid_map = NULL;
int ret;
int dev_nr = 0;
int total_devs = 1;
@@ -5819,7 +5844,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
btrfs_bio_counter_inc_blocked(root->fs_info);
ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
- mirror_num, &raid_map);
+ mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(root->fs_info);
return ret;
@@ -5832,15 +5857,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bbio->fs_info = root->fs_info;
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
- if (raid_map) {
+ if (bbio->raid_map) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
if (rw & WRITE) {
- ret = raid56_parity_write(root, bio, bbio,
- raid_map, map_length);
+ ret = raid56_parity_write(root, bio, bbio, map_length);
} else {
- ret = raid56_parity_recover(root, bio, bbio,
- raid_map, map_length,
+ ret = raid56_parity_recover(root, bio, bbio, map_length,
mirror_num, 1);
}
@@ -6238,17 +6261,22 @@ int btrfs_read_sys_array(struct btrfs_root *root)
struct extent_buffer *sb;
struct btrfs_disk_key *disk_key;
struct btrfs_chunk *chunk;
- u8 *ptr;
- unsigned long sb_ptr;
+ u8 *array_ptr;
+ unsigned long sb_array_offset;
int ret = 0;
u32 num_stripes;
u32 array_size;
u32 len = 0;
- u32 cur;
+ u32 cur_offset;
struct btrfs_key key;
- sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
- BTRFS_SUPER_INFO_SIZE);
+ ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
+ /*
+ * This will create extent buffer of nodesize, superblock size is
+ * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
+ * overallocate but we can keep it as-is, only the first page is used.
+ */
+ sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
if (!sb)
return -ENOMEM;
btrfs_set_buffer_uptodate(sb);
@@ -6271,35 +6299,56 @@ int btrfs_read_sys_array(struct btrfs_root *root)
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy);
- ptr = super_copy->sys_chunk_array;
- sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
- cur = 0;
+ array_ptr = super_copy->sys_chunk_array;
+ sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
+ cur_offset = 0;
+
+ while (cur_offset < array_size) {
+ disk_key = (struct btrfs_disk_key *)array_ptr;
+ len = sizeof(*disk_key);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
- while (cur < array_size) {
- disk_key = (struct btrfs_disk_key *)ptr;
btrfs_disk_key_to_cpu(&key, disk_key);
- len = sizeof(*disk_key); ptr += len;
- sb_ptr += len;
- cur += len;
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
if (key.type == BTRFS_CHUNK_ITEM_KEY) {
- chunk = (struct btrfs_chunk *)sb_ptr;
+ chunk = (struct btrfs_chunk *)sb_array_offset;
+ /*
+ * At least one btrfs_chunk with one stripe must be
+ * present, exact stripe count check comes afterwards
+ */
+ len = btrfs_chunk_item_size(1);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+ len = btrfs_chunk_item_size(num_stripes);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
ret = read_one_chunk(root, &key, sb, chunk);
if (ret)
break;
- num_stripes = btrfs_chunk_num_stripes(sb, chunk);
- len = btrfs_chunk_item_size(num_stripes);
} else {
ret = -EIO;
break;
}
- ptr += len;
- sb_ptr += len;
- cur += len;
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
}
free_extent_buffer(sb);
return ret;
+
+out_short_read:
+ printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
+ len, cur_offset);
+ free_extent_buffer(sb);
+ return -EIO;
}
int btrfs_read_chunk_tree(struct btrfs_root *root)