summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-10-14 16:54:47 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:09:14 +0300
commit6d76aefea1902a11c47e20fec5495d30a39891f3 (patch)
tree3492b62e7853384faad5e0b97a4c5f7c60f7d945 /fs
parentbfe88863cf3063204fc49a04307fa6635554d6e3 (diff)
downloadlinux-6d76aefea1902a11c47e20fec5495d30a39891f3.tar.xz
bcachefs: Fix for leaking of reflinked extents
When a reflink pointer points to only part of an indirect extent, and then that indirect extent is fragmented (e.g. by copygc), if the reflink pointer only points to one of the fragments we leak a reference. Fix this by storing front/back pad values in reflink pointers - when inserting reflink pointesr, we initialize them to cover the full range of the indirect extents we reference. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/bcachefs_format.h15
-rw-r--r--fs/bcachefs/buckets.c45
-rw-r--r--fs/bcachefs/fsck.c5
-rw-r--r--fs/bcachefs/reflink.c4
4 files changed, 58 insertions, 11 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 579acb69115d..4b2bf8f7b28a 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -917,15 +917,24 @@ struct bch_stripe {
struct bch_reflink_p {
struct bch_val v;
__le64 idx;
- __le64 v2;
-};
+ /*
+ * A reflink pointer might point to an indirect extent which is then
+ * later split (by copygc or rebalance). If we only pointed to part of
+ * the original indirect extent, and then one of the fragments is
+ * outside the range we point to, we'd leak a refcount: so when creating
+ * reflink pointers, we need to store pad values to remember the full
+ * range we were taking a reference on.
+ */
+ __le32 front_pad;
+ __le32 back_pad;
+} __attribute__((packed, aligned(8)));
struct bch_reflink_v {
struct bch_val v;
__le64 refcount;
union bch_extent_entry start[0];
__u64 _data[0];
-};
+} __attribute__((packed, aligned(8)));
struct bch_indirect_inline_data {
struct bch_val v;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 9c5d18b4efaa..ee1c71e011c7 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1180,8 +1180,10 @@ static int bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
struct reflink_gc *ref;
size_t l, r, m;
- u64 idx = le64_to_cpu(p.v->idx);
- unsigned sectors = p.k->size;
+ u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+ u64 sectors = (u64) le32_to_cpu(p.v->front_pad) +
+ le32_to_cpu(p.v->back_pad) +
+ p.k->size;
s64 ret = 0;
BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
@@ -1758,12 +1760,33 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
- bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}
- BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE));
+ if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
+ bch2_fs_inconsistent(c,
+ "%llu:%llu len %u idx %llu indirect extent refcount underflow",
+ p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ ret = -EIO;
+ goto err;
+ }
+
+ if (flags & BTREE_TRIGGER_INSERT) {
+ struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+ u64 pad;
+
+ pad = max_t(s64, le32_to_cpu(v->front_pad),
+ le64_to_cpu(v->idx) - bkey_start_offset(k.k));
+ BUG_ON(pad > U32_MAX);
+ v->front_pad = cpu_to_le32(pad);
+
+ pad = max_t(s64, le32_to_cpu(v->back_pad),
+ k.k->p.offset - p.k->size - le64_to_cpu(v->idx));
+ BUG_ON(pad > U32_MAX);
+ v->back_pad = cpu_to_le32(pad);
+ }
+
le64_add_cpu(refcount, add);
if (!*refcount) {
@@ -1786,10 +1809,20 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
struct bkey_s_c k, unsigned flags)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
- u64 idx = le64_to_cpu(p.v->idx);
- unsigned sectors = p.k->size;
+ u64 idx, sectors;
s64 ret = 0;
+ if (flags & BTREE_TRIGGER_INSERT) {
+ struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+
+ v->front_pad = v->back_pad = 0;
+ }
+
+ idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+ sectors = (u64) le32_to_cpu(p.v->front_pad) +
+ le32_to_cpu(p.v->back_pad) +
+ p.k->size;
+
while (sectors) {
ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
if (ret < 0)
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index b43c31b95dff..c99e1514fd4f 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -2174,7 +2174,7 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
p = bkey_s_c_to_reflink_p(k);
- if (!p.v->v2)
+ if (!p.v->front_pad && !p.v->back_pad)
return 0;
u = bch2_trans_kmalloc(trans, sizeof(*u));
@@ -2183,7 +2183,8 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
return ret;
bkey_reassemble(&u->k_i, k);
- u->v.v2 = 0;
+ u->v.front_pad = 0;
+ u->v.back_pad = 0;
return bch2_trans_update(trans, iter, &u->k_i, 0);
}
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 9bcf4216a286..2827d0ef1019 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -32,6 +32,10 @@ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (bkey_val_bytes(p.k) != sizeof(*p.v))
return "incorrect value size";
+ if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix &&
+ le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad))
+ return "idx < front_pad";
+
return NULL;
}