From c9a9dbf2cbd1641af49bf081ca3bbe4101df3991 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 28 Mar 2013 11:34:41 +0000 Subject: Btrfs: fix a warning when disabling quota Steps to reproduce: mkfs.btrfs mount btrfs quota enable btrfs sub create /subv i=1 while [ $i -le 10000 ] do dd if=/dev/zero of=/subv/data_$i bs=1K count=1 i=$(($i+1)) if [ $i -eq 500 ] then btrfs quota disable $mnt fi done dmesg Obviously, this warn_on() is unnecessary, and it will be easily triggered. Just remove it. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b44124dd2370..33b0bea50b45 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -420,8 +420,6 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); - WARN_ON(!list_empty(&qgroup->dirty)); - while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, struct btrfs_qgroup_list, -- cgit v1.2.3 From 7708f029dca5f1b9e9d6ea01ab10cd83e4c74ff2 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 7 Apr 2013 10:24:57 +0000 Subject: Btrfs: creating the subvolume qgroup automatically when enabling quota Creating the subvolume/snapshots(including root subvolume) qgroup auotomatically when enabling quota. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 2 ++ fs/btrfs/qgroup.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9c44d657b795..4fc9751d2e0c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3697,6 +3697,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) goto drop_write; } + down_write(&root->fs_info->subvol_sem); if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { trans = btrfs_start_transaction(root, 2); if (IS_ERR(trans)) { @@ -3730,6 +3731,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) } out: kfree(sa); + up_write(&root->fs_info->subvol_sem); drop_write: mnt_drop_write_file(file); return ret; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 33b0bea50b45..5be5a39dedc4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -781,11 +781,15 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *quota_root; + struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path = NULL; struct btrfs_qgroup_status_item *ptr; struct extent_buffer *leaf; struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_qgroup *qgroup = NULL; int ret = 0; + int slot; spin_lock(&fs_info->qgroup_lock); if (fs_info->quota_root) { @@ -832,7 +836,58 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); + key.objectid = 0; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = 0; + + btrfs_release_path(path); + ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); + if (ret > 0) + goto out_add_root; + if (ret < 0) + goto out_free_path; + + + while (1) { + slot = path->slots[0]; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.type == BTRFS_ROOT_REF_KEY) { + ret = add_qgroup_item(trans, quota_root, + found_key.offset); + if (ret) + goto out_free_path; + + spin_lock(&fs_info->qgroup_lock); + qgroup = add_qgroup_rb(fs_info, found_key.offset); + if (IS_ERR(qgroup)) { + spin_unlock(&fs_info->qgroup_lock); + ret = PTR_ERR(qgroup); + goto out_free_path; + } + spin_unlock(&fs_info->qgroup_lock); + } + ret = btrfs_next_item(tree_root, path); + if (ret < 0) + goto out_free_path; + if (ret) + break; + } + +out_add_root: + btrfs_release_path(path); + ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); + if (ret) + goto out_free_path; + spin_lock(&fs_info->qgroup_lock); + qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); + if (IS_ERR(qgroup)) { + spin_unlock(&fs_info->qgroup_lock); + ret = PTR_ERR(qgroup); + goto out_free_path; + } fs_info->quota_root = quota_root; fs_info->pending_quota_state = 1; spin_unlock(&fs_info->qgroup_lock); -- cgit v1.2.3 From f2f6ed3d54648ec19dcdeec30f66843cf7a38487 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 7 Apr 2013 10:50:16 +0000 Subject: Btrfs: introduce a mutex lock for btrfs quota operations The original code has one spin_lock 'qgroup_lock' to protect quota configurations in memory. If we want to add a BTRFS_QGROUP_INFO_KEY, it will be added to Btree firstly, and then update configurations in memory,however, a race condition may happen between these operations. For example: ->add_qgroup_info_item() ->add_qgroup_rb() For the above case, del_qgroup_info_item() may happen just before add_qgroup_rb(). What's worse, when we want to add a qgroup relation: ->add_qgroup_relation_item() ->add_qgroup_relations() We don't have any checks whether 'src' and 'dst' exist before add_qgroup_relation_item(), a race condition can also happen for the above case. To avoid race condition and have all the necessary checks, we introduce a mutex lock 'qgroup_ioctl_lock', and we make all the user change operations protected by the mutex lock. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 ++ fs/btrfs/disk-io.c | 1 + fs/btrfs/qgroup.c | 82 +++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 61 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 075a8a0e49c4..1a850402937d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1583,6 +1583,9 @@ struct btrfs_fs_info { struct rb_root qgroup_tree; spinlock_t qgroup_lock; + /* protect user change for quota operations */ + struct mutex qgroup_ioctl_lock; + /* list of dirty qgroups to be written at next commit */ struct list_head dirty_qgroups; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 70e6b0c32db2..9f83e5b870d2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2213,6 +2213,7 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->dev_replace.lock); spin_lock_init(&fs_info->qgroup_lock); + mutex_init(&fs_info->qgroup_ioctl_lock); fs_info->qgroup_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 5be5a39dedc4..0a1f6861ae9a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -791,6 +791,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, int ret = 0; int slot; + mutex_lock(&fs_info->qgroup_ioctl_lock); spin_lock(&fs_info->qgroup_lock); if (fs_info->quota_root) { fs_info->pending_quota_state = 1; @@ -900,6 +901,7 @@ out_free_root: kfree(quota_root); } out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -910,10 +912,11 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); spin_lock(&fs_info->qgroup_lock); if (!fs_info->quota_root) { spin_unlock(&fs_info->qgroup_lock); - return 0; + goto out; } fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; @@ -922,8 +925,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, btrfs_free_qgroup_config(fs_info); spin_unlock(&fs_info->qgroup_lock); - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) @@ -944,6 +949,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, free_extent_buffer(quota_root->commit_root); kfree(quota_root); out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -959,24 +965,28 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = add_qgroup_relation_item(trans, quota_root, src, dst); if (ret) - return ret; + goto out; ret = add_qgroup_relation_item(trans, quota_root, dst, src); if (ret) { del_qgroup_relation_item(trans, quota_root, src, dst); - return ret; + goto out; } spin_lock(&fs_info->qgroup_lock); ret = add_relation_rb(quota_root->fs_info, src, dst); spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -987,9 +997,12 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, int ret = 0; int err; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = del_qgroup_relation_item(trans, quota_root, src, dst); err = del_qgroup_relation_item(trans, quota_root, dst, src); @@ -1000,7 +1013,8 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, del_relation_rb(fs_info, src, dst); spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1011,9 +1025,12 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, struct btrfs_qgroup *qgroup; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = add_qgroup_item(trans, quota_root, qgroupid); @@ -1023,7 +1040,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, if (IS_ERR(qgroup)) ret = PTR_ERR(qgroup); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1034,9 +1052,12 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, struct btrfs_qgroup *qgroup; int ret = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } /* check if there are no relations to this qgroup */ spin_lock(&fs_info->qgroup_lock); @@ -1044,7 +1065,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, if (qgroup) { if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { spin_unlock(&fs_info->qgroup_lock); - return -EBUSY; + ret = -EBUSY; + goto out; } } spin_unlock(&fs_info->qgroup_lock); @@ -1054,7 +1076,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, spin_lock(&fs_info->qgroup_lock); del_qgroup_rb(quota_root->fs_info, qgroupid); spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1062,12 +1085,16 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 qgroupid, struct btrfs_qgroup_limit *limit) { - struct btrfs_root *quota_root = fs_info->quota_root; + struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; - if (!quota_root) - return -EINVAL; + mutex_lock(&fs_info->qgroup_ioctl_lock); + quota_root = fs_info->quota_root; + if (!quota_root) { + ret = -EINVAL; + goto out; + } ret = update_qgroup_limit_item(trans, quota_root, qgroupid, limit->flags, limit->max_rfer, @@ -1094,7 +1121,8 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, unlock: spin_unlock(&fs_info->qgroup_lock); - +out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -1392,11 +1420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, struct btrfs_qgroup *dstgroup; u32 level_size = 0; + mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_enabled) - return 0; + goto out; - if (!quota_root) - return -EINVAL; + if (!quota_root) { + ret = -EINVAL; + goto out; + } /* * create a tracking group for the subvol itself @@ -1523,6 +1554,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, unlock: spin_unlock(&fs_info->qgroup_lock); out: + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } -- cgit v1.2.3 From 58400fce5ac5939aadac8ce682edc192c6172f80 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 7 Apr 2013 10:50:17 +0000 Subject: Btrfs: remove some unnecessary spin_lock usages We use mutex lock to protect all the user change operations. So when we are calling find_qgroup_rb() to check whether qgroup exists, we don't have to hold spin_lock. Besides, when enabling/disabling quota, it must be single thread when operations come here. spin lock must be firstly used to clear quota_root when disabling quota, while enabling quota, spin lock must be used to complete the last assign work. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 0a1f6861ae9a..49c4e6398f18 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -98,7 +98,7 @@ struct btrfs_qgroup_list { struct btrfs_qgroup *member; }; -/* must be called with qgroup_lock held */ +/* must be called with qgroup_ioctl_lock held */ static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) { @@ -792,13 +792,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, int slot; mutex_lock(&fs_info->qgroup_ioctl_lock); - spin_lock(&fs_info->qgroup_lock); if (fs_info->quota_root) { fs_info->pending_quota_state = 1; - spin_unlock(&fs_info->qgroup_lock); goto out; } - spin_unlock(&fs_info->qgroup_lock); /* * initially create the quota tree @@ -860,14 +857,11 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, if (ret) goto out_free_path; - spin_lock(&fs_info->qgroup_lock); qgroup = add_qgroup_rb(fs_info, found_key.offset); if (IS_ERR(qgroup)) { - spin_unlock(&fs_info->qgroup_lock); ret = PTR_ERR(qgroup); goto out_free_path; } - spin_unlock(&fs_info->qgroup_lock); } ret = btrfs_next_item(tree_root, path); if (ret < 0) @@ -882,13 +876,12 @@ out_add_root: if (ret) goto out_free_path; - spin_lock(&fs_info->qgroup_lock); qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); if (IS_ERR(qgroup)) { - spin_unlock(&fs_info->qgroup_lock); ret = PTR_ERR(qgroup); goto out_free_path; } + spin_lock(&fs_info->qgroup_lock); fs_info->quota_root = quota_root; fs_info->pending_quota_state = 1; spin_unlock(&fs_info->qgroup_lock); @@ -913,11 +906,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); - spin_lock(&fs_info->qgroup_lock); - if (!fs_info->quota_root) { - spin_unlock(&fs_info->qgroup_lock); + if (!fs_info->quota_root) goto out; - } + spin_lock(&fs_info->qgroup_lock); fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; quota_root = fs_info->quota_root; @@ -1060,16 +1051,13 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, } /* check if there are no relations to this qgroup */ - spin_lock(&fs_info->qgroup_lock); qgroup = find_qgroup_rb(fs_info, qgroupid); if (qgroup) { if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { - spin_unlock(&fs_info->qgroup_lock); ret = -EBUSY; goto out; } } - spin_unlock(&fs_info->qgroup_lock); ret = del_qgroup_item(trans, quota_root, qgroupid); @@ -1106,20 +1094,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, (unsigned long long)qgroupid); } - spin_lock(&fs_info->qgroup_lock); - qgroup = find_qgroup_rb(fs_info, qgroupid); if (!qgroup) { ret = -ENOENT; - goto unlock; + goto out; } + spin_lock(&fs_info->qgroup_lock); qgroup->lim_flags = limit->flags; qgroup->max_rfer = limit->max_rfer; qgroup->max_excl = limit->max_excl; qgroup->rsv_rfer = limit->rsv_rfer; qgroup->rsv_excl = limit->rsv_excl; - -unlock: spin_unlock(&fs_info->qgroup_lock); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); -- cgit v1.2.3 From b7fef4f593007d52ba7fe4427e099bd71c63c521 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 7 Apr 2013 10:50:18 +0000 Subject: Btrfs: fix missing check before creating a qgroup relation Step to reproduce: mkfs.btrfs mount btrfs quota enable btrfs qgroup assign 0/1 1/1 umount btrfs-debug-tree | grep QGROUP If we want to add a qgroup relation, we should gurantee that 'src' and 'dst' exist, otherwise, such qgroup relation should not be allowed to create. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 49c4e6398f18..0932b839550c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -954,6 +954,8 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst) { struct btrfs_root *quota_root; + struct btrfs_qgroup *parent; + struct btrfs_qgroup *member; int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); @@ -962,6 +964,12 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, ret = -EINVAL; goto out; } + member = find_qgroup_rb(fs_info, src); + parent = find_qgroup_rb(fs_info, dst); + if (!member || !parent) { + ret = -EINVAL; + goto out; + } ret = add_qgroup_relation_item(trans, quota_root, src, dst); if (ret) -- cgit v1.2.3 From 3f5e2d3b3877d34231bbe45856f3b78b7692e472 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 7 Apr 2013 10:50:19 +0000 Subject: Btrfs: fix missing check in the btrfs_qgroup_inherit() The original code forgot to check 'inherit', we should gurantee that all the qgroups in the struct 'inherit' exist. Signed-off-by: Wang Shilong Reviewed-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 0932b839550c..2293da6f9e72 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1412,6 +1412,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, struct btrfs_qgroup *srcgroup; struct btrfs_qgroup *dstgroup; u32 level_size = 0; + u64 nums; mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_enabled) @@ -1422,6 +1423,20 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, goto out; } + if (inherit) { + i_qgroups = (u64 *)(inherit + 1); + nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + + 2 * inherit->num_excl_copies; + for (i = 0; i < nums; ++i) { + srcgroup = find_qgroup_rb(fs_info, *i_qgroups); + if (!srcgroup) { + ret = -EINVAL; + goto out; + } + ++i_qgroups; + } + } + /* * create a tracking group for the subvol itself */ -- cgit v1.2.3 From ddb47afa50f2f2713ef28d22380d3bf8fae648db Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 7 Apr 2013 10:50:20 +0000 Subject: Btrfs: fix a warning when updating qgroup limit Step to reproduce: mkfs.btrfs mount btrfs quota enable btrfs qgroup limit 0/1 dmesg If the relative qgroup dosen't exist, flag 'BTRFS_QGROUP_STATUS_ FLAG_INCONSISTENT' will be set, and print the noise message. This is wrong, we can just move find_qgroup_rb() before update_qgroup_limit_item().this dosen't change the logic of the function. But it can avoid unnecessary noise message and wrong set of flag. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2293da6f9e72..e089fc108483 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1092,6 +1092,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, goto out; } + qgroup = find_qgroup_rb(fs_info, qgroupid); + if (!qgroup) { + ret = -ENOENT; + goto out; + } ret = update_qgroup_limit_item(trans, quota_root, qgroupid, limit->flags, limit->max_rfer, limit->max_excl, limit->rsv_rfer, @@ -1102,11 +1107,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, (unsigned long long)qgroupid); } - qgroup = find_qgroup_rb(fs_info, qgroupid); - if (!qgroup) { - ret = -ENOENT; - goto out; - } spin_lock(&fs_info->qgroup_lock); qgroup->lim_flags = limit->flags; qgroup->max_rfer = limit->max_rfer; -- cgit v1.2.3 From b4fcd6be6bbd702ae1a6545c9b413681850a9814 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 15 Apr 2013 12:56:49 +0000 Subject: Btrfs: fix confusing edquot happening case Step to reproduce: mkfs.btrfs mount dd if=/dev/zero of=//data bs=1M count=10 sync btrfs quota enable btrfs qgroup create 0/5 btrfs qgroup limit 5M 0/5 rm -f //data sync btrfs qgroup show dd if=/dev/zero of=data bs=1M count=1 >From the perspective of users, qgroup's referenced or exclusive is negative,but user can not continue to write data! a workaround way is to cast u64 to s64 when doing qgroup reservation. Signed-off-by: Wang Shilong Reviewed-by: Arne Jansen Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e089fc108483..4beea047f4ed 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1616,14 +1616,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && - qg->reserved + qg->rfer + num_bytes > + qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) { ret = -EDQUOT; goto out; } if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && - qg->reserved + qg->excl + num_bytes > + qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) { ret = -EDQUOT; goto out; -- cgit v1.2.3 From 3c97185c65858d23bc02492fbd27733f1f11ea83 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 17 Apr 2013 14:00:36 +0000 Subject: Btrfs: fix missing check about ulist_add() in qgroup.c ulist_add() may return -ENOMEM, fix missing check about return value. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 62 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 4beea047f4ed..f9fb52e52bb6 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1261,7 +1261,10 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, ulist_reinit(tmp); /* XXX id not needed */ - ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC); + ret = ulist_add(tmp, qg->qgroupid, + (u64)(uintptr_t)qg, GFP_ATOMIC); + if (ret < 0) + goto unlock; ULIST_ITER_INIT(&tmp_uiter); while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { struct btrfs_qgroup_list *glist; @@ -1273,9 +1276,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, ++qg->refcnt; list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(tmp, glist->group->qgroupid, - (u64)(uintptr_t)glist->group, - GFP_ATOMIC); + ret = ulist_add(tmp, glist->group->qgroupid, + (u64)(uintptr_t)glist->group, + GFP_ATOMIC); + if (ret < 0) + goto unlock; } } } @@ -1284,7 +1289,10 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, * step 2: walk from the new root */ ulist_reinit(tmp); - ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); + ret = ulist_add(tmp, qgroup->qgroupid, + (uintptr_t)qgroup, GFP_ATOMIC); + if (ret < 0) + goto unlock; ULIST_ITER_INIT(&uiter); while ((unode = ulist_next(tmp, &uiter))) { struct btrfs_qgroup *qg; @@ -1305,8 +1313,10 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, qg->tag = seq; list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(tmp, glist->group->qgroupid, - (uintptr_t)glist->group, GFP_ATOMIC); + ret = ulist_add(tmp, glist->group->qgroupid, + (uintptr_t)glist->group, GFP_ATOMIC); + if (ret < 0) + goto unlock; } } @@ -1324,7 +1334,10 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, continue; ulist_reinit(tmp); - ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); + ret = ulist_add(tmp, qg->qgroupid, + (uintptr_t)qg, GFP_ATOMIC); + if (ret < 0) + goto unlock; ULIST_ITER_INIT(&tmp_uiter); while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { struct btrfs_qgroup_list *glist; @@ -1340,9 +1353,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, } list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(tmp, glist->group->qgroupid, - (uintptr_t)glist->group, - GFP_ATOMIC); + ret = ulist_add(tmp, glist->group->qgroupid, + (uintptr_t)glist->group, + GFP_ATOMIC); + if (ret < 0) + goto unlock; } } } @@ -1607,7 +1622,10 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) ret = -ENOMEM; goto out; } - ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); + ret = ulist_add(ulist, qgroup->qgroupid, + (uintptr_t)qgroup, GFP_ATOMIC); + if (ret < 0) + goto out; ULIST_ITER_INIT(&uiter); while ((unode = ulist_next(ulist, &uiter))) { struct btrfs_qgroup *qg; @@ -1630,11 +1648,13 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) } list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(ulist, glist->group->qgroupid, - (uintptr_t)glist->group, GFP_ATOMIC); + ret = ulist_add(ulist, glist->group->qgroupid, + (uintptr_t)glist->group, GFP_ATOMIC); + if (ret < 0) + goto out; } } - + ret = 0; /* * no limits exceeded, now record the reservation into all qgroups */ @@ -1663,6 +1683,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) struct ulist_node *unode; struct ulist_iterator uiter; u64 ref_root = root->root_key.objectid; + int ret = 0; if (!is_fstree(ref_root)) return; @@ -1685,7 +1706,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) btrfs_std_error(fs_info, -ENOMEM); goto out; } - ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); + ret = ulist_add(ulist, qgroup->qgroupid, + (uintptr_t)qgroup, GFP_ATOMIC); + if (ret < 0) + goto out; ULIST_ITER_INIT(&uiter); while ((unode = ulist_next(ulist, &uiter))) { struct btrfs_qgroup *qg; @@ -1696,8 +1720,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) qg->reserved -= num_bytes; list_for_each_entry(glist, &qg->groups, next_group) { - ulist_add(ulist, glist->group->qgroupid, - (uintptr_t)glist->group, GFP_ATOMIC); + ret = ulist_add(ulist, glist->group->qgroupid, + (uintptr_t)glist->group, GFP_ATOMIC); + if (ret < 0) + goto out; } } -- cgit v1.2.3 From 534e6623b7bc03ddcf6c98b9398ff512d5135fe5 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 17 Apr 2013 14:49:51 +0000 Subject: Btrfs: add all ioctl checks before user change for quota operations Since all the quota configurations are loaded in memory, and we can have ioctl checks before operating in the disk. It is safe to do such things because qgroup_ioctl_lock is held outside. Without these extra checks firstly, it should be ok to do user change for quota operations. For example: if we want to add an existed qgroup, we will do: ->add_qgroup_item() ->add_qgroup_rb() add_qgroup_item() will return -EEXIST to us, however, qgroups are all in memory, why not check them in memory firstly. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 46 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index f9fb52e52bb6..f175471da882 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -956,6 +956,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; struct btrfs_qgroup *parent; struct btrfs_qgroup *member; + struct btrfs_qgroup_list *list; int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); @@ -971,6 +972,14 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, goto out; } + /* check if such qgroup relation exist firstly */ + list_for_each_entry(list, &member->groups, next_group) { + if (list->group == parent) { + ret = -EEXIST; + goto out; + } + } + ret = add_qgroup_relation_item(trans, quota_root, src, dst); if (ret) goto out; @@ -993,6 +1002,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst) { struct btrfs_root *quota_root; + struct btrfs_qgroup *parent; + struct btrfs_qgroup *member; + struct btrfs_qgroup_list *list; int ret = 0; int err; @@ -1003,6 +1015,21 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, goto out; } + member = find_qgroup_rb(fs_info, src); + parent = find_qgroup_rb(fs_info, dst); + if (!member || !parent) { + ret = -EINVAL; + goto out; + } + + /* check if such qgroup relation exist firstly */ + list_for_each_entry(list, &member->groups, next_group) { + if (list->group == parent) + goto exist; + } + ret = -ENOENT; + goto out; +exist: ret = del_qgroup_relation_item(trans, quota_root, src, dst); err = del_qgroup_relation_item(trans, quota_root, dst, src); if (err && !ret) @@ -1010,7 +1037,6 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, spin_lock(&fs_info->qgroup_lock); del_relation_rb(fs_info, src, dst); - spin_unlock(&fs_info->qgroup_lock); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); @@ -1030,8 +1056,15 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, ret = -EINVAL; goto out; } + qgroup = find_qgroup_rb(fs_info, qgroupid); + if (qgroup) { + ret = -EEXIST; + goto out; + } ret = add_qgroup_item(trans, quota_root, qgroupid); + if (ret) + goto out; spin_lock(&fs_info->qgroup_lock); qgroup = add_qgroup_rb(fs_info, qgroupid); @@ -1058,15 +1091,18 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, goto out; } - /* check if there are no relations to this qgroup */ qgroup = find_qgroup_rb(fs_info, qgroupid); - if (qgroup) { - if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { + if (!qgroup) { + ret = -ENOENT; + goto out; + } else { + /* check if there are no relations to this qgroup */ + if (!list_empty(&qgroup->groups) || + !list_empty(&qgroup->members)) { ret = -EBUSY; goto out; } } - ret = del_qgroup_item(trans, quota_root, qgroupid); spin_lock(&fs_info->qgroup_lock); -- cgit v1.2.3 From fc36ed7e0b13955ba66fc56dc5067e67ac105150 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 24 Apr 2013 16:57:33 +0000 Subject: Btrfs: separate sequence numbers for delayed ref tracking and tree mod log Sequence numbers for delayed refs have been introduced in the first version of the qgroup patch set. To solve the problem of find_all_roots on a busy file system, the tree mod log was introduced. The sequence numbers for that were simply shared between those two users. However, at one point in qgroup's quota accounting, there's a statement accessing the previous sequence number, that's still just doing (seq - 1) just as it would have to in the very first version. To satisfy that requirement, this patch makes the sequence number counter 64 bit and splits it into a major part (used for qgroup sequence number counting) and a minor part (incremented for each tree modification in the log). This enables us to go exactly one major step backwards, as required for qgroups, while still incrementing the sequence counter for tree mod log insertions to keep track of their order. Keeping them in a single variable means there's no need to change all the code dealing with comparisons of two sequence numbers. The sequence number is reset to 0 on commit (not new in this patch), which ensures we won't overflow the two 32 bit counters. Without this fix, the qgroup tracking can occasionally go wrong and WARN_ONs from the tree mod log code may happen. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 7 ++----- fs/btrfs/delayed-ref.c | 6 ++++-- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/qgroup.c | 13 ++++++++----- fs/btrfs/transaction.c | 2 +- 7 files changed, 63 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2bc34408872d..a17d9991c333 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -360,6 +360,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) write_unlock(&fs_info->tree_mod_log_lock); } +/* + * Increment the upper half of tree_mod_seq, set lower half zero. + * + * Must be called with fs_info->tree_mod_seq_lock held. + */ +static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info) +{ + u64 seq = atomic64_read(&fs_info->tree_mod_seq); + seq &= 0xffffffff00000000ull; + seq += 1ull << 32; + atomic64_set(&fs_info->tree_mod_seq, seq); + return seq; +} + +/* + * Increment the lower half of tree_mod_seq. + * + * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers + * are generated should not technically require a spin lock here. (Rationale: + * incrementing the minor while incrementing the major seq number is between its + * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it + * just returns a unique sequence number as usual.) We have decided to leave + * that requirement in here and rethink it once we notice it really imposes a + * problem on some workload. + */ +static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info) +{ + return atomic64_inc_return(&fs_info->tree_mod_seq); +} + +/* + * return the last minor in the previous major tree_mod_seq number + */ +u64 btrfs_tree_mod_seq_prev(u64 seq) +{ + return (seq & 0xffffffff00000000ull) - 1ull; +} + /* * This adds a new blocker to the tree mod log's blocker list if the @elem * passed does not already have a sequence number set. So when a caller expects @@ -376,10 +414,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, tree_mod_log_write_lock(fs_info); spin_lock(&fs_info->tree_mod_seq_lock); if (!elem->seq) { - elem->seq = btrfs_inc_tree_mod_seq(fs_info); + elem->seq = btrfs_inc_tree_mod_seq_major(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } - seq = btrfs_inc_tree_mod_seq(fs_info); + seq = btrfs_inc_tree_mod_seq_minor(fs_info); spin_unlock(&fs_info->tree_mod_seq_lock); tree_mod_log_write_unlock(fs_info); @@ -524,7 +562,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, if (!tm) return -ENOMEM; - tm->seq = btrfs_inc_tree_mod_seq(fs_info); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); + return tm->seq; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 37c4da3403d0..2c48f52aba40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1422,7 +1422,7 @@ struct btrfs_fs_info { /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; - atomic_t tree_mod_seq; + atomic64_t tree_mod_seq; struct list_head tree_mod_seq_list; struct seq_list tree_mod_seq_elem; @@ -3332,10 +3332,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem); void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem); -static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) -{ - return atomic_inc_return(&fs_info->tree_mod_seq); -} +u64 btrfs_tree_mod_seq_prev(u64 seq); int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); /* root-item.c */ diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 116abec7a29c..c219463fb1fd 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -361,8 +361,10 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); if (seq >= elem->seq) { - pr_debug("holding back delayed_ref %llu, lowest is " - "%llu (%p)\n", seq, elem->seq, delayed_refs); + pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n", + (u32)(seq >> 32), (u32)seq, + (u32)(elem->seq >> 32), (u32)elem->seq, + delayed_refs); ret = 1; } } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e4488b57a7ae..92c44ed78de1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2157,7 +2157,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); - atomic_set(&fs_info->tree_mod_seq, 0); + atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f8a5652b0c43..1cae6631b3d8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2541,9 +2541,10 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, !trans->delayed_ref_elem.seq) { /* list without seq or seq without list */ btrfs_err(fs_info, - "qgroup accounting update error, list is%s empty, seq is %llu", + "qgroup accounting update error, list is%s empty, seq is %#x.%x", list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); + (u32)(trans->delayed_ref_elem.seq >> 32), + (u32)trans->delayed_ref_elem.seq); BUG(); } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index f175471da882..e5c56238b6c6 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1242,9 +1242,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, case BTRFS_ADD_DELAYED_REF: case BTRFS_ADD_DELAYED_EXTENT: sgn = 1; + seq = btrfs_tree_mod_seq_prev(node->seq); break; case BTRFS_DROP_DELAYED_REF: sgn = -1; + seq = node->seq; break; case BTRFS_UPDATE_DELAYED_HEAD: return 0; @@ -1254,14 +1256,14 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, /* * the delayed ref sequence number we pass depends on the direction of - * the operation. for add operations, we pass (node->seq - 1) to skip + * the operation. for add operations, we pass + * tree_mod_log_prev_seq(node->seq) to skip * the delayed ref's current sequence number, because we need the state * of the tree before the add operation. for delete operations, we pass * (node->seq) to include the delayed ref's current sequence number, * because we need the state of the tree after the delete operation. */ - ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, - sgn > 0 ? node->seq - 1 : node->seq, &roots); + ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots); if (ret < 0) return ret; @@ -1772,8 +1774,9 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) { if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) return; - printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", + pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n", trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", - trans->delayed_ref_elem.seq); + (u32)(trans->delayed_ref_elem.seq >> 32), + (u32)trans->delayed_ref_elem.seq); BUG(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 258fcebc7ccf..18d6fb7be265 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -162,7 +162,7 @@ loop: if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " "creating a fresh transaction\n"); - atomic_set(&fs_info->tree_mod_seq, 0); + atomic64_set(&fs_info->tree_mod_seq, 0); spin_lock_init(&cur_trans->commit_lock); spin_lock_init(&cur_trans->delayed_refs.lock); -- cgit v1.2.3 From 46b665ceb1edd2ac149ff701313c115f52dc0348 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 25 Apr 2013 16:04:50 +0000 Subject: Btrfs: split btrfs_qgroup_account_ref into four functions The function is separated into a preparation part and the three accounting steps mentioned in the qgroups documentation. The goal is to make steps two and three usable by the rescan functionality. A side effect is that the function is restructured into readable subunits. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 253 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 148 insertions(+), 105 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e5c56238b6c6..1fb7d8da3084 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1185,6 +1185,144 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, return 0; } +static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, + struct ulist *roots, struct ulist *tmp, + u64 seq) +{ + struct ulist_node *unode; + struct ulist_iterator uiter; + struct ulist_node *tmp_unode; + struct ulist_iterator tmp_uiter; + struct btrfs_qgroup *qg; + int ret; + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + qg = find_qgroup_rb(fs_info, unode->val); + if (!qg) + continue; + + ulist_reinit(tmp); + /* XXX id not needed */ + ret = ulist_add(tmp, qg->qgroupid, + (u64)(uintptr_t)qg, GFP_ATOMIC); + if (ret < 0) + return ret; + ULIST_ITER_INIT(&tmp_uiter); + while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; + if (qg->refcnt < seq) + qg->refcnt = seq + 1; + else + ++qg->refcnt; + + list_for_each_entry(glist, &qg->groups, next_group) { + ret = ulist_add(tmp, glist->group->qgroupid, + (u64)(uintptr_t)glist->group, + GFP_ATOMIC); + if (ret < 0) + return ret; + } + } + } + + return 0; +} + +static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info, + struct ulist *roots, struct ulist *tmp, + u64 seq, int sgn, u64 num_bytes, + struct btrfs_qgroup *qgroup) +{ + struct ulist_node *unode; + struct ulist_iterator uiter; + struct btrfs_qgroup *qg; + struct btrfs_qgroup_list *glist; + int ret; + + ulist_reinit(tmp); + ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); + if (ret < 0) + return ret; + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(tmp, &uiter))) { + qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; + if (qg->refcnt < seq) { + /* not visited by step 1 */ + qg->rfer += sgn * num_bytes; + qg->rfer_cmpr += sgn * num_bytes; + if (roots->nnodes == 0) { + qg->excl += sgn * num_bytes; + qg->excl_cmpr += sgn * num_bytes; + } + qgroup_dirty(fs_info, qg); + } + WARN_ON(qg->tag >= seq); + qg->tag = seq; + + list_for_each_entry(glist, &qg->groups, next_group) { + ret = ulist_add(tmp, glist->group->qgroupid, + (uintptr_t)glist->group, GFP_ATOMIC); + if (ret < 0) + return ret; + } + } + + return 0; +} + +static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info, + struct ulist *roots, struct ulist *tmp, + u64 seq, int sgn, u64 num_bytes) +{ + struct ulist_node *unode; + struct ulist_iterator uiter; + struct btrfs_qgroup *qg; + struct ulist_node *tmp_unode; + struct ulist_iterator tmp_uiter; + int ret; + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + qg = find_qgroup_rb(fs_info, unode->val); + if (!qg) + continue; + + ulist_reinit(tmp); + ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); + if (ret < 0) + return ret; + + ULIST_ITER_INIT(&tmp_uiter); + while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; + if (qg->tag == seq) + continue; + + if (qg->refcnt - seq == roots->nnodes) { + qg->excl -= sgn * num_bytes; + qg->excl_cmpr -= sgn * num_bytes; + qgroup_dirty(fs_info, qg); + } + + list_for_each_entry(glist, &qg->groups, next_group) { + ret = ulist_add(tmp, glist->group->qgroupid, + (uintptr_t)glist->group, + GFP_ATOMIC); + if (ret < 0) + return ret; + } + } + } + + return 0; +} + /* * btrfs_qgroup_account_ref is called for every ref that is added to or deleted * from the fs. First, all roots referencing the extent are searched, and @@ -1200,10 +1338,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; u64 ref_root; struct btrfs_qgroup *qgroup; - struct ulist_node *unode; struct ulist *roots = NULL; struct ulist *tmp = NULL; - struct ulist_iterator uiter; u64 seq; int ret = 0; int sgn; @@ -1287,119 +1423,26 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, seq = fs_info->qgroup_seq; fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - struct ulist_node *tmp_unode; - struct ulist_iterator tmp_uiter; - struct btrfs_qgroup *qg; - - qg = find_qgroup_rb(fs_info, unode->val); - if (!qg) - continue; - - ulist_reinit(tmp); - /* XXX id not needed */ - ret = ulist_add(tmp, qg->qgroupid, - (u64)(uintptr_t)qg, GFP_ATOMIC); - if (ret < 0) - goto unlock; - ULIST_ITER_INIT(&tmp_uiter); - while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; - if (qg->refcnt < seq) - qg->refcnt = seq + 1; - else - ++qg->refcnt; - - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(tmp, glist->group->qgroupid, - (u64)(uintptr_t)glist->group, - GFP_ATOMIC); - if (ret < 0) - goto unlock; - } - } - } + ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); + if (ret) + goto unlock; /* * step 2: walk from the new root */ - ulist_reinit(tmp); - ret = ulist_add(tmp, qgroup->qgroupid, - (uintptr_t)qgroup, GFP_ATOMIC); - if (ret < 0) + ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn, + node->num_bytes, qgroup); + if (ret) goto unlock; - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(tmp, &uiter))) { - struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; - if (qg->refcnt < seq) { - /* not visited by step 1 */ - qg->rfer += sgn * node->num_bytes; - qg->rfer_cmpr += sgn * node->num_bytes; - if (roots->nnodes == 0) { - qg->excl += sgn * node->num_bytes; - qg->excl_cmpr += sgn * node->num_bytes; - } - qgroup_dirty(fs_info, qg); - } - WARN_ON(qg->tag >= seq); - qg->tag = seq; - - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(tmp, glist->group->qgroupid, - (uintptr_t)glist->group, GFP_ATOMIC); - if (ret < 0) - goto unlock; - } - } /* * step 3: walk again from old refs */ - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - struct btrfs_qgroup *qg; - struct ulist_node *tmp_unode; - struct ulist_iterator tmp_uiter; - - qg = find_qgroup_rb(fs_info, unode->val); - if (!qg) - continue; - - ulist_reinit(tmp); - ret = ulist_add(tmp, qg->qgroupid, - (uintptr_t)qg, GFP_ATOMIC); - if (ret < 0) - goto unlock; - ULIST_ITER_INIT(&tmp_uiter); - while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; - if (qg->tag == seq) - continue; - - if (qg->refcnt - seq == roots->nnodes) { - qg->excl -= sgn * node->num_bytes; - qg->excl_cmpr -= sgn * node->num_bytes; - qgroup_dirty(fs_info, qg); - } + ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn, + node->num_bytes); + if (ret) + goto unlock; - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(tmp, glist->group->qgroupid, - (uintptr_t)glist->group, - GFP_ATOMIC); - if (ret < 0) - goto unlock; - } - } - } - ret = 0; unlock: spin_unlock(&fs_info->qgroup_lock); ulist_free(roots); -- cgit v1.2.3 From 2f2320360b0c35b86938bfc561124474f0dac6e4 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 25 Apr 2013 16:04:51 +0000 Subject: Btrfs: rescan for qgroups If qgroup tracking is out of sync, a rescan operation can be started. It iterates the complete extent tree and recalculates all qgroup tracking data. This is an expensive operation and should not be used unless required. A filesystem under rescan can still be umounted. The rescan continues on the next mount. Status information is provided with a separate ioctl while a rescan operation is in progress. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 17 ++- fs/btrfs/disk-io.c | 5 + fs/btrfs/ioctl.c | 83 ++++++++++-- fs/btrfs/qgroup.c | 318 +++++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/btrfs.h | 12 +- 5 files changed, 400 insertions(+), 35 deletions(-) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c48f52aba40..d9bed5fd3347 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1021,9 +1021,9 @@ struct btrfs_block_group_item { */ #define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) /* - * SCANNING is set during the initialization phase + * RESCAN is set during the initialization phase */ -#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1) /* * Some qgroup entries are known to be out of date, * either because the configuration has changed in a way that @@ -1052,7 +1052,7 @@ struct btrfs_qgroup_status_item { * only used during scanning to record the progress * of the scan. It contains a logical address */ - __le64 scan; + __le64 rescan; } __attribute__ ((__packed__)); struct btrfs_qgroup_info_item { @@ -1603,6 +1603,11 @@ struct btrfs_fs_info { /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ u64 qgroup_seq; + /* qgroup rescan items */ + struct mutex qgroup_rescan_lock; /* protects the progress item */ + struct btrfs_key qgroup_rescan_progress; + struct btrfs_workers qgroup_rescan_workers; + /* filesystem state */ unsigned long fs_state; @@ -2886,8 +2891,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, version, 64); BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, flags, 64); -BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, - scan, 64); +BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item, + rescan, 64); /* btrfs_qgroup_info_item */ BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, @@ -3828,7 +3833,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); +int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst); int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92c44ed78de1..d96305e5cc93 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1976,6 +1976,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->caching_workers); btrfs_stop_workers(&fs_info->readahead_workers); btrfs_stop_workers(&fs_info->flush_workers); + btrfs_stop_workers(&fs_info->qgroup_rescan_workers); } /* helper to cleanup tree roots */ @@ -2267,6 +2268,7 @@ int open_ctree(struct super_block *sb, fs_info->qgroup_seq = 1; fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + mutex_init(&fs_info->qgroup_rescan_lock); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -2476,6 +2478,8 @@ int open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->readahead_workers, "readahead", fs_info->thread_pool_size, &fs_info->generic_worker); + btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, + &fs_info->generic_worker); /* * endios are largely parallel and should have a very @@ -2510,6 +2514,7 @@ int open_ctree(struct super_block *sb, ret |= btrfs_start_workers(&fs_info->caching_workers); ret |= btrfs_start_workers(&fs_info->readahead_workers); ret |= btrfs_start_workers(&fs_info->flush_workers); + ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); if (ret) { err = -ENOMEM; goto fail_sb_buffer; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a74edc797531..f5f6af338b53 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3701,12 +3701,10 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) } down_write(&root->fs_info->subvol_sem); - if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { - trans = btrfs_start_transaction(root->fs_info->tree_root, 2); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } + trans = btrfs_start_transaction(root->fs_info->tree_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; } switch (sa->cmd) { @@ -3716,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) case BTRFS_QUOTA_CTL_DISABLE: ret = btrfs_quota_disable(trans, root->fs_info); break; - case BTRFS_QUOTA_CTL_RESCAN: - ret = btrfs_quota_rescan(root->fs_info); - break; default: ret = -EINVAL; break; @@ -3727,11 +3722,9 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; - if (trans) { - err = btrfs_commit_transaction(trans, root->fs_info->tree_root); - if (err && !ret) - ret = err; - } + err = btrfs_commit_transaction(trans, root->fs_info->tree_root); + if (err && !ret) + ret = err; out: kfree(sa); up_write(&root->fs_info->subvol_sem); @@ -3886,6 +3879,64 @@ drop_write: return ret; } +static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_ioctl_quota_rescan_args *qsa; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + qsa = memdup_user(arg, sizeof(*qsa)); + if (IS_ERR(qsa)) { + ret = PTR_ERR(qsa); + goto drop_write; + } + + if (qsa->flags) { + ret = -EINVAL; + goto out; + } + + ret = btrfs_qgroup_rescan(root->fs_info); + +out: + kfree(qsa); +drop_write: + mnt_drop_write_file(file); + return ret; +} + +static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_ioctl_quota_rescan_args *qsa; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + qsa = kzalloc(sizeof(*qsa), GFP_NOFS); + if (!qsa) + return -ENOMEM; + + if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + qsa->flags = 1; + qsa->progress = root->fs_info->qgroup_rescan_progress.objectid; + } + + if (copy_to_user(arg, qsa, sizeof(*qsa))) + ret = -EFAULT; + + kfree(qsa); + return ret; +} + static long btrfs_ioctl_set_received_subvol(struct file *file, void __user *arg) { @@ -4124,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_qgroup_create(file, argp); case BTRFS_IOC_QGROUP_LIMIT: return btrfs_ioctl_qgroup_limit(file, argp); + case BTRFS_IOC_QUOTA_RESCAN: + return btrfs_ioctl_quota_rescan(file, argp); + case BTRFS_IOC_QUOTA_RESCAN_STATUS: + return btrfs_ioctl_quota_rescan_status(file, argp); case BTRFS_IOC_DEV_REPLACE: return btrfs_ioctl_dev_replace(root, argp); case BTRFS_IOC_GET_FSLABEL: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1fb7d8da3084..da8458357b57 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -31,13 +31,13 @@ #include "locking.h" #include "ulist.h" #include "backref.h" +#include "extent_io.h" /* TODO XXX FIXME * - subvol delete -> delete when ref goes to 0? delete limits also? * - reorganize keys * - compressed * - sync - * - rescan * - copy also limits on subvol creation * - limit * - caches fuer ulists @@ -98,6 +98,14 @@ struct btrfs_qgroup_list { struct btrfs_qgroup *member; }; +struct qgroup_rescan { + struct btrfs_work work; + struct btrfs_fs_info *fs_info; +}; + +static void qgroup_rescan_start(struct btrfs_fs_info *fs_info, + struct qgroup_rescan *qscan); + /* must be called with qgroup_ioctl_lock held */ static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) @@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) } fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, ptr); - /* FIXME read scan element */ + fs_info->qgroup_rescan_progress.objectid = + btrfs_qgroup_status_rescan(l, ptr); + if (fs_info->qgroup_flags & + BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + struct qgroup_rescan *qscan = + kmalloc(sizeof(*qscan), GFP_NOFS); + if (!qscan) { + ret = -ENOMEM; + goto out; + } + fs_info->qgroup_rescan_progress.type = 0; + fs_info->qgroup_rescan_progress.offset = 0; + qgroup_rescan_start(fs_info, qscan); + } goto next1; } @@ -719,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); btrfs_set_qgroup_status_generation(l, ptr, trans->transid); - /* XXX scan */ + btrfs_set_qgroup_status_rescan(l, ptr, + fs_info->qgroup_rescan_progress.objectid); btrfs_mark_buffer_dirty(l); @@ -830,7 +852,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); - btrfs_set_qgroup_status_scan(leaf, ptr, 0); + btrfs_set_qgroup_status_rescan(leaf, ptr, 0); btrfs_mark_buffer_dirty(leaf); @@ -944,10 +966,11 @@ out: return ret; } -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) +static void qgroup_dirty(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup *qgroup) { - /* FIXME */ - return 0; + if (list_empty(&qgroup->dirty)) + list_add(&qgroup->dirty, &fs_info->dirty_qgroups); } int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, @@ -1155,13 +1178,6 @@ out: return ret; } -static void qgroup_dirty(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup *qgroup) -{ - if (list_empty(&qgroup->dirty)) - list_add(&qgroup->dirty, &fs_info->dirty_qgroups); -} - /* * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts * the modification into a list that's later used by btrfs_end_transaction to @@ -1390,6 +1406,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, BUG(); } + mutex_lock(&fs_info->qgroup_rescan_lock); + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { + mutex_unlock(&fs_info->qgroup_rescan_lock); + return 0; + } + } + mutex_unlock(&fs_info->qgroup_rescan_lock); + /* * the delayed ref sequence number we pass depends on the direction of * the operation. for add operations, we pass @@ -1403,7 +1428,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, if (ret < 0) return ret; + mutex_lock(&fs_info->qgroup_rescan_lock); spin_lock(&fs_info->qgroup_lock); + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { + ret = 0; + goto unlock; + } + } + quota_root = fs_info->quota_root; if (!quota_root) goto unlock; @@ -1445,6 +1478,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, unlock: spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); ulist_free(roots); ulist_free(tmp); @@ -1823,3 +1857,259 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) (u32)trans->delayed_ref_elem.seq); BUG(); } + +/* + * returns < 0 on error, 0 when more leafs are to be scanned. + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. + */ +static int +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path, + struct btrfs_trans_handle *trans, struct ulist *tmp, + struct extent_buffer *scratch_leaf) +{ + struct btrfs_key found; + struct btrfs_fs_info *fs_info = qscan->fs_info; + struct ulist *roots = NULL; + struct ulist_node *unode; + struct ulist_iterator uiter; + struct seq_list tree_mod_seq_elem = {}; + u64 seq; + int slot; + int ret; + + path->leave_spinning = 1; + mutex_lock(&fs_info->qgroup_rescan_lock); + ret = btrfs_search_slot_for_read(fs_info->extent_root, + &fs_info->qgroup_rescan_progress, + path, 1, 0); + + pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", + (unsigned long long)fs_info->qgroup_rescan_progress.objectid, + fs_info->qgroup_rescan_progress.type, + (unsigned long long)fs_info->qgroup_rescan_progress.offset, + ret); + + if (ret) { + /* + * The rescan is about to end, we will not be scanning any + * further blocks. We cannot unset the RESCAN flag here, because + * we want to commit the transaction if everything went well. + * To make the live accounting work in this phase, we set our + * scan progress pointer such that every real extent objectid + * will be smaller. + */ + fs_info->qgroup_rescan_progress.objectid = (u64)-1; + btrfs_release_path(path); + mutex_unlock(&fs_info->qgroup_rescan_lock); + return ret; + } + + btrfs_item_key_to_cpu(path->nodes[0], &found, + btrfs_header_nritems(path->nodes[0]) - 1); + fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; + + btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); + memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); + slot = path->slots[0]; + btrfs_release_path(path); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { + btrfs_item_key_to_cpu(scratch_leaf, &found, slot); + if (found.type != BTRFS_EXTENT_ITEM_KEY) + continue; + ret = btrfs_find_all_roots(trans, fs_info, found.objectid, + tree_mod_seq_elem.seq, &roots); + if (ret < 0) + goto out; + spin_lock(&fs_info->qgroup_lock); + seq = fs_info->qgroup_seq; + fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ + + ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); + if (ret) { + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + goto out; + } + + /* + * step2 of btrfs_qgroup_account_ref works from a single root, + * we're doing all at once here. + */ + ulist_reinit(tmp); + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + struct btrfs_qgroup *qg; + + qg = find_qgroup_rb(fs_info, unode->val); + if (!qg) + continue; + + ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, + GFP_ATOMIC); + if (ret < 0) { + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + goto out; + } + } + + /* this loop is similar to step 2 of btrfs_qgroup_account_ref */ + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(tmp, &uiter))) { + struct btrfs_qgroup *qg; + struct btrfs_qgroup_list *glist; + + qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; + qg->rfer += found.offset; + qg->rfer_cmpr += found.offset; + WARN_ON(qg->tag >= seq); + if (qg->refcnt - seq == roots->nnodes) { + qg->excl += found.offset; + qg->excl_cmpr += found.offset; + } + qgroup_dirty(fs_info, qg); + + list_for_each_entry(glist, &qg->groups, next_group) { + ret = ulist_add(tmp, glist->group->qgroupid, + (uintptr_t)glist->group, + GFP_ATOMIC); + if (ret < 0) { + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + goto out; + } + } + } + + spin_unlock(&fs_info->qgroup_lock); + ulist_free(roots); + ret = 0; + } + +out: + btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); + + return ret; +} + +static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) +{ + struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan, + work); + struct btrfs_path *path; + struct btrfs_trans_handle *trans = NULL; + struct btrfs_fs_info *fs_info = qscan->fs_info; + struct ulist *tmp = NULL; + struct extent_buffer *scratch_leaf = NULL; + int err = -ENOMEM; + + path = btrfs_alloc_path(); + if (!path) + goto out; + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + goto out; + scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); + if (!scratch_leaf) + goto out; + + err = 0; + while (!err) { + trans = btrfs_start_transaction(fs_info->fs_root, 0); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + break; + } + if (!fs_info->quota_enabled) { + err = -EINTR; + } else { + err = qgroup_rescan_leaf(qscan, path, trans, + tmp, scratch_leaf); + } + if (err > 0) + btrfs_commit_transaction(trans, fs_info->fs_root); + else + btrfs_end_transaction(trans, fs_info->fs_root); + } + +out: + kfree(scratch_leaf); + ulist_free(tmp); + btrfs_free_path(path); + kfree(qscan); + + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + + if (err == 2 && + fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + } else if (err < 0) { + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + } + mutex_unlock(&fs_info->qgroup_rescan_lock); + + if (err >= 0) { + pr_info("btrfs: qgroup scan completed%s\n", + err == 2 ? " (inconsistency flag cleared)" : ""); + } else { + pr_err("btrfs: qgroup scan failed with %d\n", err); + } +} + +static void +qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan) +{ + memset(&qscan->work, 0, sizeof(qscan->work)); + qscan->work.func = btrfs_qgroup_rescan_worker; + qscan->fs_info = fs_info; + + pr_info("btrfs: qgroup scan started\n"); + btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work); +} + +int +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) +{ + int ret = 0; + struct rb_node *n; + struct btrfs_qgroup *qgroup; + struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS); + + if (!qscan) + return -ENOMEM; + + mutex_lock(&fs_info->qgroup_rescan_lock); + spin_lock(&fs_info->qgroup_lock); + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) + ret = -EINPROGRESS; + else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) + ret = -EINVAL; + if (ret) { + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); + kfree(qscan); + return ret; + } + + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; + memset(&fs_info->qgroup_rescan_progress, 0, + sizeof(fs_info->qgroup_rescan_progress)); + + /* clear all current qgroup tracking information */ + for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { + qgroup = rb_entry(n, struct btrfs_qgroup, node); + qgroup->rfer = 0; + qgroup->rfer_cmpr = 0; + qgroup->excl = 0; + qgroup->excl_cmpr = 0; + } + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + qgroup_rescan_start(fs_info, qscan); + + return 0; +} diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 5e39e859a848..5ef0df545a2a 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats { #define BTRFS_QUOTA_CTL_ENABLE 1 #define BTRFS_QUOTA_CTL_DISABLE 2 -#define BTRFS_QUOTA_CTL_RESCAN 3 +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3 struct btrfs_ioctl_quota_ctl_args { __u64 cmd; __u64 status; }; +struct btrfs_ioctl_quota_rescan_args { + __u64 flags; + __u64 progress; + __u64 reserved[6]; +}; + struct btrfs_ioctl_qgroup_assign_args { __u64 assign; __u64 src; @@ -520,6 +526,10 @@ struct btrfs_ioctl_send_args { struct btrfs_ioctl_qgroup_create_args) #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \ + struct btrfs_ioctl_quota_rescan_args) +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \ + struct btrfs_ioctl_quota_rescan_args) #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ char[BTRFS_LABEL_SIZE]) #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ -- cgit v1.2.3 From 3d7b5a2882133a04716903b1f4878a64c6610842 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 25 Apr 2013 16:04:52 +0000 Subject: Btrfs: automatic rescan after "quota enable" command When qgroup tracking is enabled, we do an automatic cycle of the new rescan mechanism. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/qgroup.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/btrfs/qgroup.c') diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index da8458357b57..9d49c586995a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1493,10 +1493,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, { struct btrfs_root *quota_root = fs_info->quota_root; int ret = 0; + int start_rescan_worker = 0; if (!quota_root) goto out; + if (!fs_info->quota_enabled && fs_info->pending_quota_state) + start_rescan_worker = 1; + fs_info->quota_enabled = fs_info->pending_quota_state; spin_lock(&fs_info->qgroup_lock); @@ -1522,6 +1526,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, if (ret) fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + if (!ret && start_rescan_worker) { + ret = btrfs_qgroup_rescan(fs_info); + if (ret) + pr_err("btrfs: start rescan quota failed: %d\n", ret); + ret = 0; + } + out: return ret; -- cgit v1.2.3