From 440c840cb49f7de91e68a4cc7bca79a75cd298ae Mon Sep 17 00:00:00 2001 From: Timofey Titovets Date: Mon, 4 Dec 2017 00:30:33 +0300 Subject: Btrfs: compression heuristic: replace heap sort with radix sort Slowest part of heuristic for now is kernel heap sort() It's can take up to 55% of runtime on sorting bucket items. As sorting will always call on most data sets to get correctly byte_core_set_size, the only way to speed up heuristic, is to speed up sort on bucket. Add a general radix_sort function. Radix sort require 2 buffers, one full size of input array and one for store counters (jump addresses). That increase usage per heuristic workspace +1KiB 8KiB + 1KiB -> 8KiB + 2KiB That is LSD Radix, i use 4 bit as a base for calculating, to make counters array acceptable small (16 elements * 8 byte). That Radix sort implementation have several points to adjust, I added him to make radix sort general usable in kernel, like heap sort, if needed. Performance tested in userspace copy of heuristic code, throughput: - average <-> random data: ~3500 MiB/s - heap sort - average <-> random data: ~6000 MiB/s - radix sort Signed-off-by: Timofey Titovets [ coding style fixes ] Signed-off-by: David Sterba --- fs/btrfs/compression.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 5982c8a71f02..8cd48d7c3f76 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include "ctree.h" #include "disk-io.h" @@ -752,6 +751,8 @@ struct heuristic_ws { u32 sample_size; /* Buckets store counters for each byte value */ struct bucket_item *bucket; + /* Sorting buffer */ + struct bucket_item *bucket_b; struct list_head list; }; @@ -763,6 +764,7 @@ static void free_heuristic_ws(struct list_head *ws) kvfree(workspace->sample); kfree(workspace->bucket); + kfree(workspace->bucket_b); kfree(workspace); } @@ -782,6 +784,10 @@ static struct list_head *alloc_heuristic_ws(void) if (!ws->bucket) goto fail; + ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL); + if (!ws->bucket_b) + goto fail; + INIT_LIST_HEAD(&ws->list); return &ws->list; fail: @@ -1278,13 +1284,122 @@ static u32 shannon_entropy(struct heuristic_ws *ws) return entropy_sum * 100 / entropy_max; } -/* Compare buckets by size, ascending */ -static int bucket_comp_rev(const void *lv, const void *rv) +#define RADIX_BASE 4U +#define COUNTERS_SIZE (1U << RADIX_BASE) + +static u8 get4bits(u64 num, int shift) { + u8 low4bits; + + num >>= shift; + /* Reverse order */ + low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE); + return low4bits; +} + +static void copy_cell(void *dst, int dest_i, void *src, int src_i) { - const struct bucket_item *l = (const struct bucket_item *)lv; - const struct bucket_item *r = (const struct bucket_item *)rv; + struct bucket_item *dstv = (struct bucket_item *)dst; + struct bucket_item *srcv = (struct bucket_item *)src; + dstv[dest_i] = srcv[src_i]; +} + +static u64 get_num(const void *a, int i) +{ + struct bucket_item *av = (struct bucket_item *)a; + return av[i].count; +} - return r->count - l->count; +/* + * Use 4 bits as radix base + * Use 16 u32 counters for calculating new possition in buf array + * + * @array - array that will be sorted + * @array_buf - buffer array to store sorting results + * must be equal in size to @array + * @num - array size + * @get_num - function to extract number from array + * @copy_cell - function to copy data from array to array_buf and vice versa + * @get4bits - function to get 4 bits from number at specified offset + */ +static void radix_sort(void *array, void *array_buf, int num, + u64 (*get_num)(const void *, int i), + void (*copy_cell)(void *dest, int dest_i, + void* src, int src_i), + u8 (*get4bits)(u64 num, int shift)) +{ + u64 max_num; + u64 buf_num; + u32 counters[COUNTERS_SIZE]; + u32 new_addr; + u32 addr; + int bitlen; + int shift; + int i; + + /* + * Try avoid useless loop iterations for small numbers stored in big + * counters. Example: 48 33 4 ... in 64bit array + */ + max_num = get_num(array, 0); + for (i = 1; i < num; i++) { + buf_num = get_num(array, i); + if (buf_num > max_num) + max_num = buf_num; + } + + buf_num = ilog2(max_num); + bitlen = ALIGN(buf_num, RADIX_BASE * 2); + + shift = 0; + while (shift < bitlen) { + memset(counters, 0, sizeof(counters)); + + for (i = 0; i < num; i++) { + buf_num = get_num(array, i); + addr = get4bits(buf_num, shift); + counters[addr]++; + } + + for (i = 1; i < COUNTERS_SIZE; i++) + counters[i] += counters[i - 1]; + + for (i = num - 1; i >= 0; i--) { + buf_num = get_num(array, i); + addr = get4bits(buf_num, shift); + counters[addr]--; + new_addr = counters[addr]; + copy_cell(array_buf, new_addr, array, i); + } + + shift += RADIX_BASE; + + /* + * Normal radix expects to move data from a temporary array, to + * the main one. But that requires some CPU time. Avoid that + * by doing another sort iteration to original array instead of + * memcpy() + */ + memset(counters, 0, sizeof(counters)); + + for (i = 0; i < num; i ++) { + buf_num = get_num(array_buf, i); + addr = get4bits(buf_num, shift); + counters[addr]++; + } + + for (i = 1; i < COUNTERS_SIZE; i++) + counters[i] += counters[i - 1]; + + for (i = num - 1; i >= 0; i--) { + buf_num = get_num(array_buf, i); + addr = get4bits(buf_num, shift); + counters[addr]--; + new_addr = counters[addr]; + copy_cell(array, new_addr, array_buf, i); + } + + shift += RADIX_BASE; + } } /* @@ -1314,7 +1429,8 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL); + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get_num, copy_cell, + get4bits); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; -- cgit v1.2.3 From e128f9c3f7242318e1c76d204c7ae32bc878b8c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 17:24:26 +0100 Subject: btrfs: compression: add helper for type to string conversion There are several places opencoding this conversion, add a helper now that we have 3 compression algorithms. Signed-off-by: David Sterba --- fs/btrfs/compression.c | 15 +++++++++++++++ fs/btrfs/compression.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8cd48d7c3f76..28c3940062b7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -44,6 +44,21 @@ #include "extent_io.h" #include "extent_map.h" +static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; + +const char* btrfs_compress_type2str(enum btrfs_compression_type type) +{ + switch (type) { + case BTRFS_COMPRESS_ZLIB: + case BTRFS_COMPRESS_LZO: + case BTRFS_COMPRESS_ZSTD: + case BTRFS_COMPRESS_NONE: + return btrfs_compress_types[type]; + } + + return NULL; +} + static int btrfs_decompress_bio(struct compressed_bio *cb); static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 6b692903a23c..677fa4aa0bd7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -137,6 +137,8 @@ extern const struct btrfs_compress_op btrfs_zlib_compress; extern const struct btrfs_compress_op btrfs_lzo_compress; extern const struct btrfs_compress_op btrfs_zstd_compress; +const char* btrfs_compress_type2str(enum btrfs_compression_type type); + int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); #endif -- cgit v1.2.3 From 23ae8c63aaf82967536cba8893e5166b80b6d99a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Dec 2017 20:35:02 +0100 Subject: btrfs: heuristic: open code get_num callback of radix sort The callback is trivial and we don't need the abstraction for our purposes. Let's open code it and also make the array types explicit. Reviewed-by: Timofey Titovets Signed-off-by: David Sterba --- fs/btrfs/compression.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 28c3940062b7..37a69d4b04ce 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1318,12 +1318,6 @@ static void copy_cell(void *dst, int dest_i, void *src, int src_i) dstv[dest_i] = srcv[src_i]; } -static u64 get_num(const void *a, int i) -{ - struct bucket_item *av = (struct bucket_item *)a; - return av[i].count; -} - /* * Use 4 bits as radix base * Use 16 u32 counters for calculating new possition in buf array @@ -1332,12 +1326,11 @@ static u64 get_num(const void *a, int i) * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size - * @get_num - function to extract number from array * @copy_cell - function to copy data from array to array_buf and vice versa * @get4bits - function to get 4 bits from number at specified offset */ -static void radix_sort(void *array, void *array_buf, int num, - u64 (*get_num)(const void *, int i), +static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, + int num, void (*copy_cell)(void *dest, int dest_i, void* src, int src_i), u8 (*get4bits)(u64 num, int shift)) @@ -1355,9 +1348,9 @@ static void radix_sort(void *array, void *array_buf, int num, * Try avoid useless loop iterations for small numbers stored in big * counters. Example: 48 33 4 ... in 64bit array */ - max_num = get_num(array, 0); + max_num = array[0].count; for (i = 1; i < num; i++) { - buf_num = get_num(array, i); + buf_num = array[i].count; if (buf_num > max_num) max_num = buf_num; } @@ -1370,7 +1363,7 @@ static void radix_sort(void *array, void *array_buf, int num, memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i++) { - buf_num = get_num(array, i); + buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } @@ -1379,7 +1372,7 @@ static void radix_sort(void *array, void *array_buf, int num, counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { - buf_num = get_num(array, i); + buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; @@ -1397,7 +1390,7 @@ static void radix_sort(void *array, void *array_buf, int num, memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i ++) { - buf_num = get_num(array_buf, i); + buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } @@ -1406,7 +1399,7 @@ static void radix_sort(void *array, void *array_buf, int num, counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { - buf_num = get_num(array_buf, i); + buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; @@ -1444,7 +1437,7 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get_num, copy_cell, + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, copy_cell, get4bits); for (i = 0; i < BYTE_CORE_SET_LOW; i++) -- cgit v1.2.3 From 7add17befcfc0811b583e4c3c70849a3095f0080 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Dec 2017 20:35:02 +0100 Subject: btrfs: heuristic: open code copy_call callback of radix sort The callback is trivial and we don't need the abstraction for our purposes. Let's open code it. Reviewed-by: Timofey Titovets Signed-off-by: David Sterba --- fs/btrfs/compression.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 37a69d4b04ce..935acabc0ea7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1311,13 +1311,6 @@ static u8 get4bits(u64 num, int shift) { return low4bits; } -static void copy_cell(void *dst, int dest_i, void *src, int src_i) -{ - struct bucket_item *dstv = (struct bucket_item *)dst; - struct bucket_item *srcv = (struct bucket_item *)src; - dstv[dest_i] = srcv[src_i]; -} - /* * Use 4 bits as radix base * Use 16 u32 counters for calculating new possition in buf array @@ -1326,13 +1319,10 @@ static void copy_cell(void *dst, int dest_i, void *src, int src_i) * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size - * @copy_cell - function to copy data from array to array_buf and vice versa * @get4bits - function to get 4 bits from number at specified offset */ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, int num, - void (*copy_cell)(void *dest, int dest_i, - void* src, int src_i), u8 (*get4bits)(u64 num, int shift)) { u64 max_num; @@ -1376,7 +1366,7 @@ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; - copy_cell(array_buf, new_addr, array, i); + array_buf[new_addr] = array[i]; } shift += RADIX_BASE; @@ -1403,7 +1393,7 @@ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; - copy_cell(array, new_addr, array_buf, i); + array[new_addr] = array_buf[i]; } shift += RADIX_BASE; @@ -1437,8 +1427,7 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, copy_cell, - get4bits); + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get4bits); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; -- cgit v1.2.3 From 36243c9199d6df63a0fbebd4fc49a1af21f3d8a8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Dec 2017 20:35:02 +0100 Subject: btrfs: heuristic: call get4bits directly As it's a single instance and local to the file, we don't need to pass it as an argument. Reviewed-by: Timofey Titovets Signed-off-by: David Sterba --- fs/btrfs/compression.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 935acabc0ea7..208334aa6c6e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1319,11 +1319,9 @@ static u8 get4bits(u64 num, int shift) { * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size - * @get4bits - function to get 4 bits from number at specified offset */ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, - int num, - u8 (*get4bits)(u64 num, int shift)) + int num) { u64 max_num; u64 buf_num; @@ -1427,7 +1425,7 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get4bits); + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; -- cgit v1.2.3 From 32506af595dcaa8b71b7858bb54ccaa310d274fb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 13 Dec 2017 10:25:37 +0200 Subject: btrfs: Remove redundant bio_get/set calls in compressed read/write paths bio_get/set is necessary only if the bio is going to be referenced following submissions. In the code paths where such calls are made we don't really need them since the bio is referenced only if btrfs_map_bio returns an error. And this function can return an error prior to submission only. So referencing the bio is safe. Furthermore we do call bio_endio which will consume the last reference. So let's remove the redundant calls. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 208334aa6c6e..5abcc0461ee1 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -362,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, page->mapping = NULL; if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_get(bio); - /* * inc the count before we submit the bio so * we know the end IO handler won't happen before @@ -386,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } - bio_put(bio); - bio = btrfs_bio_alloc(bdev, first_byte); bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; @@ -403,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, first_byte += PAGE_SIZE; cond_resched(); } - bio_get(bio); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -419,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } - bio_put(bio); return 0; } @@ -652,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page->mapping = NULL; if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_get(comp_bio); - ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -680,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } - bio_put(comp_bio); - comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); comp_bio->bi_private = cb; @@ -691,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, } cur_disk_byte += PAGE_SIZE; } - bio_get(comp_bio); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -707,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } - bio_put(comp_bio); return 0; fail2: -- cgit v1.2.3