From 77bc133191c132da55c1533c16100191f863a211 Mon Sep 17 00:00:00 2001 From: Matthew Sakai Date: Thu, 16 Nov 2023 21:32:39 -0500 Subject: dm vdo: add MAINTAINERS file entry Signed-off-by: Matthew Sakai Signed-off-by: Mike Snitzer --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'MAINTAINERS') diff --git a/MAINTAINERS b/MAINTAINERS index 960512bec428..74b82cc8db48 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6118,6 +6118,15 @@ F: include/linux/device-mapper.h F: include/linux/dm-*.h F: include/uapi/linux/dm-*.h +DEVICE-MAPPER VDO TARGET +M: Matthew Sakai +M: dm-devel@lists.linux.dev +L: dm-devel@lists.linux.dev +S: Maintained +F: Documentation/admin-guide/device-mapper/vdo*.rst +F: drivers/md/dm-vdo-target.c +F: drivers/md/dm-vdo/ + DEVLINK M: Jiri Pirko L: netdev@vger.kernel.org -- cgit v1.2.3 From f36b1d3ba533d21b5b793623f05761b0297d114e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 26 Jan 2024 21:18:33 -0500 Subject: dm vdo: use a proper Makefile for dm-vdo Requires moving dm-vdo-target.c into drivers/md/dm-vdo/ This change adds a proper drivers/md/dm-vdo/Makefile and eliminates the abnormal use of patsubst in drivers/md/Makefile -- which was the cause of at least one build failure that was reported by the upstream build bot. Also, split out VDO's drivers/md/dm-vdo/Kconfig and include it from drivers/md/Kconfig Signed-off-by: Mike Snitzer Signed-off-by: Matthew Sakai --- MAINTAINERS | 1 - drivers/md/Kconfig | 18 +- drivers/md/Makefile | 3 +- drivers/md/dm-vdo-target.c | 2956 ------------------------------------- drivers/md/dm-vdo/Kconfig | 17 + drivers/md/dm-vdo/Makefile | 61 + drivers/md/dm-vdo/dm-vdo-target.c | 2956 +++++++++++++++++++++++++++++++++++++ 7 files changed, 3037 insertions(+), 2975 deletions(-) delete mode 100644 drivers/md/dm-vdo-target.c create mode 100644 drivers/md/dm-vdo/Kconfig create mode 100644 drivers/md/dm-vdo/Makefile create mode 100644 drivers/md/dm-vdo/dm-vdo-target.c (limited to 'MAINTAINERS') diff --git a/MAINTAINERS b/MAINTAINERS index 74b82cc8db48..45513eb1bc42 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6124,7 +6124,6 @@ M: dm-devel@lists.linux.dev L: dm-devel@lists.linux.dev S: Maintained F: Documentation/admin-guide/device-mapper/vdo*.rst -F: drivers/md/dm-vdo-target.c F: drivers/md/dm-vdo/ DEVLINK diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index cef25cf4800c..68ce56fc61d0 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -498,22 +498,6 @@ config DM_FLAKEY help A target that intermittently fails I/O for debugging purposes. -config DM_VDO - tristate "VDO: deduplication and compression target" - depends on 64BIT - depends on BLK_DEV_DM - select DM_BUFIO - select LZ4_COMPRESS - select LZ4_DECOMPRESS - help - This device mapper target presents a block device with - deduplication, compression and thin-provisioning. - - To compile this code as a module, choose M here: the module will - be called dm-vdo. - - If unsure, say N. - config DM_VERITY tristate "Verity target support" depends on BLK_DEV_DM @@ -650,4 +634,6 @@ config DM_AUDIT Enables audit logging of several security relevant events in the particular device-mapper targets, especially the integrity target. +source "drivers/md/dm-vdo/Kconfig" + endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 2ac0e6331104..476a214e4bdc 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -25,7 +25,6 @@ dm-ebs-y += dm-ebs-target.o dm-era-y += dm-era-target.o dm-clone-y += dm-clone-target.o dm-clone-metadata.o dm-verity-y += dm-verity-target.o -dm-vdo-y += dm-vdo-target.o $(patsubst drivers/md/dm-vdo/%.c,dm-vdo/%.o,$(wildcard $(src)/dm-vdo/*.c)) dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o md-mod-y += md.o md-bitmap.o @@ -69,7 +68,7 @@ obj-$(CONFIG_DM_ZERO) += dm-zero.o obj-$(CONFIG_DM_RAID) += dm-raid.o obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o obj-$(CONFIG_DM_VERITY) += dm-verity.o -obj-$(CONFIG_DM_VDO) += dm-vdo.o +obj-$(CONFIG_DM_VDO) += dm-vdo/ obj-$(CONFIG_DM_CACHE) += dm-cache.o obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o obj-$(CONFIG_DM_EBS) += dm-ebs.o diff --git a/drivers/md/dm-vdo-target.c b/drivers/md/dm-vdo-target.c deleted file mode 100644 index 4fbc148681e5..000000000000 --- a/drivers/md/dm-vdo-target.c +++ /dev/null @@ -1,2956 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright 2023 Red Hat - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dm-vdo/admin-state.h" -#include "dm-vdo/block-map.h" -#include "dm-vdo/completion.h" -#include "dm-vdo/constants.h" -#include "dm-vdo/data-vio.h" -#include "dm-vdo/dedupe.h" -#include "dm-vdo/dump.h" -#include "dm-vdo/encodings.h" -#include "dm-vdo/errors.h" -#include "dm-vdo/flush.h" -#include "dm-vdo/io-submitter.h" -#include "dm-vdo/logger.h" -#include "dm-vdo/memory-alloc.h" -#include "dm-vdo/message-stats.h" -#include "dm-vdo/pool-sysfs.h" -#include "dm-vdo/recovery-journal.h" -#include "dm-vdo/repair.h" -#include "dm-vdo/slab-depot.h" -#include "dm-vdo/status-codes.h" -#include "dm-vdo/string-utils.h" -#include "dm-vdo/thread-device.h" -#include "dm-vdo/thread-registry.h" -#include "dm-vdo/types.h" -#include "dm-vdo/uds-sysfs.h" -#include "dm-vdo/vdo.h" -#include "dm-vdo/vio.h" - -#define CURRENT_VERSION "8.3.0.65" - -enum { - GROW_LOGICAL_PHASE_START, - GROW_LOGICAL_PHASE_GROW_BLOCK_MAP, - GROW_LOGICAL_PHASE_END, - GROW_LOGICAL_PHASE_ERROR, - GROW_PHYSICAL_PHASE_START, - GROW_PHYSICAL_PHASE_COPY_SUMMARY, - GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS, - GROW_PHYSICAL_PHASE_USE_NEW_SLABS, - GROW_PHYSICAL_PHASE_END, - GROW_PHYSICAL_PHASE_ERROR, - LOAD_PHASE_START, - LOAD_PHASE_STATS, - LOAD_PHASE_LOAD_DEPOT, - LOAD_PHASE_MAKE_DIRTY, - LOAD_PHASE_PREPARE_TO_ALLOCATE, - LOAD_PHASE_SCRUB_SLABS, - LOAD_PHASE_DATA_REDUCTION, - LOAD_PHASE_FINISHED, - LOAD_PHASE_DRAIN_JOURNAL, - LOAD_PHASE_WAIT_FOR_READ_ONLY, - PRE_LOAD_PHASE_START, - PRE_LOAD_PHASE_LOAD_COMPONENTS, - PRE_LOAD_PHASE_END, - PREPARE_GROW_PHYSICAL_PHASE_START, - RESUME_PHASE_START, - RESUME_PHASE_ALLOW_READ_ONLY_MODE, - RESUME_PHASE_DEDUPE, - RESUME_PHASE_DEPOT, - RESUME_PHASE_JOURNAL, - RESUME_PHASE_BLOCK_MAP, - RESUME_PHASE_LOGICAL_ZONES, - RESUME_PHASE_PACKER, - RESUME_PHASE_FLUSHER, - RESUME_PHASE_DATA_VIOS, - RESUME_PHASE_END, - SUSPEND_PHASE_START, - SUSPEND_PHASE_PACKER, - SUSPEND_PHASE_DATA_VIOS, - SUSPEND_PHASE_DEDUPE, - SUSPEND_PHASE_FLUSHES, - SUSPEND_PHASE_LOGICAL_ZONES, - SUSPEND_PHASE_BLOCK_MAP, - SUSPEND_PHASE_JOURNAL, - SUSPEND_PHASE_DEPOT, - SUSPEND_PHASE_READ_ONLY_WAIT, - SUSPEND_PHASE_WRITE_SUPER_BLOCK, - SUSPEND_PHASE_END, -}; - -static const char * const ADMIN_PHASE_NAMES[] = { - "GROW_LOGICAL_PHASE_START", - "GROW_LOGICAL_PHASE_GROW_BLOCK_MAP", - "GROW_LOGICAL_PHASE_END", - "GROW_LOGICAL_PHASE_ERROR", - "GROW_PHYSICAL_PHASE_START", - "GROW_PHYSICAL_PHASE_COPY_SUMMARY", - "GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS", - "GROW_PHYSICAL_PHASE_USE_NEW_SLABS", - "GROW_PHYSICAL_PHASE_END", - "GROW_PHYSICAL_PHASE_ERROR", - "LOAD_PHASE_START", - "LOAD_PHASE_STATS", - "LOAD_PHASE_LOAD_DEPOT", - "LOAD_PHASE_MAKE_DIRTY", - "LOAD_PHASE_PREPARE_TO_ALLOCATE", - "LOAD_PHASE_SCRUB_SLABS", - "LOAD_PHASE_DATA_REDUCTION", - "LOAD_PHASE_FINISHED", - "LOAD_PHASE_DRAIN_JOURNAL", - "LOAD_PHASE_WAIT_FOR_READ_ONLY", - "PRE_LOAD_PHASE_START", - "PRE_LOAD_PHASE_LOAD_COMPONENTS", - "PRE_LOAD_PHASE_END", - "PREPARE_GROW_PHYSICAL_PHASE_START", - "RESUME_PHASE_START", - "RESUME_PHASE_ALLOW_READ_ONLY_MODE", - "RESUME_PHASE_DEDUPE", - "RESUME_PHASE_DEPOT", - "RESUME_PHASE_JOURNAL", - "RESUME_PHASE_BLOCK_MAP", - "RESUME_PHASE_LOGICAL_ZONES", - "RESUME_PHASE_PACKER", - "RESUME_PHASE_FLUSHER", - "RESUME_PHASE_DATA_VIOS", - "RESUME_PHASE_END", - "SUSPEND_PHASE_START", - "SUSPEND_PHASE_PACKER", - "SUSPEND_PHASE_DATA_VIOS", - "SUSPEND_PHASE_DEDUPE", - "SUSPEND_PHASE_FLUSHES", - "SUSPEND_PHASE_LOGICAL_ZONES", - "SUSPEND_PHASE_BLOCK_MAP", - "SUSPEND_PHASE_JOURNAL", - "SUSPEND_PHASE_DEPOT", - "SUSPEND_PHASE_READ_ONLY_WAIT", - "SUSPEND_PHASE_WRITE_SUPER_BLOCK", - "SUSPEND_PHASE_END", -}; - -enum { - /* If we bump this, update the arrays below */ - TABLE_VERSION = 4, -}; - -/* arrays for handling different table versions */ -static const u8 REQUIRED_ARGC[] = { 10, 12, 9, 7, 6 }; -/* pool name no longer used. only here for verification of older versions */ -static const u8 POOL_NAME_ARG_INDEX[] = { 8, 10, 8 }; - -/* - * Track in-use instance numbers using a flat bit array. - * - * O(n) run time isn't ideal, but if we have 1000 VDO devices in use simultaneously we still only - * need to scan 16 words, so it's not likely to be a big deal compared to other resource usage. - */ - -enum { - /* - * This minimum size for the bit array creates a numbering space of 0-999, which allows - * successive starts of the same volume to have different instance numbers in any - * reasonably-sized test. Changing instances on restart allows vdoMonReport to detect that - * the ephemeral stats have reset to zero. - */ - BIT_COUNT_MINIMUM = 1000, - /** Grow the bit array by this many bits when needed */ - BIT_COUNT_INCREMENT = 100, -}; - -struct instance_tracker { - unsigned int bit_count; - unsigned long *words; - unsigned int count; - unsigned int next; -}; - -static DEFINE_MUTEX(instances_lock); -static struct instance_tracker instances; - -/** - * free_device_config() - Free a device config created by parse_device_config(). - * @config: The config to free. - */ -static void free_device_config(struct device_config *config) -{ - if (config == NULL) - return; - - if (config->owned_device != NULL) - dm_put_device(config->owning_target, config->owned_device); - - uds_free(config->parent_device_name); - uds_free(config->original_string); - - /* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */ - memset(config, 0, sizeof(*config)); - uds_free(config); -} - -/** - * get_version_number() - Decide the version number from argv. - * - * @argc: The number of table values. - * @argv: The array of table values. - * @error_ptr: A pointer to return a error string in. - * @version_ptr: A pointer to return the version. - * - * Return: VDO_SUCCESS or an error code. - */ -static int get_version_number(int argc, char **argv, char **error_ptr, - unsigned int *version_ptr) -{ - /* version, if it exists, is in a form of V */ - if (sscanf(argv[0], "V%u", version_ptr) == 1) { - if (*version_ptr < 1 || *version_ptr > TABLE_VERSION) { - *error_ptr = "Unknown version number detected"; - return VDO_BAD_CONFIGURATION; - } - } else { - /* V0 actually has no version number in the table string */ - *version_ptr = 0; - } - - /* - * V0 and V1 have no optional parameters. There will always be a parameter for thread - * config, even if it's a "." to show it's an empty list. - */ - if (*version_ptr <= 1) { - if (argc != REQUIRED_ARGC[*version_ptr]) { - *error_ptr = "Incorrect number of arguments for version"; - return VDO_BAD_CONFIGURATION; - } - } else if (argc < REQUIRED_ARGC[*version_ptr]) { - *error_ptr = "Incorrect number of arguments for version"; - return VDO_BAD_CONFIGURATION; - } - - if (*version_ptr != TABLE_VERSION) { - uds_log_warning("Detected version mismatch between kernel module and tools kernel: %d, tool: %d", - TABLE_VERSION, *version_ptr); - uds_log_warning("Please consider upgrading management tools to match kernel."); - } - return VDO_SUCCESS; -} - -/* Free a list of non-NULL string pointers, and then the list itself. */ -static void free_string_array(char **string_array) -{ - unsigned int offset; - - for (offset = 0; string_array[offset] != NULL; offset++) - uds_free(string_array[offset]); - uds_free(string_array); -} - -/* - * Split the input string into substrings, separated at occurrences of the indicated character, - * returning a null-terminated list of string pointers. - * - * The string pointers and the pointer array itself should both be freed with uds_free() when no - * longer needed. This can be done with vdo_free_string_array (below) if the pointers in the array - * are not changed. Since the array and copied strings are allocated by this function, it may only - * be used in contexts where allocation is permitted. - * - * Empty substrings are not ignored; that is, returned substrings may be empty strings if the - * separator occurs twice in a row. - */ -static int split_string(const char *string, char separator, char ***substring_array_ptr) -{ - unsigned int current_substring = 0, substring_count = 1; - const char *s; - char **substrings; - int result; - ptrdiff_t length; - - for (s = string; *s != 0; s++) { - if (*s == separator) - substring_count++; - } - - result = uds_allocate(substring_count + 1, char *, "string-splitting array", - &substrings); - if (result != UDS_SUCCESS) - return result; - - for (s = string; *s != 0; s++) { - if (*s == separator) { - ptrdiff_t length = s - string; - - result = uds_allocate(length + 1, char, "split string", - &substrings[current_substring]); - if (result != UDS_SUCCESS) { - free_string_array(substrings); - return result; - } - /* - * Trailing NUL is already in place after allocation; deal with the zero or - * more non-NUL bytes in the string. - */ - if (length > 0) - memcpy(substrings[current_substring], string, length); - string = s + 1; - current_substring++; - BUG_ON(current_substring >= substring_count); - } - } - /* Process final string, with no trailing separator. */ - BUG_ON(current_substring != (substring_count - 1)); - length = strlen(string); - - result = uds_allocate(length + 1, char, "split string", - &substrings[current_substring]); - if (result != UDS_SUCCESS) { - free_string_array(substrings); - return result; - } - memcpy(substrings[current_substring], string, length); - current_substring++; - /* substrings[current_substring] is NULL already */ - *substring_array_ptr = substrings; - return UDS_SUCCESS; -} - -/* - * Join the input substrings into one string, joined with the indicated character, returning a - * string. array_length is a bound on the number of valid elements in substring_array, in case it - * is not NULL-terminated. - */ -static int join_strings(char **substring_array, size_t array_length, char separator, - char **string_ptr) -{ - size_t string_length = 0; - size_t i; - int result; - char *output, *current_position; - - for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) - string_length += strlen(substring_array[i]) + 1; - - result = uds_allocate(string_length, char, __func__, &output); - if (result != VDO_SUCCESS) - return result; - - current_position = &output[0]; - - for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) { - current_position = uds_append_to_buffer(current_position, - output + string_length, "%s", - substring_array[i]); - *current_position = separator; - current_position++; - } - - /* We output one too many separators; replace the last with a zero byte. */ - if (current_position != output) - *(current_position - 1) = '\0'; - - *string_ptr = output; - return UDS_SUCCESS; -} - -/** - * parse_bool() - Parse a two-valued option into a bool. - * @bool_str: The string value to convert to a bool. - * @true_str: The string value which should be converted to true. - * @false_str: The string value which should be converted to false. - * @bool_ptr: A pointer to return the bool value in. - * - * Return: VDO_SUCCESS or an error if bool_str is neither true_str nor false_str. - */ -static inline int __must_check parse_bool(const char *bool_str, const char *true_str, - const char *false_str, bool *bool_ptr) -{ - bool value = false; - - if (strcmp(bool_str, true_str) == 0) - value = true; - else if (strcmp(bool_str, false_str) == 0) - value = false; - else - return VDO_BAD_CONFIGURATION; - - *bool_ptr = value; - return VDO_SUCCESS; -} - -/** - * process_one_thread_config_spec() - Process one component of a thread parameter configuration - * string and update the configuration data structure. - * @thread_param_type: The type of thread specified. - * @count: The thread count requested. - * @config: The configuration data structure to update. - * - * If the thread count requested is invalid, a message is logged and -EINVAL returned. If the - * thread name is unknown, a message is logged but no error is returned. - * - * Return: VDO_SUCCESS or -EINVAL - */ -static int process_one_thread_config_spec(const char *thread_param_type, - unsigned int count, - struct thread_count_config *config) -{ - /* Handle limited thread parameters */ - if (strcmp(thread_param_type, "bioRotationInterval") == 0) { - if (count == 0) { - uds_log_error("thread config string error: 'bioRotationInterval' of at least 1 is required"); - return -EINVAL; - } else if (count > VDO_BIO_ROTATION_INTERVAL_LIMIT) { - uds_log_error("thread config string error: 'bioRotationInterval' cannot be higher than %d", - VDO_BIO_ROTATION_INTERVAL_LIMIT); - return -EINVAL; - } - config->bio_rotation_interval = count; - return VDO_SUCCESS; - } - if (strcmp(thread_param_type, "logical") == 0) { - if (count > MAX_VDO_LOGICAL_ZONES) { - uds_log_error("thread config string error: at most %d 'logical' threads are allowed", - MAX_VDO_LOGICAL_ZONES); - return -EINVAL; - } - config->logical_zones = count; - return VDO_SUCCESS; - } - if (strcmp(thread_param_type, "physical") == 0) { - if (count > MAX_VDO_PHYSICAL_ZONES) { - uds_log_error("thread config string error: at most %d 'physical' threads are allowed", - MAX_VDO_PHYSICAL_ZONES); - return -EINVAL; - } - config->physical_zones = count; - return VDO_SUCCESS; - } - /* Handle other thread count parameters */ - if (count > MAXIMUM_VDO_THREADS) { - uds_log_error("thread config string error: at most %d '%s' threads are allowed", - MAXIMUM_VDO_THREADS, thread_param_type); - return -EINVAL; - } - if (strcmp(thread_param_type, "hash") == 0) { - config->hash_zones = count; - return VDO_SUCCESS; - } - if (strcmp(thread_param_type, "cpu") == 0) { - if (count == 0) { - uds_log_error("thread config string error: at least one 'cpu' thread required"); - return -EINVAL; - } - config->cpu_threads = count; - return VDO_SUCCESS; - } - if (strcmp(thread_param_type, "ack") == 0) { - config->bio_ack_threads = count; - return VDO_SUCCESS; - } - if (strcmp(thread_param_type, "bio") == 0) { - if (count == 0) { - uds_log_error("thread config string error: at least one 'bio' thread required"); - return -EINVAL; - } - config->bio_threads = count; - return VDO_SUCCESS; - } - - /* - * Don't fail, just log. This will handle version mismatches between user mode tools and - * kernel. - */ - uds_log_info("unknown thread parameter type \"%s\"", thread_param_type); - return VDO_SUCCESS; -} - -/** - * parse_one_thread_config_spec() - Parse one component of a thread parameter configuration string - * and update the configuration data structure. - * @spec: The thread parameter specification string. - * @config: The configuration data to be updated. - */ -static int parse_one_thread_config_spec(const char *spec, - struct thread_count_config *config) -{ - unsigned int count; - char **fields; - int result; - - result = split_string(spec, '=', &fields); - if (result != UDS_SUCCESS) - return result; - - if ((fields[0] == NULL) || (fields[1] == NULL) || (fields[2] != NULL)) { - uds_log_error("thread config string error: expected thread parameter assignment, saw \"%s\"", - spec); - free_string_array(fields); - return -EINVAL; - } - - result = kstrtouint(fields[1], 10, &count); - if (result != UDS_SUCCESS) { - uds_log_error("thread config string error: integer value needed, found \"%s\"", - fields[1]); - free_string_array(fields); - return result; - } - - result = process_one_thread_config_spec(fields[0], count, config); - free_string_array(fields); - return result; -} - -/** - * parse_thread_config_string() - Parse the configuration string passed and update the specified - * counts and other parameters of various types of threads to be - * created. - * @string: Thread parameter configuration string. - * @config: The thread configuration data to update. - * - * The configuration string should contain one or more comma-separated specs of the form - * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", - * "logical", "physical", and "hash". - * - * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop - * further parsing. - * - * This function can't set the "reason" value the caller wants to pass back, because we'd want to - * format it to say which field was invalid, and we can't allocate the "reason" strings - * dynamically. So if an error occurs, we'll log the details and pass back an error. - * - * Return: VDO_SUCCESS or -EINVAL or -ENOMEM - */ -static int parse_thread_config_string(const char *string, - struct thread_count_config *config) -{ - int result = VDO_SUCCESS; - char **specs; - - if (strcmp(".", string) != 0) { - unsigned int i; - - result = split_string(string, ',', &specs); - if (result != UDS_SUCCESS) - return result; - - for (i = 0; specs[i] != NULL; i++) { - result = parse_one_thread_config_spec(specs[i], config); - if (result != VDO_SUCCESS) - break; - } - free_string_array(specs); - } - return result; -} - -/** - * process_one_key_value_pair() - Process one component of an optional parameter string and update - * the configuration data structure. - * @key: The optional parameter key name. - * @value: The optional parameter value. - * @config: The configuration data structure to update. - * - * If the value requested is invalid, a message is logged and -EINVAL returned. If the key is - * unknown, a message is logged but no error is returned. - * - * Return: VDO_SUCCESS or -EINVAL - */ -static int process_one_key_value_pair(const char *key, unsigned int value, - struct device_config *config) -{ - /* Non thread optional parameters */ - if (strcmp(key, "maxDiscard") == 0) { - if (value == 0) { - uds_log_error("optional parameter error: at least one max discard block required"); - return -EINVAL; - } - /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */ - if (value > (UINT_MAX / VDO_BLOCK_SIZE)) { - uds_log_error("optional parameter error: at most %d max discard blocks are allowed", - UINT_MAX / VDO_BLOCK_SIZE); - return -EINVAL; - } - config->max_discard_blocks = value; - return VDO_SUCCESS; - } - /* Handles unknown key names */ - return process_one_thread_config_spec(key, value, &config->thread_counts); -} - -/** - * parse_one_key_value_pair() - Parse one key/value pair and update the configuration data - * structure. - * @key: The optional key name. - * @value: The optional value. - * @config: The configuration data to be updated. - * - * Return: VDO_SUCCESS or error. - */ -static int parse_one_key_value_pair(const char *key, const char *value, - struct device_config *config) -{ - unsigned int count; - int result; - - if (strcmp(key, "deduplication") == 0) - return parse_bool(value, "on", "off", &config->deduplication); - - if (strcmp(key, "compression") == 0) - return parse_bool(value, "on", "off", &config->compression); - - /* The remaining arguments must have integral values. */ - result = kstrtouint(value, 10, &count); - if (result != UDS_SUCCESS) { - uds_log_error("optional config string error: integer value needed, found \"%s\"", - value); - return result; - } - return process_one_key_value_pair(key, count, config); -} - -/** - * parse_key_value_pairs() - Parse all key/value pairs from a list of arguments. - * @argc: The total number of arguments in list. - * @argv: The list of key/value pairs. - * @config: The device configuration data to update. - * - * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop - * further parsing. - * - * This function can't set the "reason" value the caller wants to pass back, because we'd want to - * format it to say which field was invalid, and we can't allocate the "reason" strings - * dynamically. So if an error occurs, we'll log the details and return the error. - * - * Return: VDO_SUCCESS or error - */ -static int parse_key_value_pairs(int argc, char **argv, struct device_config *config) -{ - int result = VDO_SUCCESS; - - while (argc) { - result = parse_one_key_value_pair(argv[0], argv[1], config); - if (result != VDO_SUCCESS) - break; - - argc -= 2; - argv += 2; - } - - return result; -} - -/** - * parse_optional_arguments() - Parse the configuration string passed in for optional arguments. - * @arg_set: The structure holding the arguments to parse. - * @error_ptr: Pointer to a buffer to hold the error string. - * @config: Pointer to device configuration data to update. - * - * For V0/V1 configurations, there will only be one optional parameter; the thread configuration. - * The configuration string should contain one or more comma-separated specs of the form - * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", - * "logical", "physical", and "hash". - * - * For V2 configurations and beyond, there could be any number of arguments. They should contain - * one or more key/value pairs separated by a space. - * - * Return: VDO_SUCCESS or error - */ -static int parse_optional_arguments(struct dm_arg_set *arg_set, char **error_ptr, - struct device_config *config) -{ - int result = VDO_SUCCESS; - - if (config->version == 0 || config->version == 1) { - result = parse_thread_config_string(arg_set->argv[0], - &config->thread_counts); - if (result != VDO_SUCCESS) { - *error_ptr = "Invalid thread-count configuration"; - return VDO_BAD_CONFIGURATION; - } - } else { - if ((arg_set->argc % 2) != 0) { - *error_ptr = "Odd number of optional arguments given but they should be pairs"; - return VDO_BAD_CONFIGURATION; - } - result = parse_key_value_pairs(arg_set->argc, arg_set->argv, config); - if (result != VDO_SUCCESS) { - *error_ptr = "Invalid optional argument configuration"; - return VDO_BAD_CONFIGURATION; - } - } - return result; -} - -/** - * handle_parse_error() - Handle a parsing error. - * @config: The config to free. - * @error_ptr: A place to store a constant string about the error. - * @error_str: A constant string to store in error_ptr. - */ -static void handle_parse_error(struct device_config *config, char **error_ptr, - char *error_str) -{ - free_device_config(config); - *error_ptr = error_str; -} - -/** - * parse_device_config() - Convert the dmsetup table into a struct device_config. - * @argc: The number of table values. - * @argv: The array of table values. - * @ti: The target structure for this table. - * @config_ptr: A pointer to return the allocated config. - * - * Return: VDO_SUCCESS or an error code. - */ -static int parse_device_config(int argc, char **argv, struct dm_target *ti, - struct device_config **config_ptr) -{ - bool enable_512e; - size_t logical_bytes = to_bytes(ti->len); - struct dm_arg_set arg_set; - char **error_ptr = &ti->error; - struct device_config *config = NULL; - int result; - - if ((logical_bytes % VDO_BLOCK_SIZE) != 0) { - handle_parse_error(config, error_ptr, - "Logical size must be a multiple of 4096"); - return VDO_BAD_CONFIGURATION; - } - - if (argc == 0) { - handle_parse_error(config, error_ptr, "Incorrect number of arguments"); - return VDO_BAD_CONFIGURATION; - } - - result = uds_allocate(1, struct device_config, "device_config", &config); - if (result != VDO_SUCCESS) { - handle_parse_error(config, error_ptr, - "Could not allocate config structure"); - return VDO_BAD_CONFIGURATION; - } - - config->owning_target = ti; - config->logical_blocks = logical_bytes / VDO_BLOCK_SIZE; - INIT_LIST_HEAD(&config->config_list); - - /* Save the original string. */ - result = join_strings(argv, argc, ' ', &config->original_string); - if (result != VDO_SUCCESS) { - handle_parse_error(config, error_ptr, "Could not populate string"); - return VDO_BAD_CONFIGURATION; - } - - uds_log_info("table line: %s", config->original_string); - - config->thread_counts = (struct thread_count_config) { - .bio_ack_threads = 1, - .bio_threads = DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT, - .bio_rotation_interval = DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL, - .cpu_threads = 1, - .logical_zones = 0, - .physical_zones = 0, - .hash_zones = 0, - }; - config->max_discard_blocks = 1; - config->deduplication = true; - config->compression = false; - - arg_set.argc = argc; - arg_set.argv = argv; - - result = get_version_number(argc, argv, error_ptr, &config->version); - if (result != VDO_SUCCESS) { - /* get_version_number sets error_ptr itself. */ - handle_parse_error(config, error_ptr, *error_ptr); - return result; - } - /* Move the arg pointer forward only if the argument was there. */ - if (config->version >= 1) - dm_shift_arg(&arg_set); - - result = uds_duplicate_string(dm_shift_arg(&arg_set), "parent device name", - &config->parent_device_name); - if (result != VDO_SUCCESS) { - handle_parse_error(config, error_ptr, - "Could not copy parent device name"); - return VDO_BAD_CONFIGURATION; - } - - /* Get the physical blocks, if known. */ - if (config->version >= 1) { - result = kstrtoull(dm_shift_arg(&arg_set), 10, &config->physical_blocks); - if (result != VDO_SUCCESS) { - handle_parse_error(config, error_ptr, - "Invalid physical block count"); - return VDO_BAD_CONFIGURATION; - } - } - - /* Get the logical block size and validate */ - result = parse_bool(dm_shift_arg(&arg_set), "512", "4096", &enable_512e); - if (result != VDO_SUCCESS) { - handle_parse_error(config, error_ptr, "Invalid logical block size"); - return VDO_BAD_CONFIGURATION; - } - config->logical_block_size = (enable_512e ? 512 : 4096); - - /* Skip past the two no longer used read cache options. */ - if (config->version <= 1) - dm_consume_args(&arg_set, 2); - - /* Get the page cache size. */ - result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->cache_size); - if (result != VDO_SUCCESS) { - handle_parse_error(config, error_ptr, - "Invalid block map page cache size"); - return VDO_BAD_CONFIGURATION; - } - - /* Get the block map era length. */ - result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->block_map_maximum_age); - if (result != VDO_SUCCESS) { - handle_parse_error(config, error_ptr, "Invalid block map maximum age"); - return VDO_BAD_CONFIGURATION; - } - - /* Skip past the no longer used MD RAID5 optimization mode */ - if (config->version <= 2) - dm_consume_args(&arg_set, 1); - - /* Skip past the no longer used write policy setting */ - if (config->version <= 3) - dm_consume_args(&arg_set, 1); - - /* Skip past the no longer used pool name for older table lines */ - if (config->version <= 2) { - /* - * Make sure the enum to get the pool name from argv directly is still in sync with - * the parsing of the table line. - */ - if (&arg_set.argv[0] != &argv[POOL_NAME_ARG_INDEX[config->version]]) { - handle_parse_error(config, error_ptr, - "Pool name not in expected location"); - return VDO_BAD_CONFIGURATION; - } - dm_shift_arg(&arg_set); - } - - /* Get the optional arguments and validate. */ - result = parse_optional_arguments(&arg_set, error_ptr, config); - if (result != VDO_SUCCESS) { - /* parse_optional_arguments sets error_ptr itself. */ - handle_parse_error(config, error_ptr, *error_ptr); - return result; - } - - /* - * Logical, physical, and hash zone counts can all be zero; then we get one thread doing - * everything, our older configuration. If any zone count is non-zero, the others must be - * as well. - */ - if (((config->thread_counts.logical_zones == 0) != - (config->thread_counts.physical_zones == 0)) || - ((config->thread_counts.physical_zones == 0) != - (config->thread_counts.hash_zones == 0))) { - handle_parse_error(config, error_ptr, - "Logical, physical, and hash zones counts must all be zero or all non-zero"); - return VDO_BAD_CONFIGURATION; - } - - if (config->cache_size < - (2 * MAXIMUM_VDO_USER_VIOS * config->thread_counts.logical_zones)) { - handle_parse_error(config, error_ptr, - "Insufficient block map cache for logical zones"); - return VDO_BAD_CONFIGURATION; - } - - result = dm_get_device(ti, config->parent_device_name, - dm_table_get_mode(ti->table), &config->owned_device); - if (result != 0) { - uds_log_error("couldn't open device \"%s\": error %d", - config->parent_device_name, result); - handle_parse_error(config, error_ptr, "Unable to open storage device"); - return VDO_BAD_CONFIGURATION; - } - - if (config->version == 0) { - u64 device_size = i_size_read(config->owned_device->bdev->bd_inode); - - config->physical_blocks = device_size / VDO_BLOCK_SIZE; - } - - *config_ptr = config; - return result; -} - -static struct vdo *get_vdo_for_target(struct dm_target *ti) -{ - return ((struct device_config *) ti->private)->vdo; -} - - -static int vdo_map_bio(struct dm_target *ti, struct bio *bio) -{ - struct vdo *vdo = get_vdo_for_target(ti); - struct vdo_work_queue *current_work_queue; - const struct admin_state_code *code = vdo_get_admin_state_code(&vdo->admin.state); - - ASSERT_LOG_ONLY(code->normal, "vdo should not receive bios while in state %s", - code->name); - - /* Count all incoming bios. */ - vdo_count_bios(&vdo->stats.bios_in, bio); - - - /* Handle empty bios. Empty flush bios are not associated with a vio. */ - if ((bio_op(bio) == REQ_OP_FLUSH) || ((bio->bi_opf & REQ_PREFLUSH) != 0)) { - vdo_launch_flush(vdo, bio); - return DM_MAPIO_SUBMITTED; - } - - /* This could deadlock, */ - current_work_queue = vdo_get_current_work_queue(); - BUG_ON((current_work_queue != NULL) && - (vdo == vdo_get_work_queue_owner(current_work_queue)->vdo)); - vdo_launch_bio(vdo->data_vio_pool, bio); - return DM_MAPIO_SUBMITTED; -} - -static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) -{ - struct vdo *vdo = get_vdo_for_target(ti); - - limits->logical_block_size = vdo->device_config->logical_block_size; - limits->physical_block_size = VDO_BLOCK_SIZE; - - /* The minimum io size for random io */ - blk_limits_io_min(limits, VDO_BLOCK_SIZE); - /* The optimal io size for streamed/sequential io */ - blk_limits_io_opt(limits, VDO_BLOCK_SIZE); - - /* - * Sets the maximum discard size that will be passed into VDO. This value comes from a - * table line value passed in during dmsetup create. - * - * The value 1024 is the largest usable value on HD systems. A 2048 sector discard on a - * busy HD system takes 31 seconds. We should use a value no higher than 1024, which takes - * 15 to 16 seconds on a busy HD system. - * - * But using large values results in 120 second blocked task warnings in /var/log/kern.log. - * In order to avoid these warnings, we choose to use the smallest reasonable value. See - * VDO-3062 and VDO-3087. - * - * The value is displayed in sysfs, and also used by dm-thin to determine whether to pass - * down discards. The block layer splits large discards on this boundary when this is set. - */ - limits->max_discard_sectors = - (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK); - - /* - * Force discards to not begin or end with a partial block by stating the granularity is - * 4k. - */ - limits->discard_granularity = VDO_BLOCK_SIZE; -} - -static int vdo_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, - void *data) -{ - struct device_config *config = get_vdo_for_target(ti)->device_config; - - return fn(ti, config->owned_device, 0, - config->physical_blocks * VDO_SECTORS_PER_BLOCK, data); -} - -/* - * Status line is: - * - * - */ - -static void vdo_status(struct dm_target *ti, status_type_t status_type, - unsigned int status_flags, char *result, unsigned int maxlen) -{ - struct vdo *vdo = get_vdo_for_target(ti); - struct vdo_statistics *stats; - struct device_config *device_config; - /* N.B.: The DMEMIT macro uses the variables named "sz", "result", "maxlen". */ - int sz = 0; - - switch (status_type) { - case STATUSTYPE_INFO: - /* Report info for dmsetup status */ - mutex_lock(&vdo->stats_mutex); - vdo_fetch_statistics(vdo, &vdo->stats_buffer); - stats = &vdo->stats_buffer; - - DMEMIT("/dev/%pg %s %s %s %s %llu %llu", - vdo_get_backing_device(vdo), stats->mode, - stats->in_recovery_mode ? "recovering" : "-", - vdo_get_dedupe_index_state_name(vdo->hash_zones), - vdo_get_compressing(vdo) ? "online" : "offline", - stats->data_blocks_used + stats->overhead_blocks_used, - stats->physical_blocks); - mutex_unlock(&vdo->stats_mutex); - break; - - case STATUSTYPE_TABLE: - /* Report the string actually specified in the beginning. */ - device_config = (struct device_config *) ti->private; - DMEMIT("%s", device_config->original_string); - break; - - case STATUSTYPE_IMA: - /* FIXME: We ought to be more detailed here, but this is what thin does. */ - *result = '\0'; - break; - } -} - -static block_count_t __must_check get_underlying_device_block_count(const struct vdo *vdo) -{ - return i_size_read(vdo_get_backing_device(vdo)->bd_inode) / VDO_BLOCK_SIZE; -} - -static int __must_check process_vdo_message_locked(struct vdo *vdo, unsigned int argc, - char **argv) -{ - if ((argc == 2) && (strcasecmp(argv[0], "compression") == 0)) { - if (strcasecmp(argv[1], "on") == 0) { - vdo_set_compressing(vdo, true); - return 0; - } - - if (strcasecmp(argv[1], "off") == 0) { - vdo_set_compressing(vdo, false); - return 0; - } - - uds_log_warning("invalid argument '%s' to dmsetup compression message", - argv[1]); - return -EINVAL; - } - - uds_log_warning("unrecognized dmsetup message '%s' received", argv[0]); - return -EINVAL; -} - -/* - * If the message is a dump, just do it. Otherwise, check that no other message is being processed, - * and only proceed if so. - * Returns -EBUSY if another message is being processed - */ -static int __must_check process_vdo_message(struct vdo *vdo, unsigned int argc, - char **argv) -{ - int result; - - /* - * All messages which may be processed in parallel with other messages should be handled - * here before the atomic check below. Messages which should be exclusive should be - * processed in process_vdo_message_locked(). - */ - - /* Dump messages should always be processed */ - if (strcasecmp(argv[0], "dump") == 0) - return vdo_dump(vdo, argc, argv, "dmsetup message"); - - if (argc == 1) { - if (strcasecmp(argv[0], "dump-on-shutdown") == 0) { - vdo->dump_on_shutdown = true; - return 0; - } - - /* Index messages should always be processed */ - if ((strcasecmp(argv[0], "index-close") == 0) || - (strcasecmp(argv[0], "index-create") == 0) || - (strcasecmp(argv[0], "index-disable") == 0) || - (strcasecmp(argv[0], "index-enable") == 0)) - return vdo_message_dedupe_index(vdo->hash_zones, argv[0]); - } - - if (atomic_cmpxchg(&vdo->processing_message, 0, 1) != 0) - return -EBUSY; - - result = process_vdo_message_locked(vdo, argc, argv); - - /* Pairs with the implicit barrier in cmpxchg just above */ - smp_wmb(); - atomic_set(&vdo->processing_message, 0); - return result; -} - -static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv, - char *result_buffer, unsigned int maxlen) -{ - struct registered_thread allocating_thread, instance_thread; - struct vdo *vdo; - int result; - - if (argc == 0) { - uds_log_warning("unspecified dmsetup message"); - return -EINVAL; - } - - vdo = get_vdo_for_target(ti); - uds_register_allocating_thread(&allocating_thread, NULL); - uds_register_thread_device_id(&instance_thread, &vdo->instance); - - /* - * Must be done here so we don't map return codes. The code in dm-ioctl expects a 1 for a - * return code to look at the buffer and see if it is full or not. - */ - if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) { - vdo_write_stats(vdo, result_buffer, maxlen); - result = 1; - } else { - result = vdo_map_to_system_error(process_vdo_message(vdo, argc, argv)); - } - - uds_unregister_thread_device_id(); - uds_unregister_allocating_thread(); - return result; -} - -static void configure_target_capabilities(struct dm_target *ti) -{ - ti->discards_supported = 1; - ti->flush_supported = true; - ti->num_discard_bios = 1; - ti->num_flush_bios = 1; - - /* - * If this value changes, please make sure to update the value for max_discard_sectors - * accordingly. - */ - BUG_ON(dm_set_target_max_io_len(ti, VDO_SECTORS_PER_BLOCK) != 0); -} - -/* - * Implements vdo_filter_fn. - */ -static bool vdo_uses_device(struct vdo *vdo, const void *context) -{ - const struct device_config *config = context; - - return vdo_get_backing_device(vdo)->bd_dev == config->owned_device->bdev->bd_dev; -} - -/** - * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in - * progress. - */ -static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo) -{ - switch (vdo->admin.phase) { - case RESUME_PHASE_PACKER: - case RESUME_PHASE_FLUSHER: - case SUSPEND_PHASE_PACKER: - case SUSPEND_PHASE_FLUSHES: - return vdo->thread_config.packer_thread; - - case RESUME_PHASE_DATA_VIOS: - case SUSPEND_PHASE_DATA_VIOS: - return vdo->thread_config.cpu_thread; - - case LOAD_PHASE_DRAIN_JOURNAL: - case RESUME_PHASE_JOURNAL: - case SUSPEND_PHASE_JOURNAL: - return vdo->thread_config.journal_thread; - - default: - return vdo->thread_config.admin_thread; - } -} - -static struct vdo_completion *prepare_admin_completion(struct vdo *vdo, - vdo_action_fn callback, - vdo_action_fn error_handler) -{ - struct vdo_completion *completion = &vdo->admin.completion; - - /* - * We can't use vdo_prepare_completion_for_requeue() here because we don't want to reset - * any error in the completion. - */ - completion->callback = callback; - completion->error_handler = error_handler; - completion->callback_thread_id = get_thread_id_for_phase(vdo); - completion->requeue = true; - return completion; -} - -/** - * advance_phase() - Increment the phase of the current admin operation and prepare the admin - * completion to run on the thread for the next phase. - * @vdo: The on which an admin operation is being performed - * - * Return: The current phase - */ -static u32 advance_phase(struct vdo *vdo) -{ - u32 phase = vdo->admin.phase++; - - vdo->admin.completion.callback_thread_id = get_thread_id_for_phase(vdo); - vdo->admin.completion.requeue = true; - return phase; -} - -/* - * Perform an administrative operation (load, suspend, grow logical, or grow physical). This method - * should not be called from vdo threads. - */ -static int perform_admin_operation(struct vdo *vdo, u32 starting_phase, - vdo_action_fn callback, vdo_action_fn error_handler, - const char *type) -{ - int result; - struct vdo_administrator *admin = &vdo->admin; - - if (atomic_cmpxchg(&admin->busy, 0, 1) != 0) { - return uds_log_error_strerror(VDO_COMPONENT_BUSY, - "Can't start %s operation, another operation is already in progress", - type); - } - - admin->phase = starting_phase; - reinit_completion(&admin->callback_sync); - vdo_reset_completion(&admin->completion); - vdo_launch_completion(prepare_admin_completion(vdo, callback, error_handler)); - - /* - * Using the "interruptible" interface means that Linux will not log a message when we wait - * for more than 120 seconds. - */ - while (wait_for_completion_interruptible(&admin->callback_sync) != 0) - /* * However, if we get a signal in a user-mode process, we could spin... */ - fsleep(1000); - - result = admin->completion.result; - /* pairs with implicit barrier in cmpxchg above */ - smp_wmb(); - atomic_set(&admin->busy, 0); - return result; -} - -/* Assert that we are operating on the correct thread for the current phase. */ -static void assert_admin_phase_thread(struct vdo *vdo, const char *what) -{ - ASSERT_LOG_ONLY(vdo_get_callback_thread_id() == get_thread_id_for_phase(vdo), - "%s on correct thread for %s", what, - ADMIN_PHASE_NAMES[vdo->admin.phase]); -} - -/** - * finish_operation_callback() - Callback to finish an admin operation. - * @completion: The admin_completion. - */ -static void finish_operation_callback(struct vdo_completion *completion) -{ - struct vdo_administrator *admin = &completion->vdo->admin; - - vdo_finish_operation(&admin->state, completion->result); - complete(&admin->callback_sync); -} - -/** - * decode_from_super_block() - Decode the VDO state from the super block and validate that it is - * correct. - * @vdo: The vdo being loaded. - * - * On error from this method, the component states must be destroyed explicitly. If this method - * returns successfully, the component states must not be destroyed. - * - * Return: VDO_SUCCESS or an error. - */ -static int __must_check decode_from_super_block(struct vdo *vdo) -{ - const struct device_config *config = vdo->device_config; - int result; - - result = vdo_decode_component_states(vdo->super_block.buffer, &vdo->geometry, - &vdo->states); - if (result != VDO_SUCCESS) - return result; - - vdo_set_state(vdo, vdo->states.vdo.state); - vdo->load_state = vdo->states.vdo.state; - - /* - * If the device config specifies a larger logical size than was recorded in the super - * block, just accept it. - */ - if (vdo->states.vdo.config.logical_blocks < config->logical_blocks) { - uds_log_warning("Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block", - (unsigned long long) config->logical_blocks, - (unsigned long long) vdo->states.vdo.config.logical_blocks); - vdo->states.vdo.config.logical_blocks = config->logical_blocks; - } - - result = vdo_validate_component_states(&vdo->states, vdo->geometry.nonce, - config->physical_blocks, - config->logical_blocks); - if (result != VDO_SUCCESS) - return result; - - vdo->layout = vdo->states.layout; - return VDO_SUCCESS; -} - -/** - * decode_vdo() - Decode the component data portion of a super block and fill in the corresponding - * portions of the vdo being loaded. - * @vdo: The vdo being loaded. - * - * This will also allocate the recovery journal and slab depot. If this method is called with an - * asynchronous layer (i.e. a thread config which specifies at least one base thread), the block - * map and packer will be constructed as well. - * - * Return: VDO_SUCCESS or an error. - */ -static int __must_check decode_vdo(struct vdo *vdo) -{ - block_count_t maximum_age, journal_length; - struct partition *partition; - int result; - - result = decode_from_super_block(vdo); - if (result != VDO_SUCCESS) { - vdo_destroy_component_states(&vdo->states); - return result; - } - - maximum_age = vdo_convert_maximum_age(vdo->device_config->block_map_maximum_age); - journal_length = - vdo_get_recovery_journal_length(vdo->states.vdo.config.recovery_journal_size); - if (maximum_age > (journal_length / 2)) { - return uds_log_error_strerror(VDO_BAD_CONFIGURATION, - "maximum age: %llu exceeds limit %llu", - (unsigned long long) maximum_age, - (unsigned long long) (journal_length / 2)); - } - - if (maximum_age == 0) { - return uds_log_error_strerror(VDO_BAD_CONFIGURATION, - "maximum age must be greater than 0"); - } - - result = vdo_enable_read_only_entry(vdo); - if (result != VDO_SUCCESS) - return result; - - partition = vdo_get_known_partition(&vdo->layout, - VDO_RECOVERY_JOURNAL_PARTITION); - result = vdo_decode_recovery_journal(vdo->states.recovery_journal, - vdo->states.vdo.nonce, vdo, partition, - vdo->states.vdo.complete_recoveries, - vdo->states.vdo.config.recovery_journal_size, - &vdo->recovery_journal); - if (result != VDO_SUCCESS) - return result; - - partition = vdo_get_known_partition(&vdo->layout, VDO_SLAB_SUMMARY_PARTITION); - result = vdo_decode_slab_depot(vdo->states.slab_depot, vdo, partition, - &vdo->depot); - if (result != VDO_SUCCESS) - return result; - - result = vdo_decode_block_map(vdo->states.block_map, - vdo->states.vdo.config.logical_blocks, vdo, - vdo->recovery_journal, vdo->states.vdo.nonce, - vdo->device_config->cache_size, maximum_age, - &vdo->block_map); - if (result != VDO_SUCCESS) - return result; - - result = vdo_make_physical_zones(vdo, &vdo->physical_zones); - if (result != VDO_SUCCESS) - return result; - - /* The logical zones depend on the physical zones already existing. */ - result = vdo_make_logical_zones(vdo, &vdo->logical_zones); - if (result != VDO_SUCCESS) - return result; - - return vdo_make_hash_zones(vdo, &vdo->hash_zones); -} - -/** - * pre_load_callback() - Callback to initiate a pre-load, registered in vdo_initialize(). - * @completion: The admin completion. - */ -static void pre_load_callback(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - int result; - - assert_admin_phase_thread(vdo, __func__); - - switch (advance_phase(vdo)) { - case PRE_LOAD_PHASE_START: - result = vdo_start_operation(&vdo->admin.state, - VDO_ADMIN_STATE_PRE_LOADING); - if (result != VDO_SUCCESS) { - vdo_continue_completion(completion, result); - return; - } - - vdo_load_super_block(vdo, completion); - return; - - case PRE_LOAD_PHASE_LOAD_COMPONENTS: - vdo_continue_completion(completion, decode_vdo(vdo)); - return; - - case PRE_LOAD_PHASE_END: - break; - - default: - vdo_set_completion_result(completion, UDS_BAD_STATE); - } - - finish_operation_callback(completion); -} - -static void release_instance(unsigned int instance) -{ - mutex_lock(&instances_lock); - if (instance >= instances.bit_count) { - ASSERT_LOG_ONLY(false, - "instance number %u must be less than bit count %u", - instance, instances.bit_count); - } else if (test_bit(instance, instances.words) == 0) { - ASSERT_LOG_ONLY(false, "instance number %u must be allocated", instance); - } else { - __clear_bit(instance, instances.words); - instances.count -= 1; - } - mutex_unlock(&instances_lock); -} - -static void set_device_config(struct dm_target *ti, struct vdo *vdo, - struct device_config *config) -{ - list_del_init(&config->config_list); - list_add_tail(&config->config_list, &vdo->device_config_list); - config->vdo = vdo; - ti->private = config; - configure_target_capabilities(ti); -} - -static int vdo_initialize(struct dm_target *ti, unsigned int instance, - struct device_config *config) -{ - struct vdo *vdo; - int result; - u64 block_size = VDO_BLOCK_SIZE; - u64 logical_size = to_bytes(ti->len); - block_count_t logical_blocks = logical_size / block_size; - - uds_log_info("loading device '%s'", vdo_get_device_name(ti)); - uds_log_debug("Logical block size = %llu", (u64) config->logical_block_size); - uds_log_debug("Logical blocks = %llu", logical_blocks); - uds_log_debug("Physical block size = %llu", (u64) block_size); - uds_log_debug("Physical blocks = %llu", config->physical_blocks); - uds_log_debug("Block map cache blocks = %u", config->cache_size); - uds_log_debug("Block map maximum age = %u", config->block_map_maximum_age); - uds_log_debug("Deduplication = %s", (config->deduplication ? "on" : "off")); - uds_log_debug("Compression = %s", (config->compression ? "on" : "off")); - - vdo = vdo_find_matching(vdo_uses_device, config); - if (vdo != NULL) { - uds_log_error("Existing vdo already uses device %s", - vdo->device_config->parent_device_name); - ti->error = "Cannot share storage device with already-running VDO"; - return VDO_BAD_CONFIGURATION; - } - - result = vdo_make(instance, config, &ti->error, &vdo); - if (result != VDO_SUCCESS) { - uds_log_error("Could not create VDO device. (VDO error %d, message %s)", - result, ti->error); - vdo_destroy(vdo); - return result; - } - - result = perform_admin_operation(vdo, PRE_LOAD_PHASE_START, pre_load_callback, - finish_operation_callback, "pre-load"); - if (result != VDO_SUCCESS) { - ti->error = ((result == VDO_INVALID_ADMIN_STATE) ? - "Pre-load is only valid immediately after initialization" : - "Cannot load metadata from device"); - uds_log_error("Could not start VDO device. (VDO error %d, message %s)", - result, ti->error); - vdo_destroy(vdo); - return result; - } - - set_device_config(ti, vdo, config); - vdo->device_config = config; - return VDO_SUCCESS; -} - -/* Implements vdo_filter_fn. */ -static bool __must_check vdo_is_named(struct vdo *vdo, const void *context) -{ - struct dm_target *ti = vdo->device_config->owning_target; - const char *device_name = vdo_get_device_name(ti); - - return strcmp(device_name, context) == 0; -} - -/** - * get_bit_array_size() - Return the number of bytes needed to store a bit array of the specified - * capacity in an array of unsigned longs. - * @bit_count: The number of bits the array must hold. - * - * Return: the number of bytes needed for the array representation. - */ -static size_t get_bit_array_size(unsigned int bit_count) -{ - /* Round up to a multiple of the word size and convert to a byte count. */ - return (BITS_TO_LONGS(bit_count) * sizeof(unsigned long)); -} - -/** - * grow_bit_array() - Re-allocate the bitmap word array so there will more instance numbers that - * can be allocated. - * - * Since the array is initially NULL, this also initializes the array the first time we allocate an - * instance number. - * - * Return: UDS_SUCCESS or an error code from the allocation - */ -static int grow_bit_array(void) -{ - unsigned int new_count = max(instances.bit_count + BIT_COUNT_INCREMENT, - (unsigned int) BIT_COUNT_MINIMUM); - unsigned long *new_words; - int result; - - result = uds_reallocate_memory(instances.words, - get_bit_array_size(instances.bit_count), - get_bit_array_size(new_count), - "instance number bit array", &new_words); - if (result != UDS_SUCCESS) - return result; - - instances.bit_count = new_count; - instances.words = new_words; - return UDS_SUCCESS; -} - -/** - * allocate_instance() - Allocate an instance number. - * @instance_ptr: A point to hold the instance number - * - * Return: UDS_SUCCESS or an error code - * - * This function must be called while holding the instances lock. - */ -static int allocate_instance(unsigned int *instance_ptr) -{ - unsigned int instance; - int result; - - /* If there are no unallocated instances, grow the bit array. */ - if (instances.count >= instances.bit_count) { - result = grow_bit_array(); - if (result != UDS_SUCCESS) - return result; - } - - /* - * There must be a zero bit somewhere now. Find it, starting just after the last instance - * allocated. - */ - instance = find_next_zero_bit(instances.words, instances.bit_count, - instances.next); - if (instance >= instances.bit_count) { - /* Nothing free after next, so wrap around to instance zero. */ - instance = find_first_zero_bit(instances.words, instances.bit_count); - result = ASSERT(instance < instances.bit_count, - "impossibly, no zero bit found"); - if (result != UDS_SUCCESS) - return result; - } - - __set_bit(instance, instances.words); - instances.count++; - instances.next = instance + 1; - *instance_ptr = instance; - return UDS_SUCCESS; -} - -static int construct_new_vdo_registered(struct dm_target *ti, unsigned int argc, - char **argv, unsigned int instance) -{ - int result; - struct device_config *config; - - result = parse_device_config(argc, argv, ti, &config); - if (result != VDO_SUCCESS) { - uds_log_error_strerror(result, "parsing failed: %s", ti->error); - release_instance(instance); - return -EINVAL; - } - - /* Beyond this point, the instance number will be cleaned up for us if needed */ - result = vdo_initialize(ti, instance, config); - if (result != VDO_SUCCESS) { - release_instance(instance); - free_device_config(config); - return vdo_map_to_system_error(result); - } - - return VDO_SUCCESS; -} - -static int construct_new_vdo(struct dm_target *ti, unsigned int argc, char **argv) -{ - int result; - unsigned int instance; - struct registered_thread instance_thread; - - mutex_lock(&instances_lock); - result = allocate_instance(&instance); - mutex_unlock(&instances_lock); - if (result != VDO_SUCCESS) - return -ENOMEM; - - uds_register_thread_device_id(&instance_thread, &instance); - result = construct_new_vdo_registered(ti, argc, argv, instance); - uds_unregister_thread_device_id(); - return result; -} - -/** - * check_may_grow_physical() - Callback to check that we're not in recovery mode, used in - * vdo_prepare_to_grow_physical(). - * @completion: The admin completion. - */ -static void check_may_grow_physical(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - - assert_admin_phase_thread(vdo, __func__); - - /* These checks can only be done from a vdo thread. */ - if (vdo_is_read_only(vdo)) - vdo_set_completion_result(completion, VDO_READ_ONLY); - - if (vdo_in_recovery_mode(vdo)) - vdo_set_completion_result(completion, VDO_RETRY_AFTER_REBUILD); - - finish_operation_callback(completion); -} - -static block_count_t get_partition_size(struct layout *layout, enum partition_id id) -{ - return vdo_get_known_partition(layout, id)->count; -} - -/** - * grow_layout() - Make the layout for growing a vdo. - * @vdo: The vdo preparing to grow. - * @old_size: The current size of the vdo. - * @new_size: The size to which the vdo will be grown. - * - * Return: VDO_SUCCESS or an error code. - */ -static int grow_layout(struct vdo *vdo, block_count_t old_size, block_count_t new_size) -{ - int result; - block_count_t min_new_size; - - if (vdo->next_layout.size == new_size) { - /* We are already prepared to grow to the new size, so we're done. */ - return VDO_SUCCESS; - } - - /* Make a copy completion if there isn't one */ - if (vdo->partition_copier == NULL) { - vdo->partition_copier = dm_kcopyd_client_create(NULL); - if (IS_ERR(vdo->partition_copier)) { - result = PTR_ERR(vdo->partition_copier); - vdo->partition_copier = NULL; - return result; - } - } - - /* Free any unused preparation. */ - vdo_uninitialize_layout(&vdo->next_layout); - - /* - * Make a new layout with the existing partition sizes for everything but the slab depot - * partition. - */ - result = vdo_initialize_layout(new_size, vdo->layout.start, - get_partition_size(&vdo->layout, - VDO_BLOCK_MAP_PARTITION), - get_partition_size(&vdo->layout, - VDO_RECOVERY_JOURNAL_PARTITION), - get_partition_size(&vdo->layout, - VDO_SLAB_SUMMARY_PARTITION), - &vdo->next_layout); - if (result != VDO_SUCCESS) { - dm_kcopyd_client_destroy(uds_forget(vdo->partition_copier)); - return result; - } - - /* Ensure the new journal and summary are entirely within the added blocks. */ - min_new_size = (old_size + - get_partition_size(&vdo->next_layout, - VDO_SLAB_SUMMARY_PARTITION) + - get_partition_size(&vdo->next_layout, - VDO_RECOVERY_JOURNAL_PARTITION)); - if (min_new_size > new_size) { - /* Copying the journal and summary would destroy some old metadata. */ - vdo_uninitialize_layout(&vdo->next_layout); - dm_kcopyd_client_destroy(uds_forget(vdo->partition_copier)); - return VDO_INCREMENT_TOO_SMALL; - } - - return VDO_SUCCESS; -} - -static int prepare_to_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) -{ - int result; - block_count_t current_physical_blocks = vdo->states.vdo.config.physical_blocks; - - uds_log_info("Preparing to resize physical to %llu", - (unsigned long long) new_physical_blocks); - ASSERT_LOG_ONLY((new_physical_blocks > current_physical_blocks), - "New physical size is larger than current physical size"); - result = perform_admin_operation(vdo, PREPARE_GROW_PHYSICAL_PHASE_START, - check_may_grow_physical, - finish_operation_callback, - "prepare grow-physical"); - if (result != VDO_SUCCESS) - return result; - - result = grow_layout(vdo, current_physical_blocks, new_physical_blocks); - if (result != VDO_SUCCESS) - return result; - - result = vdo_prepare_to_grow_slab_depot(vdo->depot, - vdo_get_known_partition(&vdo->next_layout, - VDO_SLAB_DEPOT_PARTITION)); - if (result != VDO_SUCCESS) { - vdo_uninitialize_layout(&vdo->next_layout); - return result; - } - - uds_log_info("Done preparing to resize physical"); - return VDO_SUCCESS; -} - -/** - * validate_new_device_config() - Check whether a new device config represents a valid modification - * to an existing config. - * @to_validate: The new config to validate. - * @config: The existing config. - * @may_grow: Set to true if growing the logical and physical size of the vdo is currently - * permitted. - * @error_ptr: A pointer to hold the reason for any error. - * - * Return: VDO_SUCCESS or an error. - */ -static int validate_new_device_config(struct device_config *to_validate, - struct device_config *config, bool may_grow, - char **error_ptr) -{ - if (to_validate->owning_target->begin != config->owning_target->begin) { - *error_ptr = "Starting sector cannot change"; - return VDO_PARAMETER_MISMATCH; - } - - if (to_validate->logical_block_size != config->logical_block_size) { - *error_ptr = "Logical block size cannot change"; - return VDO_PARAMETER_MISMATCH; - } - - if (to_validate->logical_blocks < config->logical_blocks) { - *error_ptr = "Can't shrink VDO logical size"; - return VDO_PARAMETER_MISMATCH; - } - - if (to_validate->cache_size != config->cache_size) { - *error_ptr = "Block map cache size cannot change"; - return VDO_PARAMETER_MISMATCH; - } - - if (to_validate->block_map_maximum_age != config->block_map_maximum_age) { - *error_ptr = "Block map maximum age cannot change"; - return VDO_PARAMETER_MISMATCH; - } - - if (memcmp(&to_validate->thread_counts, &config->thread_counts, - sizeof(struct thread_count_config)) != 0) { - *error_ptr = "Thread configuration cannot change"; - return VDO_PARAMETER_MISMATCH; - } - - if (to_validate->physical_blocks < config->physical_blocks) { - *error_ptr = "Removing physical storage from a VDO is not supported"; - return VDO_NOT_IMPLEMENTED; - } - - if (!may_grow && (to_validate->physical_blocks > config->physical_blocks)) { - *error_ptr = "VDO physical size may not grow in current state"; - return VDO_NOT_IMPLEMENTED; - } - - return VDO_SUCCESS; -} - -static int prepare_to_modify(struct dm_target *ti, struct device_config *config, - struct vdo *vdo) -{ - int result; - bool may_grow = (vdo_get_admin_state(vdo) != VDO_ADMIN_STATE_PRE_LOADED); - - result = validate_new_device_config(config, vdo->device_config, may_grow, - &ti->error); - if (result != VDO_SUCCESS) - return -EINVAL; - - if (config->logical_blocks > vdo->device_config->logical_blocks) { - block_count_t logical_blocks = vdo->states.vdo.config.logical_blocks; - - uds_log_info("Preparing to resize logical to %llu", - (unsigned long long) config->logical_blocks); - ASSERT_LOG_ONLY((config->logical_blocks > logical_blocks), - "New logical size is larger than current size"); - - result = vdo_prepare_to_grow_block_map(vdo->block_map, - config->logical_blocks); - if (result != VDO_SUCCESS) { - ti->error = "Device vdo_prepare_to_grow_logical failed"; - return result; - } - - uds_log_info("Done preparing to resize logical"); - } - - if (config->physical_blocks > vdo->device_config->physical_blocks) { - result = prepare_to_grow_physical(vdo, config->physical_blocks); - if (result != VDO_SUCCESS) { - if (result == VDO_PARAMETER_MISMATCH) { - /* - * If we don't trap this case, vdo_map_to_system_error() will remap - * it to -EIO, which is misleading and ahistorical. - */ - result = -EINVAL; - } - - if (result == VDO_TOO_MANY_SLABS) - ti->error = "Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size)"; - else - ti->error = "Device vdo_prepare_to_grow_physical failed"; - - return result; - } - } - - if (strcmp(config->parent_device_name, vdo->device_config->parent_device_name) != 0) { - const char *device_name = vdo_get_device_name(config->owning_target); - - uds_log_info("Updating backing device of %s from %s to %s", device_name, - vdo->device_config->parent_device_name, - config->parent_device_name); - } - - return VDO_SUCCESS; -} - -static int update_existing_vdo(const char *device_name, struct dm_target *ti, - unsigned int argc, char **argv, struct vdo *vdo) -{ - int result; - struct device_config *config; - - result = parse_device_config(argc, argv, ti, &config); - if (result != VDO_SUCCESS) - return -EINVAL; - - uds_log_info("preparing to modify device '%s'", device_name); - result = prepare_to_modify(ti, config, vdo); - if (result != VDO_SUCCESS) { - free_device_config(config); - return vdo_map_to_system_error(result); - } - - set_device_config(ti, vdo, config); - return VDO_SUCCESS; -} - -static int vdo_ctr(struct dm_target *ti, unsigned int argc, char **argv) -{ - int result; - struct registered_thread allocating_thread, instance_thread; - const char *device_name; - struct vdo *vdo; - - uds_register_allocating_thread(&allocating_thread, NULL); - device_name = vdo_get_device_name(ti); - vdo = vdo_find_matching(vdo_is_named, device_name); - if (vdo == NULL) { - result = construct_new_vdo(ti, argc, argv); - } else { - uds_register_thread_device_id(&instance_thread, &vdo->instance); - result = update_existing_vdo(device_name, ti, argc, argv, vdo); - uds_unregister_thread_device_id(); - } - - uds_unregister_allocating_thread(); - return result; -} - -static void vdo_dtr(struct dm_target *ti) -{ - struct device_config *config = ti->private; - struct vdo *vdo = uds_forget(config->vdo); - - list_del_init(&config->config_list); - if (list_empty(&vdo->device_config_list)) { - const char *device_name; - - /* This was the last config referencing the VDO. Free it. */ - unsigned int instance = vdo->instance; - struct registered_thread allocating_thread, instance_thread; - - uds_register_thread_device_id(&instance_thread, &instance); - uds_register_allocating_thread(&allocating_thread, NULL); - - device_name = vdo_get_device_name(ti); - uds_log_info("stopping device '%s'", device_name); - if (vdo->dump_on_shutdown) - vdo_dump_all(vdo, "device shutdown"); - - vdo_destroy(uds_forget(vdo)); - uds_log_info("device '%s' stopped", device_name); - uds_unregister_thread_device_id(); - uds_unregister_allocating_thread(); - release_instance(instance); - } else if (config == vdo->device_config) { - /* - * The VDO still references this config. Give it a reference to a config that isn't - * being destroyed. - */ - vdo->device_config = list_first_entry(&vdo->device_config_list, - struct device_config, config_list); - } - - free_device_config(config); - ti->private = NULL; -} - -static void vdo_presuspend(struct dm_target *ti) -{ - get_vdo_for_target(ti)->suspend_type = - (dm_noflush_suspending(ti) ? VDO_ADMIN_STATE_SUSPENDING : VDO_ADMIN_STATE_SAVING); -} - -/** - * write_super_block_for_suspend() - Update the VDO state and save the super block. - * @completion: The admin completion - */ -static void write_super_block_for_suspend(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - - switch (vdo_get_state(vdo)) { - case VDO_DIRTY: - case VDO_NEW: - vdo_set_state(vdo, VDO_CLEAN); - break; - - case VDO_CLEAN: - case VDO_READ_ONLY_MODE: - case VDO_FORCE_REBUILD: - case VDO_RECOVERING: - case VDO_REBUILD_FOR_UPGRADE: - break; - - case VDO_REPLAYING: - default: - vdo_continue_completion(completion, UDS_BAD_STATE); - return; - } - - vdo_save_components(vdo, completion); -} - -/** - * suspend_callback() - Callback to initiate a suspend, registered in vdo_postsuspend(). - * @completion: The sub-task completion. - */ -static void suspend_callback(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - struct admin_state *state = &vdo->admin.state; - int result; - - assert_admin_phase_thread(vdo, __func__); - - switch (advance_phase(vdo)) { - case SUSPEND_PHASE_START: - if (vdo_get_admin_state_code(state)->quiescent) { - /* Already suspended */ - break; - } - - vdo_continue_completion(completion, - vdo_start_operation(state, vdo->suspend_type)); - return; - - case SUSPEND_PHASE_PACKER: - /* - * If the VDO was already resumed from a prior suspend while read-only, some of the - * components may not have been resumed. By setting a read-only error here, we - * guarantee that the result of this suspend will be VDO_READ_ONLY and not - * VDO_INVALID_ADMIN_STATE in that case. - */ - if (vdo_in_read_only_mode(vdo)) - vdo_set_completion_result(completion, VDO_READ_ONLY); - - vdo_drain_packer(vdo->packer, completion); - return; - - case SUSPEND_PHASE_DATA_VIOS: - drain_data_vio_pool(vdo->data_vio_pool, completion); - return; - - case SUSPEND_PHASE_DEDUPE: - vdo_drain_hash_zones(vdo->hash_zones, completion); - return; - - case SUSPEND_PHASE_FLUSHES: - vdo_drain_flusher(vdo->flusher, completion); - return; - - case SUSPEND_PHASE_LOGICAL_ZONES: - /* - * Attempt to flush all I/O before completing post suspend work. We believe a - * suspended device is expected to have persisted all data written before the - * suspend, even if it hasn't been flushed yet. - */ - result = vdo_synchronous_flush(vdo); - if (result != VDO_SUCCESS) - vdo_enter_read_only_mode(vdo, result); - - vdo_drain_logical_zones(vdo->logical_zones, - vdo_get_admin_state_code(state), completion); - return; - - case SUSPEND_PHASE_BLOCK_MAP: - vdo_drain_block_map(vdo->block_map, vdo_get_admin_state_code(state), - completion); - return; - - case SUSPEND_PHASE_JOURNAL: - vdo_drain_recovery_journal(vdo->recovery_journal, - vdo_get_admin_state_code(state), completion); - return; - - case SUSPEND_PHASE_DEPOT: - vdo_drain_slab_depot(vdo->depot, vdo_get_admin_state_code(state), - completion); - return; - - case SUSPEND_PHASE_READ_ONLY_WAIT: - vdo_wait_until_not_entering_read_only_mode(completion); - return; - - case SUSPEND_PHASE_WRITE_SUPER_BLOCK: - if (vdo_is_state_suspending(state) || (completion->result != VDO_SUCCESS)) { - /* If we didn't save the VDO or there was an error, we're done. */ - break; - } - - write_super_block_for_suspend(completion); - return; - - case SUSPEND_PHASE_END: - break; - - default: - vdo_set_completion_result(completion, UDS_BAD_STATE); - } - - finish_operation_callback(completion); -} - -static void vdo_postsuspend(struct dm_target *ti) -{ - struct vdo *vdo = get_vdo_for_target(ti); - struct registered_thread instance_thread; - const char *device_name; - int result; - - uds_register_thread_device_id(&instance_thread, &vdo->instance); - device_name = vdo_get_device_name(vdo->device_config->owning_target); - uds_log_info("suspending device '%s'", device_name); - - /* - * It's important to note any error here does not actually stop device-mapper from - * suspending the device. All this work is done post suspend. - */ - result = perform_admin_operation(vdo, SUSPEND_PHASE_START, suspend_callback, - suspend_callback, "suspend"); - - if ((result == VDO_SUCCESS) || (result == VDO_READ_ONLY)) { - /* - * Treat VDO_READ_ONLY as a success since a read-only suspension still leaves the - * VDO suspended. - */ - uds_log_info("device '%s' suspended", device_name); - } else if (result == VDO_INVALID_ADMIN_STATE) { - uds_log_error("Suspend invoked while in unexpected state: %s", - vdo_get_admin_state(vdo)->name); - } else { - uds_log_error_strerror(result, "Suspend of device '%s' failed", - device_name); - } - - uds_unregister_thread_device_id(); -} - -/** - * was_new() - Check whether the vdo was new when it was loaded. - * @vdo: The vdo to query. - * - * Return: true if the vdo was new. - */ -static bool was_new(const struct vdo *vdo) -{ - return (vdo->load_state == VDO_NEW); -} - -/** - * requires_repair() - Check whether a vdo requires recovery or rebuild. - * @vdo: The vdo to query. - * - * Return: true if the vdo must be repaired. - */ -static bool __must_check requires_repair(const struct vdo *vdo) -{ - switch (vdo_get_state(vdo)) { - case VDO_DIRTY: - case VDO_FORCE_REBUILD: - case VDO_REPLAYING: - case VDO_REBUILD_FOR_UPGRADE: - return true; - - default: - return false; - } -} - -/** - * get_load_type() - Determine how the slab depot was loaded. - * @vdo: The vdo. - * - * Return: How the depot was loaded. - */ -static enum slab_depot_load_type get_load_type(struct vdo *vdo) -{ - if (vdo_state_requires_read_only_rebuild(vdo->load_state)) - return VDO_SLAB_DEPOT_REBUILD_LOAD; - - if (vdo_state_requires_recovery(vdo->load_state)) - return VDO_SLAB_DEPOT_RECOVERY_LOAD; - - return VDO_SLAB_DEPOT_NORMAL_LOAD; -} - -/** - * vdo_initialize_kobjects() - Initialize the vdo sysfs directory. - * @vdo: The vdo being initialized. - * - * Return: VDO_SUCCESS or an error code. - */ -static int vdo_initialize_kobjects(struct vdo *vdo) -{ - int result; - struct dm_target *target = vdo->device_config->owning_target; - struct mapped_device *md = dm_table_get_md(target->table); - - kobject_init(&vdo->vdo_directory, &vdo_directory_type); - vdo->sysfs_added = true; - result = kobject_add(&vdo->vdo_directory, &disk_to_dev(dm_disk(md))->kobj, - "vdo"); - if (result != 0) - return VDO_CANT_ADD_SYSFS_NODE; - - result = vdo_add_dedupe_index_sysfs(vdo->hash_zones); - if (result != 0) - return VDO_CANT_ADD_SYSFS_NODE; - - return vdo_add_sysfs_stats_dir(vdo); -} - -/** - * load_callback() - Callback to do the destructive parts of loading a VDO. - * @completion: The sub-task completion. - */ -static void load_callback(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - int result; - - assert_admin_phase_thread(vdo, __func__); - - switch (advance_phase(vdo)) { - case LOAD_PHASE_START: - result = vdo_start_operation(&vdo->admin.state, VDO_ADMIN_STATE_LOADING); - if (result != VDO_SUCCESS) { - vdo_continue_completion(completion, result); - return; - } - - /* Prepare the recovery journal for new entries. */ - vdo_open_recovery_journal(vdo->recovery_journal, vdo->depot, - vdo->block_map); - vdo_allow_read_only_mode_entry(completion); - return; - - case LOAD_PHASE_STATS: - vdo_continue_completion(completion, vdo_initialize_kobjects(vdo)); - return; - - case LOAD_PHASE_LOAD_DEPOT: - if (vdo_is_read_only(vdo)) { - /* - * In read-only mode we don't use the allocator and it may not even be - * readable, so don't bother trying to load it. - */ - vdo_set_completion_result(completion, VDO_READ_ONLY); - break; - } - - if (requires_repair(vdo)) { - vdo_repair(completion); - return; - } - - vdo_load_slab_depot(vdo->depot, - (was_new(vdo) ? VDO_ADMIN_STATE_FORMATTING : - VDO_ADMIN_STATE_LOADING), - completion, NULL); - return; - - case LOAD_PHASE_MAKE_DIRTY: - vdo_set_state(vdo, VDO_DIRTY); - vdo_save_components(vdo, completion); - return; - - case LOAD_PHASE_PREPARE_TO_ALLOCATE: - vdo_initialize_block_map_from_journal(vdo->block_map, - vdo->recovery_journal); - vdo_prepare_slab_depot_to_allocate(vdo->depot, get_load_type(vdo), - completion); - return; - - case LOAD_PHASE_SCRUB_SLABS: - if (vdo_state_requires_recovery(vdo->load_state)) - vdo_enter_recovery_mode(vdo); - - vdo_scrub_all_unrecovered_slabs(vdo->depot, completion); - return; - - case LOAD_PHASE_DATA_REDUCTION: - WRITE_ONCE(vdo->compressing, vdo->device_config->compression); - if (vdo->device_config->deduplication) { - /* - * Don't try to load or rebuild the index first (and log scary error - * messages) if this is known to be a newly-formatted volume. - */ - vdo_start_dedupe_index(vdo->hash_zones, was_new(vdo)); - } - - vdo->allocations_allowed = false; - fallthrough; - - case LOAD_PHASE_FINISHED: - break; - - case LOAD_PHASE_DRAIN_JOURNAL: - vdo_drain_recovery_journal(vdo->recovery_journal, VDO_ADMIN_STATE_SAVING, - completion); - return; - - case LOAD_PHASE_WAIT_FOR_READ_ONLY: - /* Avoid an infinite loop */ - completion->error_handler = NULL; - vdo->admin.phase = LOAD_PHASE_FINISHED; - vdo_wait_until_not_entering_read_only_mode(completion); - return; - - default: - vdo_set_completion_result(completion, UDS_BAD_STATE); - } - - finish_operation_callback(completion); -} - -/** - * handle_load_error() - Handle an error during the load operation. - * @completion: The admin completion. - * - * If at all possible, brings the vdo online in read-only mode. This handler is registered in - * vdo_preresume_registered(). - */ -static void handle_load_error(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - - if (vdo_requeue_completion_if_needed(completion, - vdo->thread_config.admin_thread)) - return; - - if (vdo_state_requires_read_only_rebuild(vdo->load_state) && - (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { - uds_log_error_strerror(completion->result, "aborting load"); - vdo->admin.phase = LOAD_PHASE_DRAIN_JOURNAL; - load_callback(uds_forget(completion)); - return; - } - - uds_log_error_strerror(completion->result, - "Entering read-only mode due to load error"); - vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY; - vdo_enter_read_only_mode(vdo, completion->result); - completion->result = VDO_READ_ONLY; - load_callback(completion); -} - -/** - * write_super_block_for_resume() - Update the VDO state and save the super block. - * @completion: The admin completion - */ -static void write_super_block_for_resume(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - - switch (vdo_get_state(vdo)) { - case VDO_CLEAN: - case VDO_NEW: - vdo_set_state(vdo, VDO_DIRTY); - vdo_save_components(vdo, completion); - return; - - case VDO_DIRTY: - case VDO_READ_ONLY_MODE: - case VDO_FORCE_REBUILD: - case VDO_RECOVERING: - case VDO_REBUILD_FOR_UPGRADE: - /* No need to write the super block in these cases */ - vdo_launch_completion(completion); - return; - - case VDO_REPLAYING: - default: - vdo_continue_completion(completion, UDS_BAD_STATE); - } -} - -/** - * resume_callback() - Callback to resume a VDO. - * @completion: The admin completion. - */ -static void resume_callback(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - int result; - - assert_admin_phase_thread(vdo, __func__); - - switch (advance_phase(vdo)) { - case RESUME_PHASE_START: - result = vdo_start_operation(&vdo->admin.state, - VDO_ADMIN_STATE_RESUMING); - if (result != VDO_SUCCESS) { - vdo_continue_completion(completion, result); - return; - } - - write_super_block_for_resume(completion); - return; - - case RESUME_PHASE_ALLOW_READ_ONLY_MODE: - vdo_allow_read_only_mode_entry(completion); - return; - - case RESUME_PHASE_DEDUPE: - vdo_resume_hash_zones(vdo->hash_zones, completion); - return; - - case RESUME_PHASE_DEPOT: - vdo_resume_slab_depot(vdo->depot, completion); - return; - - case RESUME_PHASE_JOURNAL: - vdo_resume_recovery_journal(vdo->recovery_journal, completion); - return; - - case RESUME_PHASE_BLOCK_MAP: - vdo_resume_block_map(vdo->block_map, completion); - return; - - case RESUME_PHASE_LOGICAL_ZONES: - vdo_resume_logical_zones(vdo->logical_zones, completion); - return; - - case RESUME_PHASE_PACKER: - { - bool was_enabled = vdo_get_compressing(vdo); - bool enable = vdo->device_config->compression; - - if (enable != was_enabled) - WRITE_ONCE(vdo->compressing, enable); - uds_log_info("compression is %s", (enable ? "enabled" : "disabled")); - - vdo_resume_packer(vdo->packer, completion); - return; - } - - case RESUME_PHASE_FLUSHER: - vdo_resume_flusher(vdo->flusher, completion); - return; - - case RESUME_PHASE_DATA_VIOS: - resume_data_vio_pool(vdo->data_vio_pool, completion); - return; - - case RESUME_PHASE_END: - break; - - default: - vdo_set_completion_result(completion, UDS_BAD_STATE); - } - - finish_operation_callback(completion); -} - -/** - * grow_logical_callback() - Callback to initiate a grow logical. - * @completion: The admin completion. - * - * Registered in perform_grow_logical(). - */ -static void grow_logical_callback(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - int result; - - assert_admin_phase_thread(vdo, __func__); - - switch (advance_phase(vdo)) { - case GROW_LOGICAL_PHASE_START: - if (vdo_is_read_only(vdo)) { - uds_log_error_strerror(VDO_READ_ONLY, - "Can't grow logical size of a read-only VDO"); - vdo_set_completion_result(completion, VDO_READ_ONLY); - break; - } - - result = vdo_start_operation(&vdo->admin.state, - VDO_ADMIN_STATE_SUSPENDED_OPERATION); - if (result != VDO_SUCCESS) { - vdo_continue_completion(completion, result); - return; - } - - vdo->states.vdo.config.logical_blocks = vdo->block_map->next_entry_count; - vdo_save_components(vdo, completion); - return; - - case GROW_LOGICAL_PHASE_GROW_BLOCK_MAP: - vdo_grow_block_map(vdo->block_map, completion); - return; - - case GROW_LOGICAL_PHASE_END: - break; - - case GROW_LOGICAL_PHASE_ERROR: - vdo_enter_read_only_mode(vdo, completion->result); - break; - - default: - vdo_set_completion_result(completion, UDS_BAD_STATE); - } - - finish_operation_callback(completion); -} - -/** - * handle_logical_growth_error() - Handle an error during the grow physical process. - * @completion: The admin completion. - */ -static void handle_logical_growth_error(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - - if (vdo->admin.phase == GROW_LOGICAL_PHASE_GROW_BLOCK_MAP) { - /* - * We've failed to write the new size in the super block, so set our in memory - * config back to the old size. - */ - vdo->states.vdo.config.logical_blocks = vdo->block_map->entry_count; - vdo_abandon_block_map_growth(vdo->block_map); - } - - vdo->admin.phase = GROW_LOGICAL_PHASE_ERROR; - grow_logical_callback(completion); -} - -/** - * perform_grow_logical() - Grow the logical size of the vdo. - * @vdo: The vdo to grow. - * @new_logical_blocks: The size to which the vdo should be grown. - * - * Context: This method may only be called when the vdo has been suspended and must not be called - * from a base thread. - * - * Return: VDO_SUCCESS or an error. - */ -static int perform_grow_logical(struct vdo *vdo, block_count_t new_logical_blocks) -{ - int result; - - if (vdo->device_config->logical_blocks == new_logical_blocks) { - /* - * A table was loaded for which we prepared to grow, but a table without that - * growth was what we are resuming with. - */ - vdo_abandon_block_map_growth(vdo->block_map); - return VDO_SUCCESS; - } - - uds_log_info("Resizing logical to %llu", - (unsigned long long) new_logical_blocks); - if (vdo->block_map->next_entry_count != new_logical_blocks) - return VDO_PARAMETER_MISMATCH; - - result = perform_admin_operation(vdo, GROW_LOGICAL_PHASE_START, - grow_logical_callback, - handle_logical_growth_error, "grow logical"); - if (result != VDO_SUCCESS) - return result; - - uds_log_info("Logical blocks now %llu", (unsigned long long) new_logical_blocks); - return VDO_SUCCESS; -} - -static void copy_callback(int read_err, unsigned long write_err, void *context) -{ - struct vdo_completion *completion = context; - int result = (((read_err == 0) && (write_err == 0)) ? VDO_SUCCESS : -EIO); - - vdo_continue_completion(completion, result); -} - -static void partition_to_region(struct partition *partition, struct vdo *vdo, - struct dm_io_region *region) -{ - physical_block_number_t pbn = partition->offset - vdo->geometry.bio_offset; - - *region = (struct dm_io_region) { - .bdev = vdo_get_backing_device(vdo), - .sector = pbn * VDO_SECTORS_PER_BLOCK, - .count = partition->count * VDO_SECTORS_PER_BLOCK, - }; -} - -/** - * copy_partition() - Copy a partition from the location specified in the current layout to that in - * the next layout. - * @vdo: The vdo preparing to grow. - * @id: The ID of the partition to copy. - * @parent: The completion to notify when the copy is complete. - */ -static void copy_partition(struct vdo *vdo, enum partition_id id, - struct vdo_completion *parent) -{ - struct dm_io_region read_region, write_regions[1]; - struct partition *from = vdo_get_known_partition(&vdo->layout, id); - struct partition *to = vdo_get_known_partition(&vdo->next_layout, id); - - partition_to_region(from, vdo, &read_region); - partition_to_region(to, vdo, &write_regions[0]); - dm_kcopyd_copy(vdo->partition_copier, &read_region, 1, write_regions, 0, - copy_callback, parent); -} - -/** - * grow_physical_callback() - Callback to initiate a grow physical. - * @completion: The admin completion. - * - * Registered in perform_grow_physical(). - */ -static void grow_physical_callback(struct vdo_completion *completion) -{ - struct vdo *vdo = completion->vdo; - int result; - - assert_admin_phase_thread(vdo, __func__); - - switch (advance_phase(vdo)) { - case GROW_PHYSICAL_PHASE_START: - if (vdo_is_read_only(vdo)) { - uds_log_error_strerror(VDO_READ_ONLY, - "Can't grow physical size of a read-only VDO"); - vdo_set_completion_result(completion, VDO_READ_ONLY); - break; - } - - result = vdo_start_operation(&vdo->admin.state, - VDO_ADMIN_STATE_SUSPENDED_OPERATION); - if (result != VDO_SUCCESS) { - vdo_continue_completion(completion, result); - return; - } - - /* Copy the journal into the new layout. */ - copy_partition(vdo, VDO_RECOVERY_JOURNAL_PARTITION, completion); - return; - - case GROW_PHYSICAL_PHASE_COPY_SUMMARY: - copy_partition(vdo, VDO_SLAB_SUMMARY_PARTITION, completion); - return; - - case GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS: - vdo_uninitialize_layout(&vdo->layout); - vdo->layout = vdo->next_layout; - uds_forget(vdo->next_layout.head); - vdo->states.vdo.config.physical_blocks = vdo->layout.size; - vdo_update_slab_depot_size(vdo->depot); - vdo_save_components(vdo, completion); - return; - - case GROW_PHYSICAL_PHASE_USE_NEW_SLABS: - vdo_use_new_slabs(vdo->depot, completion); - return; - - case GROW_PHYSICAL_PHASE_END: - vdo->depot->summary_origin = - vdo_get_known_partition(&vdo->layout, - VDO_SLAB_SUMMARY_PARTITION)->offset; - vdo->recovery_journal->origin = - vdo_get_known_partition(&vdo->layout, - VDO_RECOVERY_JOURNAL_PARTITION)->offset; - break; - - case GROW_PHYSICAL_PHASE_ERROR: - vdo_enter_read_only_mode(vdo, completion->result); - break; - - default: - vdo_set_completion_result(completion, UDS_BAD_STATE); - } - - vdo_uninitialize_layout(&vdo->next_layout); - finish_operation_callback(completion); -} - -/** - * handle_physical_growth_error() - Handle an error during the grow physical process. - * @completion: The sub-task completion. - */ -static void handle_physical_growth_error(struct vdo_completion *completion) -{ - completion->vdo->admin.phase = GROW_PHYSICAL_PHASE_ERROR; - grow_physical_callback(completion); -} - -/** - * perform_grow_physical() - Grow the physical size of the vdo. - * @vdo: The vdo to resize. - * @new_physical_blocks: The new physical size in blocks. - * - * Context: This method may only be called when the vdo has been suspended and must not be called - * from a base thread. - * - * Return: VDO_SUCCESS or an error. - */ -static int perform_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) -{ - int result; - block_count_t new_depot_size, prepared_depot_size; - block_count_t old_physical_blocks = vdo->states.vdo.config.physical_blocks; - - /* Skip any noop grows. */ - if (old_physical_blocks == new_physical_blocks) - return VDO_SUCCESS; - - if (new_physical_blocks != vdo->next_layout.size) { - /* - * Either the VDO isn't prepared to grow, or it was prepared to grow to a different - * size. Doing this check here relies on the fact that the call to this method is - * done under the dmsetup message lock. - */ - vdo_uninitialize_layout(&vdo->next_layout); - vdo_abandon_new_slabs(vdo->depot); - return VDO_PARAMETER_MISMATCH; - } - - /* Validate that we are prepared to grow appropriately. */ - new_depot_size = - vdo_get_known_partition(&vdo->next_layout, VDO_SLAB_DEPOT_PARTITION)->count; - prepared_depot_size = (vdo->depot->new_slabs == NULL) ? 0 : vdo->depot->new_size; - if (prepared_depot_size != new_depot_size) - return VDO_PARAMETER_MISMATCH; - - result = perform_admin_operation(vdo, GROW_PHYSICAL_PHASE_START, - grow_physical_callback, - handle_physical_growth_error, "grow physical"); - if (result != VDO_SUCCESS) - return result; - - uds_log_info("Physical block count was %llu, now %llu", - (unsigned long long) old_physical_blocks, - (unsigned long long) new_physical_blocks); - return VDO_SUCCESS; -} - -/** - * apply_new_vdo_configuration() - Attempt to make any configuration changes from the table being - * resumed. - * @vdo: The vdo being resumed. - * @config: The new device configuration derived from the table with which the vdo is being - * resumed. - * - * Return: VDO_SUCCESS or an error. - */ -static int __must_check apply_new_vdo_configuration(struct vdo *vdo, - struct device_config *config) -{ - int result; - - result = perform_grow_logical(vdo, config->logical_blocks); - if (result != VDO_SUCCESS) { - uds_log_error("grow logical operation failed, result = %d", result); - return result; - } - - result = perform_grow_physical(vdo, config->physical_blocks); - if (result != VDO_SUCCESS) - uds_log_error("resize operation failed, result = %d", result); - - return result; -} - -static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo) -{ - struct device_config *config = ti->private; - const char *device_name = vdo_get_device_name(ti); - block_count_t backing_blocks; - int result; - - backing_blocks = get_underlying_device_block_count(vdo); - if (backing_blocks < config->physical_blocks) { - /* FIXME: can this still happen? */ - uds_log_error("resume of device '%s' failed: backing device has %llu blocks but VDO physical size is %llu blocks", - device_name, (unsigned long long) backing_blocks, - (unsigned long long) config->physical_blocks); - return -EINVAL; - } - - if (vdo_get_admin_state(vdo) == VDO_ADMIN_STATE_PRE_LOADED) { - uds_log_info("starting device '%s'", device_name); - result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback, - handle_load_error, "load"); - if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) { - /* - * Something has gone very wrong. Make sure everything has drained and - * leave the device in an unresumable state. - */ - uds_log_error_strerror(result, - "Start failed, could not load VDO metadata"); - vdo->suspend_type = VDO_ADMIN_STATE_STOPPING; - perform_admin_operation(vdo, SUSPEND_PHASE_START, - suspend_callback, suspend_callback, - "suspend"); - return result; - } - - /* Even if the VDO is read-only, it is now able to handle read requests. */ - uds_log_info("device '%s' started", device_name); - } - - uds_log_info("resuming device '%s'", device_name); - - /* If this fails, the VDO was not in a state to be resumed. This should never happen. */ - result = apply_new_vdo_configuration(vdo, config); - BUG_ON(result == VDO_INVALID_ADMIN_STATE); - - /* - * Now that we've tried to modify the vdo, the new config *is* the config, whether the - * modifications worked or not. - */ - vdo->device_config = config; - - /* - * Any error here is highly unexpected and the state of the vdo is questionable, so we mark - * it read-only in memory. Because we are suspended, the read-only state will not be - * written to disk. - */ - if (result != VDO_SUCCESS) { - uds_log_error_strerror(result, - "Commit of modifications to device '%s' failed", - device_name); - vdo_enter_read_only_mode(vdo, result); - return result; - } - - if (vdo_get_admin_state(vdo)->normal) { - /* The VDO was just started, so we don't need to resume it. */ - return VDO_SUCCESS; - } - - result = perform_admin_operation(vdo, RESUME_PHASE_START, resume_callback, - resume_callback, "resume"); - BUG_ON(result == VDO_INVALID_ADMIN_STATE); - if (result == VDO_READ_ONLY) { - /* Even if the vdo is read-only, it has still resumed. */ - result = VDO_SUCCESS; - } - - if (result != VDO_SUCCESS) - uds_log_error("resume of device '%s' failed with error: %d", device_name, - result); - - return result; -} - -static int vdo_preresume(struct dm_target *ti) -{ - struct registered_thread instance_thread; - struct vdo *vdo = get_vdo_for_target(ti); - int result; - - uds_register_thread_device_id(&instance_thread, &vdo->instance); - result = vdo_preresume_registered(ti, vdo); - if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE)) - result = -EINVAL; - uds_unregister_thread_device_id(); - return vdo_map_to_system_error(result); -} - -static void vdo_resume(struct dm_target *ti) -{ - struct registered_thread instance_thread; - - uds_register_thread_device_id(&instance_thread, - &get_vdo_for_target(ti)->instance); - uds_log_info("device '%s' resumed", vdo_get_device_name(ti)); - uds_unregister_thread_device_id(); -} - -/* - * If anything changes that affects how user tools will interact with vdo, update the version - * number and make sure documentation about the change is complete so tools can properly update - * their management code. - */ -static struct target_type vdo_target_bio = { - .features = DM_TARGET_SINGLETON, - .name = "vdo", - .version = { 8, 2, 0 }, - .module = THIS_MODULE, - .ctr = vdo_ctr, - .dtr = vdo_dtr, - .io_hints = vdo_io_hints, - .iterate_devices = vdo_iterate_devices, - .map = vdo_map_bio, - .message = vdo_message, - .status = vdo_status, - .presuspend = vdo_presuspend, - .postsuspend = vdo_postsuspend, - .preresume = vdo_preresume, - .resume = vdo_resume, -}; - -static bool dm_registered; - -static void vdo_module_destroy(void) -{ - uds_log_debug("unloading"); - - if (dm_registered) - dm_unregister_target(&vdo_target_bio); - - ASSERT_LOG_ONLY(instances.count == 0, - "should have no instance numbers still in use, but have %u", - instances.count); - uds_free(instances.words); - memset(&instances, 0, sizeof(struct instance_tracker)); - - uds_log_info("unloaded version %s", CURRENT_VERSION); -} - -static int __init vdo_init(void) -{ - int result = 0; - - /* - * UDS module level initialization must be done first, as VDO initialization depends on it - */ - uds_initialize_thread_device_registry(); - uds_memory_init(); - uds_init_sysfs(); - - vdo_initialize_device_registry_once(); - uds_log_info("loaded version %s", CURRENT_VERSION); - - /* Add VDO errors to the already existing set of errors in UDS. */ - result = vdo_register_status_codes(); - if (result != UDS_SUCCESS) { - uds_log_error("vdo_register_status_codes failed %d", result); - vdo_module_destroy(); - return result; - } - - result = dm_register_target(&vdo_target_bio); - if (result < 0) { - uds_log_error("dm_register_target failed %d", result); - vdo_module_destroy(); - return result; - } - dm_registered = true; - - return result; -} - -static void __exit vdo_exit(void) -{ - vdo_module_destroy(); - /* - * UDS module level exit processing must be done after all VDO module exit processing is - * complete. - */ - uds_put_sysfs(); - uds_memory_exit(); -} - -module_init(vdo_init); -module_exit(vdo_exit); - -MODULE_DESCRIPTION(DM_NAME " target for transparent deduplication"); -MODULE_AUTHOR("Red Hat, Inc."); -MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-vdo/Kconfig b/drivers/md/dm-vdo/Kconfig new file mode 100644 index 000000000000..111ecd2c2a24 --- /dev/null +++ b/drivers/md/dm-vdo/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DM_VDO + tristate "VDO: deduplication and compression target" + depends on 64BIT + depends on BLK_DEV_DM + select DM_BUFIO + select LZ4_COMPRESS + select LZ4_DECOMPRESS + help + This device mapper target presents a block device with + deduplication, compression and thin-provisioning. + + To compile this code as a module, choose M here: the module will + be called dm-vdo. + + If unsure, say N. diff --git a/drivers/md/dm-vdo/Makefile b/drivers/md/dm-vdo/Makefile new file mode 100644 index 000000000000..8f8d161a6dbe --- /dev/null +++ b/drivers/md/dm-vdo/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_DM_VDO) += dm-vdo.o + +dm-vdo-objs := \ + action-manager.o \ + admin-state.o \ + block-map.o \ + chapter-index.o \ + completion.o \ + config.o \ + constants.o \ + data-vio.o \ + dedupe.o \ + delta-index.o \ + dm-vdo-target.o \ + dump.o \ + encodings.o \ + errors.o \ + flush.o \ + funnel-queue.o \ + funnel-requestqueue.o \ + funnel-workqueue.o \ + geometry.o \ + index-layout.o \ + index.o \ + index-page-map.o \ + index-session.o \ + int-map.o \ + io-factory.o \ + io-submitter.o \ + logger.o \ + logical-zone.o \ + memory-alloc.o \ + message-stats.o \ + murmurhash3.o \ + open-chapter.o \ + packer.o \ + permassert.o \ + physical-zone.o \ + pool-sysfs.o \ + pool-sysfs-stats.o \ + priority-table.o \ + radix-sort.o \ + recovery-journal.o \ + repair.o \ + slab-depot.o \ + sparse-cache.o \ + status-codes.o \ + string-utils.o \ + sysfs.o \ + thread-cond-var.o \ + thread-device.o \ + thread-registry.o \ + uds-sysfs.o \ + uds-threads.o \ + vdo.o \ + vio.o \ + volume-index.o \ + volume.o \ + wait-queue.o diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c new file mode 100644 index 000000000000..fcba50d8d088 --- /dev/null +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -0,0 +1,2956 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2023 Red Hat + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "admin-state.h" +#include "block-map.h" +#include "completion.h" +#include "constants.h" +#include "data-vio.h" +#include "dedupe.h" +#include "dump.h" +#include "encodings.h" +#include "errors.h" +#include "flush.h" +#include "io-submitter.h" +#include "logger.h" +#include "memory-alloc.h" +#include "message-stats.h" +#include "pool-sysfs.h" +#include "recovery-journal.h" +#include "repair.h" +#include "slab-depot.h" +#include "status-codes.h" +#include "string-utils.h" +#include "thread-device.h" +#include "thread-registry.h" +#include "types.h" +#include "uds-sysfs.h" +#include "vdo.h" +#include "vio.h" + +#define CURRENT_VERSION "8.3.0.65" + +enum { + GROW_LOGICAL_PHASE_START, + GROW_LOGICAL_PHASE_GROW_BLOCK_MAP, + GROW_LOGICAL_PHASE_END, + GROW_LOGICAL_PHASE_ERROR, + GROW_PHYSICAL_PHASE_START, + GROW_PHYSICAL_PHASE_COPY_SUMMARY, + GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS, + GROW_PHYSICAL_PHASE_USE_NEW_SLABS, + GROW_PHYSICAL_PHASE_END, + GROW_PHYSICAL_PHASE_ERROR, + LOAD_PHASE_START, + LOAD_PHASE_STATS, + LOAD_PHASE_LOAD_DEPOT, + LOAD_PHASE_MAKE_DIRTY, + LOAD_PHASE_PREPARE_TO_ALLOCATE, + LOAD_PHASE_SCRUB_SLABS, + LOAD_PHASE_DATA_REDUCTION, + LOAD_PHASE_FINISHED, + LOAD_PHASE_DRAIN_JOURNAL, + LOAD_PHASE_WAIT_FOR_READ_ONLY, + PRE_LOAD_PHASE_START, + PRE_LOAD_PHASE_LOAD_COMPONENTS, + PRE_LOAD_PHASE_END, + PREPARE_GROW_PHYSICAL_PHASE_START, + RESUME_PHASE_START, + RESUME_PHASE_ALLOW_READ_ONLY_MODE, + RESUME_PHASE_DEDUPE, + RESUME_PHASE_DEPOT, + RESUME_PHASE_JOURNAL, + RESUME_PHASE_BLOCK_MAP, + RESUME_PHASE_LOGICAL_ZONES, + RESUME_PHASE_PACKER, + RESUME_PHASE_FLUSHER, + RESUME_PHASE_DATA_VIOS, + RESUME_PHASE_END, + SUSPEND_PHASE_START, + SUSPEND_PHASE_PACKER, + SUSPEND_PHASE_DATA_VIOS, + SUSPEND_PHASE_DEDUPE, + SUSPEND_PHASE_FLUSHES, + SUSPEND_PHASE_LOGICAL_ZONES, + SUSPEND_PHASE_BLOCK_MAP, + SUSPEND_PHASE_JOURNAL, + SUSPEND_PHASE_DEPOT, + SUSPEND_PHASE_READ_ONLY_WAIT, + SUSPEND_PHASE_WRITE_SUPER_BLOCK, + SUSPEND_PHASE_END, +}; + +static const char * const ADMIN_PHASE_NAMES[] = { + "GROW_LOGICAL_PHASE_START", + "GROW_LOGICAL_PHASE_GROW_BLOCK_MAP", + "GROW_LOGICAL_PHASE_END", + "GROW_LOGICAL_PHASE_ERROR", + "GROW_PHYSICAL_PHASE_START", + "GROW_PHYSICAL_PHASE_COPY_SUMMARY", + "GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS", + "GROW_PHYSICAL_PHASE_USE_NEW_SLABS", + "GROW_PHYSICAL_PHASE_END", + "GROW_PHYSICAL_PHASE_ERROR", + "LOAD_PHASE_START", + "LOAD_PHASE_STATS", + "LOAD_PHASE_LOAD_DEPOT", + "LOAD_PHASE_MAKE_DIRTY", + "LOAD_PHASE_PREPARE_TO_ALLOCATE", + "LOAD_PHASE_SCRUB_SLABS", + "LOAD_PHASE_DATA_REDUCTION", + "LOAD_PHASE_FINISHED", + "LOAD_PHASE_DRAIN_JOURNAL", + "LOAD_PHASE_WAIT_FOR_READ_ONLY", + "PRE_LOAD_PHASE_START", + "PRE_LOAD_PHASE_LOAD_COMPONENTS", + "PRE_LOAD_PHASE_END", + "PREPARE_GROW_PHYSICAL_PHASE_START", + "RESUME_PHASE_START", + "RESUME_PHASE_ALLOW_READ_ONLY_MODE", + "RESUME_PHASE_DEDUPE", + "RESUME_PHASE_DEPOT", + "RESUME_PHASE_JOURNAL", + "RESUME_PHASE_BLOCK_MAP", + "RESUME_PHASE_LOGICAL_ZONES", + "RESUME_PHASE_PACKER", + "RESUME_PHASE_FLUSHER", + "RESUME_PHASE_DATA_VIOS", + "RESUME_PHASE_END", + "SUSPEND_PHASE_START", + "SUSPEND_PHASE_PACKER", + "SUSPEND_PHASE_DATA_VIOS", + "SUSPEND_PHASE_DEDUPE", + "SUSPEND_PHASE_FLUSHES", + "SUSPEND_PHASE_LOGICAL_ZONES", + "SUSPEND_PHASE_BLOCK_MAP", + "SUSPEND_PHASE_JOURNAL", + "SUSPEND_PHASE_DEPOT", + "SUSPEND_PHASE_READ_ONLY_WAIT", + "SUSPEND_PHASE_WRITE_SUPER_BLOCK", + "SUSPEND_PHASE_END", +}; + +enum { + /* If we bump this, update the arrays below */ + TABLE_VERSION = 4, +}; + +/* arrays for handling different table versions */ +static const u8 REQUIRED_ARGC[] = { 10, 12, 9, 7, 6 }; +/* pool name no longer used. only here for verification of older versions */ +static const u8 POOL_NAME_ARG_INDEX[] = { 8, 10, 8 }; + +/* + * Track in-use instance numbers using a flat bit array. + * + * O(n) run time isn't ideal, but if we have 1000 VDO devices in use simultaneously we still only + * need to scan 16 words, so it's not likely to be a big deal compared to other resource usage. + */ + +enum { + /* + * This minimum size for the bit array creates a numbering space of 0-999, which allows + * successive starts of the same volume to have different instance numbers in any + * reasonably-sized test. Changing instances on restart allows vdoMonReport to detect that + * the ephemeral stats have reset to zero. + */ + BIT_COUNT_MINIMUM = 1000, + /** Grow the bit array by this many bits when needed */ + BIT_COUNT_INCREMENT = 100, +}; + +struct instance_tracker { + unsigned int bit_count; + unsigned long *words; + unsigned int count; + unsigned int next; +}; + +static DEFINE_MUTEX(instances_lock); +static struct instance_tracker instances; + +/** + * free_device_config() - Free a device config created by parse_device_config(). + * @config: The config to free. + */ +static void free_device_config(struct device_config *config) +{ + if (config == NULL) + return; + + if (config->owned_device != NULL) + dm_put_device(config->owning_target, config->owned_device); + + uds_free(config->parent_device_name); + uds_free(config->original_string); + + /* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */ + memset(config, 0, sizeof(*config)); + uds_free(config); +} + +/** + * get_version_number() - Decide the version number from argv. + * + * @argc: The number of table values. + * @argv: The array of table values. + * @error_ptr: A pointer to return a error string in. + * @version_ptr: A pointer to return the version. + * + * Return: VDO_SUCCESS or an error code. + */ +static int get_version_number(int argc, char **argv, char **error_ptr, + unsigned int *version_ptr) +{ + /* version, if it exists, is in a form of V */ + if (sscanf(argv[0], "V%u", version_ptr) == 1) { + if (*version_ptr < 1 || *version_ptr > TABLE_VERSION) { + *error_ptr = "Unknown version number detected"; + return VDO_BAD_CONFIGURATION; + } + } else { + /* V0 actually has no version number in the table string */ + *version_ptr = 0; + } + + /* + * V0 and V1 have no optional parameters. There will always be a parameter for thread + * config, even if it's a "." to show it's an empty list. + */ + if (*version_ptr <= 1) { + if (argc != REQUIRED_ARGC[*version_ptr]) { + *error_ptr = "Incorrect number of arguments for version"; + return VDO_BAD_CONFIGURATION; + } + } else if (argc < REQUIRED_ARGC[*version_ptr]) { + *error_ptr = "Incorrect number of arguments for version"; + return VDO_BAD_CONFIGURATION; + } + + if (*version_ptr != TABLE_VERSION) { + uds_log_warning("Detected version mismatch between kernel module and tools kernel: %d, tool: %d", + TABLE_VERSION, *version_ptr); + uds_log_warning("Please consider upgrading management tools to match kernel."); + } + return VDO_SUCCESS; +} + +/* Free a list of non-NULL string pointers, and then the list itself. */ +static void free_string_array(char **string_array) +{ + unsigned int offset; + + for (offset = 0; string_array[offset] != NULL; offset++) + uds_free(string_array[offset]); + uds_free(string_array); +} + +/* + * Split the input string into substrings, separated at occurrences of the indicated character, + * returning a null-terminated list of string pointers. + * + * The string pointers and the pointer array itself should both be freed with uds_free() when no + * longer needed. This can be done with vdo_free_string_array (below) if the pointers in the array + * are not changed. Since the array and copied strings are allocated by this function, it may only + * be used in contexts where allocation is permitted. + * + * Empty substrings are not ignored; that is, returned substrings may be empty strings if the + * separator occurs twice in a row. + */ +static int split_string(const char *string, char separator, char ***substring_array_ptr) +{ + unsigned int current_substring = 0, substring_count = 1; + const char *s; + char **substrings; + int result; + ptrdiff_t length; + + for (s = string; *s != 0; s++) { + if (*s == separator) + substring_count++; + } + + result = uds_allocate(substring_count + 1, char *, "string-splitting array", + &substrings); + if (result != UDS_SUCCESS) + return result; + + for (s = string; *s != 0; s++) { + if (*s == separator) { + ptrdiff_t length = s - string; + + result = uds_allocate(length + 1, char, "split string", + &substrings[current_substring]); + if (result != UDS_SUCCESS) { + free_string_array(substrings); + return result; + } + /* + * Trailing NUL is already in place after allocation; deal with the zero or + * more non-NUL bytes in the string. + */ + if (length > 0) + memcpy(substrings[current_substring], string, length); + string = s + 1; + current_substring++; + BUG_ON(current_substring >= substring_count); + } + } + /* Process final string, with no trailing separator. */ + BUG_ON(current_substring != (substring_count - 1)); + length = strlen(string); + + result = uds_allocate(length + 1, char, "split string", + &substrings[current_substring]); + if (result != UDS_SUCCESS) { + free_string_array(substrings); + return result; + } + memcpy(substrings[current_substring], string, length); + current_substring++; + /* substrings[current_substring] is NULL already */ + *substring_array_ptr = substrings; + return UDS_SUCCESS; +} + +/* + * Join the input substrings into one string, joined with the indicated character, returning a + * string. array_length is a bound on the number of valid elements in substring_array, in case it + * is not NULL-terminated. + */ +static int join_strings(char **substring_array, size_t array_length, char separator, + char **string_ptr) +{ + size_t string_length = 0; + size_t i; + int result; + char *output, *current_position; + + for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) + string_length += strlen(substring_array[i]) + 1; + + result = uds_allocate(string_length, char, __func__, &output); + if (result != VDO_SUCCESS) + return result; + + current_position = &output[0]; + + for (i = 0; (i < array_length) && (substring_array[i] != NULL); i++) { + current_position = uds_append_to_buffer(current_position, + output + string_length, "%s", + substring_array[i]); + *current_position = separator; + current_position++; + } + + /* We output one too many separators; replace the last with a zero byte. */ + if (current_position != output) + *(current_position - 1) = '\0'; + + *string_ptr = output; + return UDS_SUCCESS; +} + +/** + * parse_bool() - Parse a two-valued option into a bool. + * @bool_str: The string value to convert to a bool. + * @true_str: The string value which should be converted to true. + * @false_str: The string value which should be converted to false. + * @bool_ptr: A pointer to return the bool value in. + * + * Return: VDO_SUCCESS or an error if bool_str is neither true_str nor false_str. + */ +static inline int __must_check parse_bool(const char *bool_str, const char *true_str, + const char *false_str, bool *bool_ptr) +{ + bool value = false; + + if (strcmp(bool_str, true_str) == 0) + value = true; + else if (strcmp(bool_str, false_str) == 0) + value = false; + else + return VDO_BAD_CONFIGURATION; + + *bool_ptr = value; + return VDO_SUCCESS; +} + +/** + * process_one_thread_config_spec() - Process one component of a thread parameter configuration + * string and update the configuration data structure. + * @thread_param_type: The type of thread specified. + * @count: The thread count requested. + * @config: The configuration data structure to update. + * + * If the thread count requested is invalid, a message is logged and -EINVAL returned. If the + * thread name is unknown, a message is logged but no error is returned. + * + * Return: VDO_SUCCESS or -EINVAL + */ +static int process_one_thread_config_spec(const char *thread_param_type, + unsigned int count, + struct thread_count_config *config) +{ + /* Handle limited thread parameters */ + if (strcmp(thread_param_type, "bioRotationInterval") == 0) { + if (count == 0) { + uds_log_error("thread config string error: 'bioRotationInterval' of at least 1 is required"); + return -EINVAL; + } else if (count > VDO_BIO_ROTATION_INTERVAL_LIMIT) { + uds_log_error("thread config string error: 'bioRotationInterval' cannot be higher than %d", + VDO_BIO_ROTATION_INTERVAL_LIMIT); + return -EINVAL; + } + config->bio_rotation_interval = count; + return VDO_SUCCESS; + } + if (strcmp(thread_param_type, "logical") == 0) { + if (count > MAX_VDO_LOGICAL_ZONES) { + uds_log_error("thread config string error: at most %d 'logical' threads are allowed", + MAX_VDO_LOGICAL_ZONES); + return -EINVAL; + } + config->logical_zones = count; + return VDO_SUCCESS; + } + if (strcmp(thread_param_type, "physical") == 0) { + if (count > MAX_VDO_PHYSICAL_ZONES) { + uds_log_error("thread config string error: at most %d 'physical' threads are allowed", + MAX_VDO_PHYSICAL_ZONES); + return -EINVAL; + } + config->physical_zones = count; + return VDO_SUCCESS; + } + /* Handle other thread count parameters */ + if (count > MAXIMUM_VDO_THREADS) { + uds_log_error("thread config string error: at most %d '%s' threads are allowed", + MAXIMUM_VDO_THREADS, thread_param_type); + return -EINVAL; + } + if (strcmp(thread_param_type, "hash") == 0) { + config->hash_zones = count; + return VDO_SUCCESS; + } + if (strcmp(thread_param_type, "cpu") == 0) { + if (count == 0) { + uds_log_error("thread config string error: at least one 'cpu' thread required"); + return -EINVAL; + } + config->cpu_threads = count; + return VDO_SUCCESS; + } + if (strcmp(thread_param_type, "ack") == 0) { + config->bio_ack_threads = count; + return VDO_SUCCESS; + } + if (strcmp(thread_param_type, "bio") == 0) { + if (count == 0) { + uds_log_error("thread config string error: at least one 'bio' thread required"); + return -EINVAL; + } + config->bio_threads = count; + return VDO_SUCCESS; + } + + /* + * Don't fail, just log. This will handle version mismatches between user mode tools and + * kernel. + */ + uds_log_info("unknown thread parameter type \"%s\"", thread_param_type); + return VDO_SUCCESS; +} + +/** + * parse_one_thread_config_spec() - Parse one component of a thread parameter configuration string + * and update the configuration data structure. + * @spec: The thread parameter specification string. + * @config: The configuration data to be updated. + */ +static int parse_one_thread_config_spec(const char *spec, + struct thread_count_config *config) +{ + unsigned int count; + char **fields; + int result; + + result = split_string(spec, '=', &fields); + if (result != UDS_SUCCESS) + return result; + + if ((fields[0] == NULL) || (fields[1] == NULL) || (fields[2] != NULL)) { + uds_log_error("thread config string error: expected thread parameter assignment, saw \"%s\"", + spec); + free_string_array(fields); + return -EINVAL; + } + + result = kstrtouint(fields[1], 10, &count); + if (result != UDS_SUCCESS) { + uds_log_error("thread config string error: integer value needed, found \"%s\"", + fields[1]); + free_string_array(fields); + return result; + } + + result = process_one_thread_config_spec(fields[0], count, config); + free_string_array(fields); + return result; +} + +/** + * parse_thread_config_string() - Parse the configuration string passed and update the specified + * counts and other parameters of various types of threads to be + * created. + * @string: Thread parameter configuration string. + * @config: The thread configuration data to update. + * + * The configuration string should contain one or more comma-separated specs of the form + * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", + * "logical", "physical", and "hash". + * + * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop + * further parsing. + * + * This function can't set the "reason" value the caller wants to pass back, because we'd want to + * format it to say which field was invalid, and we can't allocate the "reason" strings + * dynamically. So if an error occurs, we'll log the details and pass back an error. + * + * Return: VDO_SUCCESS or -EINVAL or -ENOMEM + */ +static int parse_thread_config_string(const char *string, + struct thread_count_config *config) +{ + int result = VDO_SUCCESS; + char **specs; + + if (strcmp(".", string) != 0) { + unsigned int i; + + result = split_string(string, ',', &specs); + if (result != UDS_SUCCESS) + return result; + + for (i = 0; specs[i] != NULL; i++) { + result = parse_one_thread_config_spec(specs[i], config); + if (result != VDO_SUCCESS) + break; + } + free_string_array(specs); + } + return result; +} + +/** + * process_one_key_value_pair() - Process one component of an optional parameter string and update + * the configuration data structure. + * @key: The optional parameter key name. + * @value: The optional parameter value. + * @config: The configuration data structure to update. + * + * If the value requested is invalid, a message is logged and -EINVAL returned. If the key is + * unknown, a message is logged but no error is returned. + * + * Return: VDO_SUCCESS or -EINVAL + */ +static int process_one_key_value_pair(const char *key, unsigned int value, + struct device_config *config) +{ + /* Non thread optional parameters */ + if (strcmp(key, "maxDiscard") == 0) { + if (value == 0) { + uds_log_error("optional parameter error: at least one max discard block required"); + return -EINVAL; + } + /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */ + if (value > (UINT_MAX / VDO_BLOCK_SIZE)) { + uds_log_error("optional parameter error: at most %d max discard blocks are allowed", + UINT_MAX / VDO_BLOCK_SIZE); + return -EINVAL; + } + config->max_discard_blocks = value; + return VDO_SUCCESS; + } + /* Handles unknown key names */ + return process_one_thread_config_spec(key, value, &config->thread_counts); +} + +/** + * parse_one_key_value_pair() - Parse one key/value pair and update the configuration data + * structure. + * @key: The optional key name. + * @value: The optional value. + * @config: The configuration data to be updated. + * + * Return: VDO_SUCCESS or error. + */ +static int parse_one_key_value_pair(const char *key, const char *value, + struct device_config *config) +{ + unsigned int count; + int result; + + if (strcmp(key, "deduplication") == 0) + return parse_bool(value, "on", "off", &config->deduplication); + + if (strcmp(key, "compression") == 0) + return parse_bool(value, "on", "off", &config->compression); + + /* The remaining arguments must have integral values. */ + result = kstrtouint(value, 10, &count); + if (result != UDS_SUCCESS) { + uds_log_error("optional config string error: integer value needed, found \"%s\"", + value); + return result; + } + return process_one_key_value_pair(key, count, config); +} + +/** + * parse_key_value_pairs() - Parse all key/value pairs from a list of arguments. + * @argc: The total number of arguments in list. + * @argv: The list of key/value pairs. + * @config: The device configuration data to update. + * + * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop + * further parsing. + * + * This function can't set the "reason" value the caller wants to pass back, because we'd want to + * format it to say which field was invalid, and we can't allocate the "reason" strings + * dynamically. So if an error occurs, we'll log the details and return the error. + * + * Return: VDO_SUCCESS or error + */ +static int parse_key_value_pairs(int argc, char **argv, struct device_config *config) +{ + int result = VDO_SUCCESS; + + while (argc) { + result = parse_one_key_value_pair(argv[0], argv[1], config); + if (result != VDO_SUCCESS) + break; + + argc -= 2; + argv += 2; + } + + return result; +} + +/** + * parse_optional_arguments() - Parse the configuration string passed in for optional arguments. + * @arg_set: The structure holding the arguments to parse. + * @error_ptr: Pointer to a buffer to hold the error string. + * @config: Pointer to device configuration data to update. + * + * For V0/V1 configurations, there will only be one optional parameter; the thread configuration. + * The configuration string should contain one or more comma-separated specs of the form + * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval", + * "logical", "physical", and "hash". + * + * For V2 configurations and beyond, there could be any number of arguments. They should contain + * one or more key/value pairs separated by a space. + * + * Return: VDO_SUCCESS or error + */ +static int parse_optional_arguments(struct dm_arg_set *arg_set, char **error_ptr, + struct device_config *config) +{ + int result = VDO_SUCCESS; + + if (config->version == 0 || config->version == 1) { + result = parse_thread_config_string(arg_set->argv[0], + &config->thread_counts); + if (result != VDO_SUCCESS) { + *error_ptr = "Invalid thread-count configuration"; + return VDO_BAD_CONFIGURATION; + } + } else { + if ((arg_set->argc % 2) != 0) { + *error_ptr = "Odd number of optional arguments given but they should be pairs"; + return VDO_BAD_CONFIGURATION; + } + result = parse_key_value_pairs(arg_set->argc, arg_set->argv, config); + if (result != VDO_SUCCESS) { + *error_ptr = "Invalid optional argument configuration"; + return VDO_BAD_CONFIGURATION; + } + } + return result; +} + +/** + * handle_parse_error() - Handle a parsing error. + * @config: The config to free. + * @error_ptr: A place to store a constant string about the error. + * @error_str: A constant string to store in error_ptr. + */ +static void handle_parse_error(struct device_config *config, char **error_ptr, + char *error_str) +{ + free_device_config(config); + *error_ptr = error_str; +} + +/** + * parse_device_config() - Convert the dmsetup table into a struct device_config. + * @argc: The number of table values. + * @argv: The array of table values. + * @ti: The target structure for this table. + * @config_ptr: A pointer to return the allocated config. + * + * Return: VDO_SUCCESS or an error code. + */ +static int parse_device_config(int argc, char **argv, struct dm_target *ti, + struct device_config **config_ptr) +{ + bool enable_512e; + size_t logical_bytes = to_bytes(ti->len); + struct dm_arg_set arg_set; + char **error_ptr = &ti->error; + struct device_config *config = NULL; + int result; + + if ((logical_bytes % VDO_BLOCK_SIZE) != 0) { + handle_parse_error(config, error_ptr, + "Logical size must be a multiple of 4096"); + return VDO_BAD_CONFIGURATION; + } + + if (argc == 0) { + handle_parse_error(config, error_ptr, "Incorrect number of arguments"); + return VDO_BAD_CONFIGURATION; + } + + result = uds_allocate(1, struct device_config, "device_config", &config); + if (result != VDO_SUCCESS) { + handle_parse_error(config, error_ptr, + "Could not allocate config structure"); + return VDO_BAD_CONFIGURATION; + } + + config->owning_target = ti; + config->logical_blocks = logical_bytes / VDO_BLOCK_SIZE; + INIT_LIST_HEAD(&config->config_list); + + /* Save the original string. */ + result = join_strings(argv, argc, ' ', &config->original_string); + if (result != VDO_SUCCESS) { + handle_parse_error(config, error_ptr, "Could not populate string"); + return VDO_BAD_CONFIGURATION; + } + + uds_log_info("table line: %s", config->original_string); + + config->thread_counts = (struct thread_count_config) { + .bio_ack_threads = 1, + .bio_threads = DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT, + .bio_rotation_interval = DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL, + .cpu_threads = 1, + .logical_zones = 0, + .physical_zones = 0, + .hash_zones = 0, + }; + config->max_discard_blocks = 1; + config->deduplication = true; + config->compression = false; + + arg_set.argc = argc; + arg_set.argv = argv; + + result = get_version_number(argc, argv, error_ptr, &config->version); + if (result != VDO_SUCCESS) { + /* get_version_number sets error_ptr itself. */ + handle_parse_error(config, error_ptr, *error_ptr); + return result; + } + /* Move the arg pointer forward only if the argument was there. */ + if (config->version >= 1) + dm_shift_arg(&arg_set); + + result = uds_duplicate_string(dm_shift_arg(&arg_set), "parent device name", + &config->parent_device_name); + if (result != VDO_SUCCESS) { + handle_parse_error(config, error_ptr, + "Could not copy parent device name"); + return VDO_BAD_CONFIGURATION; + } + + /* Get the physical blocks, if known. */ + if (config->version >= 1) { + result = kstrtoull(dm_shift_arg(&arg_set), 10, &config->physical_blocks); + if (result != VDO_SUCCESS) { + handle_parse_error(config, error_ptr, + "Invalid physical block count"); + return VDO_BAD_CONFIGURATION; + } + } + + /* Get the logical block size and validate */ + result = parse_bool(dm_shift_arg(&arg_set), "512", "4096", &enable_512e); + if (result != VDO_SUCCESS) { + handle_parse_error(config, error_ptr, "Invalid logical block size"); + return VDO_BAD_CONFIGURATION; + } + config->logical_block_size = (enable_512e ? 512 : 4096); + + /* Skip past the two no longer used read cache options. */ + if (config->version <= 1) + dm_consume_args(&arg_set, 2); + + /* Get the page cache size. */ + result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->cache_size); + if (result != VDO_SUCCESS) { + handle_parse_error(config, error_ptr, + "Invalid block map page cache size"); + return VDO_BAD_CONFIGURATION; + } + + /* Get the block map era length. */ + result = kstrtouint(dm_shift_arg(&arg_set), 10, &config->block_map_maximum_age); + if (result != VDO_SUCCESS) { + handle_parse_error(config, error_ptr, "Invalid block map maximum age"); + return VDO_BAD_CONFIGURATION; + } + + /* Skip past the no longer used MD RAID5 optimization mode */ + if (config->version <= 2) + dm_consume_args(&arg_set, 1); + + /* Skip past the no longer used write policy setting */ + if (config->version <= 3) + dm_consume_args(&arg_set, 1); + + /* Skip past the no longer used pool name for older table lines */ + if (config->version <= 2) { + /* + * Make sure the enum to get the pool name from argv directly is still in sync with + * the parsing of the table line. + */ + if (&arg_set.argv[0] != &argv[POOL_NAME_ARG_INDEX[config->version]]) { + handle_parse_error(config, error_ptr, + "Pool name not in expected location"); + return VDO_BAD_CONFIGURATION; + } + dm_shift_arg(&arg_set); + } + + /* Get the optional arguments and validate. */ + result = parse_optional_arguments(&arg_set, error_ptr, config); + if (result != VDO_SUCCESS) { + /* parse_optional_arguments sets error_ptr itself. */ + handle_parse_error(config, error_ptr, *error_ptr); + return result; + } + + /* + * Logical, physical, and hash zone counts can all be zero; then we get one thread doing + * everything, our older configuration. If any zone count is non-zero, the others must be + * as well. + */ + if (((config->thread_counts.logical_zones == 0) != + (config->thread_counts.physical_zones == 0)) || + ((config->thread_counts.physical_zones == 0) != + (config->thread_counts.hash_zones == 0))) { + handle_parse_error(config, error_ptr, + "Logical, physical, and hash zones counts must all be zero or all non-zero"); + return VDO_BAD_CONFIGURATION; + } + + if (config->cache_size < + (2 * MAXIMUM_VDO_USER_VIOS * config->thread_counts.logical_zones)) { + handle_parse_error(config, error_ptr, + "Insufficient block map cache for logical zones"); + return VDO_BAD_CONFIGURATION; + } + + result = dm_get_device(ti, config->parent_device_name, + dm_table_get_mode(ti->table), &config->owned_device); + if (result != 0) { + uds_log_error("couldn't open device \"%s\": error %d", + config->parent_device_name, result); + handle_parse_error(config, error_ptr, "Unable to open storage device"); + return VDO_BAD_CONFIGURATION; + } + + if (config->version == 0) { + u64 device_size = i_size_read(config->owned_device->bdev->bd_inode); + + config->physical_blocks = device_size / VDO_BLOCK_SIZE; + } + + *config_ptr = config; + return result; +} + +static struct vdo *get_vdo_for_target(struct dm_target *ti) +{ + return ((struct device_config *) ti->private)->vdo; +} + + +static int vdo_map_bio(struct dm_target *ti, struct bio *bio) +{ + struct vdo *vdo = get_vdo_for_target(ti); + struct vdo_work_queue *current_work_queue; + const struct admin_state_code *code = vdo_get_admin_state_code(&vdo->admin.state); + + ASSERT_LOG_ONLY(code->normal, "vdo should not receive bios while in state %s", + code->name); + + /* Count all incoming bios. */ + vdo_count_bios(&vdo->stats.bios_in, bio); + + + /* Handle empty bios. Empty flush bios are not associated with a vio. */ + if ((bio_op(bio) == REQ_OP_FLUSH) || ((bio->bi_opf & REQ_PREFLUSH) != 0)) { + vdo_launch_flush(vdo, bio); + return DM_MAPIO_SUBMITTED; + } + + /* This could deadlock, */ + current_work_queue = vdo_get_current_work_queue(); + BUG_ON((current_work_queue != NULL) && + (vdo == vdo_get_work_queue_owner(current_work_queue)->vdo)); + vdo_launch_bio(vdo->data_vio_pool, bio); + return DM_MAPIO_SUBMITTED; +} + +static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct vdo *vdo = get_vdo_for_target(ti); + + limits->logical_block_size = vdo->device_config->logical_block_size; + limits->physical_block_size = VDO_BLOCK_SIZE; + + /* The minimum io size for random io */ + blk_limits_io_min(limits, VDO_BLOCK_SIZE); + /* The optimal io size for streamed/sequential io */ + blk_limits_io_opt(limits, VDO_BLOCK_SIZE); + + /* + * Sets the maximum discard size that will be passed into VDO. This value comes from a + * table line value passed in during dmsetup create. + * + * The value 1024 is the largest usable value on HD systems. A 2048 sector discard on a + * busy HD system takes 31 seconds. We should use a value no higher than 1024, which takes + * 15 to 16 seconds on a busy HD system. + * + * But using large values results in 120 second blocked task warnings in /var/log/kern.log. + * In order to avoid these warnings, we choose to use the smallest reasonable value. See + * VDO-3062 and VDO-3087. + * + * The value is displayed in sysfs, and also used by dm-thin to determine whether to pass + * down discards. The block layer splits large discards on this boundary when this is set. + */ + limits->max_discard_sectors = + (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK); + + /* + * Force discards to not begin or end with a partial block by stating the granularity is + * 4k. + */ + limits->discard_granularity = VDO_BLOCK_SIZE; +} + +static int vdo_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, + void *data) +{ + struct device_config *config = get_vdo_for_target(ti)->device_config; + + return fn(ti, config->owned_device, 0, + config->physical_blocks * VDO_SECTORS_PER_BLOCK, data); +} + +/* + * Status line is: + * + * + */ + +static void vdo_status(struct dm_target *ti, status_type_t status_type, + unsigned int status_flags, char *result, unsigned int maxlen) +{ + struct vdo *vdo = get_vdo_for_target(ti); + struct vdo_statistics *stats; + struct device_config *device_config; + /* N.B.: The DMEMIT macro uses the variables named "sz", "result", "maxlen". */ + int sz = 0; + + switch (status_type) { + case STATUSTYPE_INFO: + /* Report info for dmsetup status */ + mutex_lock(&vdo->stats_mutex); + vdo_fetch_statistics(vdo, &vdo->stats_buffer); + stats = &vdo->stats_buffer; + + DMEMIT("/dev/%pg %s %s %s %s %llu %llu", + vdo_get_backing_device(vdo), stats->mode, + stats->in_recovery_mode ? "recovering" : "-", + vdo_get_dedupe_index_state_name(vdo->hash_zones), + vdo_get_compressing(vdo) ? "online" : "offline", + stats->data_blocks_used + stats->overhead_blocks_used, + stats->physical_blocks); + mutex_unlock(&vdo->stats_mutex); + break; + + case STATUSTYPE_TABLE: + /* Report the string actually specified in the beginning. */ + device_config = (struct device_config *) ti->private; + DMEMIT("%s", device_config->original_string); + break; + + case STATUSTYPE_IMA: + /* FIXME: We ought to be more detailed here, but this is what thin does. */ + *result = '\0'; + break; + } +} + +static block_count_t __must_check get_underlying_device_block_count(const struct vdo *vdo) +{ + return i_size_read(vdo_get_backing_device(vdo)->bd_inode) / VDO_BLOCK_SIZE; +} + +static int __must_check process_vdo_message_locked(struct vdo *vdo, unsigned int argc, + char **argv) +{ + if ((argc == 2) && (strcasecmp(argv[0], "compression") == 0)) { + if (strcasecmp(argv[1], "on") == 0) { + vdo_set_compressing(vdo, true); + return 0; + } + + if (strcasecmp(argv[1], "off") == 0) { + vdo_set_compressing(vdo, false); + return 0; + } + + uds_log_warning("invalid argument '%s' to dmsetup compression message", + argv[1]); + return -EINVAL; + } + + uds_log_warning("unrecognized dmsetup message '%s' received", argv[0]); + return -EINVAL; +} + +/* + * If the message is a dump, just do it. Otherwise, check that no other message is being processed, + * and only proceed if so. + * Returns -EBUSY if another message is being processed + */ +static int __must_check process_vdo_message(struct vdo *vdo, unsigned int argc, + char **argv) +{ + int result; + + /* + * All messages which may be processed in parallel with other messages should be handled + * here before the atomic check below. Messages which should be exclusive should be + * processed in process_vdo_message_locked(). + */ + + /* Dump messages should always be processed */ + if (strcasecmp(argv[0], "dump") == 0) + return vdo_dump(vdo, argc, argv, "dmsetup message"); + + if (argc == 1) { + if (strcasecmp(argv[0], "dump-on-shutdown") == 0) { + vdo->dump_on_shutdown = true; + return 0; + } + + /* Index messages should always be processed */ + if ((strcasecmp(argv[0], "index-close") == 0) || + (strcasecmp(argv[0], "index-create") == 0) || + (strcasecmp(argv[0], "index-disable") == 0) || + (strcasecmp(argv[0], "index-enable") == 0)) + return vdo_message_dedupe_index(vdo->hash_zones, argv[0]); + } + + if (atomic_cmpxchg(&vdo->processing_message, 0, 1) != 0) + return -EBUSY; + + result = process_vdo_message_locked(vdo, argc, argv); + + /* Pairs with the implicit barrier in cmpxchg just above */ + smp_wmb(); + atomic_set(&vdo->processing_message, 0); + return result; +} + +static int vdo_message(struct dm_target *ti, unsigned int argc, char **argv, + char *result_buffer, unsigned int maxlen) +{ + struct registered_thread allocating_thread, instance_thread; + struct vdo *vdo; + int result; + + if (argc == 0) { + uds_log_warning("unspecified dmsetup message"); + return -EINVAL; + } + + vdo = get_vdo_for_target(ti); + uds_register_allocating_thread(&allocating_thread, NULL); + uds_register_thread_device_id(&instance_thread, &vdo->instance); + + /* + * Must be done here so we don't map return codes. The code in dm-ioctl expects a 1 for a + * return code to look at the buffer and see if it is full or not. + */ + if ((argc == 1) && (strcasecmp(argv[0], "stats") == 0)) { + vdo_write_stats(vdo, result_buffer, maxlen); + result = 1; + } else { + result = vdo_map_to_system_error(process_vdo_message(vdo, argc, argv)); + } + + uds_unregister_thread_device_id(); + uds_unregister_allocating_thread(); + return result; +} + +static void configure_target_capabilities(struct dm_target *ti) +{ + ti->discards_supported = 1; + ti->flush_supported = true; + ti->num_discard_bios = 1; + ti->num_flush_bios = 1; + + /* + * If this value changes, please make sure to update the value for max_discard_sectors + * accordingly. + */ + BUG_ON(dm_set_target_max_io_len(ti, VDO_SECTORS_PER_BLOCK) != 0); +} + +/* + * Implements vdo_filter_fn. + */ +static bool vdo_uses_device(struct vdo *vdo, const void *context) +{ + const struct device_config *config = context; + + return vdo_get_backing_device(vdo)->bd_dev == config->owned_device->bdev->bd_dev; +} + +/** + * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in + * progress. + */ +static thread_id_t __must_check get_thread_id_for_phase(struct vdo *vdo) +{ + switch (vdo->admin.phase) { + case RESUME_PHASE_PACKER: + case RESUME_PHASE_FLUSHER: + case SUSPEND_PHASE_PACKER: + case SUSPEND_PHASE_FLUSHES: + return vdo->thread_config.packer_thread; + + case RESUME_PHASE_DATA_VIOS: + case SUSPEND_PHASE_DATA_VIOS: + return vdo->thread_config.cpu_thread; + + case LOAD_PHASE_DRAIN_JOURNAL: + case RESUME_PHASE_JOURNAL: + case SUSPEND_PHASE_JOURNAL: + return vdo->thread_config.journal_thread; + + default: + return vdo->thread_config.admin_thread; + } +} + +static struct vdo_completion *prepare_admin_completion(struct vdo *vdo, + vdo_action_fn callback, + vdo_action_fn error_handler) +{ + struct vdo_completion *completion = &vdo->admin.completion; + + /* + * We can't use vdo_prepare_completion_for_requeue() here because we don't want to reset + * any error in the completion. + */ + completion->callback = callback; + completion->error_handler = error_handler; + completion->callback_thread_id = get_thread_id_for_phase(vdo); + completion->requeue = true; + return completion; +} + +/** + * advance_phase() - Increment the phase of the current admin operation and prepare the admin + * completion to run on the thread for the next phase. + * @vdo: The on which an admin operation is being performed + * + * Return: The current phase + */ +static u32 advance_phase(struct vdo *vdo) +{ + u32 phase = vdo->admin.phase++; + + vdo->admin.completion.callback_thread_id = get_thread_id_for_phase(vdo); + vdo->admin.completion.requeue = true; + return phase; +} + +/* + * Perform an administrative operation (load, suspend, grow logical, or grow physical). This method + * should not be called from vdo threads. + */ +static int perform_admin_operation(struct vdo *vdo, u32 starting_phase, + vdo_action_fn callback, vdo_action_fn error_handler, + const char *type) +{ + int result; + struct vdo_administrator *admin = &vdo->admin; + + if (atomic_cmpxchg(&admin->busy, 0, 1) != 0) { + return uds_log_error_strerror(VDO_COMPONENT_BUSY, + "Can't start %s operation, another operation is already in progress", + type); + } + + admin->phase = starting_phase; + reinit_completion(&admin->callback_sync); + vdo_reset_completion(&admin->completion); + vdo_launch_completion(prepare_admin_completion(vdo, callback, error_handler)); + + /* + * Using the "interruptible" interface means that Linux will not log a message when we wait + * for more than 120 seconds. + */ + while (wait_for_completion_interruptible(&admin->callback_sync) != 0) + /* * However, if we get a signal in a user-mode process, we could spin... */ + fsleep(1000); + + result = admin->completion.result; + /* pairs with implicit barrier in cmpxchg above */ + smp_wmb(); + atomic_set(&admin->busy, 0); + return result; +} + +/* Assert that we are operating on the correct thread for the current phase. */ +static void assert_admin_phase_thread(struct vdo *vdo, const char *what) +{ + ASSERT_LOG_ONLY(vdo_get_callback_thread_id() == get_thread_id_for_phase(vdo), + "%s on correct thread for %s", what, + ADMIN_PHASE_NAMES[vdo->admin.phase]); +} + +/** + * finish_operation_callback() - Callback to finish an admin operation. + * @completion: The admin_completion. + */ +static void finish_operation_callback(struct vdo_completion *completion) +{ + struct vdo_administrator *admin = &completion->vdo->admin; + + vdo_finish_operation(&admin->state, completion->result); + complete(&admin->callback_sync); +} + +/** + * decode_from_super_block() - Decode the VDO state from the super block and validate that it is + * correct. + * @vdo: The vdo being loaded. + * + * On error from this method, the component states must be destroyed explicitly. If this method + * returns successfully, the component states must not be destroyed. + * + * Return: VDO_SUCCESS or an error. + */ +static int __must_check decode_from_super_block(struct vdo *vdo) +{ + const struct device_config *config = vdo->device_config; + int result; + + result = vdo_decode_component_states(vdo->super_block.buffer, &vdo->geometry, + &vdo->states); + if (result != VDO_SUCCESS) + return result; + + vdo_set_state(vdo, vdo->states.vdo.state); + vdo->load_state = vdo->states.vdo.state; + + /* + * If the device config specifies a larger logical size than was recorded in the super + * block, just accept it. + */ + if (vdo->states.vdo.config.logical_blocks < config->logical_blocks) { + uds_log_warning("Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block", + (unsigned long long) config->logical_blocks, + (unsigned long long) vdo->states.vdo.config.logical_blocks); + vdo->states.vdo.config.logical_blocks = config->logical_blocks; + } + + result = vdo_validate_component_states(&vdo->states, vdo->geometry.nonce, + config->physical_blocks, + config->logical_blocks); + if (result != VDO_SUCCESS) + return result; + + vdo->layout = vdo->states.layout; + return VDO_SUCCESS; +} + +/** + * decode_vdo() - Decode the component data portion of a super block and fill in the corresponding + * portions of the vdo being loaded. + * @vdo: The vdo being loaded. + * + * This will also allocate the recovery journal and slab depot. If this method is called with an + * asynchronous layer (i.e. a thread config which specifies at least one base thread), the block + * map and packer will be constructed as well. + * + * Return: VDO_SUCCESS or an error. + */ +static int __must_check decode_vdo(struct vdo *vdo) +{ + block_count_t maximum_age, journal_length; + struct partition *partition; + int result; + + result = decode_from_super_block(vdo); + if (result != VDO_SUCCESS) { + vdo_destroy_component_states(&vdo->states); + return result; + } + + maximum_age = vdo_convert_maximum_age(vdo->device_config->block_map_maximum_age); + journal_length = + vdo_get_recovery_journal_length(vdo->states.vdo.config.recovery_journal_size); + if (maximum_age > (journal_length / 2)) { + return uds_log_error_strerror(VDO_BAD_CONFIGURATION, + "maximum age: %llu exceeds limit %llu", + (unsigned long long) maximum_age, + (unsigned long long) (journal_length / 2)); + } + + if (maximum_age == 0) { + return uds_log_error_strerror(VDO_BAD_CONFIGURATION, + "maximum age must be greater than 0"); + } + + result = vdo_enable_read_only_entry(vdo); + if (result != VDO_SUCCESS) + return result; + + partition = vdo_get_known_partition(&vdo->layout, + VDO_RECOVERY_JOURNAL_PARTITION); + result = vdo_decode_recovery_journal(vdo->states.recovery_journal, + vdo->states.vdo.nonce, vdo, partition, + vdo->states.vdo.complete_recoveries, + vdo->states.vdo.config.recovery_journal_size, + &vdo->recovery_journal); + if (result != VDO_SUCCESS) + return result; + + partition = vdo_get_known_partition(&vdo->layout, VDO_SLAB_SUMMARY_PARTITION); + result = vdo_decode_slab_depot(vdo->states.slab_depot, vdo, partition, + &vdo->depot); + if (result != VDO_SUCCESS) + return result; + + result = vdo_decode_block_map(vdo->states.block_map, + vdo->states.vdo.config.logical_blocks, vdo, + vdo->recovery_journal, vdo->states.vdo.nonce, + vdo->device_config->cache_size, maximum_age, + &vdo->block_map); + if (result != VDO_SUCCESS) + return result; + + result = vdo_make_physical_zones(vdo, &vdo->physical_zones); + if (result != VDO_SUCCESS) + return result; + + /* The logical zones depend on the physical zones already existing. */ + result = vdo_make_logical_zones(vdo, &vdo->logical_zones); + if (result != VDO_SUCCESS) + return result; + + return vdo_make_hash_zones(vdo, &vdo->hash_zones); +} + +/** + * pre_load_callback() - Callback to initiate a pre-load, registered in vdo_initialize(). + * @completion: The admin completion. + */ +static void pre_load_callback(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + int result; + + assert_admin_phase_thread(vdo, __func__); + + switch (advance_phase(vdo)) { + case PRE_LOAD_PHASE_START: + result = vdo_start_operation(&vdo->admin.state, + VDO_ADMIN_STATE_PRE_LOADING); + if (result != VDO_SUCCESS) { + vdo_continue_completion(completion, result); + return; + } + + vdo_load_super_block(vdo, completion); + return; + + case PRE_LOAD_PHASE_LOAD_COMPONENTS: + vdo_continue_completion(completion, decode_vdo(vdo)); + return; + + case PRE_LOAD_PHASE_END: + break; + + default: + vdo_set_completion_result(completion, UDS_BAD_STATE); + } + + finish_operation_callback(completion); +} + +static void release_instance(unsigned int instance) +{ + mutex_lock(&instances_lock); + if (instance >= instances.bit_count) { + ASSERT_LOG_ONLY(false, + "instance number %u must be less than bit count %u", + instance, instances.bit_count); + } else if (test_bit(instance, instances.words) == 0) { + ASSERT_LOG_ONLY(false, "instance number %u must be allocated", instance); + } else { + __clear_bit(instance, instances.words); + instances.count -= 1; + } + mutex_unlock(&instances_lock); +} + +static void set_device_config(struct dm_target *ti, struct vdo *vdo, + struct device_config *config) +{ + list_del_init(&config->config_list); + list_add_tail(&config->config_list, &vdo->device_config_list); + config->vdo = vdo; + ti->private = config; + configure_target_capabilities(ti); +} + +static int vdo_initialize(struct dm_target *ti, unsigned int instance, + struct device_config *config) +{ + struct vdo *vdo; + int result; + u64 block_size = VDO_BLOCK_SIZE; + u64 logical_size = to_bytes(ti->len); + block_count_t logical_blocks = logical_size / block_size; + + uds_log_info("loading device '%s'", vdo_get_device_name(ti)); + uds_log_debug("Logical block size = %llu", (u64) config->logical_block_size); + uds_log_debug("Logical blocks = %llu", logical_blocks); + uds_log_debug("Physical block size = %llu", (u64) block_size); + uds_log_debug("Physical blocks = %llu", config->physical_blocks); + uds_log_debug("Block map cache blocks = %u", config->cache_size); + uds_log_debug("Block map maximum age = %u", config->block_map_maximum_age); + uds_log_debug("Deduplication = %s", (config->deduplication ? "on" : "off")); + uds_log_debug("Compression = %s", (config->compression ? "on" : "off")); + + vdo = vdo_find_matching(vdo_uses_device, config); + if (vdo != NULL) { + uds_log_error("Existing vdo already uses device %s", + vdo->device_config->parent_device_name); + ti->error = "Cannot share storage device with already-running VDO"; + return VDO_BAD_CONFIGURATION; + } + + result = vdo_make(instance, config, &ti->error, &vdo); + if (result != VDO_SUCCESS) { + uds_log_error("Could not create VDO device. (VDO error %d, message %s)", + result, ti->error); + vdo_destroy(vdo); + return result; + } + + result = perform_admin_operation(vdo, PRE_LOAD_PHASE_START, pre_load_callback, + finish_operation_callback, "pre-load"); + if (result != VDO_SUCCESS) { + ti->error = ((result == VDO_INVALID_ADMIN_STATE) ? + "Pre-load is only valid immediately after initialization" : + "Cannot load metadata from device"); + uds_log_error("Could not start VDO device. (VDO error %d, message %s)", + result, ti->error); + vdo_destroy(vdo); + return result; + } + + set_device_config(ti, vdo, config); + vdo->device_config = config; + return VDO_SUCCESS; +} + +/* Implements vdo_filter_fn. */ +static bool __must_check vdo_is_named(struct vdo *vdo, const void *context) +{ + struct dm_target *ti = vdo->device_config->owning_target; + const char *device_name = vdo_get_device_name(ti); + + return strcmp(device_name, context) == 0; +} + +/** + * get_bit_array_size() - Return the number of bytes needed to store a bit array of the specified + * capacity in an array of unsigned longs. + * @bit_count: The number of bits the array must hold. + * + * Return: the number of bytes needed for the array representation. + */ +static size_t get_bit_array_size(unsigned int bit_count) +{ + /* Round up to a multiple of the word size and convert to a byte count. */ + return (BITS_TO_LONGS(bit_count) * sizeof(unsigned long)); +} + +/** + * grow_bit_array() - Re-allocate the bitmap word array so there will more instance numbers that + * can be allocated. + * + * Since the array is initially NULL, this also initializes the array the first time we allocate an + * instance number. + * + * Return: UDS_SUCCESS or an error code from the allocation + */ +static int grow_bit_array(void) +{ + unsigned int new_count = max(instances.bit_count + BIT_COUNT_INCREMENT, + (unsigned int) BIT_COUNT_MINIMUM); + unsigned long *new_words; + int result; + + result = uds_reallocate_memory(instances.words, + get_bit_array_size(instances.bit_count), + get_bit_array_size(new_count), + "instance number bit array", &new_words); + if (result != UDS_SUCCESS) + return result; + + instances.bit_count = new_count; + instances.words = new_words; + return UDS_SUCCESS; +} + +/** + * allocate_instance() - Allocate an instance number. + * @instance_ptr: A point to hold the instance number + * + * Return: UDS_SUCCESS or an error code + * + * This function must be called while holding the instances lock. + */ +static int allocate_instance(unsigned int *instance_ptr) +{ + unsigned int instance; + int result; + + /* If there are no unallocated instances, grow the bit array. */ + if (instances.count >= instances.bit_count) { + result = grow_bit_array(); + if (result != UDS_SUCCESS) + return result; + } + + /* + * There must be a zero bit somewhere now. Find it, starting just after the last instance + * allocated. + */ + instance = find_next_zero_bit(instances.words, instances.bit_count, + instances.next); + if (instance >= instances.bit_count) { + /* Nothing free after next, so wrap around to instance zero. */ + instance = find_first_zero_bit(instances.words, instances.bit_count); + result = ASSERT(instance < instances.bit_count, + "impossibly, no zero bit found"); + if (result != UDS_SUCCESS) + return result; + } + + __set_bit(instance, instances.words); + instances.count++; + instances.next = instance + 1; + *instance_ptr = instance; + return UDS_SUCCESS; +} + +static int construct_new_vdo_registered(struct dm_target *ti, unsigned int argc, + char **argv, unsigned int instance) +{ + int result; + struct device_config *config; + + result = parse_device_config(argc, argv, ti, &config); + if (result != VDO_SUCCESS) { + uds_log_error_strerror(result, "parsing failed: %s", ti->error); + release_instance(instance); + return -EINVAL; + } + + /* Beyond this point, the instance number will be cleaned up for us if needed */ + result = vdo_initialize(ti, instance, config); + if (result != VDO_SUCCESS) { + release_instance(instance); + free_device_config(config); + return vdo_map_to_system_error(result); + } + + return VDO_SUCCESS; +} + +static int construct_new_vdo(struct dm_target *ti, unsigned int argc, char **argv) +{ + int result; + unsigned int instance; + struct registered_thread instance_thread; + + mutex_lock(&instances_lock); + result = allocate_instance(&instance); + mutex_unlock(&instances_lock); + if (result != VDO_SUCCESS) + return -ENOMEM; + + uds_register_thread_device_id(&instance_thread, &instance); + result = construct_new_vdo_registered(ti, argc, argv, instance); + uds_unregister_thread_device_id(); + return result; +} + +/** + * check_may_grow_physical() - Callback to check that we're not in recovery mode, used in + * vdo_prepare_to_grow_physical(). + * @completion: The admin completion. + */ +static void check_may_grow_physical(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + + assert_admin_phase_thread(vdo, __func__); + + /* These checks can only be done from a vdo thread. */ + if (vdo_is_read_only(vdo)) + vdo_set_completion_result(completion, VDO_READ_ONLY); + + if (vdo_in_recovery_mode(vdo)) + vdo_set_completion_result(completion, VDO_RETRY_AFTER_REBUILD); + + finish_operation_callback(completion); +} + +static block_count_t get_partition_size(struct layout *layout, enum partition_id id) +{ + return vdo_get_known_partition(layout, id)->count; +} + +/** + * grow_layout() - Make the layout for growing a vdo. + * @vdo: The vdo preparing to grow. + * @old_size: The current size of the vdo. + * @new_size: The size to which the vdo will be grown. + * + * Return: VDO_SUCCESS or an error code. + */ +static int grow_layout(struct vdo *vdo, block_count_t old_size, block_count_t new_size) +{ + int result; + block_count_t min_new_size; + + if (vdo->next_layout.size == new_size) { + /* We are already prepared to grow to the new size, so we're done. */ + return VDO_SUCCESS; + } + + /* Make a copy completion if there isn't one */ + if (vdo->partition_copier == NULL) { + vdo->partition_copier = dm_kcopyd_client_create(NULL); + if (IS_ERR(vdo->partition_copier)) { + result = PTR_ERR(vdo->partition_copier); + vdo->partition_copier = NULL; + return result; + } + } + + /* Free any unused preparation. */ + vdo_uninitialize_layout(&vdo->next_layout); + + /* + * Make a new layout with the existing partition sizes for everything but the slab depot + * partition. + */ + result = vdo_initialize_layout(new_size, vdo->layout.start, + get_partition_size(&vdo->layout, + VDO_BLOCK_MAP_PARTITION), + get_partition_size(&vdo->layout, + VDO_RECOVERY_JOURNAL_PARTITION), + get_partition_size(&vdo->layout, + VDO_SLAB_SUMMARY_PARTITION), + &vdo->next_layout); + if (result != VDO_SUCCESS) { + dm_kcopyd_client_destroy(uds_forget(vdo->partition_copier)); + return result; + } + + /* Ensure the new journal and summary are entirely within the added blocks. */ + min_new_size = (old_size + + get_partition_size(&vdo->next_layout, + VDO_SLAB_SUMMARY_PARTITION) + + get_partition_size(&vdo->next_layout, + VDO_RECOVERY_JOURNAL_PARTITION)); + if (min_new_size > new_size) { + /* Copying the journal and summary would destroy some old metadata. */ + vdo_uninitialize_layout(&vdo->next_layout); + dm_kcopyd_client_destroy(uds_forget(vdo->partition_copier)); + return VDO_INCREMENT_TOO_SMALL; + } + + return VDO_SUCCESS; +} + +static int prepare_to_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) +{ + int result; + block_count_t current_physical_blocks = vdo->states.vdo.config.physical_blocks; + + uds_log_info("Preparing to resize physical to %llu", + (unsigned long long) new_physical_blocks); + ASSERT_LOG_ONLY((new_physical_blocks > current_physical_blocks), + "New physical size is larger than current physical size"); + result = perform_admin_operation(vdo, PREPARE_GROW_PHYSICAL_PHASE_START, + check_may_grow_physical, + finish_operation_callback, + "prepare grow-physical"); + if (result != VDO_SUCCESS) + return result; + + result = grow_layout(vdo, current_physical_blocks, new_physical_blocks); + if (result != VDO_SUCCESS) + return result; + + result = vdo_prepare_to_grow_slab_depot(vdo->depot, + vdo_get_known_partition(&vdo->next_layout, + VDO_SLAB_DEPOT_PARTITION)); + if (result != VDO_SUCCESS) { + vdo_uninitialize_layout(&vdo->next_layout); + return result; + } + + uds_log_info("Done preparing to resize physical"); + return VDO_SUCCESS; +} + +/** + * validate_new_device_config() - Check whether a new device config represents a valid modification + * to an existing config. + * @to_validate: The new config to validate. + * @config: The existing config. + * @may_grow: Set to true if growing the logical and physical size of the vdo is currently + * permitted. + * @error_ptr: A pointer to hold the reason for any error. + * + * Return: VDO_SUCCESS or an error. + */ +static int validate_new_device_config(struct device_config *to_validate, + struct device_config *config, bool may_grow, + char **error_ptr) +{ + if (to_validate->owning_target->begin != config->owning_target->begin) { + *error_ptr = "Starting sector cannot change"; + return VDO_PARAMETER_MISMATCH; + } + + if (to_validate->logical_block_size != config->logical_block_size) { + *error_ptr = "Logical block size cannot change"; + return VDO_PARAMETER_MISMATCH; + } + + if (to_validate->logical_blocks < config->logical_blocks) { + *error_ptr = "Can't shrink VDO logical size"; + return VDO_PARAMETER_MISMATCH; + } + + if (to_validate->cache_size != config->cache_size) { + *error_ptr = "Block map cache size cannot change"; + return VDO_PARAMETER_MISMATCH; + } + + if (to_validate->block_map_maximum_age != config->block_map_maximum_age) { + *error_ptr = "Block map maximum age cannot change"; + return VDO_PARAMETER_MISMATCH; + } + + if (memcmp(&to_validate->thread_counts, &config->thread_counts, + sizeof(struct thread_count_config)) != 0) { + *error_ptr = "Thread configuration cannot change"; + return VDO_PARAMETER_MISMATCH; + } + + if (to_validate->physical_blocks < config->physical_blocks) { + *error_ptr = "Removing physical storage from a VDO is not supported"; + return VDO_NOT_IMPLEMENTED; + } + + if (!may_grow && (to_validate->physical_blocks > config->physical_blocks)) { + *error_ptr = "VDO physical size may not grow in current state"; + return VDO_NOT_IMPLEMENTED; + } + + return VDO_SUCCESS; +} + +static int prepare_to_modify(struct dm_target *ti, struct device_config *config, + struct vdo *vdo) +{ + int result; + bool may_grow = (vdo_get_admin_state(vdo) != VDO_ADMIN_STATE_PRE_LOADED); + + result = validate_new_device_config(config, vdo->device_config, may_grow, + &ti->error); + if (result != VDO_SUCCESS) + return -EINVAL; + + if (config->logical_blocks > vdo->device_config->logical_blocks) { + block_count_t logical_blocks = vdo->states.vdo.config.logical_blocks; + + uds_log_info("Preparing to resize logical to %llu", + (unsigned long long) config->logical_blocks); + ASSERT_LOG_ONLY((config->logical_blocks > logical_blocks), + "New logical size is larger than current size"); + + result = vdo_prepare_to_grow_block_map(vdo->block_map, + config->logical_blocks); + if (result != VDO_SUCCESS) { + ti->error = "Device vdo_prepare_to_grow_logical failed"; + return result; + } + + uds_log_info("Done preparing to resize logical"); + } + + if (config->physical_blocks > vdo->device_config->physical_blocks) { + result = prepare_to_grow_physical(vdo, config->physical_blocks); + if (result != VDO_SUCCESS) { + if (result == VDO_PARAMETER_MISMATCH) { + /* + * If we don't trap this case, vdo_map_to_system_error() will remap + * it to -EIO, which is misleading and ahistorical. + */ + result = -EINVAL; + } + + if (result == VDO_TOO_MANY_SLABS) + ti->error = "Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size)"; + else + ti->error = "Device vdo_prepare_to_grow_physical failed"; + + return result; + } + } + + if (strcmp(config->parent_device_name, vdo->device_config->parent_device_name) != 0) { + const char *device_name = vdo_get_device_name(config->owning_target); + + uds_log_info("Updating backing device of %s from %s to %s", device_name, + vdo->device_config->parent_device_name, + config->parent_device_name); + } + + return VDO_SUCCESS; +} + +static int update_existing_vdo(const char *device_name, struct dm_target *ti, + unsigned int argc, char **argv, struct vdo *vdo) +{ + int result; + struct device_config *config; + + result = parse_device_config(argc, argv, ti, &config); + if (result != VDO_SUCCESS) + return -EINVAL; + + uds_log_info("preparing to modify device '%s'", device_name); + result = prepare_to_modify(ti, config, vdo); + if (result != VDO_SUCCESS) { + free_device_config(config); + return vdo_map_to_system_error(result); + } + + set_device_config(ti, vdo, config); + return VDO_SUCCESS; +} + +static int vdo_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + int result; + struct registered_thread allocating_thread, instance_thread; + const char *device_name; + struct vdo *vdo; + + uds_register_allocating_thread(&allocating_thread, NULL); + device_name = vdo_get_device_name(ti); + vdo = vdo_find_matching(vdo_is_named, device_name); + if (vdo == NULL) { + result = construct_new_vdo(ti, argc, argv); + } else { + uds_register_thread_device_id(&instance_thread, &vdo->instance); + result = update_existing_vdo(device_name, ti, argc, argv, vdo); + uds_unregister_thread_device_id(); + } + + uds_unregister_allocating_thread(); + return result; +} + +static void vdo_dtr(struct dm_target *ti) +{ + struct device_config *config = ti->private; + struct vdo *vdo = uds_forget(config->vdo); + + list_del_init(&config->config_list); + if (list_empty(&vdo->device_config_list)) { + const char *device_name; + + /* This was the last config referencing the VDO. Free it. */ + unsigned int instance = vdo->instance; + struct registered_thread allocating_thread, instance_thread; + + uds_register_thread_device_id(&instance_thread, &instance); + uds_register_allocating_thread(&allocating_thread, NULL); + + device_name = vdo_get_device_name(ti); + uds_log_info("stopping device '%s'", device_name); + if (vdo->dump_on_shutdown) + vdo_dump_all(vdo, "device shutdown"); + + vdo_destroy(uds_forget(vdo)); + uds_log_info("device '%s' stopped", device_name); + uds_unregister_thread_device_id(); + uds_unregister_allocating_thread(); + release_instance(instance); + } else if (config == vdo->device_config) { + /* + * The VDO still references this config. Give it a reference to a config that isn't + * being destroyed. + */ + vdo->device_config = list_first_entry(&vdo->device_config_list, + struct device_config, config_list); + } + + free_device_config(config); + ti->private = NULL; +} + +static void vdo_presuspend(struct dm_target *ti) +{ + get_vdo_for_target(ti)->suspend_type = + (dm_noflush_suspending(ti) ? VDO_ADMIN_STATE_SUSPENDING : VDO_ADMIN_STATE_SAVING); +} + +/** + * write_super_block_for_suspend() - Update the VDO state and save the super block. + * @completion: The admin completion + */ +static void write_super_block_for_suspend(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + + switch (vdo_get_state(vdo)) { + case VDO_DIRTY: + case VDO_NEW: + vdo_set_state(vdo, VDO_CLEAN); + break; + + case VDO_CLEAN: + case VDO_READ_ONLY_MODE: + case VDO_FORCE_REBUILD: + case VDO_RECOVERING: + case VDO_REBUILD_FOR_UPGRADE: + break; + + case VDO_REPLAYING: + default: + vdo_continue_completion(completion, UDS_BAD_STATE); + return; + } + + vdo_save_components(vdo, completion); +} + +/** + * suspend_callback() - Callback to initiate a suspend, registered in vdo_postsuspend(). + * @completion: The sub-task completion. + */ +static void suspend_callback(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + struct admin_state *state = &vdo->admin.state; + int result; + + assert_admin_phase_thread(vdo, __func__); + + switch (advance_phase(vdo)) { + case SUSPEND_PHASE_START: + if (vdo_get_admin_state_code(state)->quiescent) { + /* Already suspended */ + break; + } + + vdo_continue_completion(completion, + vdo_start_operation(state, vdo->suspend_type)); + return; + + case SUSPEND_PHASE_PACKER: + /* + * If the VDO was already resumed from a prior suspend while read-only, some of the + * components may not have been resumed. By setting a read-only error here, we + * guarantee that the result of this suspend will be VDO_READ_ONLY and not + * VDO_INVALID_ADMIN_STATE in that case. + */ + if (vdo_in_read_only_mode(vdo)) + vdo_set_completion_result(completion, VDO_READ_ONLY); + + vdo_drain_packer(vdo->packer, completion); + return; + + case SUSPEND_PHASE_DATA_VIOS: + drain_data_vio_pool(vdo->data_vio_pool, completion); + return; + + case SUSPEND_PHASE_DEDUPE: + vdo_drain_hash_zones(vdo->hash_zones, completion); + return; + + case SUSPEND_PHASE_FLUSHES: + vdo_drain_flusher(vdo->flusher, completion); + return; + + case SUSPEND_PHASE_LOGICAL_ZONES: + /* + * Attempt to flush all I/O before completing post suspend work. We believe a + * suspended device is expected to have persisted all data written before the + * suspend, even if it hasn't been flushed yet. + */ + result = vdo_synchronous_flush(vdo); + if (result != VDO_SUCCESS) + vdo_enter_read_only_mode(vdo, result); + + vdo_drain_logical_zones(vdo->logical_zones, + vdo_get_admin_state_code(state), completion); + return; + + case SUSPEND_PHASE_BLOCK_MAP: + vdo_drain_block_map(vdo->block_map, vdo_get_admin_state_code(state), + completion); + return; + + case SUSPEND_PHASE_JOURNAL: + vdo_drain_recovery_journal(vdo->recovery_journal, + vdo_get_admin_state_code(state), completion); + return; + + case SUSPEND_PHASE_DEPOT: + vdo_drain_slab_depot(vdo->depot, vdo_get_admin_state_code(state), + completion); + return; + + case SUSPEND_PHASE_READ_ONLY_WAIT: + vdo_wait_until_not_entering_read_only_mode(completion); + return; + + case SUSPEND_PHASE_WRITE_SUPER_BLOCK: + if (vdo_is_state_suspending(state) || (completion->result != VDO_SUCCESS)) { + /* If we didn't save the VDO or there was an error, we're done. */ + break; + } + + write_super_block_for_suspend(completion); + return; + + case SUSPEND_PHASE_END: + break; + + default: + vdo_set_completion_result(completion, UDS_BAD_STATE); + } + + finish_operation_callback(completion); +} + +static void vdo_postsuspend(struct dm_target *ti) +{ + struct vdo *vdo = get_vdo_for_target(ti); + struct registered_thread instance_thread; + const char *device_name; + int result; + + uds_register_thread_device_id(&instance_thread, &vdo->instance); + device_name = vdo_get_device_name(vdo->device_config->owning_target); + uds_log_info("suspending device '%s'", device_name); + + /* + * It's important to note any error here does not actually stop device-mapper from + * suspending the device. All this work is done post suspend. + */ + result = perform_admin_operation(vdo, SUSPEND_PHASE_START, suspend_callback, + suspend_callback, "suspend"); + + if ((result == VDO_SUCCESS) || (result == VDO_READ_ONLY)) { + /* + * Treat VDO_READ_ONLY as a success since a read-only suspension still leaves the + * VDO suspended. + */ + uds_log_info("device '%s' suspended", device_name); + } else if (result == VDO_INVALID_ADMIN_STATE) { + uds_log_error("Suspend invoked while in unexpected state: %s", + vdo_get_admin_state(vdo)->name); + } else { + uds_log_error_strerror(result, "Suspend of device '%s' failed", + device_name); + } + + uds_unregister_thread_device_id(); +} + +/** + * was_new() - Check whether the vdo was new when it was loaded. + * @vdo: The vdo to query. + * + * Return: true if the vdo was new. + */ +static bool was_new(const struct vdo *vdo) +{ + return (vdo->load_state == VDO_NEW); +} + +/** + * requires_repair() - Check whether a vdo requires recovery or rebuild. + * @vdo: The vdo to query. + * + * Return: true if the vdo must be repaired. + */ +static bool __must_check requires_repair(const struct vdo *vdo) +{ + switch (vdo_get_state(vdo)) { + case VDO_DIRTY: + case VDO_FORCE_REBUILD: + case VDO_REPLAYING: + case VDO_REBUILD_FOR_UPGRADE: + return true; + + default: + return false; + } +} + +/** + * get_load_type() - Determine how the slab depot was loaded. + * @vdo: The vdo. + * + * Return: How the depot was loaded. + */ +static enum slab_depot_load_type get_load_type(struct vdo *vdo) +{ + if (vdo_state_requires_read_only_rebuild(vdo->load_state)) + return VDO_SLAB_DEPOT_REBUILD_LOAD; + + if (vdo_state_requires_recovery(vdo->load_state)) + return VDO_SLAB_DEPOT_RECOVERY_LOAD; + + return VDO_SLAB_DEPOT_NORMAL_LOAD; +} + +/** + * vdo_initialize_kobjects() - Initialize the vdo sysfs directory. + * @vdo: The vdo being initialized. + * + * Return: VDO_SUCCESS or an error code. + */ +static int vdo_initialize_kobjects(struct vdo *vdo) +{ + int result; + struct dm_target *target = vdo->device_config->owning_target; + struct mapped_device *md = dm_table_get_md(target->table); + + kobject_init(&vdo->vdo_directory, &vdo_directory_type); + vdo->sysfs_added = true; + result = kobject_add(&vdo->vdo_directory, &disk_to_dev(dm_disk(md))->kobj, + "vdo"); + if (result != 0) + return VDO_CANT_ADD_SYSFS_NODE; + + result = vdo_add_dedupe_index_sysfs(vdo->hash_zones); + if (result != 0) + return VDO_CANT_ADD_SYSFS_NODE; + + return vdo_add_sysfs_stats_dir(vdo); +} + +/** + * load_callback() - Callback to do the destructive parts of loading a VDO. + * @completion: The sub-task completion. + */ +static void load_callback(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + int result; + + assert_admin_phase_thread(vdo, __func__); + + switch (advance_phase(vdo)) { + case LOAD_PHASE_START: + result = vdo_start_operation(&vdo->admin.state, VDO_ADMIN_STATE_LOADING); + if (result != VDO_SUCCESS) { + vdo_continue_completion(completion, result); + return; + } + + /* Prepare the recovery journal for new entries. */ + vdo_open_recovery_journal(vdo->recovery_journal, vdo->depot, + vdo->block_map); + vdo_allow_read_only_mode_entry(completion); + return; + + case LOAD_PHASE_STATS: + vdo_continue_completion(completion, vdo_initialize_kobjects(vdo)); + return; + + case LOAD_PHASE_LOAD_DEPOT: + if (vdo_is_read_only(vdo)) { + /* + * In read-only mode we don't use the allocator and it may not even be + * readable, so don't bother trying to load it. + */ + vdo_set_completion_result(completion, VDO_READ_ONLY); + break; + } + + if (requires_repair(vdo)) { + vdo_repair(completion); + return; + } + + vdo_load_slab_depot(vdo->depot, + (was_new(vdo) ? VDO_ADMIN_STATE_FORMATTING : + VDO_ADMIN_STATE_LOADING), + completion, NULL); + return; + + case LOAD_PHASE_MAKE_DIRTY: + vdo_set_state(vdo, VDO_DIRTY); + vdo_save_components(vdo, completion); + return; + + case LOAD_PHASE_PREPARE_TO_ALLOCATE: + vdo_initialize_block_map_from_journal(vdo->block_map, + vdo->recovery_journal); + vdo_prepare_slab_depot_to_allocate(vdo->depot, get_load_type(vdo), + completion); + return; + + case LOAD_PHASE_SCRUB_SLABS: + if (vdo_state_requires_recovery(vdo->load_state)) + vdo_enter_recovery_mode(vdo); + + vdo_scrub_all_unrecovered_slabs(vdo->depot, completion); + return; + + case LOAD_PHASE_DATA_REDUCTION: + WRITE_ONCE(vdo->compressing, vdo->device_config->compression); + if (vdo->device_config->deduplication) { + /* + * Don't try to load or rebuild the index first (and log scary error + * messages) if this is known to be a newly-formatted volume. + */ + vdo_start_dedupe_index(vdo->hash_zones, was_new(vdo)); + } + + vdo->allocations_allowed = false; + fallthrough; + + case LOAD_PHASE_FINISHED: + break; + + case LOAD_PHASE_DRAIN_JOURNAL: + vdo_drain_recovery_journal(vdo->recovery_journal, VDO_ADMIN_STATE_SAVING, + completion); + return; + + case LOAD_PHASE_WAIT_FOR_READ_ONLY: + /* Avoid an infinite loop */ + completion->error_handler = NULL; + vdo->admin.phase = LOAD_PHASE_FINISHED; + vdo_wait_until_not_entering_read_only_mode(completion); + return; + + default: + vdo_set_completion_result(completion, UDS_BAD_STATE); + } + + finish_operation_callback(completion); +} + +/** + * handle_load_error() - Handle an error during the load operation. + * @completion: The admin completion. + * + * If at all possible, brings the vdo online in read-only mode. This handler is registered in + * vdo_preresume_registered(). + */ +static void handle_load_error(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + + if (vdo_requeue_completion_if_needed(completion, + vdo->thread_config.admin_thread)) + return; + + if (vdo_state_requires_read_only_rebuild(vdo->load_state) && + (vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) { + uds_log_error_strerror(completion->result, "aborting load"); + vdo->admin.phase = LOAD_PHASE_DRAIN_JOURNAL; + load_callback(uds_forget(completion)); + return; + } + + uds_log_error_strerror(completion->result, + "Entering read-only mode due to load error"); + vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY; + vdo_enter_read_only_mode(vdo, completion->result); + completion->result = VDO_READ_ONLY; + load_callback(completion); +} + +/** + * write_super_block_for_resume() - Update the VDO state and save the super block. + * @completion: The admin completion + */ +static void write_super_block_for_resume(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + + switch (vdo_get_state(vdo)) { + case VDO_CLEAN: + case VDO_NEW: + vdo_set_state(vdo, VDO_DIRTY); + vdo_save_components(vdo, completion); + return; + + case VDO_DIRTY: + case VDO_READ_ONLY_MODE: + case VDO_FORCE_REBUILD: + case VDO_RECOVERING: + case VDO_REBUILD_FOR_UPGRADE: + /* No need to write the super block in these cases */ + vdo_launch_completion(completion); + return; + + case VDO_REPLAYING: + default: + vdo_continue_completion(completion, UDS_BAD_STATE); + } +} + +/** + * resume_callback() - Callback to resume a VDO. + * @completion: The admin completion. + */ +static void resume_callback(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + int result; + + assert_admin_phase_thread(vdo, __func__); + + switch (advance_phase(vdo)) { + case RESUME_PHASE_START: + result = vdo_start_operation(&vdo->admin.state, + VDO_ADMIN_STATE_RESUMING); + if (result != VDO_SUCCESS) { + vdo_continue_completion(completion, result); + return; + } + + write_super_block_for_resume(completion); + return; + + case RESUME_PHASE_ALLOW_READ_ONLY_MODE: + vdo_allow_read_only_mode_entry(completion); + return; + + case RESUME_PHASE_DEDUPE: + vdo_resume_hash_zones(vdo->hash_zones, completion); + return; + + case RESUME_PHASE_DEPOT: + vdo_resume_slab_depot(vdo->depot, completion); + return; + + case RESUME_PHASE_JOURNAL: + vdo_resume_recovery_journal(vdo->recovery_journal, completion); + return; + + case RESUME_PHASE_BLOCK_MAP: + vdo_resume_block_map(vdo->block_map, completion); + return; + + case RESUME_PHASE_LOGICAL_ZONES: + vdo_resume_logical_zones(vdo->logical_zones, completion); + return; + + case RESUME_PHASE_PACKER: + { + bool was_enabled = vdo_get_compressing(vdo); + bool enable = vdo->device_config->compression; + + if (enable != was_enabled) + WRITE_ONCE(vdo->compressing, enable); + uds_log_info("compression is %s", (enable ? "enabled" : "disabled")); + + vdo_resume_packer(vdo->packer, completion); + return; + } + + case RESUME_PHASE_FLUSHER: + vdo_resume_flusher(vdo->flusher, completion); + return; + + case RESUME_PHASE_DATA_VIOS: + resume_data_vio_pool(vdo->data_vio_pool, completion); + return; + + case RESUME_PHASE_END: + break; + + default: + vdo_set_completion_result(completion, UDS_BAD_STATE); + } + + finish_operation_callback(completion); +} + +/** + * grow_logical_callback() - Callback to initiate a grow logical. + * @completion: The admin completion. + * + * Registered in perform_grow_logical(). + */ +static void grow_logical_callback(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + int result; + + assert_admin_phase_thread(vdo, __func__); + + switch (advance_phase(vdo)) { + case GROW_LOGICAL_PHASE_START: + if (vdo_is_read_only(vdo)) { + uds_log_error_strerror(VDO_READ_ONLY, + "Can't grow logical size of a read-only VDO"); + vdo_set_completion_result(completion, VDO_READ_ONLY); + break; + } + + result = vdo_start_operation(&vdo->admin.state, + VDO_ADMIN_STATE_SUSPENDED_OPERATION); + if (result != VDO_SUCCESS) { + vdo_continue_completion(completion, result); + return; + } + + vdo->states.vdo.config.logical_blocks = vdo->block_map->next_entry_count; + vdo_save_components(vdo, completion); + return; + + case GROW_LOGICAL_PHASE_GROW_BLOCK_MAP: + vdo_grow_block_map(vdo->block_map, completion); + return; + + case GROW_LOGICAL_PHASE_END: + break; + + case GROW_LOGICAL_PHASE_ERROR: + vdo_enter_read_only_mode(vdo, completion->result); + break; + + default: + vdo_set_completion_result(completion, UDS_BAD_STATE); + } + + finish_operation_callback(completion); +} + +/** + * handle_logical_growth_error() - Handle an error during the grow physical process. + * @completion: The admin completion. + */ +static void handle_logical_growth_error(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + + if (vdo->admin.phase == GROW_LOGICAL_PHASE_GROW_BLOCK_MAP) { + /* + * We've failed to write the new size in the super block, so set our in memory + * config back to the old size. + */ + vdo->states.vdo.config.logical_blocks = vdo->block_map->entry_count; + vdo_abandon_block_map_growth(vdo->block_map); + } + + vdo->admin.phase = GROW_LOGICAL_PHASE_ERROR; + grow_logical_callback(completion); +} + +/** + * perform_grow_logical() - Grow the logical size of the vdo. + * @vdo: The vdo to grow. + * @new_logical_blocks: The size to which the vdo should be grown. + * + * Context: This method may only be called when the vdo has been suspended and must not be called + * from a base thread. + * + * Return: VDO_SUCCESS or an error. + */ +static int perform_grow_logical(struct vdo *vdo, block_count_t new_logical_blocks) +{ + int result; + + if (vdo->device_config->logical_blocks == new_logical_blocks) { + /* + * A table was loaded for which we prepared to grow, but a table without that + * growth was what we are resuming with. + */ + vdo_abandon_block_map_growth(vdo->block_map); + return VDO_SUCCESS; + } + + uds_log_info("Resizing logical to %llu", + (unsigned long long) new_logical_blocks); + if (vdo->block_map->next_entry_count != new_logical_blocks) + return VDO_PARAMETER_MISMATCH; + + result = perform_admin_operation(vdo, GROW_LOGICAL_PHASE_START, + grow_logical_callback, + handle_logical_growth_error, "grow logical"); + if (result != VDO_SUCCESS) + return result; + + uds_log_info("Logical blocks now %llu", (unsigned long long) new_logical_blocks); + return VDO_SUCCESS; +} + +static void copy_callback(int read_err, unsigned long write_err, void *context) +{ + struct vdo_completion *completion = context; + int result = (((read_err == 0) && (write_err == 0)) ? VDO_SUCCESS : -EIO); + + vdo_continue_completion(completion, result); +} + +static void partition_to_region(struct partition *partition, struct vdo *vdo, + struct dm_io_region *region) +{ + physical_block_number_t pbn = partition->offset - vdo->geometry.bio_offset; + + *region = (struct dm_io_region) { + .bdev = vdo_get_backing_device(vdo), + .sector = pbn * VDO_SECTORS_PER_BLOCK, + .count = partition->count * VDO_SECTORS_PER_BLOCK, + }; +} + +/** + * copy_partition() - Copy a partition from the location specified in the current layout to that in + * the next layout. + * @vdo: The vdo preparing to grow. + * @id: The ID of the partition to copy. + * @parent: The completion to notify when the copy is complete. + */ +static void copy_partition(struct vdo *vdo, enum partition_id id, + struct vdo_completion *parent) +{ + struct dm_io_region read_region, write_regions[1]; + struct partition *from = vdo_get_known_partition(&vdo->layout, id); + struct partition *to = vdo_get_known_partition(&vdo->next_layout, id); + + partition_to_region(from, vdo, &read_region); + partition_to_region(to, vdo, &write_regions[0]); + dm_kcopyd_copy(vdo->partition_copier, &read_region, 1, write_regions, 0, + copy_callback, parent); +} + +/** + * grow_physical_callback() - Callback to initiate a grow physical. + * @completion: The admin completion. + * + * Registered in perform_grow_physical(). + */ +static void grow_physical_callback(struct vdo_completion *completion) +{ + struct vdo *vdo = completion->vdo; + int result; + + assert_admin_phase_thread(vdo, __func__); + + switch (advance_phase(vdo)) { + case GROW_PHYSICAL_PHASE_START: + if (vdo_is_read_only(vdo)) { + uds_log_error_strerror(VDO_READ_ONLY, + "Can't grow physical size of a read-only VDO"); + vdo_set_completion_result(completion, VDO_READ_ONLY); + break; + } + + result = vdo_start_operation(&vdo->admin.state, + VDO_ADMIN_STATE_SUSPENDED_OPERATION); + if (result != VDO_SUCCESS) { + vdo_continue_completion(completion, result); + return; + } + + /* Copy the journal into the new layout. */ + copy_partition(vdo, VDO_RECOVERY_JOURNAL_PARTITION, completion); + return; + + case GROW_PHYSICAL_PHASE_COPY_SUMMARY: + copy_partition(vdo, VDO_SLAB_SUMMARY_PARTITION, completion); + return; + + case GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS: + vdo_uninitialize_layout(&vdo->layout); + vdo->layout = vdo->next_layout; + uds_forget(vdo->next_layout.head); + vdo->states.vdo.config.physical_blocks = vdo->layout.size; + vdo_update_slab_depot_size(vdo->depot); + vdo_save_components(vdo, completion); + return; + + case GROW_PHYSICAL_PHASE_USE_NEW_SLABS: + vdo_use_new_slabs(vdo->depot, completion); + return; + + case GROW_PHYSICAL_PHASE_END: + vdo->depot->summary_origin = + vdo_get_known_partition(&vdo->layout, + VDO_SLAB_SUMMARY_PARTITION)->offset; + vdo->recovery_journal->origin = + vdo_get_known_partition(&vdo->layout, + VDO_RECOVERY_JOURNAL_PARTITION)->offset; + break; + + case GROW_PHYSICAL_PHASE_ERROR: + vdo_enter_read_only_mode(vdo, completion->result); + break; + + default: + vdo_set_completion_result(completion, UDS_BAD_STATE); + } + + vdo_uninitialize_layout(&vdo->next_layout); + finish_operation_callback(completion); +} + +/** + * handle_physical_growth_error() - Handle an error during the grow physical process. + * @completion: The sub-task completion. + */ +static void handle_physical_growth_error(struct vdo_completion *completion) +{ + completion->vdo->admin.phase = GROW_PHYSICAL_PHASE_ERROR; + grow_physical_callback(completion); +} + +/** + * perform_grow_physical() - Grow the physical size of the vdo. + * @vdo: The vdo to resize. + * @new_physical_blocks: The new physical size in blocks. + * + * Context: This method may only be called when the vdo has been suspended and must not be called + * from a base thread. + * + * Return: VDO_SUCCESS or an error. + */ +static int perform_grow_physical(struct vdo *vdo, block_count_t new_physical_blocks) +{ + int result; + block_count_t new_depot_size, prepared_depot_size; + block_count_t old_physical_blocks = vdo->states.vdo.config.physical_blocks; + + /* Skip any noop grows. */ + if (old_physical_blocks == new_physical_blocks) + return VDO_SUCCESS; + + if (new_physical_blocks != vdo->next_layout.size) { + /* + * Either the VDO isn't prepared to grow, or it was prepared to grow to a different + * size. Doing this check here relies on the fact that the call to this method is + * done under the dmsetup message lock. + */ + vdo_uninitialize_layout(&vdo->next_layout); + vdo_abandon_new_slabs(vdo->depot); + return VDO_PARAMETER_MISMATCH; + } + + /* Validate that we are prepared to grow appropriately. */ + new_depot_size = + vdo_get_known_partition(&vdo->next_layout, VDO_SLAB_DEPOT_PARTITION)->count; + prepared_depot_size = (vdo->depot->new_slabs == NULL) ? 0 : vdo->depot->new_size; + if (prepared_depot_size != new_depot_size) + return VDO_PARAMETER_MISMATCH; + + result = perform_admin_operation(vdo, GROW_PHYSICAL_PHASE_START, + grow_physical_callback, + handle_physical_growth_error, "grow physical"); + if (result != VDO_SUCCESS) + return result; + + uds_log_info("Physical block count was %llu, now %llu", + (unsigned long long) old_physical_blocks, + (unsigned long long) new_physical_blocks); + return VDO_SUCCESS; +} + +/** + * apply_new_vdo_configuration() - Attempt to make any configuration changes from the table being + * resumed. + * @vdo: The vdo being resumed. + * @config: The new device configuration derived from the table with which the vdo is being + * resumed. + * + * Return: VDO_SUCCESS or an error. + */ +static int __must_check apply_new_vdo_configuration(struct vdo *vdo, + struct device_config *config) +{ + int result; + + result = perform_grow_logical(vdo, config->logical_blocks); + if (result != VDO_SUCCESS) { + uds_log_error("grow logical operation failed, result = %d", result); + return result; + } + + result = perform_grow_physical(vdo, config->physical_blocks); + if (result != VDO_SUCCESS) + uds_log_error("resize operation failed, result = %d", result); + + return result; +} + +static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo) +{ + struct device_config *config = ti->private; + const char *device_name = vdo_get_device_name(ti); + block_count_t backing_blocks; + int result; + + backing_blocks = get_underlying_device_block_count(vdo); + if (backing_blocks < config->physical_blocks) { + /* FIXME: can this still happen? */ + uds_log_error("resume of device '%s' failed: backing device has %llu blocks but VDO physical size is %llu blocks", + device_name, (unsigned long long) backing_blocks, + (unsigned long long) config->physical_blocks); + return -EINVAL; + } + + if (vdo_get_admin_state(vdo) == VDO_ADMIN_STATE_PRE_LOADED) { + uds_log_info("starting device '%s'", device_name); + result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback, + handle_load_error, "load"); + if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) { + /* + * Something has gone very wrong. Make sure everything has drained and + * leave the device in an unresumable state. + */ + uds_log_error_strerror(result, + "Start failed, could not load VDO metadata"); + vdo->suspend_type = VDO_ADMIN_STATE_STOPPING; + perform_admin_operation(vdo, SUSPEND_PHASE_START, + suspend_callback, suspend_callback, + "suspend"); + return result; + } + + /* Even if the VDO is read-only, it is now able to handle read requests. */ + uds_log_info("device '%s' started", device_name); + } + + uds_log_info("resuming device '%s'", device_name); + + /* If this fails, the VDO was not in a state to be resumed. This should never happen. */ + result = apply_new_vdo_configuration(vdo, config); + BUG_ON(result == VDO_INVALID_ADMIN_STATE); + + /* + * Now that we've tried to modify the vdo, the new config *is* the config, whether the + * modifications worked or not. + */ + vdo->device_config = config; + + /* + * Any error here is highly unexpected and the state of the vdo is questionable, so we mark + * it read-only in memory. Because we are suspended, the read-only state will not be + * written to disk. + */ + if (result != VDO_SUCCESS) { + uds_log_error_strerror(result, + "Commit of modifications to device '%s' failed", + device_name); + vdo_enter_read_only_mode(vdo, result); + return result; + } + + if (vdo_get_admin_state(vdo)->normal) { + /* The VDO was just started, so we don't need to resume it. */ + return VDO_SUCCESS; + } + + result = perform_admin_operation(vdo, RESUME_PHASE_START, resume_callback, + resume_callback, "resume"); + BUG_ON(result == VDO_INVALID_ADMIN_STATE); + if (result == VDO_READ_ONLY) { + /* Even if the vdo is read-only, it has still resumed. */ + result = VDO_SUCCESS; + } + + if (result != VDO_SUCCESS) + uds_log_error("resume of device '%s' failed with error: %d", device_name, + result); + + return result; +} + +static int vdo_preresume(struct dm_target *ti) +{ + struct registered_thread instance_thread; + struct vdo *vdo = get_vdo_for_target(ti); + int result; + + uds_register_thread_device_id(&instance_thread, &vdo->instance); + result = vdo_preresume_registered(ti, vdo); + if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE)) + result = -EINVAL; + uds_unregister_thread_device_id(); + return vdo_map_to_system_error(result); +} + +static void vdo_resume(struct dm_target *ti) +{ + struct registered_thread instance_thread; + + uds_register_thread_device_id(&instance_thread, + &get_vdo_for_target(ti)->instance); + uds_log_info("device '%s' resumed", vdo_get_device_name(ti)); + uds_unregister_thread_device_id(); +} + +/* + * If anything changes that affects how user tools will interact with vdo, update the version + * number and make sure documentation about the change is complete so tools can properly update + * their management code. + */ +static struct target_type vdo_target_bio = { + .features = DM_TARGET_SINGLETON, + .name = "vdo", + .version = { 8, 2, 0 }, + .module = THIS_MODULE, + .ctr = vdo_ctr, + .dtr = vdo_dtr, + .io_hints = vdo_io_hints, + .iterate_devices = vdo_iterate_devices, + .map = vdo_map_bio, + .message = vdo_message, + .status = vdo_status, + .presuspend = vdo_presuspend, + .postsuspend = vdo_postsuspend, + .preresume = vdo_preresume, + .resume = vdo_resume, +}; + +static bool dm_registered; + +static void vdo_module_destroy(void) +{ + uds_log_debug("unloading"); + + if (dm_registered) + dm_unregister_target(&vdo_target_bio); + + ASSERT_LOG_ONLY(instances.count == 0, + "should have no instance numbers still in use, but have %u", + instances.count); + uds_free(instances.words); + memset(&instances, 0, sizeof(struct instance_tracker)); + + uds_log_info("unloaded version %s", CURRENT_VERSION); +} + +static int __init vdo_init(void) +{ + int result = 0; + + /* + * UDS module level initialization must be done first, as VDO initialization depends on it + */ + uds_initialize_thread_device_registry(); + uds_memory_init(); + uds_init_sysfs(); + + vdo_initialize_device_registry_once(); + uds_log_info("loaded version %s", CURRENT_VERSION); + + /* Add VDO errors to the already existing set of errors in UDS. */ + result = vdo_register_status_codes(); + if (result != UDS_SUCCESS) { + uds_log_error("vdo_register_status_codes failed %d", result); + vdo_module_destroy(); + return result; + } + + result = dm_register_target(&vdo_target_bio); + if (result < 0) { + uds_log_error("dm_register_target failed %d", result); + vdo_module_destroy(); + return result; + } + dm_registered = true; + + return result; +} + +static void __exit vdo_exit(void) +{ + vdo_module_destroy(); + /* + * UDS module level exit processing must be done after all VDO module exit processing is + * complete. + */ + uds_put_sysfs(); + uds_memory_exit(); +} + +module_init(vdo_init); +module_exit(vdo_exit); + +MODULE_DESCRIPTION(DM_NAME " target for transparent deduplication"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); -- cgit v1.2.3