summaryrefslogtreecommitdiff
path: root/drivers/md/dm-vdo/types.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-13 19:57:23 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-13 19:57:23 +0300
commit61387b8dcf1dc0f30cf690956a48768a3fce1810 (patch)
tree78f176823ac74dc82f26462ce347735cf73b79f0 /drivers/md/dm-vdo/types.h
parentc0499a081285dcacacd10b0cb20ccba777411b88 (diff)
parentcb824724dccb3195d22cad96e7b65fe13621d0a6 (diff)
downloadlinux-61387b8dcf1dc0f30cf690956a48768a3fce1810.tar.xz
Merge tag 'for-6.9/dm-vdo' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper VDO target from Mike Snitzer: "Introduce the DM vdo target which provides block-level deduplication, compression, and thin provisioning. Please see: Documentation/admin-guide/device-mapper/vdo.rst Documentation/admin-guide/device-mapper/vdo-design.rst The DM vdo target handles its concurrency by pinning an IO, and subsequent stages of handling that IO, to a particular VDO thread. This aspect of VDO is "unique" but its overall implementation is very tightly coupled to its mostly lockless threading model. As such, VDO is not easily changed to use more traditional finer-grained locking and Linux workqueues. Please see the "Zones and Threading" section of vdo-design.rst The DM vdo target has been used in production for many years but has seen significant changes over the past ~6 years to prepare it for upstream inclusion. The codebase is still large but it is isolated to drivers/md/dm-vdo/ and has been made considerably more approachable and maintainable. Matt Sakai has been added to the MAINTAINERS file to reflect that he will send VDO changes upstream through the DM subsystem maintainers" * tag 'for-6.9/dm-vdo' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (142 commits) dm vdo: document minimum metadata size requirements dm vdo: remove meaningless version number constant dm vdo: remove vdo_perform_once dm vdo block-map: Remove stray semicolon dm vdo string-utils: change from uds_ to vdo_ namespace dm vdo logger: change from uds_ to vdo_ namespace dm vdo funnel-queue: change from uds_ to vdo_ namespace dm vdo indexer: fix use after free dm vdo logger: remove log level to string conversion code dm vdo: document log_level parameter dm vdo: add 'log_level' module parameter dm vdo: remove all sysfs interfaces dm vdo target: eliminate inappropriate uses of UDS_SUCCESS dm vdo indexer: update ASSERT and ASSERT_LOG_ONLY usage dm vdo encodings: update some stale comments dm vdo permassert: audit all of ASSERT to test for VDO_SUCCESS dm-vdo funnel-workqueue: return VDO_SUCCESS from make_simple_work_queue dm vdo thread-utils: return VDO_SUCCESS on vdo_create_thread success dm vdo int-map: return VDO_SUCCESS on success dm vdo: check for VDO_SUCCESS return value from memory-alloc functions ...
Diffstat (limited to 'drivers/md/dm-vdo/types.h')
-rw-r--r--drivers/md/dm-vdo/types.h393
1 files changed, 393 insertions, 0 deletions
diff --git a/drivers/md/dm-vdo/types.h b/drivers/md/dm-vdo/types.h
new file mode 100644
index 000000000000..dbe892b10f26
--- /dev/null
+++ b/drivers/md/dm-vdo/types.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#ifndef VDO_TYPES_H
+#define VDO_TYPES_H
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/device-mapper.h>
+#include <linux/list.h>
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
+
+#include "funnel-queue.h"
+
+/* A size type in blocks. */
+typedef u64 block_count_t;
+
+/* The size of a block. */
+typedef u16 block_size_t;
+
+/* A counter for data_vios */
+typedef u16 data_vio_count_t;
+
+/* A height within a tree. */
+typedef u8 height_t;
+
+/* The logical block number as used by the consumer. */
+typedef u64 logical_block_number_t;
+
+/* The type of the nonce used to identify instances of VDO. */
+typedef u64 nonce_t;
+
+/* A size in pages. */
+typedef u32 page_count_t;
+
+/* A page number. */
+typedef u32 page_number_t;
+
+/*
+ * The physical (well, less logical) block number at which the block is found on the underlying
+ * device.
+ */
+typedef u64 physical_block_number_t;
+
+/* A count of tree roots. */
+typedef u8 root_count_t;
+
+/* A number of sectors. */
+typedef u8 sector_count_t;
+
+/* A sequence number. */
+typedef u64 sequence_number_t;
+
+/* The offset of a block within a slab. */
+typedef u32 slab_block_number;
+
+/* A size type in slabs. */
+typedef u16 slab_count_t;
+
+/* A slot in a bin or block map page. */
+typedef u16 slot_number_t;
+
+/* typedef thread_count_t - A thread counter. */
+typedef u8 thread_count_t;
+
+/* typedef thread_id_t - A thread ID, vdo threads are numbered sequentially from 0. */
+typedef u8 thread_id_t;
+
+/* A zone counter */
+typedef u8 zone_count_t;
+
+/* The following enums are persisted on storage, so the values must be preserved. */
+
+/* The current operating mode of the VDO. */
+enum vdo_state {
+ VDO_DIRTY = 0,
+ VDO_NEW = 1,
+ VDO_CLEAN = 2,
+ VDO_READ_ONLY_MODE = 3,
+ VDO_FORCE_REBUILD = 4,
+ VDO_RECOVERING = 5,
+ VDO_REPLAYING = 6, /* VDO_REPLAYING is never set anymore, but retained for upgrade */
+ VDO_REBUILD_FOR_UPGRADE = 7,
+
+ /* Keep VDO_STATE_COUNT at the bottom. */
+ VDO_STATE_COUNT
+};
+
+/**
+ * vdo_state_requires_read_only_rebuild() - Check whether a vdo_state indicates
+ * that a read-only rebuild is required.
+ * @state: The vdo_state to check.
+ *
+ * Return: true if the state indicates a rebuild is required
+ */
+static inline bool __must_check vdo_state_requires_read_only_rebuild(enum vdo_state state)
+{
+ return ((state == VDO_FORCE_REBUILD) || (state == VDO_REBUILD_FOR_UPGRADE));
+}
+
+/**
+ * vdo_state_requires_recovery() - Check whether a vdo state indicates that recovery is needed.
+ * @state: The state to check.
+ *
+ * Return: true if the state indicates a recovery is required
+ */
+static inline bool __must_check vdo_state_requires_recovery(enum vdo_state state)
+{
+ return ((state == VDO_DIRTY) || (state == VDO_REPLAYING) || (state == VDO_RECOVERING));
+}
+
+/*
+ * The current operation on a physical block (from the point of view of the recovery journal, slab
+ * journals, and reference counts.
+ */
+enum journal_operation {
+ VDO_JOURNAL_DATA_REMAPPING = 0,
+ VDO_JOURNAL_BLOCK_MAP_REMAPPING = 1,
+} __packed;
+
+/* Partition IDs encoded in the volume layout in the super block. */
+enum partition_id {
+ VDO_BLOCK_MAP_PARTITION = 0,
+ VDO_SLAB_DEPOT_PARTITION = 1,
+ VDO_RECOVERY_JOURNAL_PARTITION = 2,
+ VDO_SLAB_SUMMARY_PARTITION = 3,
+} __packed;
+
+/* Metadata types for the vdo. */
+enum vdo_metadata_type {
+ VDO_METADATA_RECOVERY_JOURNAL = 1,
+ VDO_METADATA_SLAB_JOURNAL = 2,
+ VDO_METADATA_RECOVERY_JOURNAL_2 = 3,
+} __packed;
+
+/* A position in the block map where a block map entry is stored. */
+struct block_map_slot {
+ physical_block_number_t pbn;
+ slot_number_t slot;
+};
+
+/*
+ * Four bits of each five-byte block map entry contain a mapping state value used to distinguish
+ * unmapped or discarded logical blocks (which are treated as mapped to the zero block) from entries
+ * that have been mapped to a physical block, including the zero block.
+ *
+ * FIXME: these should maybe be defines.
+ */
+enum block_mapping_state {
+ VDO_MAPPING_STATE_UNMAPPED = 0, /* Must be zero to be the default value */
+ VDO_MAPPING_STATE_UNCOMPRESSED = 1, /* A normal (uncompressed) block */
+ VDO_MAPPING_STATE_COMPRESSED_BASE = 2, /* Compressed in slot 0 */
+ VDO_MAPPING_STATE_COMPRESSED_MAX = 15, /* Compressed in slot 13 */
+};
+
+enum {
+ VDO_MAX_COMPRESSION_SLOTS =
+ (VDO_MAPPING_STATE_COMPRESSED_MAX - VDO_MAPPING_STATE_COMPRESSED_BASE + 1),
+};
+
+
+struct data_location {
+ physical_block_number_t pbn;
+ enum block_mapping_state state;
+};
+
+/* The configuration of a single slab derived from the configured block size and slab size. */
+struct slab_config {
+ /* total number of blocks in the slab */
+ block_count_t slab_blocks;
+ /* number of blocks available for data */
+ block_count_t data_blocks;
+ /* number of blocks for reference counts */
+ block_count_t reference_count_blocks;
+ /* number of blocks for the slab journal */
+ block_count_t slab_journal_blocks;
+ /*
+ * Number of blocks after which the slab journal starts pushing out a reference_block for
+ * each new entry it receives.
+ */
+ block_count_t slab_journal_flushing_threshold;
+ /*
+ * Number of blocks after which the slab journal pushes out all reference_blocks and makes
+ * all vios wait.
+ */
+ block_count_t slab_journal_blocking_threshold;
+ /* Number of blocks after which the slab must be scrubbed before coming online. */
+ block_count_t slab_journal_scrubbing_threshold;
+} __packed;
+
+/*
+ * This structure is memcmp'd for equality. Keep it packed and don't add any fields that are not
+ * properly set in both extant and parsed configs.
+ */
+struct thread_count_config {
+ unsigned int bio_ack_threads;
+ unsigned int bio_threads;
+ unsigned int bio_rotation_interval;
+ unsigned int cpu_threads;
+ unsigned int logical_zones;
+ unsigned int physical_zones;
+ unsigned int hash_zones;
+} __packed;
+
+struct device_config {
+ struct dm_target *owning_target;
+ struct dm_dev *owned_device;
+ struct vdo *vdo;
+ /* All configs referencing a layer are kept on a list in the layer */
+ struct list_head config_list;
+ char *original_string;
+ unsigned int version;
+ char *parent_device_name;
+ block_count_t physical_blocks;
+ /*
+ * This is the number of logical blocks from VDO's internal point of view. It is the number
+ * of 4K blocks regardless of the value of the logical_block_size parameter below.
+ */
+ block_count_t logical_blocks;
+ unsigned int logical_block_size;
+ unsigned int cache_size;
+ unsigned int block_map_maximum_age;
+ bool deduplication;
+ bool compression;
+ struct thread_count_config thread_counts;
+ block_count_t max_discard_blocks;
+};
+
+enum vdo_completion_type {
+ /* Keep VDO_UNSET_COMPLETION_TYPE at the top. */
+ VDO_UNSET_COMPLETION_TYPE,
+ VDO_ACTION_COMPLETION,
+ VDO_ADMIN_COMPLETION,
+ VDO_BLOCK_ALLOCATOR_COMPLETION,
+ VDO_DATA_VIO_POOL_COMPLETION,
+ VDO_DECREMENT_COMPLETION,
+ VDO_FLUSH_COMPLETION,
+ VDO_FLUSH_NOTIFICATION_COMPLETION,
+ VDO_GENERATION_FLUSHED_COMPLETION,
+ VDO_HASH_ZONE_COMPLETION,
+ VDO_HASH_ZONES_COMPLETION,
+ VDO_LOCK_COUNTER_COMPLETION,
+ VDO_PAGE_COMPLETION,
+ VDO_READ_ONLY_MODE_COMPLETION,
+ VDO_REPAIR_COMPLETION,
+ VDO_SYNC_COMPLETION,
+ VIO_COMPLETION,
+} __packed;
+
+struct vdo_completion;
+
+/**
+ * typedef vdo_action_fn - An asynchronous VDO operation.
+ * @completion: The completion of the operation.
+ */
+typedef void (*vdo_action_fn)(struct vdo_completion *completion);
+
+enum vdo_completion_priority {
+ BIO_ACK_Q_ACK_PRIORITY = 0,
+ BIO_ACK_Q_MAX_PRIORITY = 0,
+ BIO_Q_COMPRESSED_DATA_PRIORITY = 0,
+ BIO_Q_DATA_PRIORITY = 0,
+ BIO_Q_FLUSH_PRIORITY = 2,
+ BIO_Q_HIGH_PRIORITY = 2,
+ BIO_Q_METADATA_PRIORITY = 1,
+ BIO_Q_VERIFY_PRIORITY = 1,
+ BIO_Q_MAX_PRIORITY = 2,
+ CPU_Q_COMPLETE_VIO_PRIORITY = 0,
+ CPU_Q_COMPLETE_READ_PRIORITY = 0,
+ CPU_Q_COMPRESS_BLOCK_PRIORITY = 0,
+ CPU_Q_EVENT_REPORTER_PRIORITY = 0,
+ CPU_Q_HASH_BLOCK_PRIORITY = 0,
+ CPU_Q_MAX_PRIORITY = 0,
+ UDS_Q_PRIORITY = 0,
+ UDS_Q_MAX_PRIORITY = 0,
+ VDO_DEFAULT_Q_COMPLETION_PRIORITY = 1,
+ VDO_DEFAULT_Q_FLUSH_PRIORITY = 2,
+ VDO_DEFAULT_Q_MAP_BIO_PRIORITY = 0,
+ VDO_DEFAULT_Q_SYNC_PRIORITY = 2,
+ VDO_DEFAULT_Q_VIO_CALLBACK_PRIORITY = 1,
+ VDO_DEFAULT_Q_MAX_PRIORITY = 2,
+ /* The maximum allowable priority */
+ VDO_WORK_Q_MAX_PRIORITY = 2,
+ /* A value which must be out of range for a valid priority */
+ VDO_WORK_Q_DEFAULT_PRIORITY = VDO_WORK_Q_MAX_PRIORITY + 1,
+};
+
+struct vdo_completion {
+ /* The type of completion this is */
+ enum vdo_completion_type type;
+
+ /*
+ * <code>true</code> once the processing of the operation is complete. This flag should not
+ * be used by waiters external to the VDO base as it is used to gate calling the callback.
+ */
+ bool complete;
+
+ /*
+ * If true, queue this completion on the next callback invocation, even if it is already
+ * running on the correct thread.
+ */
+ bool requeue;
+
+ /* The ID of the thread which should run the next callback */
+ thread_id_t callback_thread_id;
+
+ /* The result of the operation */
+ int result;
+
+ /* The VDO on which this completion operates */
+ struct vdo *vdo;
+
+ /* The callback which will be called once the operation is complete */
+ vdo_action_fn callback;
+
+ /* Callback which, if set, will be called if an error result is set */
+ vdo_action_fn error_handler;
+
+ /* The parent object, if any, that spawned this completion */
+ void *parent;
+
+ /* Entry link for lock-free work queue */
+ struct funnel_queue_entry work_queue_entry_link;
+ enum vdo_completion_priority priority;
+ struct vdo_work_queue *my_queue;
+};
+
+struct block_allocator;
+struct data_vio;
+struct vdo;
+struct vdo_config;
+
+/* vio types for statistics and instrumentation. */
+enum vio_type {
+ VIO_TYPE_UNINITIALIZED = 0,
+ VIO_TYPE_DATA,
+ VIO_TYPE_BLOCK_ALLOCATOR,
+ VIO_TYPE_BLOCK_MAP,
+ VIO_TYPE_BLOCK_MAP_INTERIOR,
+ VIO_TYPE_GEOMETRY,
+ VIO_TYPE_PARTITION_COPY,
+ VIO_TYPE_RECOVERY_JOURNAL,
+ VIO_TYPE_SLAB_JOURNAL,
+ VIO_TYPE_SLAB_SUMMARY,
+ VIO_TYPE_SUPER_BLOCK,
+} __packed;
+
+/* Priority levels for asynchronous I/O operations performed on a vio. */
+enum vio_priority {
+ VIO_PRIORITY_LOW = 0,
+ VIO_PRIORITY_DATA = VIO_PRIORITY_LOW,
+ VIO_PRIORITY_COMPRESSED_DATA = VIO_PRIORITY_DATA,
+ VIO_PRIORITY_METADATA,
+ VIO_PRIORITY_HIGH,
+} __packed;
+
+/*
+ * A wrapper for a bio. All I/O to the storage below a vdo is conducted via vios.
+ */
+struct vio {
+ /* The completion for this vio */
+ struct vdo_completion completion;
+
+ /* The bio zone in which I/O should be processed */
+ zone_count_t bio_zone;
+
+ /* The queueing priority of the vio operation */
+ enum vio_priority priority;
+
+ /* The vio type is used for statistics and instrumentation. */
+ enum vio_type type;
+
+ /* The size of this vio in blocks */
+ unsigned int block_count;
+
+ /* The data being read or written. */
+ char *data;
+
+ /* The VDO-owned bio to use for all IO for this vio */
+ struct bio *bio;
+
+ /*
+ * A list of enqueued bios with consecutive block numbers, stored by vdo_submit_bio() under
+ * the first-enqueued vio. The other vios are found via their bio entries in this list, and
+ * are not added to the work queue as separate completions.
+ */
+ struct bio_list bios_merged;
+};
+
+#endif /* VDO_TYPES_H */