From 7eada909bfd7ac90a4522e56aa3179d1fd68cd14 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 4 Jan 2017 20:23:53 +0100 Subject: dm: add integrity target The dm-integrity target emulates a block device that has additional per-sector tags that can be used for storing integrity information. A general problem with storing integrity tags with every sector is that writing the sector and the integrity tag must be atomic - i.e. in case of crash, either both sector and integrity tag or none of them is written. To guarantee write atomicity the dm-integrity target uses a journal. It writes sector data and integrity tags into a journal, commits the journal and then copies the data and integrity tags to their respective location. The dm-integrity target can be used with the dm-crypt target - in this situation the dm-crypt target creates the integrity data and passes them to the dm-integrity target via bio_integrity_payload attached to the bio. In this mode, the dm-crypt and dm-integrity targets provide authenticated disk encryption - if the attacker modifies the encrypted device, an I/O error is returned instead of random data. The dm-integrity target can also be used as a standalone target, in this mode it calculates and verifies the integrity tag internally. In this mode, the dm-integrity target can be used to detect silent data corruption on the disk or in the I/O path. Signed-off-by: Mikulas Patocka Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-integrity.txt | 189 +++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 Documentation/device-mapper/dm-integrity.txt (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt new file mode 100644 index 000000000000..2406f56501dc --- /dev/null +++ b/Documentation/device-mapper/dm-integrity.txt @@ -0,0 +1,189 @@ +The dm-integrity target emulates a block device that has additional +per-sector tags that can be used for storing integrity information. + +A general problem with storing integrity tags with every sector is that +writing the sector and the integrity tag must be atomic - i.e. in case of +crash, either both sector and integrity tag or none of them is written. + +To guarantee write atomicity, the dm-integrity target uses journal, it +writes sector data and integrity tags into a journal, commits the journal +and then copies the data and integrity tags to their respective location. + +The dm-integrity target can be used with the dm-crypt target - in this +situation the dm-crypt target creates the integrity data and passes them +to the dm-integrity target via bio_integrity_payload attached to the bio. +In this mode, the dm-crypt and dm-integrity targets provide authenticated +disk encryption - if the attacker modifies the encrypted device, an I/O +error is returned instead of random data. + +The dm-integrity target can also be used as a standalone target, in this +mode it calculates and verifies the integrity tag internally. In this +mode, the dm-integrity target can be used to detect silent data +corruption on the disk or in the I/O path. + + +When loading the target for the first time, the kernel driver will format +the device. But it will only format the device if the superblock contains +zeroes. If the superblock is neither valid nor zeroed, the dm-integrity +target can't be loaded. + +To use the target for the first time: +1. overwrite the superblock with zeroes +2. load the dm-integrity target with one-sector size, the kernel driver + will format the device +3. unload the dm-integrity target +4. read the "provided_data_sectors" value from the superblock +5. load the dm-integrity target with the the target size + "provided_data_sectors" +6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target + with the size "provided_data_sectors" + + +Target arguments: + +1. the underlying block device + +2. the number of reserved sector at the beginning of the device - the + dm-integrity won't read of write these sectors + +3. the size of the integrity tag (if "-" is used, the size is taken from + the internal-hash algorithm) + +4. mode: + D - direct writes (without journal) - in this mode, journaling is + not used and data sectors and integrity tags are written + separately. In case of crash, it is possible that the data + and integrity tag doesn't match. + J - journaled writes - data and integrity tags are written to the + journal and atomicity is guaranteed. In case of crash, + either both data and tag or none of them are written. The + journaled mode degrades write throughput twice because the + data have to be written twice. + +5. the number of additional arguments + +Additional arguments: + +journal-sectors:number + The size of journal, this argument is used only if formatting the + device. If the device is already formatted, the value from the + superblock is used. + +interleave-sectors:number + The number of interleaved sectors. This values is rounded down to + a power of two. If the device is already formatted, the value from + the superblock is used. + +buffer-sectors:number + The number of sectors in one buffer. The value is rounded down to + a power of two. + + The tag area is accessed using buffers, the buffer size is + configurable. The large buffer size means that the I/O size will + be larger, but there could be less I/Os issued. + +journal-watermark:number + The journal watermark in percents. When the size of the journal + exceeds this watermark, the thread that flushes the journal will + be started. + +commit-time:number + Commit time in milliseconds. When this time passes, the journal is + written. The journal is also written immediatelly if the FLUSH + request is received. + +internal-hash:algorithm(:key) (the key is optional) + Use internal hash or crc. + When this argument is used, the dm-integrity target won't accept + integrity tags from the upper target, but it will automatically + generate and verify the integrity tags. + + You can use a crc algorithm (such as crc32), then integrity target + will protect the data against accidental corruption. + You can also use a hmac algorithm (for example + "hmac(sha256):0123456789abcdef"), in this mode it will provide + cryptographic authentication of the data without encryption. + + When this argument is not used, the integrity tags are accepted + from an upper layer target, such as dm-crypt. The upper layer + target should check the validity of the integrity tags. + +journal-crypt:algorithm(:key) (the key is optional) + Encrypt the journal using given algorithm to make sure that the + attacker can't read the journal. You can use a block cipher here + (such as "cbc(aes)") or a stream cipher (for example "chacha20", + "salsa20", "ctr(aes)" or "ecb(arc4)"). + + The journal contains history of last writes to the block device, + an attacker reading the journal could see the last sector nubmers + that were written. From the sector numbers, the attacker can infer + the size of files that were written. To protect against this + situation, you can encrypt the journal. + +journal-mac:algorithm(:key) (the key is optional) + Protect sector numbers in the journal from accidental or malicious + modification. To protect against accidental modification, use a + crc algorithm, to protect against malicious modification, use a + hmac algorithm with a key. + + This option is not needed when using internal-hash because in this + mode, the integrity of journal entries is checked when replaying + the journal. Thus, modified sector number would be detected at + this stage. + + +The journal mode (D/J), buffer-sectors, journal-watermark, commit-time can +be changed when reloading the target (load an inactive table and swap the +tables with suspend and resume). The other arguments should not be changed +when reloading the target because the layout of disk data depend on them +and the reloaded target would be non-functional. + + +The layout of the formatted block device: +* reserved sectors (they are not used by this target, they can be used for + storing LUKS metadata or for other purpose), the size of the reserved + area is specified in the target arguments +* superblock (4kiB) + * magic string - identifies that the device was formatted + * version + * log2(interleave sectors) + * integrity tag size + * the number of journal sections + * provided data sectors - the number of sectors that this target + provides (i.e. the size of the device minus the size of all + metadata and padding). The user of this target should not send + bios that access data beyond the "provided data sectors" limit. + * flags - a flag is set if journal-mac is used +* journal + The journal is divided into sections, each section contains: + * metadata area (4kiB), it contains journal entries + every journal entry contains: + * logical sector (specifies where the data and tag should + be written) + * last 8 bytes of data + * integrity tag (the size is specified in the superblock) + every metadata sector ends with + * mac (8-bytes), all the macs in 8 metadata sectors form a + 64-byte value. It is used to store hmac of sector + numbers in the journal section, to protect against a + possibility that the attacker tampers with sector + numbers in the journal. + * commit id + * data area (the size is variable; it depends on how many journal + entries fit into the metadata area) + every sector in the data area contains: + * data (504 bytes of data, the last 8 bytes are stored in + the journal entry) + * commit id + To test if the whole journal section was written correctly, every + 512-byte sector of the journal ends with 8-byte commit id. If the + commit id matches on all sectors in a journal section, then it is + assumed that the section was written correctly. If the commit id + doesn't match, the section was written partially and it should not + be replayed. +* one or more runs of interleaved tags and data. Each run contains: + * tag area - it contains integrity tags. There is one tag for each + sector in the data area + * data area - it contains data sectors. The number of data sectors + in one run must be a power of two. log2 of this value is stored + in the superblock. -- cgit v1.2.3 From ef43aa38063a6b2b3c6618e28ab35794f4f1fe29 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 4 Jan 2017 20:23:54 +0100 Subject: dm crypt: add cryptographic data integrity protection (authenticated encryption) Allow the use of per-sector metadata, provided by the dm-integrity module, for integrity protection and persistently stored per-sector Initialization Vector (IV). The underlying device must support the "DM-DIF-EXT-TAG" dm-integrity profile. The per-bio integrity metadata is allocated by dm-crypt for every bio. Example of low-level mapping table for various types of use: DEV=/dev/sdb SIZE=417792 # Additional HMAC with CBC-ESSIV, key is concatenated encryption key + HMAC key SIZE_INT=389952 dmsetup create x --table "0 $SIZE_INT integrity $DEV 0 32 J 0" dmsetup create y --table "0 $SIZE_INT crypt aes-cbc-essiv:sha256 \ 11ff33c6fb942655efb3e30cf4c0fd95f5ef483afca72166c530ae26151dd83b \ 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff \ 0 /dev/mapper/x 0 1 integrity:32:hmac(sha256)" # AEAD (Authenticated Encryption with Additional Data) - GCM with random IVs # GCM in kernel uses 96bits IV and we store 128bits auth tag (so 28 bytes metadata space) SIZE_INT=393024 dmsetup create x --table "0 $SIZE_INT integrity $DEV 0 28 J 0" dmsetup create y --table "0 $SIZE_INT crypt aes-gcm-random \ 11ff33c6fb942655efb3e30cf4c0fd95f5ef483afca72166c530ae26151dd83b \ 0 /dev/mapper/x 0 1 integrity:28:aead" # Random IV only for XTS mode (no integrity protection but provides atomic random sector change) SIZE_INT=401272 dmsetup create x --table "0 $SIZE_INT integrity $DEV 0 16 J 0" dmsetup create y --table "0 $SIZE_INT crypt aes-xts-random \ 11ff33c6fb942655efb3e30cf4c0fd95f5ef483afca72166c530ae26151dd83b \ 0 /dev/mapper/x 0 1 integrity:16:none" # Random IV with XTS + HMAC integrity protection SIZE_INT=377656 dmsetup create x --table "0 $SIZE_INT integrity $DEV 0 48 J 0" dmsetup create y --table "0 $SIZE_INT crypt aes-xts-random \ 11ff33c6fb942655efb3e30cf4c0fd95f5ef483afca72166c530ae26151dd83b \ 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff \ 0 /dev/mapper/x 0 1 integrity:48:hmac(sha256)" Both AEAD and HMAC protection authenticates not only data but also sector metadata. HMAC protection is implemented through autenc wrapper (so it is processed the same way as an authenticated mode). In HMAC mode there are two keys (concatenated in dm-crypt mapping table). First is the encryption key and the second is the key for authentication (HMAC). (It is userspace decision if these keys are independent or somehow derived.) The sector request for AEAD/HMAC authenticated encryption looks like this: |----- AAD -------|------ DATA -------|-- AUTH TAG --| | (authenticated) | (auth+encryption) | | | sector_LE | IV | sector in/out | tag in/out | For writes, the integrity fields are calculated during AEAD encryption of every sector and stored in bio integrity fields and sent to underlying dm-integrity target for storage. For reads, the integrity metadata is verified during AEAD decryption of every sector (they are filled in by dm-integrity, but the integrity fields are pre-allocated in dm-crypt). There is also an experimental support in cryptsetup utility for more friendly configuration (part of LUKS2 format). Because the integrity fields are not valid on initial creation, the device must be "formatted". This can be done by direct-io writes to the device (e.g. dd in direct-io mode). For now, there is available trivial tool to do this, see: https://github.com/mbroz/dm_int_tools Signed-off-by: Milan Broz Signed-off-by: Ondrej Mosnacek Signed-off-by: Vashek Matyas Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-crypt.txt | 16 + drivers/md/dm-crypt.c | 882 ++++++++++++++++++++++++++----- 2 files changed, 758 insertions(+), 140 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index ff1f87bf26e8..a2a6627aa659 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt @@ -93,6 +93,22 @@ submit_from_crypt_cpus thread because it benefits CFQ to have writes submitted using the same context. +integrity:: + Calculates and verifies integrity for the encrypted device (uses + authenticated encryption). This mode requires metadata stored in per-bio + integrity structure of in size. + + This option requires that the underlying device is created by dm-integrity + target and provides exactly of per-sector metadata. + + There can by two options for . The first one is used when encryption + mode is Authenticated mode (AEAD mode), then type must be just "aead". + The second option is integrity calculated by keyed hash (HMAC), then + is for example "hmac(sha256)". + + If random IV is used (persistently stored IV in metadata per-sector), + then includes both space for random IV and authentication tag. + Example scripts =============== LUKS (Linux Unified Key Setup) is now the preferred way to set up disk diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 389a3637ffcc..e8e570d000f6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1,8 +1,8 @@ /* * Copyright (C) 2003 Jana Saout * Copyright (C) 2004 Clemens Fruhwirth - * Copyright (C) 2006-2015 Red Hat, Inc. All rights reserved. - * Copyright (C) 2013 Milan Broz + * Copyright (C) 2006-2017 Red Hat, Inc. All rights reserved. + * Copyright (C) 2013-2017 Milan Broz * * This file is released under the GPL. */ @@ -31,6 +31,9 @@ #include #include #include +#include +#include +#include /* for struct rtattr and RTA macros only */ #include #include @@ -48,7 +51,11 @@ struct convert_context { struct bvec_iter iter_out; sector_t cc_sector; atomic_t cc_pending; - struct skcipher_request *req; + union { + struct skcipher_request *req; + struct aead_request *req_aead; + } r; + }; /* @@ -57,6 +64,8 @@ struct convert_context { struct dm_crypt_io { struct crypt_config *cc; struct bio *base_bio; + u8 *integrity_metadata; + bool integrity_metadata_from_pool; struct work_struct work; struct convert_context ctx; @@ -70,8 +79,8 @@ struct dm_crypt_io { struct dm_crypt_request { struct convert_context *ctx; - struct scatterlist sg_in; - struct scatterlist sg_out; + struct scatterlist sg_in[4]; + struct scatterlist sg_out[4]; sector_t iv_sector; }; @@ -118,6 +127,11 @@ struct iv_tcw_private { enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; +enum cipher_flags { + CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cihper */ + CRYPT_MODE_INTEGRITY_HMAC, /* Compose authenticated mode from normal mode and HMAC */ +}; + /* * The fields in here must be read only after initialization. */ @@ -126,11 +140,14 @@ struct crypt_config { sector_t start; /* - * pool for per bio private data, crypto requests and - * encryption requeusts/buffer pages + * pool for per bio private data, crypto requests, + * encryption requeusts/buffer pages and integrity tags */ mempool_t *req_pool; mempool_t *page_pool; + mempool_t *tag_pool; + unsigned tag_pool_max_sectors; + struct bio_set *bs; struct mutex bio_alloc_lock; @@ -143,6 +160,7 @@ struct crypt_config { char *cipher; char *cipher_string; + char *cipher_auth; char *key_string; const struct crypt_iv_operations *iv_gen_ops; @@ -157,8 +175,12 @@ struct crypt_config { /* ESSIV: struct crypto_cipher *essiv_tfm */ void *iv_private; - struct crypto_skcipher **tfms; + union { + struct crypto_skcipher **tfms; + struct crypto_aead **tfms_aead; + } cipher_tfm; unsigned tfms_count; + unsigned long cipher_flags; /* * Layout of each crypto request: @@ -181,21 +203,36 @@ struct crypt_config { unsigned int key_size; unsigned int key_parts; /* independent parts in key buffer */ unsigned int key_extra_size; /* additional keys length */ + unsigned int key_mac_size; /* MAC key size for authenc(...) */ + + unsigned int integrity_tag_size; + unsigned int integrity_iv_size; + unsigned int on_disk_tag_size; + + u8 *authenc_key; /* space for keys in authenc() format (if used) */ u8 key[0]; }; -#define MIN_IOS 64 +#define MIN_IOS 64 +#define MAX_TAG_SIZE 480 +#define POOL_ENTRY_SIZE 512 static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); -static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); +static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc, + struct scatterlist *sg); /* * Use this to access cipher attributes that are the same for each CPU. */ static struct crypto_skcipher *any_tfm(struct crypt_config *cc) { - return cc->tfms[0]; + return cc->cipher_tfm.tfms[0]; +} + +static struct crypto_aead *any_tfm_aead(struct crypt_config *cc) +{ + return cc->cipher_tfm.tfms_aead[0]; } /* @@ -325,8 +362,7 @@ static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc, return essiv_tfm; } - if (crypto_cipher_blocksize(essiv_tfm) != - crypto_skcipher_ivsize(any_tfm(cc))) { + if (crypto_cipher_blocksize(essiv_tfm) != cc->iv_size) { ti->error = "Block size of ESSIV cipher does " "not match IV size of block cipher"; crypto_free_cipher(essiv_tfm); @@ -395,6 +431,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, essiv_tfm = setup_essiv_cpu(cc, ti, salt, crypto_ahash_digestsize(hash_tfm)); + if (IS_ERR(essiv_tfm)) { crypt_iv_essiv_dtr(cc); return PTR_ERR(essiv_tfm); @@ -585,12 +622,14 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { + struct scatterlist *sg; u8 *src; int r = 0; if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) { - src = kmap_atomic(sg_page(&dmreq->sg_in)); - r = crypt_iv_lmk_one(cc, iv, dmreq, src + dmreq->sg_in.offset); + sg = crypt_get_sg_data(cc, dmreq->sg_in); + src = kmap_atomic(sg_page(sg)); + r = crypt_iv_lmk_one(cc, iv, dmreq, src + sg->offset); kunmap_atomic(src); } else memset(iv, 0, cc->iv_size); @@ -601,18 +640,20 @@ static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { + struct scatterlist *sg; u8 *dst; int r; if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) return 0; - dst = kmap_atomic(sg_page(&dmreq->sg_out)); - r = crypt_iv_lmk_one(cc, iv, dmreq, dst + dmreq->sg_out.offset); + sg = crypt_get_sg_data(cc, dmreq->sg_out); + dst = kmap_atomic(sg_page(sg)); + r = crypt_iv_lmk_one(cc, iv, dmreq, dst + sg->offset); /* Tweak the first block of plaintext sector */ if (!r) - crypto_xor(dst + dmreq->sg_out.offset, iv, cc->iv_size); + crypto_xor(dst + sg->offset, iv, cc->iv_size); kunmap_atomic(dst); return r; @@ -724,6 +765,7 @@ out: static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { + struct scatterlist *sg; struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; __le64 sector = cpu_to_le64(dmreq->iv_sector); u8 *src; @@ -731,8 +773,9 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, /* Remove whitening from ciphertext */ if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) { - src = kmap_atomic(sg_page(&dmreq->sg_in)); - r = crypt_iv_tcw_whitening(cc, dmreq, src + dmreq->sg_in.offset); + sg = crypt_get_sg_data(cc, dmreq->sg_in); + src = kmap_atomic(sg_page(sg)); + r = crypt_iv_tcw_whitening(cc, dmreq, src + sg->offset); kunmap_atomic(src); } @@ -748,6 +791,7 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { + struct scatterlist *sg; u8 *dst; int r; @@ -755,13 +799,22 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, return 0; /* Apply whitening on ciphertext */ - dst = kmap_atomic(sg_page(&dmreq->sg_out)); - r = crypt_iv_tcw_whitening(cc, dmreq, dst + dmreq->sg_out.offset); + sg = crypt_get_sg_data(cc, dmreq->sg_out); + dst = kmap_atomic(sg_page(sg)); + r = crypt_iv_tcw_whitening(cc, dmreq, dst + sg->offset); kunmap_atomic(dst); return r; } +static int crypt_iv_random_gen(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + /* Used only for writes, there must be an additional space to store IV */ + get_random_bytes(iv, cc->iv_size); + return 0; +} + static const struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen }; @@ -806,6 +859,108 @@ static const struct crypt_iv_operations crypt_iv_tcw_ops = { .post = crypt_iv_tcw_post }; +static struct crypt_iv_operations crypt_iv_random_ops = { + .generator = crypt_iv_random_gen +}; + +/* + * Integrity extensions + */ +static bool crypt_integrity_aead(struct crypt_config *cc) +{ + return test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); +} + +static bool crypt_integrity_hmac(struct crypt_config *cc) +{ + return test_bit(CRYPT_MODE_INTEGRITY_HMAC, &cc->cipher_flags); +} + +static bool crypt_integrity_mode(struct crypt_config *cc) +{ + return crypt_integrity_aead(cc) || crypt_integrity_hmac(cc); +} + +/* Get sg containing data */ +static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc, + struct scatterlist *sg) +{ + if (unlikely(crypt_integrity_mode(cc))) + return &sg[2]; + + return sg; +} + +static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio) +{ + struct bio_integrity_payload *bip; + unsigned int tag_len; + int ret; + + if (!bio_sectors(bio) || !io->cc->on_disk_tag_size) + return 0; + + bip = bio_integrity_alloc(bio, GFP_NOIO, 1); + if (IS_ERR(bip)) + return PTR_ERR(bip); + + tag_len = io->cc->on_disk_tag_size * bio_sectors(bio); + + bip->bip_iter.bi_size = tag_len; + bip->bip_iter.bi_sector = io->cc->start + io->sector; + + /* We own the metadata, do not let bio_free to release it */ + bip->bip_flags &= ~BIP_BLOCK_INTEGRITY; + + ret = bio_integrity_add_page(bio, virt_to_page(io->integrity_metadata), + tag_len, offset_in_page(io->integrity_metadata)); + if (unlikely(ret != tag_len)) + return -ENOMEM; + + return 0; +} + +static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) +{ +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk); + + /* From now we require underlying device with our integrity profile */ + if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) { + ti->error = "Integrity profile not supported."; + return -EINVAL; + } + + if (bi->tag_size != cc->on_disk_tag_size) { + ti->error = "Integrity profile tag size mismatch."; + return -EINVAL; + } + + if (crypt_integrity_mode(cc)) { + cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size; + DMINFO("Integrity AEAD, tag size %u, IV size %u.", + cc->integrity_tag_size, cc->integrity_iv_size); + + if (crypto_aead_setauthsize(any_tfm_aead(cc), cc->integrity_tag_size)) { + ti->error = "Integrity AEAD auth tag size is not supported."; + return -EINVAL; + } + } else if (cc->integrity_iv_size) + DMINFO("Additional per-sector space %u bytes for IV.", + cc->integrity_iv_size); + + if ((cc->integrity_tag_size + cc->integrity_iv_size) != bi->tag_size) { + ti->error = "Not enough space for integrity tag in the profile."; + return -EINVAL; + } + + return 0; +#else + ti->error = "Integrity profile not supported."; + return -EINVAL; +#endif +} + static void crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, struct bio *bio_out, struct bio *bio_in, @@ -822,58 +977,207 @@ static void crypt_convert_init(struct crypt_config *cc, } static struct dm_crypt_request *dmreq_of_req(struct crypt_config *cc, - struct skcipher_request *req) + void *req) { return (struct dm_crypt_request *)((char *)req + cc->dmreq_start); } -static struct skcipher_request *req_of_dmreq(struct crypt_config *cc, - struct dm_crypt_request *dmreq) +static void *req_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq) { - return (struct skcipher_request *)((char *)dmreq - cc->dmreq_start); + return (void *)((char *)dmreq - cc->dmreq_start); } static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq) { - return (u8 *)ALIGN((unsigned long)(dmreq + 1), - crypto_skcipher_alignmask(any_tfm(cc)) + 1); + if (crypt_integrity_mode(cc)) + return (u8 *)ALIGN((unsigned long)(dmreq + 1), + crypto_aead_alignmask(any_tfm_aead(cc)) + 1); + else + return (u8 *)ALIGN((unsigned long)(dmreq + 1), + crypto_skcipher_alignmask(any_tfm(cc)) + 1); } -static int crypt_convert_block(struct crypt_config *cc, - struct convert_context *ctx, - struct skcipher_request *req) +static u8 *org_iv_of_dmreq(struct crypt_config *cc, + struct dm_crypt_request *dmreq) +{ + return iv_of_dmreq(cc, dmreq) + cc->iv_size; +} + +static uint64_t *org_sector_of_dmreq(struct crypt_config *cc, + struct dm_crypt_request *dmreq) +{ + u8 *ptr = iv_of_dmreq(cc, dmreq) + cc->iv_size + cc->iv_size; + return (uint64_t*) ptr; +} + +static unsigned int *org_tag_of_dmreq(struct crypt_config *cc, + struct dm_crypt_request *dmreq) +{ + u8 *ptr = iv_of_dmreq(cc, dmreq) + cc->iv_size + + cc->iv_size + sizeof(uint64_t); + return (unsigned int*)ptr; +} + +static void *tag_from_dmreq(struct crypt_config *cc, + struct dm_crypt_request *dmreq) +{ + struct convert_context *ctx = dmreq->ctx; + struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); + + return &io->integrity_metadata[*org_tag_of_dmreq(cc, dmreq) * + cc->on_disk_tag_size]; +} + +static void *iv_tag_from_dmreq(struct crypt_config *cc, + struct dm_crypt_request *dmreq) +{ + return tag_from_dmreq(cc, dmreq) + cc->integrity_tag_size; +} + +static int crypt_convert_block_aead(struct crypt_config *cc, + struct convert_context *ctx, + struct aead_request *req, + unsigned int tag_offset) { struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in); struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out); struct dm_crypt_request *dmreq; - u8 *iv; - int r; + unsigned int data_len = 1 << SECTOR_SHIFT; + u8 *iv, *org_iv, *tag_iv, *tag; + uint64_t *sector; + int r = 0; + + BUG_ON(cc->integrity_iv_size && cc->integrity_iv_size != cc->iv_size); dmreq = dmreq_of_req(cc, req); + dmreq->iv_sector = ctx->cc_sector; + dmreq->ctx = ctx; + + *org_tag_of_dmreq(cc, dmreq) = tag_offset; + + sector = org_sector_of_dmreq(cc, dmreq); + *sector = cpu_to_le64(ctx->cc_sector - cc->iv_offset); + iv = iv_of_dmreq(cc, dmreq); + org_iv = org_iv_of_dmreq(cc, dmreq); + tag = tag_from_dmreq(cc, dmreq); + tag_iv = iv_tag_from_dmreq(cc, dmreq); + + /* AEAD request: + * |----- AAD -------|------ DATA -------|-- AUTH TAG --| + * | (authenticated) | (auth+encryption) | | + * | sector_LE | IV | sector in/out | tag in/out | + */ + sg_init_table(dmreq->sg_in, 4); + sg_set_buf(&dmreq->sg_in[0], sector, sizeof(uint64_t)); + sg_set_buf(&dmreq->sg_in[1], org_iv, cc->iv_size); + sg_set_page(&dmreq->sg_in[2], bv_in.bv_page, data_len, bv_in.bv_offset); + sg_set_buf(&dmreq->sg_in[3], tag, cc->integrity_tag_size); + + sg_init_table(dmreq->sg_out, 4); + sg_set_buf(&dmreq->sg_out[0], sector, sizeof(uint64_t)); + sg_set_buf(&dmreq->sg_out[1], org_iv, cc->iv_size); + sg_set_page(&dmreq->sg_out[2], bv_out.bv_page, data_len, bv_out.bv_offset); + sg_set_buf(&dmreq->sg_out[3], tag, cc->integrity_tag_size); + + if (cc->iv_gen_ops) { + /* For READs use IV stored in integrity metadata */ + if (cc->integrity_iv_size && bio_data_dir(ctx->bio_in) != WRITE) { + memcpy(org_iv, tag_iv, cc->iv_size); + } else { + r = cc->iv_gen_ops->generator(cc, org_iv, dmreq); + if (r < 0) + return r; + /* Store generated IV in integrity metadata */ + if (cc->integrity_iv_size) + memcpy(tag_iv, org_iv, cc->iv_size); + } + /* Working copy of IV, to be modified in crypto API */ + memcpy(iv, org_iv, cc->iv_size); + } + + aead_request_set_ad(req, sizeof(uint64_t) + cc->iv_size); + if (bio_data_dir(ctx->bio_in) == WRITE) { + aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out, + data_len, iv); + r = crypto_aead_encrypt(req); + if (cc->integrity_tag_size + cc->integrity_iv_size != cc->on_disk_tag_size) + memset(tag + cc->integrity_tag_size + cc->integrity_iv_size, 0, + cc->on_disk_tag_size - (cc->integrity_tag_size + cc->integrity_iv_size)); + } else { + aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out, + data_len + cc->integrity_tag_size, iv); + r = crypto_aead_decrypt(req); + } + + if (r == -EBADMSG) + DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu", + (unsigned long long)le64_to_cpu(*sector)); + + if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) + r = cc->iv_gen_ops->post(cc, org_iv, dmreq); + + bio_advance_iter(ctx->bio_in, &ctx->iter_in, data_len); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, data_len); + return r; +} + +static int crypt_convert_block_skcipher(struct crypt_config *cc, + struct convert_context *ctx, + struct skcipher_request *req, + unsigned int tag_offset) +{ + struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in); + struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out); + struct scatterlist *sg_in, *sg_out; + struct dm_crypt_request *dmreq; + unsigned int data_len = 1 << SECTOR_SHIFT; + u8 *iv, *org_iv, *tag_iv; + uint64_t *sector; + int r = 0; + + dmreq = dmreq_of_req(cc, req); dmreq->iv_sector = ctx->cc_sector; dmreq->ctx = ctx; - sg_init_table(&dmreq->sg_in, 1); - sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT, - bv_in.bv_offset); - sg_init_table(&dmreq->sg_out, 1); - sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT, - bv_out.bv_offset); + *org_tag_of_dmreq(cc, dmreq) = tag_offset; + + iv = iv_of_dmreq(cc, dmreq); + org_iv = org_iv_of_dmreq(cc, dmreq); + tag_iv = iv_tag_from_dmreq(cc, dmreq); + + sector = org_sector_of_dmreq(cc, dmreq); + *sector = cpu_to_le64(ctx->cc_sector - cc->iv_offset); + + /* For skcipher we use only the first sg item */ + sg_in = &dmreq->sg_in[0]; + sg_out = &dmreq->sg_out[0]; - bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT); - bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT); + sg_init_table(sg_in, 1); + sg_set_page(sg_in, bv_in.bv_page, data_len, bv_in.bv_offset); + + sg_init_table(sg_out, 1); + sg_set_page(sg_out, bv_out.bv_page, data_len, bv_out.bv_offset); if (cc->iv_gen_ops) { - r = cc->iv_gen_ops->generator(cc, iv, dmreq); - if (r < 0) - return r; + /* For READs use IV stored in integrity metadata */ + if (cc->integrity_iv_size && bio_data_dir(ctx->bio_in) != WRITE) { + memcpy(org_iv, tag_iv, cc->integrity_iv_size); + } else { + r = cc->iv_gen_ops->generator(cc, org_iv, dmreq); + if (r < 0) + return r; + /* Store generated IV in integrity metadata */ + if (cc->integrity_iv_size) + memcpy(tag_iv, org_iv, cc->integrity_iv_size); + } + /* Working copy of IV, to be modified in crypto API */ + memcpy(iv, org_iv, cc->iv_size); } - skcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out, - 1 << SECTOR_SHIFT, iv); + skcipher_request_set_crypt(req, sg_in, sg_out, data_len, iv); if (bio_data_dir(ctx->bio_in) == WRITE) r = crypto_skcipher_encrypt(req); @@ -881,7 +1185,10 @@ static int crypt_convert_block(struct crypt_config *cc, r = crypto_skcipher_decrypt(req); if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) - r = cc->iv_gen_ops->post(cc, iv, dmreq); + r = cc->iv_gen_ops->post(cc, org_iv, dmreq); + + bio_advance_iter(ctx->bio_in, &ctx->iter_in, data_len); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, data_len); return r; } @@ -889,27 +1196,53 @@ static int crypt_convert_block(struct crypt_config *cc, static void kcryptd_async_done(struct crypto_async_request *async_req, int error); -static void crypt_alloc_req(struct crypt_config *cc, - struct convert_context *ctx) +static void crypt_alloc_req_skcipher(struct crypt_config *cc, + struct convert_context *ctx) { unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); - if (!ctx->req) - ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); + if (!ctx->r.req) + ctx->r.req = mempool_alloc(cc->req_pool, GFP_NOIO); + + skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); + + /* + * Use REQ_MAY_BACKLOG so a cipher driver internally backlogs + * requests if driver request queue is full. + */ + skcipher_request_set_callback(ctx->r.req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); +} + +static void crypt_alloc_req_aead(struct crypt_config *cc, + struct convert_context *ctx) +{ + if (!ctx->r.req_aead) + ctx->r.req_aead = mempool_alloc(cc->req_pool, GFP_NOIO); - skcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); + aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]); /* * Use REQ_MAY_BACKLOG so a cipher driver internally backlogs * requests if driver request queue is full. */ - skcipher_request_set_callback(ctx->req, + aead_request_set_callback(ctx->r.req_aead, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - kcryptd_async_done, dmreq_of_req(cc, ctx->req)); + kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); +} + +static void crypt_alloc_req(struct crypt_config *cc, + struct convert_context *ctx) +{ + if (crypt_integrity_mode(cc)) + crypt_alloc_req_aead(cc, ctx); + else + crypt_alloc_req_skcipher(cc, ctx); } -static void crypt_free_req(struct crypt_config *cc, - struct skcipher_request *req, struct bio *base_bio) +static void crypt_free_req_skcipher(struct crypt_config *cc, + struct skcipher_request *req, struct bio *base_bio) { struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); @@ -917,12 +1250,30 @@ static void crypt_free_req(struct crypt_config *cc, mempool_free(req, cc->req_pool); } +static void crypt_free_req_aead(struct crypt_config *cc, + struct aead_request *req, struct bio *base_bio) +{ + struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); + + if ((struct aead_request *)(io + 1) != req) + mempool_free(req, cc->req_pool); +} + +static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_bio) +{ + if (crypt_integrity_mode(cc)) + crypt_free_req_aead(cc, req, base_bio); + else + crypt_free_req_skcipher(cc, req, base_bio); +} + /* * Encrypt / decrypt data from one bio to another one (can be the same one) */ static int crypt_convert(struct crypt_config *cc, struct convert_context *ctx) { + unsigned int tag_offset = 0; int r; atomic_set(&ctx->cc_pending, 1); @@ -933,7 +1284,10 @@ static int crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->cc_pending); - r = crypt_convert_block(cc, ctx, ctx->req); + if (crypt_integrity_mode(cc)) + r = crypt_convert_block_aead(cc, ctx, ctx->r.req_aead, tag_offset); + else + r = crypt_convert_block_skcipher(cc, ctx, ctx->r.req, tag_offset); switch (r) { /* @@ -949,8 +1303,9 @@ static int crypt_convert(struct crypt_config *cc, * completion function kcryptd_async_done() will be called. */ case -EINPROGRESS: - ctx->req = NULL; + ctx->r.req = NULL; ctx->cc_sector++; + tag_offset++; continue; /* * The request was already processed (synchronously). @@ -958,13 +1313,21 @@ static int crypt_convert(struct crypt_config *cc, case 0: atomic_dec(&ctx->cc_pending); ctx->cc_sector++; + tag_offset++; cond_resched(); continue; - - /* There was an error while processing the request. */ + /* + * There was a data integrity error. + */ + case -EBADMSG: + atomic_dec(&ctx->cc_pending); + return -EILSEQ; + /* + * There was an error while processing the request. + */ default: atomic_dec(&ctx->cc_pending); - return r; + return -EIO; } } @@ -1005,7 +1368,7 @@ retry: clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); if (!clone) - goto return_clone; + goto out; clone_init(io, clone); @@ -1027,7 +1390,13 @@ retry: remaining_size -= len; } -return_clone: + /* Allocate space for integrity tags */ + if (dm_crypt_integrity_io_alloc(io, clone)) { + crypt_free_buffer_pages(cc, clone); + bio_put(clone); + clone = NULL; + } +out: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) mutex_unlock(&cc->bio_alloc_lock); @@ -1053,7 +1422,9 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; - io->ctx.req = NULL; + io->ctx.r.req = NULL; + io->integrity_metadata = NULL; + io->integrity_metadata_from_pool = false; atomic_set(&io->io_pending, 0); } @@ -1075,8 +1446,13 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; - if (io->ctx.req) - crypt_free_req(cc, io->ctx.req, base_bio); + if (io->ctx.r.req) + crypt_free_req(cc, io->ctx.r.req, base_bio); + + if (unlikely(io->integrity_metadata_from_pool)) + mempool_free(io->integrity_metadata, io->cc->tag_pool); + else + kfree(io->integrity_metadata); base_bio->bi_error = error; bio_endio(base_bio); @@ -1156,6 +1532,12 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) clone_init(io, clone); clone->bi_iter.bi_sector = cc->start + io->sector; + if (dm_crypt_integrity_io_alloc(io, clone)) { + crypt_dec_pending(io); + bio_put(clone); + return 1; + } + generic_make_request(clone); return 0; } @@ -1314,8 +1696,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); r = crypt_convert(cc, &io->ctx); - if (r) - io->error = -EIO; + if (r < 0) + io->error = r; crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); /* Encryption was already finished, submit io now */ @@ -1345,7 +1727,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) r = crypt_convert(cc, &io->ctx); if (r < 0) - io->error = -EIO; + io->error = r; if (atomic_dec_and_test(&io->ctx.cc_pending)) kcryptd_crypt_read_done(io); @@ -1372,9 +1754,13 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, } if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) - error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); + error = cc->iv_gen_ops->post(cc, org_iv_of_dmreq(cc, dmreq), dmreq); - if (error < 0) + if (error == -EBADMSG) { + DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu", + (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq))); + io->error = -EILSEQ; + } else if (error < 0) io->error = -EIO; crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); @@ -1430,37 +1816,59 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) return 0; } -static void crypt_free_tfms(struct crypt_config *cc) +static void crypt_free_tfms_aead(struct crypt_config *cc) +{ + if (!cc->cipher_tfm.tfms_aead) + return; + + if (cc->cipher_tfm.tfms_aead[0] && !IS_ERR(cc->cipher_tfm.tfms_aead[0])) { + crypto_free_aead(cc->cipher_tfm.tfms_aead[0]); + cc->cipher_tfm.tfms_aead[0] = NULL; + } + + kfree(cc->cipher_tfm.tfms_aead); + cc->cipher_tfm.tfms_aead = NULL; +} + +static void crypt_free_tfms_skcipher(struct crypt_config *cc) { unsigned i; - if (!cc->tfms) + if (!cc->cipher_tfm.tfms) return; for (i = 0; i < cc->tfms_count; i++) - if (cc->tfms[i] && !IS_ERR(cc->tfms[i])) { - crypto_free_skcipher(cc->tfms[i]); - cc->tfms[i] = NULL; + if (cc->cipher_tfm.tfms[i] && !IS_ERR(cc->cipher_tfm.tfms[i])) { + crypto_free_skcipher(cc->cipher_tfm.tfms[i]); + cc->cipher_tfm.tfms[i] = NULL; } - kfree(cc->tfms); - cc->tfms = NULL; + kfree(cc->cipher_tfm.tfms); + cc->cipher_tfm.tfms = NULL; } -static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) +static void crypt_free_tfms(struct crypt_config *cc) +{ + if (crypt_integrity_mode(cc)) + crypt_free_tfms_aead(cc); + else + crypt_free_tfms_skcipher(cc); +} + +static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode) { unsigned i; int err; - cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), - GFP_KERNEL); - if (!cc->tfms) + cc->cipher_tfm.tfms = kzalloc(cc->tfms_count * + sizeof(struct crypto_skcipher *), GFP_KERNEL); + if (!cc->cipher_tfm.tfms) return -ENOMEM; for (i = 0; i < cc->tfms_count; i++) { - cc->tfms[i] = crypto_alloc_skcipher(ciphermode, 0, 0); - if (IS_ERR(cc->tfms[i])) { - err = PTR_ERR(cc->tfms[i]); + cc->cipher_tfm.tfms[i] = crypto_alloc_skcipher(ciphermode, 0, 0); + if (IS_ERR(cc->cipher_tfm.tfms[i])) { + err = PTR_ERR(cc->cipher_tfm.tfms[i]); crypt_free_tfms(cc); return err; } @@ -1469,22 +1877,111 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) return 0; } +static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) +{ + char *authenc = NULL; + int err; + + cc->cipher_tfm.tfms = kmalloc(sizeof(struct crypto_aead *), GFP_KERNEL); + if (!cc->cipher_tfm.tfms) + return -ENOMEM; + + /* Compose AEAD cipher with autenc(authenticator,cipher) structure */ + if (crypt_integrity_hmac(cc)) { + authenc = kmalloc(CRYPTO_MAX_ALG_NAME, GFP_KERNEL); + if (!authenc) + return -ENOMEM; + err = snprintf(authenc, CRYPTO_MAX_ALG_NAME, + "authenc(%s,%s)", cc->cipher_auth, ciphermode); + if (err < 0) { + kzfree(authenc); + return err; + } + ciphermode = authenc; + } + + cc->cipher_tfm.tfms_aead[0] = crypto_alloc_aead(ciphermode, 0, 0); + if (IS_ERR(cc->cipher_tfm.tfms_aead[0])) { + err = PTR_ERR(cc->cipher_tfm.tfms_aead[0]); + crypt_free_tfms(cc); + return err; + } + + kzfree(authenc); + return 0; +} + +static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) +{ + if (crypt_integrity_mode(cc)) + return crypt_alloc_tfms_aead(cc, ciphermode); + else + return crypt_alloc_tfms_skcipher(cc, ciphermode); +} + +static unsigned crypt_subkey_size(struct crypt_config *cc) +{ + return (cc->key_size - cc->key_extra_size) >> ilog2(cc->tfms_count); +} + +static unsigned crypt_authenckey_size(struct crypt_config *cc) +{ + return crypt_subkey_size(cc) + RTA_SPACE(sizeof(struct crypto_authenc_key_param)); +} + +/* + * If AEAD is composed like authenc(hmac(sha256),xts(aes)), + * the key must be for some reason in special format. + * This funcion converts cc->key to this special format. + */ +static void crypt_copy_authenckey(char *p, const void *key, + unsigned enckeylen, unsigned authkeylen) +{ + struct crypto_authenc_key_param *param; + struct rtattr *rta; + + rta = (struct rtattr *)p; + param = RTA_DATA(rta); + param->enckeylen = cpu_to_be32(enckeylen); + rta->rta_len = RTA_LENGTH(sizeof(*param)); + rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; + p += RTA_SPACE(sizeof(*param)); + memcpy(p, key + enckeylen, authkeylen); + p += authkeylen; + memcpy(p, key, enckeylen); +} + static int crypt_setkey(struct crypt_config *cc) { unsigned subkey_size; int err = 0, i, r; /* Ignore extra keys (which are used for IV etc) */ - subkey_size = (cc->key_size - cc->key_extra_size) >> ilog2(cc->tfms_count); + subkey_size = crypt_subkey_size(cc); + if (crypt_integrity_hmac(cc)) + crypt_copy_authenckey(cc->authenc_key, cc->key, + subkey_size - cc->key_mac_size, + cc->key_mac_size); for (i = 0; i < cc->tfms_count; i++) { - r = crypto_skcipher_setkey(cc->tfms[i], - cc->key + (i * subkey_size), - subkey_size); + if (crypt_integrity_aead(cc)) + r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], + cc->key + (i * subkey_size), + subkey_size); + else if (crypt_integrity_hmac(cc)) + r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], + cc->authenc_key, crypt_authenckey_size(cc)); + else + r = crypto_skcipher_setkey(cc->cipher_tfm.tfms[i], + cc->key + (i * subkey_size), + subkey_size); if (r) err = r; } + if (crypt_integrity_hmac(cc)) + memzero_explicit(cc->authenc_key, crypt_authenckey_size(cc)); + return err; } @@ -1681,6 +2178,7 @@ static void crypt_dtr(struct dm_target *ti) mempool_destroy(cc->page_pool); mempool_destroy(cc->req_pool); + mempool_destroy(cc->tag_pool); if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); @@ -1691,6 +2189,8 @@ static void crypt_dtr(struct dm_target *ti) kzfree(cc->cipher); kzfree(cc->cipher_string); kzfree(cc->key_string); + kzfree(cc->cipher_auth); + kzfree(cc->authenc_key); /* Must zero key material before freeing */ kzfree(cc); @@ -1731,7 +2231,6 @@ static int crypt_ctr_cipher(struct dm_target *ti, return -EINVAL; } cc->key_parts = cc->tfms_count; - cc->key_extra_size = 0; cc->cipher = kstrdup(cipher, GFP_KERNEL); if (!cc->cipher) @@ -1777,7 +2276,20 @@ static int crypt_ctr_cipher(struct dm_target *ti, } /* Initialize IV */ - cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc)); + if (crypt_integrity_mode(cc)) + cc->iv_size = crypto_aead_ivsize(any_tfm_aead(cc)); + else + cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc)); + + if (crypt_integrity_hmac(cc)) { + cc->authenc_key = kmalloc(crypt_authenckey_size(cc), GFP_KERNEL); + if (!cc->authenc_key) { + ret = -ENOMEM; + ti->error = "Error allocating authenc key space"; + goto bad; + } + } + if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ cc->iv_size = max(cc->iv_size, @@ -1816,6 +2328,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, cc->iv_gen_ops = &crypt_iv_tcw_ops; cc->key_parts += 2; /* IV + whitening */ cc->key_extra_size = cc->iv_size + TCW_WHITENING_SIZE; + } else if (strcmp(ivmode, "random") == 0) { + cc->iv_gen_ops = &crypt_iv_random_ops; + /* Need storage space in integrity fields. */ + cc->integrity_iv_size = cc->iv_size; } else { ret = -EINVAL; ti->error = "Invalid IV mode"; @@ -1857,6 +2373,75 @@ bad_mem: return -ENOMEM; } +static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **argv) +{ + struct crypt_config *cc = ti->private; + struct dm_arg_set as; + static struct dm_arg _args[] = { + {0, 3, "Invalid number of feature args"}, + }; + unsigned int opt_params, val; + const char *opt_string, *sval; + int ret; + + /* Optional parameters */ + as.argc = argc; + as.argv = argv; + + ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error); + if (ret) + return ret; + + while (opt_params--) { + opt_string = dm_shift_arg(&as); + if (!opt_string) { + ti->error = "Not enough feature arguments"; + return -EINVAL; + } + + if (!strcasecmp(opt_string, "allow_discards")) + ti->num_discard_bios = 1; + + else if (!strcasecmp(opt_string, "same_cpu_crypt")) + set_bit(DM_CRYPT_SAME_CPU, &cc->flags); + + else if (!strcasecmp(opt_string, "submit_from_crypt_cpus")) + set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + else if (sscanf(opt_string, "integrity:%u:", &val) == 1) { + if (val == 0 || val > MAX_TAG_SIZE) { + ti->error = "Invalid integrity arguments"; + return -EINVAL; + } + cc->on_disk_tag_size = val; + sval = strchr(opt_string + strlen("integrity:"), ':') + 1; + if (!strcasecmp(sval, "aead")) { + set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); + } else if (!strncasecmp(sval, "hmac(", strlen("hmac("))) { + struct crypto_ahash *hmac_tfm = crypto_alloc_ahash(sval, 0, 0); + if (IS_ERR(hmac_tfm)) { + ti->error = "Error initializing HMAC integrity hash."; + return PTR_ERR(hmac_tfm); + } + cc->key_mac_size = crypto_ahash_digestsize(hmac_tfm); + crypto_free_ahash(hmac_tfm); + set_bit(CRYPT_MODE_INTEGRITY_HMAC, &cc->cipher_flags); + } else if (strcasecmp(sval, "none")) { + ti->error = "Unknown integrity profile"; + return -EINVAL; + } + + cc->cipher_auth = kstrdup(sval, GFP_KERNEL); + if (!cc->cipher_auth) + return -ENOMEM; + } else { + ti->error = "Invalid feature arguments"; + return -EINVAL; + } + } + + return 0; +} + /* * Construct an encryption mapping: * [|:::] @@ -1865,18 +2450,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; int key_size; - unsigned int opt_params; + unsigned int align_mask; unsigned long long tmpll; int ret; - size_t iv_size_padding; - struct dm_arg_set as; - const char *opt_string; + size_t iv_size_padding, additional_req_size; char dummy; - static struct dm_arg _args[] = { - {0, 3, "Invalid number of feature args"}, - }; - if (argc < 5) { ti->error = "Not enough arguments"; return -EINVAL; @@ -1896,38 +2475,59 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->key_size = key_size; ti->private = cc; + + /* Optional parameters need to be read before cipher constructor */ + if (argc > 5) { + ret = crypt_ctr_optional(ti, argc - 5, &argv[5]); + if (ret) + goto bad; + } + ret = crypt_ctr_cipher(ti, argv[0], argv[1]); if (ret < 0) goto bad; - cc->dmreq_start = sizeof(struct skcipher_request); - cc->dmreq_start += crypto_skcipher_reqsize(any_tfm(cc)); + if (crypt_integrity_mode(cc)) { + cc->dmreq_start = sizeof(struct aead_request); + cc->dmreq_start += crypto_aead_reqsize(any_tfm_aead(cc)); + align_mask = crypto_aead_alignmask(any_tfm_aead(cc)); + } else { + cc->dmreq_start = sizeof(struct skcipher_request); + cc->dmreq_start += crypto_skcipher_reqsize(any_tfm(cc)); + align_mask = crypto_skcipher_alignmask(any_tfm(cc)); + } cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); - if (crypto_skcipher_alignmask(any_tfm(cc)) < CRYPTO_MINALIGN) { + if (align_mask < CRYPTO_MINALIGN) { /* Allocate the padding exactly */ iv_size_padding = -(cc->dmreq_start + sizeof(struct dm_crypt_request)) - & crypto_skcipher_alignmask(any_tfm(cc)); + & align_mask; } else { /* * If the cipher requires greater alignment than kmalloc * alignment, we don't know the exact position of the * initialization vector. We must assume worst case. */ - iv_size_padding = crypto_skcipher_alignmask(any_tfm(cc)); + iv_size_padding = align_mask; } ret = -ENOMEM; - cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + - sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); + + /* ...| IV + padding | original IV | original sec. number | bio tag offset | */ + additional_req_size = sizeof(struct dm_crypt_request) + + iv_size_padding + cc->iv_size + + cc->iv_size + + sizeof(uint64_t) + + sizeof(unsigned int); + + cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + additional_req_size); if (!cc->req_pool) { ti->error = "Cannot allocate crypt request mempool"; goto bad; } cc->per_bio_data_size = ti->per_io_data_size = - ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + - sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, + ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size, ARCH_KMALLOC_MINALIGN); cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0); @@ -1964,39 +2564,20 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; - argv += 5; - argc -= 5; - - /* Optional parameters */ - if (argc) { - as.argc = argc; - as.argv = argv; - - ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error); + if (crypt_integrity_mode(cc) || cc->integrity_iv_size) { + ret = crypt_integrity_ctr(cc, ti); if (ret) goto bad; - ret = -EINVAL; - while (opt_params--) { - opt_string = dm_shift_arg(&as); - if (!opt_string) { - ti->error = "Not enough feature arguments"; - goto bad; - } - - if (!strcasecmp(opt_string, "allow_discards")) - ti->num_discard_bios = 1; - - else if (!strcasecmp(opt_string, "same_cpu_crypt")) - set_bit(DM_CRYPT_SAME_CPU, &cc->flags); + cc->tag_pool_max_sectors = POOL_ENTRY_SIZE / cc->on_disk_tag_size; + if (!cc->tag_pool_max_sectors) + cc->tag_pool_max_sectors = 1; - else if (!strcasecmp(opt_string, "submit_from_crypt_cpus")) - set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); - - else { - ti->error = "Invalid feature arguments"; - goto bad; - } + cc->tag_pool = mempool_create_kmalloc_pool(MIN_IOS, + cc->tag_pool_max_sectors * cc->on_disk_tag_size); + if (!cc->tag_pool) { + ti->error = "Cannot allocate integrity tags mempool"; + goto bad; } } @@ -2062,12 +2643,29 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) * Check if bio is too large, split as needed. */ if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && - bio_data_dir(bio) == WRITE) + (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size)) dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); - io->ctx.req = (struct skcipher_request *)(io + 1); + + if (cc->on_disk_tag_size) { + unsigned tag_len = cc->on_disk_tag_size * bio_sectors(bio); + + if (unlikely(tag_len > KMALLOC_MAX_SIZE) || + unlikely(!(io->integrity_metadata = kmalloc(tag_len, + GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)))) { + if (bio_sectors(bio) > cc->tag_pool_max_sectors) + dm_accept_partial_bio(bio, cc->tag_pool_max_sectors); + io->integrity_metadata = mempool_alloc(cc->tag_pool, GFP_NOIO); + io->integrity_metadata_from_pool = true; + } + } + + if (crypt_integrity_mode(cc)) + io->ctx.r.req_aead = (struct aead_request *)(io + 1); + else + io->ctx.r.req = (struct skcipher_request *)(io + 1); if (bio_data_dir(io->base_bio) == READ) { if (kcryptd_io_read(io, GFP_NOWAIT)) @@ -2108,6 +2706,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + if (cc->on_disk_tag_size) + num_feature_args++; if (num_feature_args) { DMEMIT(" %d", num_feature_args); if (ti->num_discard_bios) @@ -2116,6 +2716,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" same_cpu_crypt"); if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) DMEMIT(" submit_from_crypt_cpus"); + if (cc->on_disk_tag_size) + DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth); } break; @@ -2216,7 +2818,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 15, 0}, + .version = {1, 16, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- cgit v1.2.3 From 33d2f09fcb357fd1861c4959d1d3505492bf91f8 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 16 Mar 2017 15:39:40 +0100 Subject: dm crypt: introduce new format of cipher with "capi:" prefix For the new authenticated encryption we have to support generic composed modes (combination of encryption algorithm and authenticator) because this is how the kernel crypto API accesses such algorithms. To simplify the interface, we accept an algorithm directly in crypto API format. The new format is recognised by the "capi:" prefix. The dmcrypt internal IV specification is the same as for the old format. The crypto API cipher specifications format is: capi:cipher_api_spec-ivmode[:ivopts] Examples: capi:cbc(aes)-essiv:sha256 (equivalent to old aes-cbc-essiv:sha256) capi:xts(aes)-plain64 (equivalent to old aes-xts-plain64) Examples of authenticated modes: capi:gcm(aes)-random capi:authenc(hmac(sha256),xts(aes))-random capi:rfc7539(chacha20,poly1305)-random Authenticated modes can only be configured using the new cipher format. Note that this format allows user to specify arbitrary combinations that can be insecure. (Policy decision is done in cryptsetup userspace.) Authenticated encryption algorithms can be of two types, either native modes (like GCM) that performs both encryption and authentication internally, or composed modes where user can compose AEAD with separate specification of encryption algorithm and authenticator. For composed mode with HMAC (length-preserving encryption mode like an XTS and HMAC as an authenticator) we have to calculate HMAC digest size (the separate authentication key is the same size as the HMAC digest). Introduce crypt_ctr_auth_cipher() to parse the crypto API string to get HMAC algorithm and retrieve digest size from it. Also, for HMAC composed mode we need to parse the crypto API string to get the cipher mode nested in the specification. For native AEAD mode (like GCM), we can use crypto_tfm_alg_name() API to get the cipher specification. Because the HMAC composed mode is not processed the same as the native AEAD mode, the CRYPT_MODE_INTEGRITY_HMAC flag is no longer needed and "hmac" specification for the table integrity argument is removed. Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-crypt.txt | 47 ++++-- drivers/md/dm-crypt.c | 275 ++++++++++++++++++++++--------- 2 files changed, 223 insertions(+), 99 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index a2a6627aa659..8140b71f3c54 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt @@ -11,14 +11,31 @@ Parameters: \ [<#opt_params> ] - Encryption cipher and an optional IV generation mode. - (In format cipher[:keycount]-chainmode-ivmode[:ivopts]). + Encryption cipher, encryption mode and Initial Vector (IV) generator. + + The cipher specifications format is: + cipher[:keycount]-chainmode-ivmode[:ivopts] Examples: - des aes-cbc-essiv:sha256 - twofish-ecb + aes-xts-plain64 + serpent-xts-plain64 + + Cipher format also supports direct specification with kernel crypt API + format (selected by capi: prefix). The IV specification is the same + as for the first format type. + This format is mainly used for specification of authenticated modes. - /proc/crypto contains supported crypto modes + The crypto API cipher specifications format is: + capi:cipher_api_spec-ivmode[:ivopts] + Examples: + capi:cbc(aes)-essiv:sha256 + capi:xts(aes)-plain64 + Examples of authenticated modes: + capi:gcm(aes)-random + capi:authenc(hmac(sha256),xts(aes))-random + capi:rfc7539(chacha20,poly1305)-random + + The /proc/crypto contains a list of curently loaded crypto modes. Key used for encryption. It is encoded either as a hexadecimal number @@ -94,20 +111,16 @@ submit_from_crypt_cpus same context. integrity:: - Calculates and verifies integrity for the encrypted device (uses - authenticated encryption). This mode requires metadata stored in per-bio - integrity structure of in size. - - This option requires that the underlying device is created by dm-integrity - target and provides exactly of per-sector metadata. + The device requires additional metadata per-sector stored + in per-bio integrity structure. This metadata must by provided + by underlying dm-integrity target. - There can by two options for . The first one is used when encryption - mode is Authenticated mode (AEAD mode), then type must be just "aead". - The second option is integrity calculated by keyed hash (HMAC), then - is for example "hmac(sha256)". + The can be "none" if metadata is used only for persistent IV. - If random IV is used (persistently stored IV in metadata per-sector), - then includes both space for random IV and authentication tag. + For Authenticated Encryption with Additional Data (AEAD) + the is "aead". An AEAD mode additionally calculates and verifies + integrity for the encrypted device. The additional space is then + used for storing authentication tag (and persistent IV if needed). Example scripts =============== diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index aa0aca1aea79..b2e48b26fd40 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -129,7 +129,6 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, enum cipher_flags { CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cihper */ - CRYPT_MODE_INTEGRITY_HMAC, /* Compose authenticated mode from normal mode and HMAC */ }; /* @@ -873,19 +872,14 @@ static bool crypt_integrity_aead(struct crypt_config *cc) static bool crypt_integrity_hmac(struct crypt_config *cc) { - return test_bit(CRYPT_MODE_INTEGRITY_HMAC, &cc->cipher_flags); -} - -static bool crypt_integrity_mode(struct crypt_config *cc) -{ - return crypt_integrity_aead(cc) || crypt_integrity_hmac(cc); + return crypt_integrity_aead(cc) && cc->key_mac_size; } /* Get sg containing data */ static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc, struct scatterlist *sg) { - if (unlikely(crypt_integrity_mode(cc))) + if (unlikely(crypt_integrity_aead(cc))) return &sg[2]; return sg; @@ -936,7 +930,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) return -EINVAL; } - if (crypt_integrity_mode(cc)) { + if (crypt_integrity_aead(cc)) { cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size; DMINFO("Integrity AEAD, tag size %u, IV size %u.", cc->integrity_tag_size, cc->integrity_iv_size); @@ -990,7 +984,7 @@ static void *req_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmre static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq) { - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) return (u8 *)ALIGN((unsigned long)(dmreq + 1), crypto_aead_alignmask(any_tfm_aead(cc)) + 1); else @@ -1235,7 +1229,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) crypt_alloc_req_aead(cc, ctx); else crypt_alloc_req_skcipher(cc, ctx); @@ -1261,7 +1255,7 @@ static void crypt_free_req_aead(struct crypt_config *cc, static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_bio) { - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) crypt_free_req_aead(cc, req, base_bio); else crypt_free_req_skcipher(cc, req, base_bio); @@ -1284,7 +1278,7 @@ static int crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->cc_pending); - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) r = crypt_convert_block_aead(cc, ctx, ctx->r.req_aead, tag_offset); else r = crypt_convert_block_skcipher(cc, ctx, ctx->r.req, tag_offset); @@ -1849,7 +1843,7 @@ static void crypt_free_tfms_skcipher(struct crypt_config *cc) static void crypt_free_tfms(struct crypt_config *cc) { - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) crypt_free_tfms_aead(cc); else crypt_free_tfms_skcipher(cc); @@ -1879,27 +1873,12 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode) static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) { - char *authenc = NULL; int err; cc->cipher_tfm.tfms = kmalloc(sizeof(struct crypto_aead *), GFP_KERNEL); if (!cc->cipher_tfm.tfms) return -ENOMEM; - /* Compose AEAD cipher with autenc(authenticator,cipher) structure */ - if (crypt_integrity_hmac(cc)) { - authenc = kmalloc(CRYPTO_MAX_ALG_NAME, GFP_KERNEL); - if (!authenc) - return -ENOMEM; - err = snprintf(authenc, CRYPTO_MAX_ALG_NAME, - "authenc(%s,%s)", cc->cipher_auth, ciphermode); - if (err < 0) { - kzfree(authenc); - return err; - } - ciphermode = authenc; - } - cc->cipher_tfm.tfms_aead[0] = crypto_alloc_aead(ciphermode, 0, 0); if (IS_ERR(cc->cipher_tfm.tfms_aead[0])) { err = PTR_ERR(cc->cipher_tfm.tfms_aead[0]); @@ -1907,13 +1886,12 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) return err; } - kzfree(authenc); return 0; } static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) { - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) return crypt_alloc_tfms_aead(cc, ciphermode); else return crypt_alloc_tfms_skcipher(cc, ciphermode); @@ -1964,13 +1942,13 @@ static int crypt_setkey(struct crypt_config *cc) subkey_size - cc->key_mac_size, cc->key_mac_size); for (i = 0; i < cc->tfms_count; i++) { - if (crypt_integrity_aead(cc)) - r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], - cc->key + (i * subkey_size), - subkey_size); - else if (crypt_integrity_hmac(cc)) + if (crypt_integrity_hmac(cc)) r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], cc->authenc_key, crypt_authenckey_size(cc)); + else if (crypt_integrity_aead(cc)) + r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], + cc->key + (i * subkey_size), + subkey_size); else r = crypto_skcipher_setkey(cc->cipher_tfm.tfms[i], cc->key + (i * subkey_size), @@ -2200,19 +2178,11 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) { struct crypt_config *cc = ti->private; - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) cc->iv_size = crypto_aead_ivsize(any_tfm_aead(cc)); else cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc)); - if (crypt_integrity_hmac(cc)) { - cc->authenc_key = kmalloc(crypt_authenckey_size(cc), GFP_KERNEL); - if (!cc->authenc_key) { - ti->error = "Error allocating authenc key space"; - return -ENOMEM; - } - } - if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ cc->iv_size = max(cc->iv_size, @@ -2263,24 +2233,155 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) return 0; } -static int crypt_ctr_cipher(struct dm_target *ti, - char *cipher_in, char *key) +/* + * Workaround to parse cipher algorithm from crypto API spec. + * The cc->cipher is currently used only in ESSIV. + * This should be probably done by crypto-api calls (once available...) + */ +static int crypt_ctr_blkdev_cipher(struct crypt_config *cc) +{ + const char *alg_name = NULL; + char *start, *end; + + if (crypt_integrity_aead(cc)) { + alg_name = crypto_tfm_alg_name(crypto_aead_tfm(any_tfm_aead(cc))); + if (!alg_name) + return -EINVAL; + if (crypt_integrity_hmac(cc)) { + alg_name = strchr(alg_name, ','); + if (!alg_name) + return -EINVAL; + } + alg_name++; + } else { + alg_name = crypto_tfm_alg_name(crypto_skcipher_tfm(any_tfm(cc))); + if (!alg_name) + return -EINVAL; + } + + start = strchr(alg_name, '('); + end = strchr(alg_name, ')'); + + if (!start && !end) { + cc->cipher = kstrdup(alg_name, GFP_KERNEL); + return cc->cipher ? 0 : -ENOMEM; + } + + if (!start || !end || ++start >= end) + return -EINVAL; + + cc->cipher = kzalloc(end - start + 1, GFP_KERNEL); + if (!cc->cipher) + return -ENOMEM; + + strncpy(cc->cipher, start, end - start); + + return 0; +} + +/* + * Workaround to parse HMAC algorithm from AEAD crypto API spec. + * The HMAC is needed to calculate tag size (HMAC digest size). + * This should be probably done by crypto-api calls (once available...) + */ +static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api) +{ + char *start, *end, *mac_alg = NULL; + struct crypto_ahash *mac; + + if (!strstarts(cipher_api, "authenc(")) + return 0; + + start = strchr(cipher_api, '('); + end = strchr(cipher_api, ','); + if (!start || !end || ++start > end) + return -EINVAL; + + mac_alg = kzalloc(end - start + 1, GFP_KERNEL); + if (!mac_alg) + return -ENOMEM; + strncpy(mac_alg, start, end - start); + + mac = crypto_alloc_ahash(mac_alg, 0, 0); + kfree(mac_alg); + + if (IS_ERR(mac)) + return PTR_ERR(mac); + + cc->key_mac_size = crypto_ahash_digestsize(mac); + crypto_free_ahash(mac); + + cc->authenc_key = kmalloc(crypt_authenckey_size(cc), GFP_KERNEL); + if (!cc->authenc_key) + return -ENOMEM; + + return 0; +} + +static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key, + char **ivmode, char **ivopts) +{ + struct crypt_config *cc = ti->private; + char *tmp, *cipher_api; + int ret = -EINVAL; + + cc->tfms_count = 1; + + /* + * New format (capi: prefix) + * capi:cipher_api_spec-iv:ivopts + */ + tmp = &cipher_in[strlen("capi:")]; + cipher_api = strsep(&tmp, "-"); + *ivmode = strsep(&tmp, ":"); + *ivopts = tmp; + + if (*ivmode && !strcmp(*ivmode, "lmk")) + cc->tfms_count = 64; + + cc->key_parts = cc->tfms_count; + + /* Allocate cipher */ + ret = crypt_alloc_tfms(cc, cipher_api); + if (ret < 0) { + ti->error = "Error allocating crypto tfm"; + return ret; + } + + /* Alloc AEAD, can be used only in new format. */ + if (crypt_integrity_aead(cc)) { + ret = crypt_ctr_auth_cipher(cc, cipher_api); + if (ret < 0) { + ti->error = "Invalid AEAD cipher spec"; + return -ENOMEM; + } + cc->iv_size = crypto_aead_ivsize(any_tfm_aead(cc)); + } else + cc->iv_size = crypto_skcipher_ivsize(any_tfm(cc)); + + ret = crypt_ctr_blkdev_cipher(cc); + if (ret < 0) { + ti->error = "Cannot allocate cipher string"; + return -ENOMEM; + } + + return 0; +} + +static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key, + char **ivmode, char **ivopts) { struct crypt_config *cc = ti->private; - char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; + char *tmp, *cipher, *chainmode, *keycount; char *cipher_api = NULL; int ret = -EINVAL; char dummy; - if (strchr(cipher_in, '(')) { + if (strchr(cipher_in, '(') || crypt_integrity_aead(cc)) { ti->error = "Bad cipher specification"; return -EINVAL; } - cc->cipher_string = kstrdup(cipher_in, GFP_KERNEL); - if (!cc->cipher_string) - goto bad_mem; - /* * Legacy dm-crypt cipher specification * cipher[:keycount]-mode-iv:ivopts @@ -2303,8 +2404,8 @@ static int crypt_ctr_cipher(struct dm_target *ti, goto bad_mem; chainmode = strsep(&tmp, "-"); - ivopts = strsep(&tmp, "-"); - ivmode = strsep(&ivopts, ":"); + *ivopts = strsep(&tmp, "-"); + *ivmode = strsep(&*ivopts, ":"); if (tmp) DMWARN("Ignoring unexpected additional cipher options"); @@ -2313,12 +2414,12 @@ static int crypt_ctr_cipher(struct dm_target *ti, * For compatibility with the original dm-crypt mapping format, if * only the cipher name is supplied, use cbc-plain. */ - if (!chainmode || (!strcmp(chainmode, "plain") && !ivmode)) { + if (!chainmode || (!strcmp(chainmode, "plain") && !*ivmode)) { chainmode = "cbc"; - ivmode = "plain"; + *ivmode = "plain"; } - if (strcmp(chainmode, "ecb") && !ivmode) { + if (strcmp(chainmode, "ecb") && !*ivmode) { ti->error = "IV mechanism required"; return -EINVAL; } @@ -2338,19 +2439,45 @@ static int crypt_ctr_cipher(struct dm_target *ti, ret = crypt_alloc_tfms(cc, cipher_api); if (ret < 0) { ti->error = "Error allocating crypto tfm"; - goto bad; + kfree(cipher_api); + return ret; + } + + return 0; +bad_mem: + ti->error = "Cannot allocate cipher strings"; + return -ENOMEM; +} + +static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key) +{ + struct crypt_config *cc = ti->private; + char *ivmode = NULL, *ivopts = NULL; + int ret; + + cc->cipher_string = kstrdup(cipher_in, GFP_KERNEL); + if (!cc->cipher_string) { + ti->error = "Cannot allocate cipher strings"; + return -ENOMEM; } + if (strstarts(cipher_in, "capi:")) + ret = crypt_ctr_cipher_new(ti, cipher_in, key, &ivmode, &ivopts); + else + ret = crypt_ctr_cipher_old(ti, cipher_in, key, &ivmode, &ivopts); + if (ret) + return ret; + /* Initialize IV */ ret = crypt_ctr_ivmode(ti, ivmode); if (ret < 0) - goto bad; + return ret; /* Initialize and set key */ ret = crypt_set_key(cc, key); if (ret < 0) { ti->error = "Error decoding and setting key"; - goto bad; + return ret; } /* Allocate IV */ @@ -2358,7 +2485,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, ret = cc->iv_gen_ops->ctr(cc, ti, ivopts); if (ret < 0) { ti->error = "Error creating IV"; - goto bad; + return ret; } } @@ -2367,18 +2494,11 @@ static int crypt_ctr_cipher(struct dm_target *ti, ret = cc->iv_gen_ops->init(cc); if (ret < 0) { ti->error = "Error initialising IV"; - goto bad; + return ret; } } - ret = 0; -bad: - kfree(cipher_api); return ret; - -bad_mem: - ti->error = "Cannot allocate cipher strings"; - return -ENOMEM; } static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **argv) @@ -2424,15 +2544,6 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (!strncasecmp(sval, "hmac(", strlen("hmac("))) { - struct crypto_ahash *hmac_tfm = crypto_alloc_ahash(sval, 0, 0); - if (IS_ERR(hmac_tfm)) { - ti->error = "Error initializing HMAC integrity hash."; - return PTR_ERR(hmac_tfm); - } - cc->key_mac_size = crypto_ahash_digestsize(hmac_tfm); - crypto_free_ahash(hmac_tfm); - set_bit(CRYPT_MODE_INTEGRITY_HMAC, &cc->cipher_flags); } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; @@ -2495,7 +2606,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (ret < 0) goto bad; - if (crypt_integrity_mode(cc)) { + if (crypt_integrity_aead(cc)) { cc->dmreq_start = sizeof(struct aead_request); cc->dmreq_start += crypto_aead_reqsize(any_tfm_aead(cc)); align_mask = crypto_aead_alignmask(any_tfm_aead(cc)); @@ -2572,7 +2683,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; - if (crypt_integrity_mode(cc) || cc->integrity_iv_size) { + if (crypt_integrity_aead(cc) || cc->integrity_iv_size) { ret = crypt_integrity_ctr(cc, ti); if (ret) goto bad; @@ -2670,7 +2781,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) } } - if (crypt_integrity_mode(cc)) + if (crypt_integrity_aead(cc)) io->ctx.r.req_aead = (struct aead_request *)(io + 1); else io->ctx.r.req = (struct skcipher_request *)(io + 1); -- cgit v1.2.3 From 8f0009a225171cc1b76a6b443de5137b26e1374b Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 16 Mar 2017 15:39:44 +0100 Subject: dm crypt: optionally support larger encryption sector size Add optional "sector_size" parameter that specifies encryption sector size (atomic unit of block device encryption). Parameter can be in range 512 - 4096 bytes and must be power of two. For compatibility reasons, the maximal IO must fit into the page limit, so the limit is set to the minimal page size possible (4096 bytes). NOTE: this device cannot yet be handled by cryptsetup if this parameter is set. IV for the sector is calculated from the 512 bytes sector offset unless the iv_large_sectors option is used. Test script using dmsetup: DEV="/dev/sdb" DEV_SIZE=$(blockdev --getsz $DEV) KEY="9c1185a5c5e9fc54612808977ee8f548b2258d31ddadef707ba62c166051b9e3cd0294c27515f2bccee924e8823ca6e124b8fc3167ed478bca702babe4e130ac" BLOCK_SIZE=4096 # dmsetup create test_crypt --table "0 $DEV_SIZE crypt aes-xts-plain64 $KEY 0 $DEV 0 1 sector_size:$BLOCK_SIZE" # dmsetup table --showkeys test_crypt Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-crypt.txt | 14 +++++ drivers/md/dm-crypt.c | 105 ++++++++++++++++++++++++------- 2 files changed, 96 insertions(+), 23 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index 8140b71f3c54..3b3e1de21c9c 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt @@ -122,6 +122,20 @@ integrity:: integrity for the encrypted device. The additional space is then used for storing authentication tag (and persistent IV if needed). +sector_size: + Use as the encryption unit instead of 512 bytes sectors. + This option can be in range 512 - 4096 bytes and must be power of two. + Virtual device will announce this size as a minimal IO and logical sector. + +iv_large_sectors + IV generators will use sector number counted in units + instead of default 512 bytes sectors. + + For example, if is 4096 bytes, plain64 IV for the second + sector will be 8 (without flag) and 1 if iv_large_sectors is present. + The must be multiple of (in 512 bytes units) + if this flag is specified. + Example scripts =============== LUKS (Linux Unified Key Setup) is now the preferred way to set up disk diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b2e48b26fd40..05acc42bdd38 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -129,6 +129,7 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, enum cipher_flags { CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cihper */ + CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */ }; /* @@ -171,6 +172,7 @@ struct crypt_config { } iv_gen_private; sector_t iv_offset; unsigned int iv_size; + unsigned int sector_size; /* ESSIV: struct crypto_cipher *essiv_tfm */ void *iv_private; @@ -524,6 +526,11 @@ static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti, { struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; + if (cc->sector_size != (1 << SECTOR_SHIFT)) { + ti->error = "Unsupported sector size for LMK"; + return -EINVAL; + } + lmk->hash_tfm = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(lmk->hash_tfm)) { ti->error = "Error initializing LMK hash"; @@ -677,6 +684,11 @@ static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti, { struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; + if (cc->sector_size != (1 << SECTOR_SHIFT)) { + ti->error = "Unsupported sector size for TCW"; + return -EINVAL; + } + if (cc->key_size <= (cc->iv_size + TCW_WHITENING_SIZE)) { ti->error = "Wrong key size for TCW"; return -EINVAL; @@ -1037,15 +1049,20 @@ static int crypt_convert_block_aead(struct crypt_config *cc, struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in); struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out); struct dm_crypt_request *dmreq; - unsigned int data_len = 1 << SECTOR_SHIFT; u8 *iv, *org_iv, *tag_iv, *tag; uint64_t *sector; int r = 0; BUG_ON(cc->integrity_iv_size && cc->integrity_iv_size != cc->iv_size); + /* Reject unexpected unaligned bio. */ + if (unlikely(bv_in.bv_offset & (cc->sector_size - 1))) + return -EIO; + dmreq = dmreq_of_req(cc, req); dmreq->iv_sector = ctx->cc_sector; + if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) + sector_div(dmreq->iv_sector, cc->sector_size >> SECTOR_SHIFT); dmreq->ctx = ctx; *org_tag_of_dmreq(cc, dmreq) = tag_offset; @@ -1066,13 +1083,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, sg_init_table(dmreq->sg_in, 4); sg_set_buf(&dmreq->sg_in[0], sector, sizeof(uint64_t)); sg_set_buf(&dmreq->sg_in[1], org_iv, cc->iv_size); - sg_set_page(&dmreq->sg_in[2], bv_in.bv_page, data_len, bv_in.bv_offset); + sg_set_page(&dmreq->sg_in[2], bv_in.bv_page, cc->sector_size, bv_in.bv_offset); sg_set_buf(&dmreq->sg_in[3], tag, cc->integrity_tag_size); sg_init_table(dmreq->sg_out, 4); sg_set_buf(&dmreq->sg_out[0], sector, sizeof(uint64_t)); sg_set_buf(&dmreq->sg_out[1], org_iv, cc->iv_size); - sg_set_page(&dmreq->sg_out[2], bv_out.bv_page, data_len, bv_out.bv_offset); + sg_set_page(&dmreq->sg_out[2], bv_out.bv_page, cc->sector_size, bv_out.bv_offset); sg_set_buf(&dmreq->sg_out[3], tag, cc->integrity_tag_size); if (cc->iv_gen_ops) { @@ -1094,14 +1111,14 @@ static int crypt_convert_block_aead(struct crypt_config *cc, aead_request_set_ad(req, sizeof(uint64_t) + cc->iv_size); if (bio_data_dir(ctx->bio_in) == WRITE) { aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out, - data_len, iv); + cc->sector_size, iv); r = crypto_aead_encrypt(req); if (cc->integrity_tag_size + cc->integrity_iv_size != cc->on_disk_tag_size) memset(tag + cc->integrity_tag_size + cc->integrity_iv_size, 0, cc->on_disk_tag_size - (cc->integrity_tag_size + cc->integrity_iv_size)); } else { aead_request_set_crypt(req, dmreq->sg_in, dmreq->sg_out, - data_len + cc->integrity_tag_size, iv); + cc->sector_size + cc->integrity_tag_size, iv); r = crypto_aead_decrypt(req); } @@ -1112,8 +1129,8 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) r = cc->iv_gen_ops->post(cc, org_iv, dmreq); - bio_advance_iter(ctx->bio_in, &ctx->iter_in, data_len); - bio_advance_iter(ctx->bio_out, &ctx->iter_out, data_len); + bio_advance_iter(ctx->bio_in, &ctx->iter_in, cc->sector_size); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, cc->sector_size); return r; } @@ -1127,13 +1144,18 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out); struct scatterlist *sg_in, *sg_out; struct dm_crypt_request *dmreq; - unsigned int data_len = 1 << SECTOR_SHIFT; u8 *iv, *org_iv, *tag_iv; uint64_t *sector; int r = 0; + /* Reject unexpected unaligned bio. */ + if (unlikely(bv_in.bv_offset & (cc->sector_size - 1))) + return -EIO; + dmreq = dmreq_of_req(cc, req); dmreq->iv_sector = ctx->cc_sector; + if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) + sector_div(dmreq->iv_sector, cc->sector_size >> SECTOR_SHIFT); dmreq->ctx = ctx; *org_tag_of_dmreq(cc, dmreq) = tag_offset; @@ -1150,10 +1172,10 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, sg_out = &dmreq->sg_out[0]; sg_init_table(sg_in, 1); - sg_set_page(sg_in, bv_in.bv_page, data_len, bv_in.bv_offset); + sg_set_page(sg_in, bv_in.bv_page, cc->sector_size, bv_in.bv_offset); sg_init_table(sg_out, 1); - sg_set_page(sg_out, bv_out.bv_page, data_len, bv_out.bv_offset); + sg_set_page(sg_out, bv_out.bv_page, cc->sector_size, bv_out.bv_offset); if (cc->iv_gen_ops) { /* For READs use IV stored in integrity metadata */ @@ -1171,7 +1193,7 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, memcpy(iv, org_iv, cc->iv_size); } - skcipher_request_set_crypt(req, sg_in, sg_out, data_len, iv); + skcipher_request_set_crypt(req, sg_in, sg_out, cc->sector_size, iv); if (bio_data_dir(ctx->bio_in) == WRITE) r = crypto_skcipher_encrypt(req); @@ -1181,8 +1203,8 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) r = cc->iv_gen_ops->post(cc, org_iv, dmreq); - bio_advance_iter(ctx->bio_in, &ctx->iter_in, data_len); - bio_advance_iter(ctx->bio_out, &ctx->iter_out, data_len); + bio_advance_iter(ctx->bio_in, &ctx->iter_in, cc->sector_size); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, cc->sector_size); return r; } @@ -1268,6 +1290,7 @@ static int crypt_convert(struct crypt_config *cc, struct convert_context *ctx) { unsigned int tag_offset = 0; + unsigned int sector_step = cc->sector_size / (1 << SECTOR_SHIFT); int r; atomic_set(&ctx->cc_pending, 1); @@ -1275,7 +1298,6 @@ static int crypt_convert(struct crypt_config *cc, while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { crypt_alloc_req(cc, ctx); - atomic_inc(&ctx->cc_pending); if (crypt_integrity_aead(cc)) @@ -1298,16 +1320,16 @@ static int crypt_convert(struct crypt_config *cc, */ case -EINPROGRESS: ctx->r.req = NULL; - ctx->cc_sector++; - tag_offset++; + ctx->cc_sector += sector_step; + tag_offset += sector_step; continue; /* * The request was already processed (synchronously). */ case 0: atomic_dec(&ctx->cc_pending); - ctx->cc_sector++; - tag_offset++; + ctx->cc_sector += sector_step; + tag_offset += sector_step; cond_resched(); continue; /* @@ -2506,10 +2528,11 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar struct crypt_config *cc = ti->private; struct dm_arg_set as; static struct dm_arg _args[] = { - {0, 3, "Invalid number of feature args"}, + {0, 6, "Invalid number of feature args"}, }; unsigned int opt_params, val; const char *opt_string, *sval; + char dummy; int ret; /* Optional parameters */ @@ -2552,7 +2575,16 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar cc->cipher_auth = kstrdup(sval, GFP_KERNEL); if (!cc->cipher_auth) return -ENOMEM; - } else { + } else if (sscanf(opt_string, "sector_size:%u%c", &cc->sector_size, &dummy) == 1) { + if (cc->sector_size < (1 << SECTOR_SHIFT) || + cc->sector_size > 4096 || + (1 << ilog2(cc->sector_size) != cc->sector_size)) { + ti->error = "Invalid feature value for sector_size"; + return -EINVAL; + } + } else if (!strcasecmp(opt_string, "iv_large_sectors")) + set_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); + else { ti->error = "Invalid feature arguments"; return -EINVAL; } @@ -2592,6 +2624,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -ENOMEM; } cc->key_size = key_size; + cc->sector_size = (1 << SECTOR_SHIFT); ti->private = cc; @@ -2664,7 +2697,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) mutex_init(&cc->bio_alloc_lock); ret = -EINVAL; - if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { + if ((sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) || + (tmpll & ((cc->sector_size >> SECTOR_SHIFT) - 1))) { ti->error = "Invalid iv_offset sector"; goto bad; } @@ -2765,6 +2799,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size)) dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + /* + * Ensure that bio is a multiple of internal sector encryption size + * and is aligned to this size as defined in IO hints. + */ + if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0)) + return -EIO; + + if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1))) + return -EIO; + io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); @@ -2772,12 +2816,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) unsigned tag_len = cc->on_disk_tag_size * bio_sectors(bio); if (unlikely(tag_len > KMALLOC_MAX_SIZE) || - unlikely(!(io->integrity_metadata = kmalloc(tag_len, + unlikely(!(io->integrity_metadata = kzalloc(tag_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)))) { if (bio_sectors(bio) > cc->tag_pool_max_sectors) dm_accept_partial_bio(bio, cc->tag_pool_max_sectors); io->integrity_metadata = mempool_alloc(cc->tag_pool, GFP_NOIO); io->integrity_metadata_from_pool = true; + memset(io->integrity_metadata, 0, cc->tag_pool_max_sectors * (1 << SECTOR_SHIFT)); } } @@ -2825,6 +2870,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type, num_feature_args += !!ti->num_discard_bios; num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + num_feature_args += (cc->sector_size != (1 << SECTOR_SHIFT)) ? 1 : 0; + num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); if (cc->on_disk_tag_size) num_feature_args++; if (num_feature_args) { @@ -2837,6 +2884,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" submit_from_crypt_cpus"); if (cc->on_disk_tag_size) DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth); + if (cc->sector_size != (1 << SECTOR_SHIFT)) + DMEMIT(" sector_size:%d", cc->sector_size); + if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) + DMEMIT(" iv_large_sectors"); } break; @@ -2926,6 +2977,8 @@ static int crypt_iterate_devices(struct dm_target *ti, static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) { + struct crypt_config *cc = ti->private; + /* * Unfortunate constraint that is required to avoid the potential * for exceeding underlying device's max_segments limits -- due to @@ -2933,11 +2986,17 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) * bio that are not as physically contiguous as the original bio. */ limits->max_segment_size = PAGE_SIZE; + + if (cc->sector_size != (1 << SECTOR_SHIFT)) { + limits->logical_block_size = cc->sector_size; + limits->physical_block_size = cc->sector_size; + blk_limits_io_min(limits, cc->sector_size); + } } static struct target_type crypt_target = { .name = "crypt", - .version = {1, 16, 0}, + .version = {1, 17, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- cgit v1.2.3 From c2bcb2b702e4684e566d295d687228498184e0c4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Mar 2017 12:40:51 -0400 Subject: dm integrity: add recovery mode In recovery mode, we don't: - replay the journal - check checksums - allow writes to the device This mode can be used as a last resort for data recovery. The motivation for recovery mode is that when there is a single error in the journal, the user should not lose access to the whole device. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-integrity.txt | 5 ++++ drivers/md/dm-integrity.c | 40 +++++++++++++++++++--------- 2 files changed, 32 insertions(+), 13 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt index 2406f56501dc..9d9089f74206 100644 --- a/Documentation/device-mapper/dm-integrity.txt +++ b/Documentation/device-mapper/dm-integrity.txt @@ -59,6 +59,11 @@ Target arguments: either both data and tag or none of them are written. The journaled mode degrades write throughput twice because the data have to be written twice. + R - recovery mode - in this mode, journal is not replayed, + checksums are not checked and writes to the device are not + allowed. This mode is useful for data recovery if the + device cannot be activated in any of the other standard + modes. 5. the number of additional arguments diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ecb0b592f5a0..e26a079b41ea 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1216,6 +1216,9 @@ static void integrity_metadata(struct work_struct *w) unsigned sectors_to_process = dio->range.n_sectors; sector_t sector = dio->range.logical_sector; + if (unlikely(ic->mode == 'R')) + goto skip_io; + checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT) * ic->tag_size + extra_space, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); if (!checksums) @@ -1288,6 +1291,7 @@ again: } } } +skip_io: dec_in_flight(dio); return; error: @@ -1327,6 +1331,9 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) return -EIO; } + if (unlikely(ic->mode == 'R') && unlikely(dio->write)) + return -EIO; + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); bio->bi_iter.bi_sector = get_data_sector(ic, area, offset); @@ -1926,6 +1933,9 @@ static void replay_journal(struct dm_integrity_c *ic) bool journal_empty; unsigned char unused, last_used, want_commit_seq; + if (ic->mode == 'R') + return; + if (ic->journal_uptodate) return; @@ -2705,7 +2715,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } } - if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D")) + if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) ic->mode = argv[3][0]; else { ti->error = "Invalid mode (expecting J or D)"; @@ -2864,14 +2874,15 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Error reading superblock"; goto bad; } - if (!memcmp(ic->sb->magic, SB_MAGIC, 8)) { - should_write_sb = false; - } else { - for (i = 0; i < 512; i += 8) { - if (*(__u64 *)((__u8 *)ic->sb + i)) { - r = -EINVAL; - ti->error = "The device is not initialized"; - goto bad; + should_write_sb = false; + if (memcmp(ic->sb->magic, SB_MAGIC, 8)) { + if (ic->mode != 'R') { + for (i = 0; i < 512; i += 8) { + if (*(__u64 *)((__u8 *)ic->sb + i)) { + r = -EINVAL; + ti->error = "The device is not initialized"; + goto bad; + } } } @@ -2880,7 +2891,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Could not initialize superblock"; goto bad; } - should_write_sb = true; + if (ic->mode != 'R') + should_write_sb = true; } if (ic->sb->version != SB_VERSION) { @@ -2954,9 +2966,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors); - r = create_journal(ic, &ti->error); - if (r) - goto bad; + if (ic->mode != 'R') { + r = create_journal(ic, &ti->error); + if (r) + goto bad; + } if (should_write_sb) { int r; -- cgit v1.2.3 From 4464e36e06470e3d68dc26a874f0dbdffa09a6e8 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Sat, 18 Mar 2017 01:39:12 +0100 Subject: dm raid: fix table line argument order in status Commit 3a1c1ef2f ("dm raid: enhance status interface and fixup takeover/raid0") added new table line arguments and introduced an ordering flaw. The sequence of the raid10_copies and raid10_format raid parameters got reversed which causes lvm2 userspace to fail by falsely assuming a changed table line. Sequence those 2 parameters as before so that old lvm2 can function properly with new kernels by adjusting the table line output as documented in Documentation/device-mapper/dm-raid.txt. Also, add missing version 1.10.1 highlight to the documention. Fixes: 3a1c1ef2f ("dm raid: enhance status interface and fixup takeover/raid0") Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-raid.txt | 3 ++ drivers/md/dm-raid.c | 62 +++++++++++++++++---------------- 2 files changed, 35 insertions(+), 30 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index cd2cb2fc85ea..95c4c8dd6dd1 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -331,3 +331,6 @@ Version History 'D' on the status line. If '- -' is passed into the constructor, emit '- -' on the table line and '-' as the status line health character. 1.10.0 Add support for raid4/5/6 journal device +1.10.1 Fix data corruption on reshape request +1.11.0 Fix table line argument order + (wrong raid10_copies/raid10_format sequence) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f8564d63982f..e07185fca638 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3382,38 +3382,28 @@ static void raid_status(struct dm_target *ti, status_type_t type, hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 + (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0); + /* Emit table line */ + /* This has to be in the documented order for userspace! */ DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); - if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) - DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT), - raid10_md_layout_to_format(mddev->layout)); - if (test_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags)) - DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES), - raid10_md_layout_to_copies(mddev->layout)); - if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) - DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC)); if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC)); - if (test_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) - DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE), - (unsigned long long) to_sector(mddev->bitmap_info.chunksize)); - if (test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) - DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET), - (unsigned long long) rs->data_offset); - if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) - DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP), - mddev->bitmap_info.daemon_sleep); - if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) - DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS), - max(rs->delta_disks, mddev->delta_disks)); - if (test_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags)) - DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE), - max_nr_stripes); + if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) + DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC)); if (rebuild_disks) for (i = 0; i < rs->raid_disks; i++) if (test_bit(rs->dev[i].rdev.raid_disk, (void *) rs->rebuild_disks)) DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), rs->dev[i].rdev.raid_disk); + if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) + DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP), + mddev->bitmap_info.daemon_sleep); + if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) + DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE), + mddev->sync_speed_min); + if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) + DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE), + mddev->sync_speed_max); if (write_mostly_params) for (i = 0; i < rs->raid_disks; i++) if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) @@ -3422,12 +3412,24 @@ static void raid_status(struct dm_target *ti, status_type_t type, if (test_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags)) DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND), mddev->bitmap_info.max_write_behind); - if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) - DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE), - mddev->sync_speed_max); - if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) - DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE), - mddev->sync_speed_min); + if (test_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags)) + DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE), + max_nr_stripes); + if (test_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags)) + DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE), + (unsigned long long) to_sector(mddev->bitmap_info.chunksize)); + if (test_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags)) + DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES), + raid10_md_layout_to_copies(mddev->layout)); + if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) + DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT), + raid10_md_layout_to_format(mddev->layout)); + if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags)) + DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS), + max(rs->delta_disks, mddev->delta_disks)); + if (test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags)) + DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET), + (unsigned long long) rs->data_offset); if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV), __get_dev_name(rs->journal_dev.dev)); @@ -3791,7 +3793,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 10, 1}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3 From 6e53636fe81465d6810f4e0910e7238edf12a133 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 22 Mar 2017 17:44:38 +0100 Subject: dm raid: add raid4/5/6 journal write-back support via journal_mode option Commit 63c32ed4afc ("dm raid: add raid4/5/6 journaling support") added journal support to close the raid4/5/6 "write hole" -- in terms of writethrough caching. Introduce a "journal_mode" feature and use the new r5c_journal_mode_set() API to add support for switching the journal device's cache mode between write-through (the current default) and write-back. NOTE: If the journal device is not layered on resilent storage and it fails, write-through mode will cause the "write hole" to reoccur. But if the journal fails while in write-back mode it will cause data loss for any dirty cache entries unless resilent storage is used for the journal. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-raid.txt | 11 +++- drivers/md/dm-raid.c | 104 ++++++++++++++++++++++++++++---- 2 files changed, 101 insertions(+), 14 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 95c4c8dd6dd1..7e06e65586d4 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -170,6 +170,13 @@ The target is named "raid" and it accepts the following parameters: Takeover/reshape is not possible with a raid4/5/6 journal device; it has to be deconfigured before requesting these. + [journal_mode ] + This option sets the caching mode on journaled raid4/5/6 raid sets + (see 'journal_dev ' above) to 'writethrough' or 'writeback'. + If 'writeback' is selected the journal device has to be resilient + and must not suffer from the 'write hole' problem itself (e.g. use + raid1 or raid10) to avoid a single point of failure. + <#raid_devs>: The number of devices composing the array. Each device consists of two entries. The first is the device containing the metadata (if any); the second is the one containing the @@ -254,7 +261,8 @@ recovery. Here is a fuller description of the individual fields: The current data offset to the start of the user data on each component device of a raid set (see the respective raid parameter to support out-of-place reshaping). - 'A' - active raid4/5/6 journal device. + 'A' - active write-through journal device. + 'a' - active write-back journal device. 'D' - dead journal device. '-' - no journal device. @@ -334,3 +342,4 @@ Version History 1.10.1 Fix data corruption on reshape request 1.11.0 Fix table line argument order (wrong raid10_copies/raid10_format sequence) +1.11.1 Add raid4/5/6 journal write-back support via journal_mode option diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e07185fca638..0f61bb659b73 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010-2011 Neil Brown - * Copyright (C) 2010-2016 Red Hat, Inc. All rights reserved. + * Copyright (C) 2010-2017 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -79,7 +79,10 @@ struct raid_dev { #define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */ /* New for v1.10.0 */ -#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6! */ +#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6 (journal device)! */ + +/* New for v1.11.1 */ +#define __CTR_FLAG_JOURNAL_MODE 16 /* 2 */ /* Only with raid4/5/6 (journal mode)! */ /* * Flags for rs->ctr_flags field. @@ -100,6 +103,7 @@ struct raid_dev { #define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET) #define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS) #define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV) +#define CTR_FLAG_JOURNAL_MODE (1 << __CTR_FLAG_JOURNAL_MODE) #define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET) @@ -175,7 +179,8 @@ struct raid_dev { CTR_FLAG_REGION_SIZE | \ CTR_FLAG_DELTA_DISKS | \ CTR_FLAG_DATA_OFFSET | \ - CTR_FLAG_JOURNAL_DEV) + CTR_FLAG_JOURNAL_DEV | \ + CTR_FLAG_JOURNAL_MODE) #define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \ CTR_FLAG_REBUILD | \ @@ -186,7 +191,8 @@ struct raid_dev { CTR_FLAG_REGION_SIZE | \ CTR_FLAG_DELTA_DISKS | \ CTR_FLAG_DATA_OFFSET | \ - CTR_FLAG_JOURNAL_DEV) + CTR_FLAG_JOURNAL_DEV | \ + CTR_FLAG_JOURNAL_MODE) /* ...valid options definitions per raid level */ /* @@ -239,6 +245,7 @@ struct raid_set { struct journal_dev { struct dm_dev *dev; struct md_rdev rdev; + int mode; } journal_dev; struct raid_dev dev[0]; @@ -326,6 +333,7 @@ static struct arg_name_flag { { CTR_FLAG_DELTA_DISKS, "delta_disks"}, { CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"}, { CTR_FLAG_JOURNAL_DEV, "journal_dev" }, + { CTR_FLAG_JOURNAL_MODE, "journal_mode" }, }; /* Return argument name string for given @flag */ @@ -344,6 +352,39 @@ static const char *dm_raid_arg_name_by_flag(const uint32_t flag) return NULL; } +/* Define correlation of raid456 journal cache modes and dm-raid target line parameters */ +static struct { + const int mode; + const char *param; +} _raid456_journal_mode[] = { + { R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" }, + { R5C_JOURNAL_MODE_WRITE_BACK , "writeback" } +}; + +/* Return MD raid4/5/6 journal mode for dm @journal_mode one */ +static int dm_raid_journal_mode_to_md(const char *mode) +{ + int m = ARRAY_SIZE(_raid456_journal_mode); + + while (m--) + if (!strcasecmp(mode, _raid456_journal_mode[m].param)) + return _raid456_journal_mode[m].mode; + + return -EINVAL; +} + +/* Return dm-raid raid4/5/6 journal mode string for @mode */ +static const char *md_journal_mode_to_dm_raid(const int mode) +{ + int m = ARRAY_SIZE(_raid456_journal_mode); + + while (m--) + if (mode == _raid456_journal_mode[m].mode) + return _raid456_journal_mode[m].param; + + return "unknown"; +} + /* * Bool helpers to test for various raid levels of a raid set. * It's level as reported by the superblock rather than @@ -1183,7 +1224,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, continue; } - /* "journal_dev dev" */ + /* "journal_dev " */ if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) { int r; struct md_rdev *jdev; @@ -1211,10 +1252,32 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, rs->ti->error = "No space for raid4/5/6 journal"; return -ENOSPC; } + rs->journal_dev.mode = R5C_JOURNAL_MODE_WRITE_THROUGH; set_bit(Journal, &jdev->flags); continue; } + /* "journal_mode " ("journal_dev" mandatory!) */ + if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE))) { + int r; + + if (!test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { + rs->ti->error = "raid4/5/6 'journal_mode' is invalid without 'journal_dev'"; + return -EINVAL; + } + if (test_and_set_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { + rs->ti->error = "Only one raid4/5/6 'journal_mode' argument allowed"; + return -EINVAL; + } + r = dm_raid_journal_mode_to_md(arg); + if (r < 0) { + rs->ti->error = "Invalid 'journal_mode' argument"; + return r; + } + rs->journal_dev.mode = r; + continue; + } + /* * Parameters with number values from here on. */ @@ -3076,6 +3139,16 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) rs->callbacks.congested_fn = raid_is_congested; dm_table_add_target_callbacks(ti->table, &rs->callbacks); + /* If raid4/5/6 journal mode explictely requested (only possible with journal dev) -> set it */ + if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { + r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); + if (r) { + ti->error = "Failed to set raid4/5/6 journal mode"; + mddev_unlock(&rs->md); + goto bad_journal_mode_set; + } + } + mddev_suspend(&rs->md); /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ @@ -3109,6 +3182,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) mddev_unlock(&rs->md); return 0; +bad_journal_mode_set: bad_stripe_cache: bad_check_reshape: md_stop(&rs->md); @@ -3180,18 +3254,18 @@ static const char *decipher_sync_action(struct mddev *mddev) * Status characters: * * 'D' = Dead/Failed raid set component or raid4/5/6 journal device - * 'a' = Alive but not in-sync - * 'A' = Alive and in-sync raid set component or alive raid4/5/6 journal device + * 'a' = Alive but not in-sync raid set component _or_ alive raid4/5/6 'write_back' journal device + * 'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device * '-' = Non-existing device (i.e. uspace passed '- -' into the ctr) */ -static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync) +static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev, bool array_in_sync) { if (!rdev->bdev) return "-"; else if (test_bit(Faulty, &rdev->flags)) return "D"; else if (test_bit(Journal, &rdev->flags)) - return "A"; + return (rs->journal_dev.mode == R5C_JOURNAL_MODE_WRITE_THROUGH) ? "A" : "a"; else if (!array_in_sync || !test_bit(In_sync, &rdev->flags)) return "a"; else @@ -3315,7 +3389,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ for (i = 0; i < rs->raid_disks; i++) - DMEMIT(__raid_dev_status(&rs->dev[i].rdev, array_in_sync)); + DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev, array_in_sync)); /* * In-sync/Reshape ratio: @@ -3366,7 +3440,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, * v1.10.0+: */ DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? - __raid_dev_status(&rs->journal_dev.rdev, 0) : "-"); + __raid_dev_status(rs, &rs->journal_dev.rdev, 0) : "-"); break; case STATUSTYPE_TABLE: @@ -3381,7 +3455,8 @@ static void raid_status(struct dm_target *ti, status_type_t type, write_mostly_params + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 + - (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0); + (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0) + + (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags) ? 2 : 0); /* Emit table line */ /* This has to be in the documented order for userspace! */ @@ -3433,6 +3508,9 @@ static void raid_status(struct dm_target *ti, status_type_t type, if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV), __get_dev_name(rs->journal_dev.dev)); + if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) + DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE), + md_journal_mode_to_dm_raid(rs->journal_dev.mode)); DMEMIT(" %d", rs->raid_disks); for (i = 0; i < rs->raid_disks; i++) DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), @@ -3793,7 +3871,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 11, 0}, + .version = {1, 11, 1}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3 From 56b67a4f292f14548f4046979d46d07bcf8ba971 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 18 Apr 2017 16:51:50 -0400 Subject: dm integrity: various small changes and cleanups Some coding style changes. Fix a bug that the array test_tag has insufficient size if the digest size of internal has is bigger than the tag size. The function __fls is undefined for zero argument, this patch fixes undefined behavior if the user sets zero interleave_sectors. Fix the limit of optional arguments to 8. Don't allocate crypt_data on the stack to avoid a BUG with debug kernel. Rename all optional argument names to have underscores rather than dashes. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-integrity.txt | 20 ++--- drivers/md/dm-integrity.c | 116 ++++++++++++++------------- 2 files changed, 72 insertions(+), 64 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt index 9d9089f74206..ced34cd915ef 100644 --- a/Documentation/device-mapper/dm-integrity.txt +++ b/Documentation/device-mapper/dm-integrity.txt @@ -69,17 +69,17 @@ Target arguments: Additional arguments: -journal-sectors:number +journal_sectors:number The size of journal, this argument is used only if formatting the device. If the device is already formatted, the value from the superblock is used. -interleave-sectors:number +interleave_sectors:number The number of interleaved sectors. This values is rounded down to a power of two. If the device is already formatted, the value from the superblock is used. -buffer-sectors:number +buffer_sectors:number The number of sectors in one buffer. The value is rounded down to a power of two. @@ -87,17 +87,17 @@ buffer-sectors:number configurable. The large buffer size means that the I/O size will be larger, but there could be less I/Os issued. -journal-watermark:number +journal_watermark:number The journal watermark in percents. When the size of the journal exceeds this watermark, the thread that flushes the journal will be started. -commit-time:number +commit_time:number Commit time in milliseconds. When this time passes, the journal is written. The journal is also written immediatelly if the FLUSH request is received. -internal-hash:algorithm(:key) (the key is optional) +internal_hash:algorithm(:key) (the key is optional) Use internal hash or crc. When this argument is used, the dm-integrity target won't accept integrity tags from the upper target, but it will automatically @@ -113,7 +113,7 @@ internal-hash:algorithm(:key) (the key is optional) from an upper layer target, such as dm-crypt. The upper layer target should check the validity of the integrity tags. -journal-crypt:algorithm(:key) (the key is optional) +journal_crypt:algorithm(:key) (the key is optional) Encrypt the journal using given algorithm to make sure that the attacker can't read the journal. You can use a block cipher here (such as "cbc(aes)") or a stream cipher (for example "chacha20", @@ -125,7 +125,7 @@ journal-crypt:algorithm(:key) (the key is optional) the size of files that were written. To protect against this situation, you can encrypt the journal. -journal-mac:algorithm(:key) (the key is optional) +journal_mac:algorithm(:key) (the key is optional) Protect sector numbers in the journal from accidental or malicious modification. To protect against accidental modification, use a crc algorithm, to protect against malicious modification, use a @@ -137,7 +137,7 @@ journal-mac:algorithm(:key) (the key is optional) this stage. -The journal mode (D/J), buffer-sectors, journal-watermark, commit-time can +The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can be changed when reloading the target (load an inactive table and swap the tables with suspend and resume). The other arguments should not be changed when reloading the target because the layout of disk data depend on them @@ -158,7 +158,7 @@ The layout of the formatted block device: provides (i.e. the size of the device minus the size of all metadata and padding). The user of this target should not send bios that access data beyond the "provided data sectors" limit. - * flags - a flag is set if journal-mac is used + * flags - a flag is set if journal_mac is used * journal The journal is divided into sections, each section contains: * metadata area (4kiB), it contains journal entries diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e26a079b41ea..95cdffbb206c 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -27,8 +27,8 @@ #define DEFAULT_JOURNAL_WATERMARK 50 #define DEFAULT_SYNC_MSEC 10000 #define DEFAULT_MAX_JOURNAL_SECTORS 131072 -#define MIN_INTERLEAVE_SECTORS 3 -#define MAX_INTERLEAVE_SECTORS 31 +#define MIN_LOG2_INTERLEAVE_SECTORS 3 +#define MAX_LOG2_INTERLEAVE_SECTORS 31 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 /* @@ -414,7 +414,7 @@ static void page_list_location(struct dm_integrity_c *ic, unsigned section, unsi { unsigned sector; - access_journal_check(ic, section, offset, false, "access_journal"); + access_journal_check(ic, section, offset, false, "page_list_location"); sector = section * ic->journal_section_sectors + offset; @@ -1211,7 +1211,7 @@ static void integrity_metadata(struct work_struct *w) unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); char *checksums; - unsigned extra_space = digest_size > ic->tag_size ? digest_size - ic->tag_size : 0; + unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0; char checksums_onstack[ic->tag_size + extra_space]; unsigned sectors_to_process = dio->range.n_sectors; sector_t sector = dio->range.logical_sector; @@ -1820,7 +1820,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, unlikely(from_replay) && #endif ic->internal_hash) { - unsigned char test_tag[ic->tag_size]; + char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; integrity_sector_checksum(ic, sec + (l - j), (char *)access_journal_data(ic, i, l), test_tag); @@ -2135,11 +2135,11 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, arg_count += !!ic->journal_mac_alg.alg_string; DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, ic->tag_size, ic->mode, arg_count); - DMEMIT(" journal-sectors:%u", ic->initial_sectors - SB_SECTORS); - DMEMIT(" interleave-sectors:%u", 1U << ic->sb->log2_interleave_sectors); - DMEMIT(" buffer-sectors:%u", 1U << ic->log2_buffer_sectors); - DMEMIT(" journal-watermark:%u", (unsigned)watermark_percentage); - DMEMIT(" commit-time:%u", ic->autocommit_msec); + DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); + DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); + DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); + DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); + DMEMIT(" commit_time:%u", ic->autocommit_msec); #define EMIT_ALG(a, n) \ do { \ @@ -2149,9 +2149,9 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(":%s", ic->a.key_string);\ } \ } while (0) - EMIT_ALG(internal_hash_alg, "internal-hash"); - EMIT_ALG(journal_crypt_alg, "journal-crypt"); - EMIT_ALG(journal_mac_alg, "journal-mac"); + EMIT_ALG(internal_hash_alg, "internal_hash"); + EMIT_ALG(journal_crypt_alg, "journal_crypt"); + EMIT_ALG(journal_mac_alg, "journal_mac"); break; } } @@ -2213,6 +2213,7 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec unsigned journal_sections; int test_bit; + memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); memcpy(ic->sb->magic, SB_MAGIC, 8); ic->sb->version = SB_VERSION; ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); @@ -2225,9 +2226,11 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec journal_sections = 1; ic->sb->journal_sections = cpu_to_le32(journal_sections); + if (!interleave_sectors) + interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; ic->sb->log2_interleave_sectors = __fls(interleave_sectors); - ic->sb->log2_interleave_sectors = max((__u8)MIN_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); - ic->sb->log2_interleave_sectors = min((__u8)MAX_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); + ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); + ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); ic->provided_data_sectors = 0; for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) { @@ -2238,7 +2241,7 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec ic->provided_data_sectors = prev_data_sectors; } - if (!le64_to_cpu(ic->provided_data_sectors)) + if (!ic->provided_data_sectors) return -EINVAL; ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); @@ -2444,6 +2447,12 @@ static int create_journal(struct dm_integrity_c *ic, char **error) int r = 0; unsigned i; __u64 journal_pages, journal_desc_size, journal_tree_size; + unsigned char *crypt_data = NULL; + + ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); + ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); + ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL); + ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL); journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors, PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT); @@ -2541,7 +2550,13 @@ static int create_journal(struct dm_integrity_c *ic, char **error) SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); unsigned char iv[ivsize]; unsigned crypt_len = roundup(ivsize, blocksize); - unsigned char crypt_data[crypt_len]; + + crypt_data = kmalloc(crypt_len, GFP_KERNEL); + if (!crypt_data) { + *error = "Unable to allocate crypt data"; + r = -ENOMEM; + goto bad; + } skcipher_request_set_tfm(req, ic->journal_crypt); @@ -2630,38 +2645,38 @@ retest_commit_id: r = -ENOMEM; } bad: + kfree(crypt_data); return r; } /* - * Construct a integrity mapping: + * Construct a integrity mapping * * Arguments: * device * offset from the start of the device * tag size - * D - direct writes, J - journal writes + * D - direct writes, J - journal writes, R - recovery mode * number of optional arguments * optional arguments: - * journal-sectors - * interleave-sectors - * buffer-sectors - * journal-watermark - * commit-time - * internal-hash - * journal-crypt - * journal-mac + * journal_sectors + * interleave_sectors + * buffer_sectors + * journal_watermark + * commit_time + * internal_hash + * journal_crypt + * journal_mac */ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) { struct dm_integrity_c *ic; char dummy; int r; - unsigned i; unsigned extra_args; struct dm_arg_set as; static struct dm_arg _args[] = { - {0, 7, "Invalid number of feature args"}, + {0, 8, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; @@ -2683,11 +2698,6 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->private = ic; ti->per_io_data_size = sizeof(struct dm_integrity_io); - ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); - ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); - ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL); - ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL); - ic->in_progress = RB_ROOT; init_waitqueue_head(&ic->endio_wait); bio_list_init(&ic->flush_bio_list); @@ -2718,7 +2728,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) ic->mode = argv[3][0]; else { - ti->error = "Invalid mode (expecting J or D)"; + ti->error = "Invalid mode (expecting J, D, R)"; r = -EINVAL; goto bad; } @@ -2746,29 +2756,29 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Not enough feature arguments"; goto bad; } - if (sscanf(opt_string, "journal-sectors:%u%c", &val, &dummy) == 1) + if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) journal_sectors = val; - else if (sscanf(opt_string, "interleave-sectors:%u%c", &val, &dummy) == 1) + else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) interleave_sectors = val; - else if (sscanf(opt_string, "buffer-sectors:%u%c", &val, &dummy) == 1) + else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) buffer_sectors = val; - else if (sscanf(opt_string, "journal-watermark:%u%c", &val, &dummy) == 1 && val <= 100) + else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100) journal_watermark = val; - else if (sscanf(opt_string, "commit-time:%u%c", &val, &dummy) == 1) + else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) sync_msec = val; - else if (!memcmp(opt_string, "internal-hash:", strlen("internal-hash:"))) { + else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, - "Invalid internal-hash argument"); + "Invalid internal_hash argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal-crypt:", strlen("journal-crypt:"))) { + } else if (!memcmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) { r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error, - "Invalid journal-crypt argument"); + "Invalid journal_crypt argument"); if (r) goto bad; - } else if (!memcmp(opt_string, "journal-mac:", strlen("journal-mac:"))) { + } else if (!memcmp(opt_string, "journal_mac:", strlen("journal_mac:"))) { r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error, - "Invalid journal-mac argument"); + "Invalid journal_mac argument"); if (r) goto bad; } else { @@ -2877,12 +2887,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) should_write_sb = false; if (memcmp(ic->sb->magic, SB_MAGIC, 8)) { if (ic->mode != 'R') { - for (i = 0; i < 512; i += 8) { - if (*(__u64 *)((__u8 *)ic->sb + i)) { - r = -EINVAL; - ti->error = "The device is not initialized"; - goto bad; - } + if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) { + r = -EINVAL; + ti->error = "The device is not initialized"; + goto bad; } } @@ -2906,8 +2914,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } /* make sure that ti->max_io_len doesn't overflow */ - if (ic->sb->log2_interleave_sectors < MIN_INTERLEAVE_SECTORS || - ic->sb->log2_interleave_sectors > MAX_INTERLEAVE_SECTORS) { + if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || + ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { r = -EINVAL; ti->error = "Invalid interleave_sectors in the superblock"; goto bad; -- cgit v1.2.3 From 9d609f85b7eb96697ae22ad1d47a3a3920174ba8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 18 Apr 2017 16:51:52 -0400 Subject: dm integrity: support larger block sizes The DM integrity block size can now be 512, 1k, 2k or 4k. Using larger blocks reduces metadata handling overhead. The block size can be configured at table load time using the "block_size:" option; where is expressed in bytes (defult is still 512 bytes). It is safe to use larger block sizes with DM integrity, because the DM integrity journal makes sure that the whole block is updated atomically even if the underlying device doesn't support atomic writes of that size (e.g. 4k block ontop of a 512b device). Depends-on: 2859323e ("block: fix blk_integrity_register to use template's interval_exp if not 0") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-integrity.txt | 5 + drivers/md/dm-integrity.c | 219 +++++++++++++++++++++------ 2 files changed, 179 insertions(+), 45 deletions(-) (limited to 'Documentation/device-mapper') diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt index ced34cd915ef..f33e3ade7a09 100644 --- a/Documentation/device-mapper/dm-integrity.txt +++ b/Documentation/device-mapper/dm-integrity.txt @@ -136,6 +136,11 @@ journal_mac:algorithm(:key) (the key is optional) the journal. Thus, modified sector number would be detected at this stage. +block_size:number + The size of a data block in bytes. The larger the block size the + less overhead there is for per-block integrity metadata. + Supported values are 512, 1024, 2048 and 4096 bytes. If not + specified the default block size is 512 bytes. The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can be changed when reloading the target (load an inactive table and swap the diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 95cdffbb206c..0354af4cd713 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,7 @@ #define SB_MAGIC "integrt" #define SB_VERSION 1 #define SB_SECTORS 8 +#define MAX_SECTORS_PER_BLOCK 8 struct superblock { __u8 magic[8]; @@ -54,6 +56,7 @@ struct superblock { __u32 journal_sections; __u64 provided_data_sectors; /* userspace uses this value */ __u32 flags; + __u8 log2_sectors_per_block; }; #define SB_FLAG_HAVE_JOURNAL_MAC 0x1 @@ -71,10 +74,12 @@ struct journal_entry { } s; __u64 sector; } u; - commit_id_t last_bytes; - __u8 tag[0]; + commit_id_t last_bytes[0]; + /* __u8 tag[0]; */ }; +#define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block]) + #if BITS_PER_LONG == 64 #define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0) #define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector) @@ -100,7 +105,7 @@ struct journal_sector { commit_id_t commit_id; }; -#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, tag)) +#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK])) #define METADATA_PADDING_SECTORS 8 @@ -162,7 +167,7 @@ struct dm_integrity_c { unsigned short journal_entry_size; unsigned char journal_entries_per_sector; unsigned char journal_section_entries; - unsigned char journal_section_sectors; + unsigned short journal_section_sectors; unsigned journal_sections; unsigned journal_entries; sector_t device_sectors; @@ -170,6 +175,7 @@ struct dm_integrity_c { unsigned metadata_run; __s8 log2_metadata_run; __u8 log2_buffer_sectors; + __u8 sectors_per_block; unsigned char mode; bool suspending; @@ -332,6 +338,12 @@ static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); } +#define sector_to_block(ic, n) \ +do { \ + BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \ + (n) >>= (ic)->sb->log2_sectors_per_block; \ +} while (0) + static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area, sector_t offset, unsigned *metadata_offset) { @@ -345,6 +357,8 @@ static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t ms += area * ic->metadata_run; ms >>= ic->log2_buffer_sectors; + sector_to_block(ic, offset); + if (likely(ic->log2_tag_size >= 0)) { ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size); mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1); @@ -459,9 +473,13 @@ static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, uns static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n) { - access_journal_check(ic, section, n, true, "access_journal_data"); + n <<= ic->sb->log2_sectors_per_block; - return access_journal(ic, section, n + JOURNAL_BLOCK_SECTORS); + n += JOURNAL_BLOCK_SECTORS; + + access_journal_check(ic, section, n, false, "access_journal_data"); + + return access_journal(ic, section, n); } static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE]) @@ -812,6 +830,8 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig int r; unsigned sector, pl_index, pl_offset; + BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1)); + if (unlikely(dm_integrity_failed(ic))) { fn(-1UL, data); return; @@ -846,6 +866,8 @@ static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range * struct rb_node **n = &ic->in_progress.rb_node; struct rb_node *parent; + BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1)); + parent = NULL; while (*n) { @@ -1175,7 +1197,7 @@ static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector goto failed; } - r = crypto_shash_update(req, data, 1 << SECTOR_SHIFT); + r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT); if (unlikely(r < 0)) { dm_integrity_io_error(ic, "crypto_shash_update", r); goto failed; @@ -1219,7 +1241,7 @@ static void integrity_metadata(struct work_struct *w) if (unlikely(ic->mode == 'R')) goto skip_io; - checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT) * ic->tag_size + extra_space, + checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); if (!checksums) checksums = checksums_onstack; @@ -1235,9 +1257,9 @@ again: do { integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr); checksums_ptr += ic->tag_size; - sectors_to_process--; - pos += 1 << SECTOR_SHIFT; - sector++; + sectors_to_process -= ic->sectors_per_block; + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); kunmap_atomic(mem); @@ -1272,7 +1294,9 @@ again: if (bip) { struct bio_vec biv; struct bvec_iter iter; - unsigned data_to_process = dio->range.n_sectors * ic->tag_size; + unsigned data_to_process = dio->range.n_sectors; + sector_to_block(ic, data_to_process); + data_to_process *= ic->tag_size; bip_for_each_vec(biv, bip, iter) { unsigned char *tag; @@ -1303,6 +1327,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) { struct dm_integrity_c *ic = ti->private; struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); + struct bio_integrity_payload *bip; sector_t area, offset; @@ -1330,6 +1355,44 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) (unsigned long long)ic->provided_data_sectors); return -EIO; } + if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) { + DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", + ic->sectors_per_block, + (unsigned long long)dio->range.logical_sector, bio_sectors(bio)); + return -EIO; + } + + if (ic->sectors_per_block > 1) { + struct bvec_iter iter; + struct bio_vec bv; + bio_for_each_segment(bv, bio, iter) { + if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { + DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", + bv.bv_offset, bv.bv_len, ic->sectors_per_block); + return -EIO; + } + } + } + + bip = bio_integrity(bio); + if (!ic->internal_hash) { + if (bip) { + unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block; + if (ic->log2_tag_size >= 0) + wanted_tag_size <<= ic->log2_tag_size; + else + wanted_tag_size *= ic->tag_size; + if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { + DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size); + return -EIO; + } + } + } else { + if (unlikely(bip != NULL)) { + DMERR("Unexpected integrity data when using internal hash"); + return -EIO; + } + } if (unlikely(ic->mode == 'R') && unlikely(dio->write)) return -EIO; @@ -1369,6 +1432,8 @@ retry_kmap: if (unlikely(!dio->write)) { struct journal_sector *js; + char *mem_ptr; + unsigned s; if (unlikely(journal_entry_is_inprogress(je))) { flush_dcache_page(bv.bv_page); @@ -1380,14 +1445,20 @@ retry_kmap: smp_rmb(); BUG_ON(journal_entry_get_sector(je) != logical_sector); js = access_journal_data(ic, journal_section, journal_entry); - memcpy(mem + bv.bv_offset, js, JOURNAL_SECTOR_DATA); - memcpy(mem + bv.bv_offset + JOURNAL_SECTOR_DATA, &je->last_bytes, sizeof je->last_bytes); + mem_ptr = mem + bv.bv_offset; + s = 0; + do { + memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA); + *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s]; + js++; + mem_ptr += 1 << SECTOR_SHIFT; + } while (++s < ic->sectors_per_block); #ifdef INTERNAL_VERIFY if (ic->internal_hash) { char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack); - if (unlikely(memcmp(checksums_onstack, je->tag, ic->tag_size))) { + if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { DMERR("Checksum failed when reading from journal, at sector 0x%llx", (unsigned long long)logical_sector); } @@ -1398,7 +1469,7 @@ retry_kmap: if (!ic->internal_hash) { struct bio_integrity_payload *bip = bio_integrity(bio); unsigned tag_todo = ic->tag_size; - char *tag_ptr = je->tag; + char *tag_ptr = journal_entry_tag(ic, je); if (bip) do { struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); @@ -1421,24 +1492,29 @@ retry_kmap: if (likely(dio->write)) { struct journal_sector *js; + unsigned s; js = access_journal_data(ic, journal_section, journal_entry); - memcpy(js, mem + bv.bv_offset, 1 << SECTOR_SHIFT); - je->last_bytes = js->commit_id; + memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT); + + s = 0; + do { + je->last_bytes[s] = js[s].commit_id; + } while (++s < ic->sectors_per_block); if (ic->internal_hash) { unsigned digest_size = crypto_shash_digestsize(ic->internal_hash); if (unlikely(digest_size > ic->tag_size)) { char checksums_onstack[digest_size]; integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack); - memcpy(je->tag, checksums_onstack, ic->tag_size); + memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size); } else - integrity_sector_checksum(ic, logical_sector, (char *)js, je->tag); + integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je)); } journal_entry_set_sector(je, logical_sector); } - logical_sector++; + logical_sector += ic->sectors_per_block; journal_entry++; if (unlikely(journal_entry == ic->journal_section_entries)) { @@ -1447,8 +1523,8 @@ retry_kmap: wraparound_section(ic, &journal_section); } - bv.bv_offset += 1 << SECTOR_SHIFT; - } while (bv.bv_len -= 1 << SECTOR_SHIFT); + bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT; + } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT); if (unlikely(!dio->write)) flush_dcache_page(bv.bv_page); @@ -1526,7 +1602,8 @@ retry: pos = journal_section * ic->journal_section_entries + journal_entry; ws = journal_section; we = journal_entry; - for (i = 0; i < dio->range.n_sectors; i++) { + i = 0; + do { struct journal_entry *je; add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i); @@ -1543,7 +1620,7 @@ retry: ws++; wraparound_section(ic, &ws); } - } + } while ((i += ic->sectors_per_block) < dio->range.n_sectors); spin_unlock_irq(&ic->endio_wait.lock); goto journal_read_write; @@ -1555,8 +1632,9 @@ retry: dio->range.n_sectors = next_sector - dio->range.logical_sector; } else { unsigned i; - for (i = 1; i < dio->range.n_sectors; i++) { - if (!test_journal_node(ic, journal_read_pos + i, dio->range.logical_sector + i)) + unsigned jp = journal_read_pos + 1; + for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) { + if (!test_journal_node(ic, jp, dio->range.logical_sector + i)) break; } dio->range.n_sectors = i; @@ -1725,6 +1803,16 @@ static void complete_copy_from_journal(unsigned long error, void *context) complete_journal_op(comp); } +static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js, + struct journal_entry *je) +{ + unsigned s = 0; + do { + js->commit_id = je->last_bytes[s]; + js++; + } while (++s < ic->sectors_per_block); +} + static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, unsigned write_sections, bool from_replay) { @@ -1753,8 +1841,14 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, continue; BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay); sec = journal_entry_get_sector(je); + if (unlikely(from_replay)) { + if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) { + dm_integrity_io_error(ic, "invalid sector in journal", -EIO); + sec &= ~(sector_t)(ic->sectors_per_block - 1); + } + } get_area_and_offset(ic, sec, &area, &offset); - access_journal_data(ic, i, j)->commit_id = je->last_bytes; + restore_last_bytes(ic, access_journal_data(ic, i, j), je); for (k = j + 1; k < ic->journal_section_entries; k++) { struct journal_entry *je2 = access_journal_entry(ic, i, k); sector_t sec2, area2, offset2; @@ -1763,16 +1857,16 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay); sec2 = journal_entry_get_sector(je2); get_area_and_offset(ic, sec2, &area2, &offset2); - if (area2 != area || offset2 != offset + (k - j)) + if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block)) break; - access_journal_data(ic, i, k)->commit_id = je2->last_bytes; + restore_last_bytes(ic, access_journal_data(ic, i, k), je2); } next_loop = k - 1; io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO); io->comp = ∁ io->range.logical_sector = sec; - io->range.n_sectors = k - j; + io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; spin_lock_irq(&ic->endio_wait.lock); while (unlikely(!add_new_range(ic, &io->range))) @@ -1788,8 +1882,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, journal_entry_set_unused(je2); remove_journal_node(ic, §ion_node[j]); j++; - sec++; - offset++; + sec += ic->sectors_per_block; + offset += ic->sectors_per_block; } while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) { struct journal_entry *je2 = access_journal_entry(ic, i, k - 1); @@ -1822,14 +1916,14 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, ic->internal_hash) { char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)]; - integrity_sector_checksum(ic, sec + (l - j), + integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), (char *)access_journal_data(ic, i, l), test_tag); - if (unlikely(memcmp(test_tag, je2->tag, ic->tag_size))) + if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); } journal_entry_set_unused(je2); - r = dm_integrity_rw_tag(ic, je2->tag, &metadata_block, &metadata_offset, + r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset, ic->tag_size, TAG_WRITE); if (unlikely(r)) { dm_integrity_io_error(ic, "reading tags", r); @@ -1837,7 +1931,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, } atomic_inc(&comp.in_flight); - copy_from_journal(ic, i, j, k - j, get_data_sector(ic, area, offset), + copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block, + (k - j) << ic->sb->log2_sectors_per_block, + get_data_sector(ic, area, offset), complete_copy_from_journal, io); skip_io: j = next_loop; @@ -2130,6 +2226,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, watermark_percentage += ic->journal_entries / 2; do_div(watermark_percentage, ic->journal_entries); arg_count = 5; + arg_count += ic->sectors_per_block != 1; arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; @@ -2140,6 +2237,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); DMEMIT(" commit_time:%u", ic->autocommit_msec); + if (ic->sectors_per_block != 1) + DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); #define EMIT_ALG(a, n) \ do { \ @@ -2165,19 +2264,30 @@ static int dm_integrity_iterate_devices(struct dm_target *ti, return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); } +static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct dm_integrity_c *ic = ti->private; + + if (ic->sectors_per_block > 1) { + limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; + limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; + blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); + } +} + static void calculate_journal_section_size(struct dm_integrity_c *ic) { unsigned sector_space = JOURNAL_SECTOR_DATA; ic->journal_sections = le32_to_cpu(ic->sb->journal_sections); - ic->journal_entry_size = roundup(offsetof(struct journal_entry, tag) + ic->tag_size, + ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size, JOURNAL_ENTRY_ROUNDUP); if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) sector_space -= JOURNAL_MAC_PER_SECTOR; ic->journal_entries_per_sector = sector_space / ic->journal_entry_size; ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS; - ic->journal_section_sectors = ic->journal_section_entries + JOURNAL_BLOCK_SECTORS; + ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS; ic->journal_entries = ic->journal_section_entries * ic->journal_sections; } @@ -2192,7 +2302,7 @@ static int calculate_device_limits(struct dm_integrity_c *ic) return -EINVAL; ic->initial_sectors = initial_sectors; - ic->metadata_run = roundup((__u64)ic->tag_size << ic->sb->log2_interleave_sectors, + ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; if (!(ic->metadata_run & (ic->metadata_run - 1))) ic->log2_metadata_run = __ffs(ic->metadata_run); @@ -2217,6 +2327,7 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec memcpy(ic->sb->magic, SB_MAGIC, 8); ic->sb->version = SB_VERSION; ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); + ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); if (ic->journal_mac_alg.alg_string) ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC); @@ -2256,8 +2367,9 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) memset(&bi, 0, sizeof(bi)); bi.profile = &dm_integrity_profile; - bi.tuple_size = ic->tag_size * (queue_logical_block_size(disk->queue) >> SECTOR_SHIFT); - bi.tag_size = ic->tag_size; + bi.tuple_size = ic->tag_size; + bi.tag_size = bi.tuple_size; + bi.interval_exp = ilog2(ic->sectors_per_block << SECTOR_SHIFT); blk_integrity_register(disk, &bi); blk_queue_max_integrity_segments(disk->queue, UINT_MAX); @@ -2667,6 +2779,7 @@ bad: * internal_hash * journal_crypt * journal_mac + * block_size */ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) { @@ -2676,7 +2789,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned extra_args; struct dm_arg_set as; static struct dm_arg _args[] = { - {0, 8, "Invalid number of feature args"}, + {0, 9, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; @@ -2740,6 +2853,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) buffer_sectors = DEFAULT_BUFFER_SECTORS; journal_watermark = DEFAULT_JOURNAL_WATERMARK; sync_msec = DEFAULT_SYNC_MSEC; + ic->sectors_per_block = 1; as.argc = argc - DIRECT_ARGUMENTS; as.argv = argv + DIRECT_ARGUMENTS; @@ -2766,7 +2880,16 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) journal_watermark = val; else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) sync_msec = val; - else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { + else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { + if (val < 1 << SECTOR_SHIFT || + val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || + (val & (val -1))) { + r = -EINVAL; + ti->error = "Invalid block_size argument"; + goto bad; + } + ic->sectors_per_block = val >> SECTOR_SHIFT; + } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, "Invalid internal_hash argument"); if (r) @@ -2910,7 +3033,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) { r = -EINVAL; - ti->error = "Invalid tag size"; + ti->error = "Tag size doesn't match the information in superblock"; + goto bad; + } + if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) { + r = -EINVAL; + ti->error = "Block size doesn't match the information in superblock"; goto bad; } /* make sure that ti->max_io_len doesn't overflow */ @@ -3084,6 +3212,7 @@ static struct target_type integrity_target = { .resume = dm_integrity_resume, .status = dm_integrity_status, .iterate_devices = dm_integrity_iterate_devices, + .io_hints = dm_integrity_io_hints, }; int __init dm_integrity_init(void) -- cgit v1.2.3