summaryrefslogtreecommitdiff
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c235
1 files changed, 206 insertions, 29 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 09cd6d334604..14215ec646f7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -14,6 +14,7 @@
#include "super.h"
#include "mds_client.h"
#include "cache.h"
+#include "crypto.h"
#include <linux/ceph/decode.h>
#include <linux/ceph/messenger.h>
@@ -1216,15 +1217,11 @@ struct cap_msg_args {
umode_t mode;
bool inline_data;
bool wake;
+ bool encrypted;
+ u32 fscrypt_auth_len;
+ u8 fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context
};
-/*
- * cap struct size + flock buffer size + inline version + inline data size +
- * osd_epoch_barrier + oldest_flush_tid
- */
-#define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \
- 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4)
-
/* Marshal up the cap msg to the MDS */
static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
{
@@ -1240,7 +1237,7 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
arg->size, arg->max_size, arg->xattr_version,
arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
- msg->hdr.version = cpu_to_le16(10);
+ msg->hdr.version = cpu_to_le16(12);
msg->hdr.tid = cpu_to_le64(arg->flush_tid);
fc = msg->front.iov_base;
@@ -1257,7 +1254,13 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
fc->ino = cpu_to_le64(arg->ino);
fc->snap_follows = cpu_to_le64(arg->follows);
- fc->size = cpu_to_le64(arg->size);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ if (arg->encrypted)
+ fc->size = cpu_to_le64(round_up(arg->size,
+ CEPH_FSCRYPT_BLOCK_SIZE));
+ else
+#endif
+ fc->size = cpu_to_le64(arg->size);
fc->max_size = cpu_to_le64(arg->max_size);
ceph_encode_timespec64(&fc->mtime, &arg->mtime);
ceph_encode_timespec64(&fc->atime, &arg->atime);
@@ -1311,6 +1314,27 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
/* Advisory flags (version 10) */
ceph_encode_32(&p, arg->flags);
+
+ /* dirstats (version 11) - these are r/o on the client */
+ ceph_encode_64(&p, 0);
+ ceph_encode_64(&p, 0);
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ /*
+ * fscrypt_auth and fscrypt_file (version 12)
+ *
+ * fscrypt_auth holds the crypto context (if any). fscrypt_file
+ * tracks the real i_size as an __le64 field (and we use a rounded-up
+ * i_size in the traditional size field).
+ */
+ ceph_encode_32(&p, arg->fscrypt_auth_len);
+ ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len);
+ ceph_encode_32(&p, sizeof(__le64));
+ ceph_encode_64(&p, arg->size);
+#else /* CONFIG_FS_ENCRYPTION */
+ ceph_encode_32(&p, 0);
+ ceph_encode_32(&p, 0);
+#endif /* CONFIG_FS_ENCRYPTION */
}
/*
@@ -1378,7 +1402,6 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
arg->follows = flushing ? ci->i_head_snapc->seq : 0;
arg->flush_tid = flush_tid;
arg->oldest_flush_tid = oldest_flush_tid;
-
arg->size = i_size_read(inode);
ci->i_reported_size = arg->size;
arg->max_size = ci->i_wanted_max_size;
@@ -1432,8 +1455,39 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
}
}
arg->flags = flags;
+ arg->encrypted = IS_ENCRYPTED(inode);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ if (ci->fscrypt_auth_len &&
+ WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) {
+ /* Don't set this if it's too big */
+ arg->fscrypt_auth_len = 0;
+ } else {
+ arg->fscrypt_auth_len = ci->fscrypt_auth_len;
+ memcpy(arg->fscrypt_auth, ci->fscrypt_auth,
+ min_t(size_t, ci->fscrypt_auth_len,
+ sizeof(arg->fscrypt_auth)));
+ }
+#endif /* CONFIG_FS_ENCRYPTION */
}
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+ 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4 + 8)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+ return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len;
+}
+#else
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+ 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+ return CAP_MSG_FIXED_FIELDS;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
/*
* Send a cap msg on the given inode.
*
@@ -1444,7 +1498,8 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
struct ceph_msg *msg;
struct inode *inode = &ci->netfs.inode;
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS,
+ false);
if (!msg) {
pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n",
ceph_vinop(inode), ceph_cap_string(arg->dirty),
@@ -1470,10 +1525,6 @@ static inline int __send_flush_snap(struct inode *inode,
struct cap_msg_args arg;
struct ceph_msg *msg;
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
- if (!msg)
- return -ENOMEM;
-
arg.session = session;
arg.ino = ceph_vino(inode).ino;
arg.cid = 0;
@@ -1510,6 +1561,15 @@ static inline int __send_flush_snap(struct inode *inode,
arg.inline_data = capsnap->inline_data;
arg.flags = 0;
arg.wake = false;
+ arg.encrypted = IS_ENCRYPTED(inode);
+
+ /* No fscrypt_auth changes from a capsnap.*/
+ arg.fscrypt_auth_len = 0;
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg),
+ GFP_NOFS, false);
+ if (!msg)
+ return -ENOMEM;
encode_cap_msg(msg, &arg);
ceph_con_send(&arg.session->s_con, msg);
@@ -2900,10 +2960,9 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got)
+int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
+ int want, loff_t endoff, int *got)
{
- struct ceph_file_info *fi = filp->private_data;
- struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int ret, _got, flags;
@@ -2912,7 +2971,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
if (ret < 0)
return ret;
- if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+ if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
fi->filp_gen != READ_ONCE(fsc->filp_gen))
return -EBADF;
@@ -2965,7 +3024,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
continue;
}
- if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+ if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
if (ret >= 0 && _got)
ceph_put_cap_refs(ci, _got);
@@ -3028,6 +3087,15 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
return 0;
}
+int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff,
+ int *got)
+{
+ struct ceph_file_info *fi = filp->private_data;
+ struct inode *inode = file_inode(filp);
+
+ return __ceph_get_caps(inode, fi, need, want, endoff, got);
+}
+
/*
* Take cap refs. Caller must already know we hold at least one ref
* on the caps in question or we don't know this is safe.
@@ -3323,6 +3391,9 @@ struct cap_extra_info {
/* currently issued */
int issued;
struct timespec64 btime;
+ u8 *fscrypt_auth;
+ u32 fscrypt_auth_len;
+ u64 fscrypt_file_size;
};
/*
@@ -3355,6 +3426,14 @@ static void handle_cap_grant(struct inode *inode,
bool deleted_inode = false;
bool fill_inline = false;
+ /*
+ * If there is at least one crypto block then we'll trust
+ * fscrypt_file_size. If the real length of the file is 0, then
+ * ignore it (it has probably been truncated down to 0 by the MDS).
+ */
+ if (IS_ENCRYPTED(inode) && size)
+ size = extra_info->fscrypt_file_size;
+
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
@@ -3421,6 +3500,14 @@ static void handle_cap_grant(struct inode *inode,
dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
from_kuid(&init_user_ns, inode->i_uid),
from_kgid(&init_user_ns, inode->i_gid));
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len ||
+ memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth,
+ ci->fscrypt_auth_len))
+ pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n",
+ __func__, ci->fscrypt_auth_len,
+ extra_info->fscrypt_auth_len);
+#endif
}
if ((newcaps & CEPH_CAP_LINK_SHARED) &&
@@ -3837,7 +3924,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
*/
static bool handle_cap_trunc(struct inode *inode,
struct ceph_mds_caps *trunc,
- struct ceph_mds_session *session)
+ struct ceph_mds_session *session,
+ struct cap_extra_info *extra_info)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
@@ -3854,8 +3942,16 @@ static bool handle_cap_trunc(struct inode *inode,
issued |= implemented | dirty;
- dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
- inode, mds, seq, truncate_size, truncate_seq);
+ /*
+ * If there is at least one crypto block then we'll trust
+ * fscrypt_file_size. If the real length of the file is 0, then
+ * ignore it (it has probably been truncated down to 0 by the MDS).
+ */
+ if (IS_ENCRYPTED(inode) && size)
+ size = extra_info->fscrypt_file_size;
+
+ dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n",
+ __func__, inode, mds, seq, truncate_size, truncate_seq);
queue_trunc = ceph_fill_file_size(inode, issued,
truncate_seq, truncate_size, size);
return queue_trunc;
@@ -4075,6 +4171,52 @@ retry:
*target_cap = cap;
}
+#ifdef CONFIG_FS_ENCRYPTION
+static int parse_fscrypt_fields(void **p, void *end,
+ struct cap_extra_info *extra)
+{
+ u32 len;
+
+ ceph_decode_32_safe(p, end, extra->fscrypt_auth_len, bad);
+ if (extra->fscrypt_auth_len) {
+ ceph_decode_need(p, end, extra->fscrypt_auth_len, bad);
+ extra->fscrypt_auth = kmalloc(extra->fscrypt_auth_len,
+ GFP_KERNEL);
+ if (!extra->fscrypt_auth)
+ return -ENOMEM;
+ ceph_decode_copy_safe(p, end, extra->fscrypt_auth,
+ extra->fscrypt_auth_len, bad);
+ }
+
+ ceph_decode_32_safe(p, end, len, bad);
+ if (len >= sizeof(u64)) {
+ ceph_decode_64_safe(p, end, extra->fscrypt_file_size, bad);
+ len -= sizeof(u64);
+ }
+ ceph_decode_skip_n(p, end, len, bad);
+ return 0;
+bad:
+ return -EIO;
+}
+#else
+static int parse_fscrypt_fields(void **p, void *end,
+ struct cap_extra_info *extra)
+{
+ u32 len;
+
+ /* Don't care about these fields unless we're encryption-capable */
+ ceph_decode_32_safe(p, end, len, bad);
+ if (len)
+ ceph_decode_skip_n(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, bad);
+ if (len)
+ ceph_decode_skip_n(p, end, len, bad);
+ return 0;
+bad:
+ return -EIO;
+}
+#endif
+
/*
* Handle a caps message from the MDS.
*
@@ -4105,6 +4247,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
dout("handle_caps from mds%d\n", session->s_mds);
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
/* decode */
end = msg->front.iov_base + msg->front.iov_len;
if (msg->front.iov_len < sizeof(*h))
@@ -4195,13 +4340,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad);
}
+ if (msg_version >= 12) {
+ if (parse_fscrypt_fields(&p, end, &extra_info))
+ goto bad;
+ }
+
/* lookup ino */
inode = ceph_find_inode(mdsc->fsc->sb, vino);
dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
vino.snap, inode);
mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
(unsigned)seq);
@@ -4292,7 +4441,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
break;
case CEPH_CAP_OP_TRUNC:
- queue_trunc = handle_cap_trunc(inode, h, session);
+ queue_trunc = handle_cap_trunc(inode, h, session,
+ &extra_info);
spin_unlock(&ci->i_ceph_lock);
if (queue_trunc)
ceph_queue_vmtruncate(inode);
@@ -4309,12 +4459,15 @@ done:
done_unlocked:
iput(inode);
out:
+ ceph_dec_mds_stopping_blocker(mdsc);
+
ceph_put_string(extra_info.pool_ns);
/* Defer closing the sessions after s_mutex lock being released */
if (close_sessions)
ceph_mdsc_close_sessions(mdsc);
+ kfree(extra_info.fscrypt_auth);
return;
flush_cap_releases:
@@ -4611,6 +4764,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
return ret;
}
+/**
+ * ceph_encode_dentry_release - encode a dentry release into an outgoing request
+ * @p: outgoing request buffer
+ * @dentry: dentry to release
+ * @dir: dir to release it from
+ * @mds: mds that we're speaking to
+ * @drop: caps being dropped
+ * @unless: unless we have these caps
+ *
+ * Encode a dentry release into an outgoing request buffer. Returns 1 if the
+ * thing was released, or a negative error code otherwise.
+ */
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
struct inode *dir,
int mds, int drop, int unless)
@@ -4643,13 +4808,25 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
dout("encode_dentry_release %p mds%d seq %d\n",
dentry, mds, (int)di->lease_seq);
- rel->dname_len = cpu_to_le32(dentry->d_name.len);
- memcpy(*p, dentry->d_name.name, dentry->d_name.len);
- *p += dentry->d_name.len;
rel->dname_seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
+ spin_unlock(&dentry->d_lock);
+ if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) {
+ int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p);
+
+ if (ret2 < 0)
+ return ret2;
+
+ rel->dname_len = cpu_to_le32(ret2);
+ *p += ret2;
+ } else {
+ rel->dname_len = cpu_to_le32(dentry->d_name.len);
+ memcpy(*p, dentry->d_name.name, dentry->d_name.len);
+ *p += dentry->d_name.len;
+ }
+ } else {
+ spin_unlock(&dentry->d_lock);
}
- spin_unlock(&dentry->d_lock);
return ret;
}