From f017754d6919f3bd9786710b07b4708293594f20 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 20 Jun 2018 15:42:55 +0800 Subject: ceph: add retry logic for error -ERANGE in ceph_get_acl() When the size of acl extended attribution is larger than pre-allocated value buffer size, we will hit error '-ERANGE' and it's probabaly caused by concurrent get/set acl from different clients. In this case, current logic just sets acl to NULL so that we cannot get proper information but the operation looks successful. This patch adds retry logic for error -ERANGE and return -EIO if fail from the retry. Additionally, print real errno when failing from __ceph_getxattr(). Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 59cb307b15fb..eb9d567c9ef8 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -45,6 +45,7 @@ static inline void ceph_set_cached_acl(struct inode *inode, struct posix_acl *ceph_get_acl(struct inode *inode, int type) { int size; + unsigned int retry_cnt = 0; const char *name; char *value = NULL; struct posix_acl *acl; @@ -60,6 +61,7 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) BUG(); } +retry: size = __ceph_getxattr(inode, name, "", 0); if (size > 0) { value = kzalloc(size, GFP_NOFS); @@ -68,12 +70,22 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) size = __ceph_getxattr(inode, name, value, size); } - if (size > 0) + if (size == -ERANGE && retry_cnt < 10) { + retry_cnt++; + kfree(value); + value = NULL; + goto retry; + } + + if (size > 0) { acl = posix_acl_from_xattr(&init_user_ns, value, size); - else if (size == -ERANGE || size == -ENODATA || size == 0) + } else if (size == -ENODATA || size == 0) { acl = NULL; - else + } else { + pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n", + ceph_vinop(inode), size); acl = ERR_PTR(-EIO); + } kfree(value); -- cgit v1.2.3 From 93d35c754d97a57bda2a2c6c39dce9b67a9f3c99 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 22 Jun 2018 15:01:13 +0800 Subject: ceph: restore ctime as well in the case of restoring old mode It's better to restore ctime as well in the case of restoring old mode in ceph_set_acl(). Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index eb9d567c9ef8..3351ea14390b 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -101,6 +101,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name = NULL; char *value = NULL; struct iattr newattrs; + struct timespec64 old_ctime = inode->i_ctime; umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; switch (type) { @@ -145,7 +146,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (new_mode != old_mode) { newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; - newattrs.ia_valid = ATTR_MODE; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ret = __ceph_setattr(inode, &newattrs); if (ret) goto out_free; @@ -154,8 +155,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = __ceph_setxattr(inode, name, value, size, 0); if (ret) { if (new_mode != old_mode) { + newattrs.ia_ctime = old_ctime; newattrs.ia_mode = old_mode; - newattrs.ia_valid = ATTR_MODE; + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; __ceph_setattr(inode, &newattrs); } goto out_free; -- cgit v1.2.3 From 2f56b6bae73b2d65ef4816ca89341facc53d3361 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 27 Jun 2018 16:38:13 +0200 Subject: libceph: amend "bad option arg" error message Don't mention "mount" -- in the rbd case it is "mapping". Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- fs/ceph/super.c | 3 +-- net/ceph/ceph_common.c | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index fa0729c1e776..4e8949b88b05 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -786,7 +786,7 @@ static int parse_rbd_opts_token(char *c, void *private) if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) at '%s'\n", c); + pr_err("bad option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 95a3b3ac9b6e..3d8a26b2944f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -219,8 +219,7 @@ static int parse_fsopt_token(char *c, void *private) if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 1677a6132034..6ebd7d953ee0 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -396,8 +396,7 @@ ceph_parse_options(char *options, const char *dev_name, if (token < Opt_last_int) { err = match_int(&argstr[0], &intval); if (err < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad option arg (not int) at '%s'\n", c); goto out; } dout("got int token %d val %d\n", token, intval); -- cgit v1.2.3 From dfeb84d4adfda0448b013b8aae2cc5c95eb27ccd Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 2 Jul 2018 15:55:23 +0800 Subject: ceph: fix incorrect use of strncpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC8 prints following warning: fs/ceph/mds_client.c:3683:2: warning: ‘strncpy’ output may be truncated copying 64 bytes from a string of length 64 [-Wstringop-truncation] [ Change to strscpy() while at it. ] Signed-off-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dc8bc664a871..f1590256d2e6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3644,8 +3644,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) init_rwsem(&mdsc->pool_perm_rwsem); mdsc->pool_perm_tree = RB_ROOT; - strncpy(mdsc->nodename, utsname()->nodename, - sizeof(mdsc->nodename) - 1); + strscpy(mdsc->nodename, utsname()->nodename, + sizeof(mdsc->nodename)); return 0; } -- cgit v1.2.3 From 61ad36d47dd273b7b8c3d63fc8359d96f7976c79 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Mon, 9 Jul 2018 22:48:07 +0800 Subject: ceph: return errors from posix_acl_equiv_mode() correctly In order to return correct error code should replace variable ret using err in error case. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 3351ea14390b..027408d55aee 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -185,10 +185,10 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, return err; if (acl) { - int ret = posix_acl_equiv_mode(acl, mode); - if (ret < 0) + err = posix_acl_equiv_mode(acl, mode); + if (err < 0) goto out_err; - if (ret == 0) { + if (err == 0) { posix_acl_release(acl); acl = NULL; } -- cgit v1.2.3 From 0459871c4995d0bd57d6b1694f0971ec5cabafc1 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Mon, 9 Jul 2018 22:17:30 +0800 Subject: ceph: add d_drop for some error cases in ceph_mknod() When file num exceeds quota limit or fails from ceph_per_init_acls() should call d_drop to drop dentry from cache as well. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 036ac0f3a393..4b6a49fd2a61 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -827,12 +827,14 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; - if (ceph_quota_is_max_files_exceeded(dir)) - return -EDQUOT; + if (ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) - return err; + goto out; dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", dir, dentry, mode, rdev); -- cgit v1.2.3 From 67fcd151400242757edbab710402161b2cd38989 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Mon, 9 Jul 2018 22:17:31 +0800 Subject: ceph: add d_drop for some error cases in ceph_symlink() When file num exceeds quota limit, should call d_drop to drop dentry from cache as well. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4b6a49fd2a61..0210b02b86d4 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -885,8 +885,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; - if (ceph_quota_is_max_files_exceeded(dir)) - return -EDQUOT; + if (ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); -- cgit v1.2.3 From 63ecae7e439f766e88c91da56955bca4db06802a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Jul 2018 22:18:35 +0200 Subject: ceph: stop using current_kernel_time() ceph_mdsc_create_request() is one of the last callers of the deprecated current_kernel_time() as well as timespec_trunc(). This changes it to use the timespec64 based interfaces instead, though we still need to convert the result until we are ready to change over req->r_stamp. The output of the two functions, ktime_get_coarse_real_ts64() and current_kernel_time() is the same coarse-granular timestamp, the only difference here is that ktime_get_coarse_real_ts64() doesn't overflow in 2038. Signed-off-by: Arnd Bergmann Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f1590256d2e6..d2679dce7332 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1779,6 +1779,7 @@ struct ceph_mds_request * ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) { struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS); + struct timespec64 ts; if (!req) return ERR_PTR(-ENOMEM); @@ -1797,7 +1798,9 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); - req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran); + ktime_get_coarse_real_ts64(&ts); + req->r_stamp = timespec64_to_timespec(timespec64_trunc(ts, + mdsc->fsc->sb->s_time_gran)); req->r_op = op; req->r_direct_mode = mode; -- cgit v1.2.3 From 9bbeab41ce50542624ef381e7852d70f2f39a2b1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Jul 2018 22:18:36 +0200 Subject: ceph: use timespec64 for inode timestamp Since the vfs structures are all using timespec64, we can now change the internal representation, using ceph_encode_timespec64 and ceph_decode_timespec64. In case of ceph_aux_inode however, we need to avoid doing a memcmp() on uninitialized padding data, so the members of the i_mtime field get copied individually into 64-bit integers. Signed-off-by: Arnd Bergmann Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/cache.c | 11 +++++--- fs/ceph/caps.c | 26 +++++++++--------- fs/ceph/dir.c | 6 ++--- fs/ceph/inode.c | 76 +++++++++++++++++++++++++--------------------------- fs/ceph/mds_client.c | 7 ++--- fs/ceph/snap.c | 6 ++--- fs/ceph/super.h | 9 ++++--- fs/ceph/xattr.c | 4 +-- 8 files changed, 71 insertions(+), 74 deletions(-) (limited to 'fs') diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 362900e42424..1bf3502bdd6f 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -25,8 +25,9 @@ #include "cache.h" struct ceph_aux_inode { - u64 version; - struct timespec mtime; + u64 version; + u64 mtime_sec; + u64 mtime_nsec; }; struct fscache_netfs ceph_cache_netfs = { @@ -130,7 +131,8 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = timespec64_to_timespec(inode->i_mtime); + aux.mtime_sec = inode->i_mtime.tv_sec; + aux.mtime_nsec = inode->i_mtime.tv_nsec; if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; @@ -163,7 +165,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) if (!ci->fscache) { memset(&aux, 0, sizeof(aux)); aux.version = ci->i_version; - aux.mtime = timespec64_to_timespec(inode->i_mtime); + aux.mtime_sec = inode->i_mtime.tv_sec; + aux.mtime_nsec = inode->i_mtime.tv_nsec; ci->fscache = fscache_acquire_cookie(fsc->fscache, &ceph_fscache_inode_object_def, &ci->i_vino, sizeof(ci->i_vino), diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 990258cbd836..f50cc008632a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1125,7 +1125,7 @@ struct cap_msg_args { u64 flush_tid, oldest_flush_tid, size, max_size; u64 xattr_version; struct ceph_buffer *xattr_buf; - struct timespec atime, mtime, ctime; + struct timespec64 atime, mtime, ctime; int op, caps, wanted, dirty; u32 seq, issue_seq, mseq, time_warp_seq; u32 flags; @@ -1146,7 +1146,7 @@ static int send_cap_msg(struct cap_msg_args *arg) struct ceph_msg *msg; void *p; size_t extra_len; - struct timespec zerotime = {0}; + struct timespec64 zerotime = {0}; struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" @@ -1186,9 +1186,9 @@ static int send_cap_msg(struct cap_msg_args *arg) fc->size = cpu_to_le64(arg->size); fc->max_size = cpu_to_le64(arg->max_size); - ceph_encode_timespec(&fc->mtime, &arg->mtime); - ceph_encode_timespec(&fc->atime, &arg->atime); - ceph_encode_timespec(&fc->ctime, &arg->ctime); + ceph_encode_timespec64(&fc->mtime, &arg->mtime); + ceph_encode_timespec64(&fc->atime, &arg->atime); + ceph_encode_timespec64(&fc->ctime, &arg->ctime); fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq); fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid)); @@ -1237,7 +1237,7 @@ static int send_cap_msg(struct cap_msg_args *arg) * We just zero these out for now, as the MDS ignores them unless * the requisite feature flags are set (which we don't do yet). */ - ceph_encode_timespec(p, &zerotime); + ceph_encode_timespec64(p, &zerotime); p += sizeof(struct ceph_timespec); ceph_encode_64(&p, 0); @@ -1360,9 +1360,9 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, arg.xattr_buf = NULL; } - arg.mtime = timespec64_to_timespec(inode->i_mtime); - arg.atime = timespec64_to_timespec(inode->i_atime); - arg.ctime = timespec64_to_timespec(inode->i_ctime); + arg.mtime = inode->i_mtime; + arg.atime = inode->i_atime; + arg.ctime = inode->i_ctime; arg.op = op; arg.caps = cap->implemented; @@ -3148,11 +3148,11 @@ static void handle_cap_grant(struct inode *inode, } if (newcaps & CEPH_CAP_ANY_RD) { - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; /* ctime/mtime/atime? */ - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); + ceph_decode_timespec64(&mtime, &grant->mtime); + ceph_decode_timespec64(&atime, &grant->atime); + ceph_decode_timespec64(&ctime, &grant->ctime); ceph_fill_file_time(inode, extra_info->issued, le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, &atime); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0210b02b86d4..82928cea0209 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1397,7 +1397,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, " rfiles: %20lld\n" " rsubdirs: %20lld\n" "rbytes: %20lld\n" - "rctime: %10ld.%09ld\n", + "rctime: %10lld.%09ld\n", ci->i_files + ci->i_subdirs, ci->i_files, ci->i_subdirs, @@ -1405,8 +1405,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, ci->i_rfiles, ci->i_rsubdirs, ci->i_rbytes, - (long)ci->i_rctime.tv_sec, - (long)ci->i_rctime.tv_nsec); + ci->i_rctime.tv_sec, + ci->i_rctime.tv_nsec); } if (*ppos >= dfi->dir_info_len) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index a866be999216..715a6f2a4613 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -658,13 +658,10 @@ int ceph_fill_file_size(struct inode *inode, int issued, } void ceph_fill_file_time(struct inode *inode, int issued, - u64 time_warp_seq, struct timespec *ctime, - struct timespec *mtime, struct timespec *atime) + u64 time_warp_seq, struct timespec64 *ctime, + struct timespec64 *mtime, struct timespec64 *atime) { struct ceph_inode_info *ci = ceph_inode(inode); - struct timespec64 ctime64 = timespec_to_timespec64(*ctime); - struct timespec64 mtime64 = timespec_to_timespec64(*mtime); - struct timespec64 atime64 = timespec_to_timespec64(*atime); int warn = 0; if (issued & (CEPH_CAP_FILE_EXCL| @@ -673,39 +670,39 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { if (ci->i_version == 0 || - timespec64_compare(&ctime64, &inode->i_ctime) > 0) { + timespec64_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", - (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - (long long)ctime->tv_sec, ctime->tv_nsec); - inode->i_ctime = ctime64; + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + ctime->tv_sec, ctime->tv_nsec); + inode->i_ctime = *ctime; } if (ci->i_version == 0 || ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %lld.%09ld -> %lld.%09ld " "tw %d -> %d\n", - (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)mtime->tv_sec, mtime->tv_nsec, + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + mtime->tv_sec, mtime->tv_nsec, ci->i_time_warp_seq, (int)time_warp_seq); - inode->i_mtime = mtime64; - inode->i_atime = atime64; + inode->i_mtime = *mtime; + inode->i_atime = *atime; ci->i_time_warp_seq = time_warp_seq; } else if (time_warp_seq == ci->i_time_warp_seq) { /* nobody did utimes(); take the max */ - if (timespec64_compare(&mtime64, &inode->i_mtime) > 0) { + if (timespec64_compare(mtime, &inode->i_mtime) > 0) { dout("mtime %lld.%09ld -> %lld.%09ld inc\n", - (long long)inode->i_mtime.tv_sec, + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)mtime->tv_sec, mtime->tv_nsec); - inode->i_mtime = mtime64; + mtime->tv_sec, mtime->tv_nsec); + inode->i_mtime = *mtime; } - if (timespec64_compare(&atime64, &inode->i_atime) > 0) { + if (timespec64_compare(atime, &inode->i_atime) > 0) { dout("atime %lld.%09ld -> %lld.%09ld inc\n", - (long long)inode->i_atime.tv_sec, + inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - (long long)atime->tv_sec, atime->tv_nsec); - inode->i_atime = atime64; + atime->tv_sec, atime->tv_nsec); + inode->i_atime = *atime; } } else if (issued & CEPH_CAP_FILE_EXCL) { /* we did a utimes(); ignore mds values */ @@ -715,9 +712,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, } else { /* we have no write|excl caps; whatever the MDS says is true */ if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { - inode->i_ctime = ctime64; - inode->i_mtime = mtime64; - inode->i_atime = atime64; + inode->i_ctime = *ctime; + inode->i_mtime = *mtime; + inode->i_atime = *atime; ci->i_time_warp_seq = time_warp_seq; } else { warn = 1; @@ -743,7 +740,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int issued, new_issued, info_caps; - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; struct ceph_buffer *xattr_blob = NULL; struct ceph_string *pool_ns = NULL; struct ceph_cap *new_cap = NULL; @@ -823,9 +820,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); + ceph_decode_timespec64(&atime, &info->atime); + ceph_decode_timespec64(&mtime, &info->mtime); + ceph_decode_timespec64(&ctime, &info->ctime); ceph_fill_file_time(inode, issued, le32_to_cpu(info->time_warp_seq), &ctime, &mtime, &atime); @@ -872,7 +869,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, ci->i_rbytes = le64_to_cpu(info->rbytes); ci->i_rfiles = le64_to_cpu(info->rfiles); ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); - ceph_decode_timespec(&ci->i_rctime, &info->rctime); + ceph_decode_timespec64(&ci->i_rctime, &info->rctime); } } @@ -1954,7 +1951,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) int err = 0; int inode_dirty_flags = 0; bool lock_snap_rwsem = false; - struct timespec ts; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) @@ -2030,8 +2026,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (ia_valid & ATTR_ATIME) { dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode, - (long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - (long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); + inode->i_atime.tv_sec, inode->i_atime.tv_nsec, + attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_atime = attr->ia_atime; @@ -2043,8 +2039,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || !timespec64_equal(&inode->i_atime, &attr->ia_atime)) { - ts = timespec64_to_timespec(attr->ia_atime); - ceph_encode_timespec(&req->r_args.setattr.atime, &ts); + ceph_encode_timespec64(&req->r_args.setattr.atime, + &attr->ia_atime); mask |= CEPH_SETATTR_ATIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; @@ -2052,8 +2048,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) } if (ia_valid & ATTR_MTIME) { dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, - (long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - (long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_mtime = attr->ia_mtime; @@ -2065,8 +2061,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) dirtied |= CEPH_CAP_FILE_WR; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || !timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) { - ts = timespec64_to_timespec(attr->ia_mtime); - ceph_encode_timespec(&req->r_args.setattr.mtime, &ts); + ceph_encode_timespec64(&req->r_args.setattr.mtime, + &attr->ia_mtime); mask |= CEPH_SETATTR_MTIME; release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; @@ -2097,8 +2093,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, - (long long)inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, - (long long)attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, only ? "ctime only" : "ignored"); if (only) { /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d2679dce7332..a1a510438b7c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2961,15 +2961,12 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { - struct timespec ts; rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v1.issued = cpu_to_le32(cap->issued); rec.v1.size = cpu_to_le64(inode->i_size); - ts = timespec64_to_timespec(inode->i_mtime); - ceph_encode_timespec(&rec.v1.mtime, &ts); - ts = timespec64_to_timespec(inode->i_atime); - ceph_encode_timespec(&rec.v1.atime, &ts); + ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); + ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v1.pathbase = cpu_to_le64(pathbase); } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index af81555c14fd..041c27ea8de1 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -594,9 +594,9 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, BUG_ON(capsnap->writing); capsnap->size = inode->i_size; - capsnap->mtime = timespec64_to_timespec(inode->i_mtime); - capsnap->atime = timespec64_to_timespec(inode->i_atime); - capsnap->ctime = timespec64_to_timespec(inode->i_ctime); + capsnap->mtime = inode->i_mtime; + capsnap->atime = inode->i_atime; + capsnap->ctime = inode->i_ctime; capsnap->time_warp_seq = ci->i_time_warp_seq; capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a7077a0c989f..d3eb70e2b527 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -193,7 +193,7 @@ struct ceph_cap_snap { u64 xattr_version; u64 size; - struct timespec mtime, atime, ctime; + struct timespec64 mtime, atime, ctime; u64 time_warp_seq; u64 truncate_size; u32 truncate_seq; @@ -307,7 +307,7 @@ struct ceph_inode_info { char *i_symlink; /* for dirs */ - struct timespec i_rctime; + struct timespec64 i_rctime; u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; @@ -857,8 +857,9 @@ extern struct inode *ceph_get_snapdir(struct inode *parent); extern int ceph_fill_file_size(struct inode *inode, int issued, u32 truncate_seq, u64 truncate_size, u64 size); extern void ceph_fill_file_time(struct inode *inode, int issued, - u64 time_warp_seq, struct timespec *ctime, - struct timespec *mtime, struct timespec *atime); + u64 time_warp_seq, struct timespec64 *ctime, + struct timespec64 *mtime, + struct timespec64 *atime); extern int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req); extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5bc8edb4c2a6..5cc8b94f8206 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -224,8 +224,8 @@ static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, size_t size) { - return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec, - (long)ci->i_rctime.tv_nsec); + return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec, + ci->i_rctime.tv_nsec); } /* quotas */ -- cgit v1.2.3 From fac02ddf910814c24f5d9d969dfdab5227f6f3eb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Jul 2018 22:18:37 +0200 Subject: libceph: use timespec64 for r_mtime The request mtime field is used all over ceph, and is currently represented as a 'timespec' structure in Linux. This changes it to timespec64 to allow times beyond 2038, modifying all users at the same time. [ Remove now redundant ts variable in writepage_nounlock(). ] Signed-off-by: Arnd Bergmann Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- fs/ceph/addr.c | 12 +++++------- fs/ceph/file.c | 8 ++++---- include/linux/ceph/osd_client.h | 6 +++--- net/ceph/osd_client.c | 8 ++++---- 5 files changed, 17 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c680de15fae0..11f9be10e3fa 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1466,7 +1466,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) struct ceph_osd_request *osd_req = obj_request->osd_req; osd_req->r_flags = CEPH_OSD_FLAG_WRITE; - ktime_get_real_ts(&osd_req->r_mtime); + ktime_get_real_ts64(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->ex.oe_off; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 292b3d72d725..afcc59ed7090 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode, */ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) { - struct timespec ts; struct inode *inode; struct ceph_inode_info *ci; struct ceph_fs_client *fsc; @@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); - ts = timespec64_to_timespec(inode->i_mtime); err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode), &ci->i_layout, snapc, page_off, len, ceph_wbc.truncate_seq, ceph_wbc.truncate_size, - &ts, &page, 1); + &inode->i_mtime, &page, 1); if (err < 0) { struct writeback_control tmp_wbc; if (!wbc) @@ -1134,7 +1132,7 @@ new_request: pages = NULL; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); BUG_ON(rc); req = NULL; @@ -1734,7 +1732,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1776,7 +1774,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) goto out_put; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; err = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!err) err = ceph_osdc_wait_request(&fsc->client->osdc, req); @@ -1937,7 +1935,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, 0, false, true); err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); - wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime); + wr_req->r_mtime = ci->vfs_inode.i_mtime; err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); if (!err) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ad0bed99b1d5..2f3a30ca94bf 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -721,7 +721,7 @@ struct ceph_aio_request { struct list_head osd_reqs; unsigned num_reqs; atomic_t pending_reqs; - struct timespec mtime; + struct timespec64 mtime; struct ceph_cap_flush *prealloc_cf; }; @@ -923,7 +923,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, int num_pages = 0; int flags; int ret; - struct timespec mtime = timespec64_to_timespec(current_time(inode)); + struct timespec64 mtime = current_time(inode); size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; @@ -1131,7 +1131,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, int flags; int ret; bool check_caps = false; - struct timespec mtime = timespec64_to_timespec(current_time(inode)); + struct timespec64 mtime = current_time(inode); size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) @@ -1663,7 +1663,7 @@ static int ceph_zero_partial_object(struct inode *inode, goto out; } - req->r_mtime = timespec64_to_timespec(inode->i_mtime); + req->r_mtime = inode->i_mtime; ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); if (!ret) { ret = ceph_osdc_wait_request(&fsc->client->osdc, req); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d6fe7e4df8cf..02096da01845 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -199,7 +199,7 @@ struct ceph_osd_request { /* set by submitter */ u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ struct ceph_snap_context *r_snapc; /* for writes */ - struct timespec r_mtime; /* ditto */ + struct timespec64 r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ bool r_linger; /* don't resend on failure */ @@ -253,7 +253,7 @@ struct ceph_osd_linger_request { struct ceph_osd_request_target t; u32 map_dne_bound; - struct timespec mtime; + struct timespec64 mtime; struct kref kref; struct mutex lock; @@ -508,7 +508,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_snap_context *sc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, - struct timespec *mtime, + struct timespec64 *mtime, struct page **pages, int nr_pages); /* watch/notify */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 93614cb5e03f..8002b8e9ce24 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1978,7 +1978,7 @@ static void encode_request_partial(struct ceph_osd_request *req, p += sizeof(struct ceph_blkin_trace_info); ceph_encode_32(&p, 0); /* client_inc, always 0 */ - ceph_encode_timespec(p, &req->r_mtime); + ceph_encode_timespec64(p, &req->r_mtime); p += sizeof(struct ceph_timespec); encode_oloc(&p, end, &req->r_t.target_oloc); @@ -4512,7 +4512,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE; - ktime_get_real_ts(&lreq->mtime); + ktime_get_real_ts64(&lreq->mtime); lreq->reg_req = alloc_linger_request(lreq); if (!lreq->reg_req) { @@ -4570,7 +4570,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; - ktime_get_real_ts(&req->r_mtime); + ktime_get_real_ts64(&req->r_mtime); osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH); @@ -5136,7 +5136,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, struct ceph_snap_context *snapc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, - struct timespec *mtime, + struct timespec64 *mtime, struct page **pages, int num_pages) { struct ceph_osd_request *req; -- cgit v1.2.3 From 0ed1e90a09eb0e2863cab43e1ed5c5df89566772 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Jul 2018 22:18:38 +0200 Subject: ceph: use timespec64 for r_stamp The ceph_mds_request stamp still uses the deprecated timespec structure, this converts it over as well. Signed-off-by: Arnd Bergmann Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 2 +- fs/ceph/mds_client.c | 7 +++---- fs/ceph/mds_client.h | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 715a6f2a4613..d62f65f2875d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2136,7 +2136,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_inode_drop = release; req->r_args.setattr.mask = cpu_to_le32(mask); req->r_num_caps = 1; - req->r_stamp = timespec64_to_timespec(attr->ia_ctime); + req->r_stamp = attr->ia_ctime; err = ceph_mdsc_do_request(mdsc, NULL, req); } dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a1a510438b7c..f5a299daae95 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1799,8 +1799,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) INIT_LIST_HEAD(&req->r_unsafe_item); ktime_get_coarse_real_ts64(&ts); - req->r_stamp = timespec64_to_timespec(timespec64_trunc(ts, - mdsc->fsc->sb->s_time_gran)); + req->r_stamp = timespec64_trunc(ts, mdsc->fsc->sb->s_time_gran); req->r_op = op; req->r_direct_mode = mode; @@ -2097,7 +2096,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, /* time stamp */ { struct ceph_timespec ts; - ceph_encode_timespec(&ts, &req->r_stamp); + ceph_encode_timespec64(&ts, &req->r_stamp); ceph_encode_copy(&p, &ts, sizeof(ts)); } @@ -2190,7 +2189,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, p = msg->front.iov_base + req->r_request_release_offset; { struct ceph_timespec ts; - ceph_encode_timespec(&ts, &req->r_stamp); + ceph_encode_timespec64(&ts, &req->r_stamp); ceph_encode_copy(&p, &ts, sizeof(ts)); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 2ec3b5b35067..80a7523e1523 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -229,7 +229,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ kuid_t r_uid; kgid_t r_gid; - struct timespec r_stamp; + struct timespec64 r_stamp; /* for choosing which mds to send this request to */ int r_direct_mode; -- cgit v1.2.3 From 24499847e4471b0aba8126cebd640084136fc2bb Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Mon, 23 Jul 2018 21:32:24 +0530 Subject: ceph: adding new return type vm_fault_t Use new return type vm_fault_t for page_mkwrite and fault handler. Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 62 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index afcc59ed7090..9c332a6f6667 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1429,7 +1429,7 @@ static void ceph_restore_sigs(sigset_t *oldset) /* * vm ops */ -static int ceph_filemap_fault(struct vm_fault *vmf) +static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); @@ -1437,8 +1437,9 @@ static int ceph_filemap_fault(struct vm_fault *vmf) struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; loff_t off = vmf->pgoff << PAGE_SHIFT; - int want, got, ret; + int want, got, err; sigset_t oldset; + vm_fault_t ret = VM_FAULT_SIGBUS; ceph_block_sigs(&oldset); @@ -1450,8 +1451,8 @@ static int ceph_filemap_fault(struct vm_fault *vmf) want = CEPH_CAP_FILE_CACHE; got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); - if (ret < 0) + err = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page); + if (err < 0) goto out_restore; dout("filemap_fault %p %llu~%zd got cap refs on %s\n", @@ -1463,16 +1464,17 @@ static int ceph_filemap_fault(struct vm_fault *vmf) ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); ceph_del_rw_context(fi, &rw_ctx); + dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n", + inode, off, (size_t)PAGE_SIZE, + ceph_cap_string(got), ret); } else - ret = -EAGAIN; + err = -EAGAIN; - dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", - inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret); if (pinned_page) put_page(pinned_page); ceph_put_cap_refs(ci, got); - if (ret != -EAGAIN) + if (err != -EAGAIN) goto out_restore; /* read inline data */ @@ -1480,7 +1482,6 @@ static int ceph_filemap_fault(struct vm_fault *vmf) /* does not support inline data > PAGE_SIZE */ ret = VM_FAULT_SIGBUS; } else { - int ret1; struct address_space *mapping = inode->i_mapping; struct page *page = find_or_create_page(mapping, 0, mapping_gfp_constraint(mapping, @@ -1489,32 +1490,32 @@ static int ceph_filemap_fault(struct vm_fault *vmf) ret = VM_FAULT_OOM; goto out_inline; } - ret1 = __ceph_do_getattr(inode, page, + err = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, true); - if (ret1 < 0 || off >= i_size_read(inode)) { + if (err < 0 || off >= i_size_read(inode)) { unlock_page(page); put_page(page); - if (ret1 < 0) - ret = ret1; + if (err == -ENOMEM) + ret = VM_FAULT_OOM; else ret = VM_FAULT_SIGBUS; goto out_inline; } - if (ret1 < PAGE_SIZE) - zero_user_segment(page, ret1, PAGE_SIZE); + if (err < PAGE_SIZE) + zero_user_segment(page, err, PAGE_SIZE); else flush_dcache_page(page); SetPageUptodate(page); vmf->page = page; ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; out_inline: - dout("filemap_fault %p %llu~%zd read inline data ret %d\n", + dout("filemap_fault %p %llu~%zd read inline data ret %x\n", inode, off, (size_t)PAGE_SIZE, ret); } out_restore: ceph_restore_sigs(&oldset); - if (ret < 0) - ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + if (err < 0) + ret = vmf_error(err); return ret; } @@ -1522,7 +1523,7 @@ out_restore: /* * Reuse write_begin here for simplicity. */ -static int ceph_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); @@ -1533,8 +1534,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) loff_t off = page_offset(page); loff_t size = i_size_read(inode); size_t len; - int want, got, ret; + int want, got, err; sigset_t oldset; + vm_fault_t ret = VM_FAULT_SIGBUS; prealloc_cf = ceph_alloc_cap_flush(); if (!prealloc_cf) @@ -1548,10 +1550,10 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) lock_page(page); locked_page = page; } - ret = ceph_uninline_data(vma->vm_file, locked_page); + err = ceph_uninline_data(vma->vm_file, locked_page); if (locked_page) unlock_page(locked_page); - if (ret < 0) + if (err < 0) goto out_free; } @@ -1568,9 +1570,9 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) want = CEPH_CAP_FILE_BUFFER; got = 0; - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, + err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len, &got, NULL); - if (ret < 0) + if (err < 0) goto out_free; dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", @@ -1588,13 +1590,13 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) break; } - ret = ceph_update_writeable_page(vma->vm_file, off, len, page); - if (ret >= 0) { + err = ceph_update_writeable_page(vma->vm_file, off, len, page); + if (err >= 0) { /* success. we'll keep the page locked. */ set_page_dirty(page); ret = VM_FAULT_LOCKED; } - } while (ret == -EAGAIN); + } while (err == -EAGAIN); if (ret == VM_FAULT_LOCKED || ci->i_inline_version != CEPH_INLINE_NONE) { @@ -1608,14 +1610,14 @@ static int ceph_page_mkwrite(struct vm_fault *vmf) __mark_inode_dirty(inode, dirty); } - dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n", + dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n", inode, off, len, ceph_cap_string(got), ret); ceph_put_cap_refs(ci, got); out_free: ceph_restore_sigs(&oldset); ceph_free_cap_flush(prealloc_cf); - if (ret < 0) - ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + if (err < 0) + ret = vmf_error(err); return ret; } -- cgit v1.2.3 From 6daca13d2e72bedaaacfc08f873114c9307d5aea Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Jul 2018 19:18:34 +0200 Subject: libceph: add authorizer challenge When a client authenticates with a service, an authorizer is sent with a nonce to the service (ceph_x_authorize_[ab]) and the service responds with a mutation of that nonce (ceph_x_authorize_reply). This lets the client verify the service is who it says it is but it doesn't protect against a replay: someone can trivially capture the exchange and reuse the same authorizer to authenticate themselves. Allow the service to reject an initial authorizer with a random challenge (ceph_x_authorize_challenge). The client then has to respond with an updated authorizer proving they are able to decrypt the service's challenge and that the new authorizer was produced for this specific connection instance. The accepting side requires this challenge and response unconditionally if the client side advertises they have CEPHX_V2 feature bit. This addresses CVE-2018-1128. Link: http://tracker.ceph.com/issues/24836 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- fs/ceph/mds_client.c | 11 +++++++ include/linux/ceph/auth.h | 8 +++++ include/linux/ceph/messenger.h | 3 ++ include/linux/ceph/msgr.h | 2 +- net/ceph/auth.c | 16 ++++++++++ net/ceph/auth_x.c | 72 +++++++++++++++++++++++++++++++++++++++--- net/ceph/auth_x_protocol.h | 7 ++++ net/ceph/messenger.c | 17 +++++++++- net/ceph/osd_client.c | 11 +++++++ 9 files changed, 140 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f5a299daae95..4fc7582233db 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4154,6 +4154,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_mds_session *s = con->private; + struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -4217,6 +4227,7 @@ static const struct ceph_connection_operations mds_con_ops = { .put = con_put, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index e931da8424a4..6728c2ee0205 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -64,6 +64,10 @@ struct ceph_auth_client_ops { /* ensure that an existing authorizer is up to date */ int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); + int (*add_authorizer_challenge)(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, struct ceph_authorizer *a); void (*invalidate_authorizer)(struct ceph_auth_client *ac, @@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *a); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len); extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a); extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 021718570b50..fc2b4491ee0a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -31,6 +31,9 @@ struct ceph_connection_operations { struct ceph_auth_handshake *(*get_authorizer) ( struct ceph_connection *con, int *proto, int force_new); + int (*add_authorizer_challenge)(struct ceph_connection *con, + void *challenge_buf, + int challenge_buf_len); int (*verify_authorizer_reply) (struct ceph_connection *con); int (*invalidate_authorizer)(struct ceph_connection *con); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 73ae2a926548..9e50aede46c8 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -91,7 +91,7 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ #define CEPH_MSGR_TAG_KEEPALIVE2 14 /* keepalive2 byte + ceph_timespec */ #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */ - +#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16 /* cephx v2 doing server challenge */ /* * connection negotiation diff --git a/net/ceph/auth.c b/net/ceph/auth.c index dbde2b3c3c15..fbeee068ea14 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac, } EXPORT_SYMBOL(ceph_auth_update_authorizer); +int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->add_authorizer_challenge) + ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf, + challenge_buf_len); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge); + int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 61cccb93f653..512eed4291fe 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -295,7 +295,8 @@ bad: * authorizer. The first part (ceph_x_authorize_a) should already be * encoded. */ -static int encrypt_authorizer(struct ceph_x_authorizer *au) +static int encrypt_authorizer(struct ceph_x_authorizer *au, + u64 *server_challenge) { struct ceph_x_authorize_a *msg_a; struct ceph_x_authorize_b *msg_b; @@ -308,16 +309,28 @@ static int encrypt_authorizer(struct ceph_x_authorizer *au) end = au->buf->vec.iov_base + au->buf->vec.iov_len; msg_b = p + ceph_x_encrypt_offset(); - msg_b->struct_v = 1; + msg_b->struct_v = 2; msg_b->nonce = cpu_to_le64(au->nonce); + if (server_challenge) { + msg_b->have_challenge = 1; + msg_b->server_challenge_plus_one = + cpu_to_le64(*server_challenge + 1); + } else { + msg_b->have_challenge = 0; + msg_b->server_challenge_plus_one = 0; + } ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b)); if (ret < 0) return ret; p += ret; - WARN_ON(p > end); - au->buf->vec.iov_len = p - au->buf->vec.iov_base; + if (server_challenge) { + WARN_ON(p != end); + } else { + WARN_ON(p > end); + au->buf->vec.iov_len = p - au->buf->vec.iov_base; + } return 0; } @@ -382,7 +395,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, le64_to_cpu(msg_a->ticket_blob.secret_id)); get_random_bytes(&au->nonce, sizeof(au->nonce)); - ret = encrypt_authorizer(au); + ret = encrypt_authorizer(au, NULL); if (ret) { pr_err("failed to encrypt authorizer: %d", ret); goto out_au; @@ -664,6 +677,54 @@ static int ceph_x_update_authorizer( return 0; } +static int decrypt_authorize_challenge(struct ceph_x_authorizer *au, + void *challenge_buf, + int challenge_buf_len, + u64 *server_challenge) +{ + struct ceph_x_authorize_challenge *ch = + challenge_buf + sizeof(struct ceph_x_encrypt_header); + int ret; + + /* no leading len */ + ret = __ceph_x_decrypt(&au->session_key, challenge_buf, + challenge_buf_len); + if (ret < 0) + return ret; + if (ret < sizeof(*ch)) { + pr_err("bad size %d for ceph_x_authorize_challenge\n", ret); + return -EINVAL; + } + + *server_challenge = le64_to_cpu(ch->server_challenge); + return 0; +} + +static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + void *challenge_buf, + int challenge_buf_len) +{ + struct ceph_x_authorizer *au = (void *)a; + u64 server_challenge; + int ret; + + ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len, + &server_challenge); + if (ret) { + pr_err("failed to decrypt authorize challenge: %d", ret); + return ret; + } + + ret = encrypt_authorizer(au, &server_challenge); + if (ret) { + pr_err("failed to encrypt authorizer w/ challenge: %d", ret); + return ret; + } + + return 0; +} + static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a) { @@ -816,6 +877,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, .update_authorizer = ceph_x_update_authorizer, + .add_authorizer_challenge = ceph_x_add_authorizer_challenge, .verify_authorizer_reply = ceph_x_verify_authorizer_reply, .invalidate_authorizer = ceph_x_invalidate_authorizer, .reset = ceph_x_reset, diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 32c13d763b9a..24b0b74564d0 100644 --- a/net/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h @@ -70,6 +70,13 @@ struct ceph_x_authorize_a { struct ceph_x_authorize_b { __u8 struct_v; __le64 nonce; + __u8 have_challenge; + __le64 server_challenge_plus_one; +} __attribute__ ((packed)); + +struct ceph_x_authorize_challenge { + __u8 struct_v; + __le64 server_challenge; } __attribute__ ((packed)); struct ceph_x_authorize_reply { diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 500cc3da586f..e915c8bce117 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2080,9 +2080,24 @@ static int process_connect(struct ceph_connection *con) if (con->auth) { /* * Any connection that defines ->get_authorizer() - * should also define ->verify_authorizer_reply(). + * should also define ->add_authorizer_challenge() and + * ->verify_authorizer_reply(). + * * See get_connect_authorizer(). */ + if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { + ret = con->ops->add_authorizer_challenge( + con, con->auth->authorizer_reply_buf, + le32_to_cpu(con->in_reply.authorizer_len)); + if (ret < 0) + return ret; + + con_out_kvec_reset(con); + __prepare_write_connect(con); + prepare_read_connect(con); + return 0; + } + ret = con->ops->verify_authorizer_reply(con); if (ret < 0) { con->error_msg = "bad authorize reply"; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 8002b8e9ce24..60934bd8796c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5393,6 +5393,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } +static int add_authorizer_challenge(struct ceph_connection *con, + void *challenge_buf, int challenge_buf_len) +{ + struct ceph_osd *o = con->private; + struct ceph_osd_client *osdc = o->o_osdc; + struct ceph_auth_client *ac = osdc->client->monc.auth; + + return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer, + challenge_buf, challenge_buf_len); +} static int verify_authorizer_reply(struct ceph_connection *con) { @@ -5442,6 +5452,7 @@ static const struct ceph_connection_operations osd_con_ops = { .put = put_osd_con, .dispatch = dispatch, .get_authorizer = get_authorizer, + .add_authorizer_challenge = add_authorizer_challenge, .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .alloc_msg = alloc_msg, -- cgit v1.2.3 From 719784ba706cdbb47ef87483950f0a4594d36e87 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 19 Jul 2018 22:15:24 +0800 Subject: ceph: add new field max_file_size in ceph_fs_client In order to not bother to VFS and other specific filesystems, we decided to do offset validation inside ceph kernel client, so just simply set sb->s_maxbytes to MAX_LFS_FILESIZE so that it can successfully pass VFS check. We add new field max_file_size in ceph_fs_client to store real file size limit and doing proper check based on it. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 ++- fs/ceph/super.c | 3 ++- fs/ceph/super.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4fc7582233db..dfbe138472e6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4018,7 +4018,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) } else { mdsc->mdsmap = newmap; /* first mds map */ } - mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; + mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size, + MAX_LFS_FILESIZE); __wake_requests(mdsc, &mdsc->waiting_for_map); ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP, diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 3d8a26b2944f..43ca3b763875 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -940,11 +940,12 @@ static int ceph_set_super(struct super_block *s, void *data) dout("set_super %p data %p\n", s, data); s->s_flags = fsc->mount_options->sb_flags; - s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ + s->s_maxbytes = MAX_LFS_FILESIZE; s->s_xattr = ceph_xattr_handlers; s->s_fs_info = fsc; fsc->sb = s; + fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ s->s_op = &ceph_super_ops; s->s_d_op = &ceph_dentry_ops; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d3eb70e2b527..f09dbf2a2e26 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -98,6 +98,7 @@ struct ceph_fs_client { unsigned long mount_state; int min_caps; /* min caps i added */ + loff_t max_file_size; struct ceph_mds_client *mdsc; -- cgit v1.2.3 From 0671e9968dfb3f99a366df816c361b8e871dba1b Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 19 Jul 2018 22:15:25 +0800 Subject: ceph: add additional range check in ceph_fallocate() If the range is larger than both real file size and limit of max file size, then return -EFBIG. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 2f3a30ca94bf..2c54d2674db6 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1728,8 +1728,7 @@ static long ceph_fallocate(struct file *file, int mode, struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_inode_to_client(inode)->client->osdc; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; int want, got = 0; int dirty; @@ -1737,6 +1736,9 @@ static long ceph_fallocate(struct file *file, int mode, loff_t endoff = 0; loff_t size; + if ((offset + length) > max(i_size_read(inode), fsc->max_file_size)) + return -EFBIG; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -1760,7 +1762,7 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL) && !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; goto unlock; -- cgit v1.2.3 From 8687a3e2c7a026c173ac2e0d65d869c98c154a3a Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 19 Jul 2018 22:15:26 +0800 Subject: ceph: add additional offset check in ceph_write_iter() If the offset is larger or equal to both real file size and max file size, then return -EFBIG. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 2c54d2674db6..b9cd9d271dcb 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1384,12 +1384,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_osd_client *osdc = - &ceph_sb_to_client(inode->i_sb)->client->osdc; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; loff_t pos; + loff_t limit = max(i_size_read(inode), fsc->max_file_size); if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -1415,6 +1415,13 @@ retry_snap: goto out; pos = iocb->ki_pos; + if (unlikely(pos >= limit)) { + err = -EFBIG; + goto out; + } else { + iov_iter_truncate(from, limit - pos); + } + count = iov_iter_count(from); if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) { err = -EDQUOT; @@ -1436,7 +1443,7 @@ retry_snap: } /* FIXME: not complete since it doesn't account for being at quota */ - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_FULL)) { err = -ENOSPC; goto out; } @@ -1526,7 +1533,7 @@ retry_snap: } if (written >= 0) { - if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) + if (ceph_osdmap_flag(&fsc->client->osdc, CEPH_OSDMAP_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); } -- cgit v1.2.3 From 36a4c72d1c6f5f50d4db14a38f296855ae82571b Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sun, 5 Aug 2018 19:33:01 +0800 Subject: ceph: add additional size check in ceph_setattr() ceph_setattr() finally calls vfs function inode_newsize_ok() to do offset validation and that is based on sb->s_maxbytes. Because we set sb->s_maxbytes to MAX_LFS_FILESIZE to through VFS check and do proper offset validation in cephfs level, we need adding proper offset validation before calling inode_newsize_ok(). Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d62f65f2875d..ebc7bdaed2d0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2157,6 +2157,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) int ceph_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); int err; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -2166,6 +2167,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size > max(inode->i_size, fsc->max_file_size)) + return -EFBIG; + if ((attr->ia_valid & ATTR_SIZE) && ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) return -EDQUOT; -- cgit v1.2.3 From 9da12e3a7de1665a14063653b60e872da0ee7810 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 19 Jul 2018 22:15:27 +0800 Subject: ceph: compare fsc->max_file_size and inode->i_size for max file size limit In ceph_llseek(), we compare fsc->max_file_size and inode->i_size to choose max file size limit. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b9cd9d271dcb..141a64c1f391 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1554,6 +1554,7 @@ out_unlocked: static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); loff_t i_size; loff_t ret; @@ -1598,7 +1599,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) break; } - ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); + ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size)); out: inode_unlock(inode); -- cgit v1.2.3 From d5548492902967dd088bd0a21df7d047df10f9f6 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 28 Jul 2018 16:30:48 +0800 Subject: ceph: change to void return type for __do_request() We do not check return code for __do_request() in all callers, so change to void return type. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dfbe138472e6..8dbfca01c7ea 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2227,7 +2227,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, /* * send request, or put it on the appropriate wait list. */ -static int __do_request(struct ceph_mds_client *mdsc, +static void __do_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { struct ceph_mds_session *session = NULL; @@ -2237,7 +2237,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) __unregister_request(mdsc, req); - goto out; + return; } if (req->r_timeout && @@ -2260,7 +2260,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (mdsc->mdsmap->m_epoch == 0) { dout("do_request no mdsmap, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); - goto finish; + return; } if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && @@ -2278,7 +2278,7 @@ static int __do_request(struct ceph_mds_client *mdsc, ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { dout("do_request no mds or not active, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); - goto out; + return; } /* get, open session */ @@ -2328,8 +2328,7 @@ finish: complete_request(mdsc, req); __unregister_request(mdsc, req); } -out: - return err; + return; } /* -- cgit v1.2.3 From 7bf8f736c8e0f2e854d41838eed12e317fb29963 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 28 Jul 2018 23:15:35 +0800 Subject: ceph: refactor ceph_unreserve_caps() The code of ceph_unreserve_caps() and error handling in ceph_reserve_caps() are duplicated, so introduce a helper __ceph_unreserve_caps() to reduce duplicated code. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 66 +++++++++++++++++++++++++++++++-------------------------- fs/ceph/super.h | 2 +- 2 files changed, 37 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f50cc008632a..fb4a19ee5b6d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -156,6 +156,37 @@ void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) spin_unlock(&mdsc->caps_list_lock); } +static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) +{ + struct ceph_cap *cap; + int i; + + if (nr_caps) { + BUG_ON(mdsc->caps_reserve_count < nr_caps); + mdsc->caps_reserve_count -= nr_caps; + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + mdsc->caps_total_count -= nr_caps; + for (i = 0; i < nr_caps; i++) { + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + } else { + mdsc->caps_avail_count += nr_caps; + } + + dout("%s: caps %d = %d used + %d resv + %d avail\n", + __func__, + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + + mdsc->caps_avail_count); + } +} + /* * Called under mdsc->mutex. */ @@ -283,39 +314,14 @@ out_nomem: return -ENOMEM; } -int ceph_unreserve_caps(struct ceph_mds_client *mdsc, +void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { - int i; - struct ceph_cap *cap; - dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); - if (ctx->count) { - spin_lock(&mdsc->caps_list_lock); - BUG_ON(mdsc->caps_reserve_count < ctx->count); - mdsc->caps_reserve_count -= ctx->count; - if (mdsc->caps_avail_count >= - mdsc->caps_reserve_count + mdsc->caps_min_count) { - mdsc->caps_total_count -= ctx->count; - for (i = 0; i < ctx->count; i++) { - cap = list_first_entry(&mdsc->caps_list, - struct ceph_cap, caps_item); - list_del(&cap->caps_item); - kmem_cache_free(ceph_cap_cachep, cap); - } - } else { - mdsc->caps_avail_count += ctx->count; - } - ctx->count = 0; - dout("unreserve caps %d = %d used + %d resv + %d avail\n", - mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); - BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + - mdsc->caps_reserve_count + - mdsc->caps_avail_count); - spin_unlock(&mdsc->caps_list_lock); - } - return 0; + spin_lock(&mdsc->caps_list_lock); + __ceph_unreserve_caps(mdsc, ctx->count); + ctx->count = 0; + spin_unlock(&mdsc->caps_list_lock); } struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f09dbf2a2e26..9b5f15d20b33 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -656,7 +656,7 @@ extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need); -extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, +extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx); extern void ceph_reservation_status(struct ceph_fs_client *client, int *total, int *avail, int *used, -- cgit v1.2.3 From e5bc08d09f5fac5cd2901f34b510e67c524894ab Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 28 Jul 2018 23:15:37 +0800 Subject: ceph: refactor error handling code in ceph_reserve_caps() Call new helper __ceph_unreserve_caps() to reduce duplicated code. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 46 +++++++++++++--------------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index fb4a19ee5b6d..dd7dfdd2ba13 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -198,6 +198,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, int have; int alloc = 0; int max_caps; + int err = 0; bool trimmed = false; struct ceph_mds_session *s; LIST_HEAD(newcaps); @@ -264,9 +265,14 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", ctx, need, have + alloc); - goto out_nomem; + err = -ENOMEM; + break; + } + + if (!err) { + BUG_ON(have + alloc != need); + ctx->count = need; } - BUG_ON(have + alloc != need); spin_lock(&mdsc->caps_list_lock); mdsc->caps_total_count += alloc; @@ -276,42 +282,16 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); + + if (err) + __ceph_unreserve_caps(mdsc, have + alloc); + spin_unlock(&mdsc->caps_list_lock); - ctx->count = need; dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", ctx, mdsc->caps_total_count, mdsc->caps_use_count, mdsc->caps_reserve_count, mdsc->caps_avail_count); - return 0; - -out_nomem: - - spin_lock(&mdsc->caps_list_lock); - mdsc->caps_avail_count += have; - mdsc->caps_reserve_count -= have; - - while (!list_empty(&newcaps)) { - cap = list_first_entry(&newcaps, - struct ceph_cap, caps_item); - list_del(&cap->caps_item); - - /* Keep some preallocated caps around (ceph_min_count), to - * avoid lots of free/alloc churn. */ - if (mdsc->caps_avail_count >= - mdsc->caps_reserve_count + mdsc->caps_min_count) { - kmem_cache_free(ceph_cap_cachep, cap); - } else { - mdsc->caps_avail_count++; - mdsc->caps_total_count++; - list_add(&cap->caps_item, &mdsc->caps_list); - } - } - - BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + - mdsc->caps_reserve_count + - mdsc->caps_avail_count); - spin_unlock(&mdsc->caps_list_lock); - return -ENOMEM; + return err; } void ceph_unreserve_caps(struct ceph_mds_client *mdsc, -- cgit v1.2.3 From 342ce1823ebaec573ac269b56bca78c698fec5c3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 11 May 2018 18:47:29 +0800 Subject: ceph: support cephfs' own feature bits Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 48 +++++++++++++++++++++++++++++++++++++++--------- fs/ceph/mds_client.h | 12 ++++++++++++ 2 files changed, 51 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8dbfca01c7ea..5b767cf1f780 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -902,6 +902,27 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) return msg; } +static void encode_supported_features(void **p, void *end) +{ + static const unsigned char bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; + static const size_t count = ARRAY_SIZE(bits); + + if (count > 0) { + size_t i; + size_t size = ((size_t)bits[count - 1] + 64) / 64 * 8; + + BUG_ON(*p + 4 + size > end); + ceph_encode_32(p, size); + memset(*p, 0, size); + for (i = 0; i < count; i++) + ((unsigned char*)(*p))[i / 8] |= 1 << (bits[i] % 8); + *p += size; + } else { + BUG_ON(*p + 4 > end); + ceph_encode_32(p, 0); + } +} + /* * session message, specialization for CEPH_SESSION_REQUEST_OPEN * to include additional client metadata fields. @@ -911,11 +932,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 struct ceph_msg *msg; struct ceph_mds_session_head *h; int i = -1; - int metadata_bytes = 0; + int extra_bytes = 0; int metadata_key_count = 0; struct ceph_options *opt = mdsc->fsc->client->options; struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; - void *p; + void *p, *end; const char* metadata[][2] = { {"hostname", mdsc->nodename}, @@ -926,21 +947,26 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 }; /* Calculate serialized length of metadata */ - metadata_bytes = 4; /* map length */ + extra_bytes = 4; /* map length */ for (i = 0; metadata[i][0]; ++i) { - metadata_bytes += 8 + strlen(metadata[i][0]) + + extra_bytes += 8 + strlen(metadata[i][0]) + strlen(metadata[i][1]); metadata_key_count++; } + /* supported feature */ + extra_bytes += 4 + 8; /* Allocate the message */ - msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes, + msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); return NULL; } - h = msg->front.iov_base; + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + h = p; h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); h->seq = cpu_to_le64(seq); @@ -950,11 +976,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 * * ClientSession messages with metadata are v2 */ - msg->hdr.version = cpu_to_le16(2); + msg->hdr.version = cpu_to_le16(3); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ - p = msg->front.iov_base + sizeof(*h); + p += sizeof(*h); /* Number of entries in the map */ ceph_encode_32(&p, metadata_key_count); @@ -972,6 +998,10 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 p += val_len; } + encode_supported_features(&p, end); + msg->front.iov_len = p - msg->front.iov_base; + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + return msg; } @@ -2749,7 +2779,7 @@ static void handle_session(struct ceph_mds_session *session, int wake = 0; /* decode */ - if (msg->front.iov_len != sizeof(*h)) + if (msg->front.iov_len < sizeof(*h)) goto bad; op = le32_to_cpu(h->op); seq = le64_to_cpu(h->seq); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 80a7523e1523..32fcce0d4d3c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -16,6 +16,18 @@ #include #include +/* The first 8 bits are reserved for old ceph releases */ +#define CEPHFS_FEATURE_MIMIC 8 + +#define CEPHFS_FEATURES_ALL { \ + 0, 1, 2, 3, 4, 5, 6, 7, \ + CEPHFS_FEATURE_MIMIC, \ +} + +#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL +#define CEPHFS_FEATURES_CLIENT_REQUIRED {} + + /* * Some lock dependencies: * -- cgit v1.2.3 From 0fcf6c02b205f80f24eb548b236543ec151cb01c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 3 Aug 2018 16:24:49 +0800 Subject: ceph: don't drop message if it contains more data than expected Later version mds may encode more data into messages. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 6 +++--- fs/ceph/quota.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5b767cf1f780..bc43c822426a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3406,10 +3406,10 @@ static void handle_lease(struct ceph_mds_client *mdsc, vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; seq = le32_to_cpu(h->seq); - dname.name = (void *)h + sizeof(*h) + sizeof(u32); - dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); - if (dname.len != get_unaligned_le32(h+1)) + dname.len = get_unaligned_le32(h + 1); + if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len) goto bad; + dname.name = (void *)(h + 1) + sizeof(u32); /* lookup inode */ inode = ceph_find_inode(sb, vino); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 242bfa5c0539..32d4f13784ba 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -48,7 +48,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, struct inode *inode; struct ceph_inode_info *ci; - if (msg->front.iov_len != sizeof(*h)) { + if (msg->front.iov_len < sizeof(*h)) { pr_err("%s corrupt message mds%d len %d\n", __func__, session->s_mds, (int)msg->front.iov_len); ceph_msg_dump(msg); -- cgit v1.2.3