From f7c2f4f6ce16fb58f7d024f3e1b40023c4b43ff9 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 31 May 2023 16:06:55 +0800 Subject: ceph: only send metrics when the MDS rank is ready When the MDS rank is in clientreplay state, the metrics requests will be discarded directly. Also, when there are a lot of known client requests to recover from, the metrics requests will slow down the MDS rank from getting to the active state sooner. With this patch, we will send the metrics requests only when the MDS rank is in active state. Link: https://tracker.ceph.com/issues/61524 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/metric.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index c47347d2e84e..cce78d769f55 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -36,6 +36,14 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, s32 items = 0; s32 len; + /* Do not send the metrics until the MDS rank is ready */ + mutex_lock(&mdsc->mutex); + if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) { + mutex_unlock(&mdsc->mutex); + return false; + } + mutex_unlock(&mdsc->mutex); + len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) + sizeof(*meta) + sizeof(*dlease) + sizeof(*files) + sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize) -- cgit v1.2.3 From 8b0da5c549ae63ba1debd92a350f90773cb4bfe7 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 18 May 2023 09:40:14 +0800 Subject: ceph: try to dump the msgs when decoding fails When the msgs are corrupted we need to dump them and then it will be easier to dig what has happened and where the issue is. Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4c0f22acf53d..66048a86c480 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -645,6 +645,7 @@ bad: err = -EIO; out_bad: pr_err("mds parse_reply err %d\n", err); + ceph_msg_dump(msg); return err; } @@ -3538,6 +3539,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, bad: pr_err("mdsc_handle_forward decode error err=%d\n", err); + ceph_msg_dump(msg); } static int __decode_session_metadata(void **p, void *end, @@ -5258,6 +5260,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) bad: pr_err("error decoding fsmap %d. Shutting down mount.\n", err); ceph_umount_begin(mdsc->fsc->sb); + ceph_msg_dump(msg); err_out: mutex_lock(&mdsc->mutex); mdsc->mdsmap_err = err; @@ -5326,6 +5329,7 @@ bad_unlock: bad: pr_err("error decoding mdsmap %d. Shutting down mount.\n", err); ceph_umount_begin(mdsc->fsc->sb); + ceph_msg_dump(msg); return; } -- cgit v1.2.3 From d9d00f71ab5a2b5a47b228f678a8817e8687387f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 5 Jun 2023 14:58:18 +0800 Subject: ceph: voluntarily drop Xx caps for requests those touch parent mtime For write requests the parent's mtime will be updated correspondingly. And if the 'Xx' caps is issued and when releasing other caps together with the write requests the MDS Locker will try to eval the xattr lock, which need to change the locker state excl --> sync and need to do Xx caps revocation. Just voluntarily dropping CEPH_CAP_XATTR_EXCL caps to avoid a cap revoke message, which could cause the mtime will be overwrote by stale one. [ idryomov: break unnecessarily long lines ] Link: https://tracker.ceph.com/issues/61584 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 17 ++++++++++------- fs/ceph/file.c | 3 ++- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index cb67ac821f0e..4a2b39d9a61a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -886,7 +886,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; @@ -953,7 +954,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry = dget(dentry); req->r_num_caps = 2; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; @@ -1022,7 +1024,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_args.mkdir.mode = cpu_to_le32(mode); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; @@ -1079,7 +1082,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_parent = dir; ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_SHARED on source inode (mds will lock it) */ req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; @@ -1218,7 +1221,7 @@ retry: req->r_num_caps = 2; req->r_parent = dir; ihold(dir); - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; req->r_inode_drop = ceph_drop_caps_for_unlink(inode); @@ -1320,9 +1323,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, req->r_parent = new_dir; ihold(new_dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); - req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_RDCACHE on source inode (mds will lock it) */ req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f4d8bf7dec88..5e4c7c3d55b9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -791,7 +791,8 @@ retry: if (flags & O_CREAT) { struct ceph_file_layout lo; - req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; + req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; if (as_ctx.pagelist) { req->r_pagelist = as_ctx.pagelist; -- cgit v1.2.3 From 23ee27dce30e7d3091d6c3143b79f48dab6f9a3e Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 10 May 2023 19:55:46 +0800 Subject: ceph: add a dedicated private data for netfs rreq We need to save the 'f_ra.ra_pages' to expand the readahead window later. Cc: stable@vger.kernel.org Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead") Link: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn Link: https://www.spinics.net/lists/ceph-users/msg76183.html Signed-off-by: Xiubo Li Reviewed-and-tested-by: Hu Weiwen Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 45 ++++++++++++++++++++++++++++++++++----------- fs/ceph/super.h | 13 +++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6bb251a4d613..19c4f08454d2 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -362,18 +362,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; int got = 0, want = CEPH_CAP_FILE_CACHE; + struct ceph_netfs_request_data *priv; int ret = 0; if (rreq->origin != NETFS_READAHEAD) return 0; + priv = kzalloc(sizeof(*priv), GFP_NOFS); + if (!priv) + return -ENOMEM; + if (file) { struct ceph_rw_context *rw_ctx; struct ceph_file_info *fi = file->private_data; + priv->file_ra_pages = file->f_ra.ra_pages; + priv->file_ra_disabled = file->f_mode & FMODE_RANDOM; + rw_ctx = ceph_find_rw_context(fi); - if (rw_ctx) + if (rw_ctx) { + rreq->netfs_priv = priv; return 0; + } } /* @@ -383,27 +393,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); if (ret < 0) { dout("start_read %p, error getting cap\n", inode); - return ret; + goto out; } if (!(got & want)) { dout("start_read %p, no cache cap\n", inode); - return -EACCES; + ret = -EACCES; + goto out; + } + if (ret == 0) { + ret = -EACCES; + goto out; } - if (ret == 0) - return -EACCES; - rreq->netfs_priv = (void *)(uintptr_t)got; - return 0; + priv->caps = got; + rreq->netfs_priv = priv; + +out: + if (ret < 0) + kfree(priv); + + return ret; } static void ceph_netfs_free_request(struct netfs_io_request *rreq) { - struct ceph_inode_info *ci = ceph_inode(rreq->inode); - int got = (uintptr_t)rreq->netfs_priv; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; + + if (!priv) + return; - if (got) - ceph_put_cap_refs(ci, got); + if (priv->caps) + ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps); + kfree(priv); + rreq->netfs_priv = NULL; } const struct netfs_request_ops ceph_netfs_ops = { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d24bf0db5234..3bfddf34d488 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -451,6 +451,19 @@ struct ceph_inode_info { unsigned long i_work_mask; }; +struct ceph_netfs_request_data { + int caps; + + /* + * Maximum size of a file readahead request. + * The fadvise could update the bdi's default ra_pages. + */ + unsigned int file_ra_pages; + + /* Set it if fadvise disables file readahead entirely */ + bool file_ra_disabled; +}; + static inline struct ceph_inode_info * ceph_inode(const struct inode *inode) { -- cgit v1.2.3 From dc94bb8f271c079f69583d0f12a489aaf5202751 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 4 May 2023 19:00:42 +0800 Subject: ceph: fix blindly expanding the readahead windows Blindly expanding the readahead windows will cause unneccessary pagecache thrashing and also will introduce the network workload. We should disable expanding the windows if the readahead is disabled and also shouldn't expand the windows too much. Expanding forward firstly instead of expanding backward for possible sequential reads. Bound `rreq->len` to the actual file size to restore the previous page cache usage. The posix_fadvise may change the maximum size of a file readahead. Cc: stable@vger.kernel.org Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead") Link: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn Link: https://www.spinics.net/lists/ceph-users/msg76183.html Signed-off-by: Xiubo Li Reviewed-and-tested-by: Hu Weiwen Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 19c4f08454d2..59cbfb80edbd 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_layout *lo = &ci->i_layout; + unsigned long max_pages = inode->i_sb->s_bdi->ra_pages; + loff_t end = rreq->start + rreq->len, new_end; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; + unsigned long max_len; u32 blockoff; - u64 blockno; - /* Expand the start downward */ - blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); - rreq->start = blockno * lo->stripe_unit; - rreq->len += blockoff; + if (priv) { + /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */ + if (priv->file_ra_disabled) + max_pages = 0; + else + max_pages = priv->file_ra_pages; + + } + + /* Readahead is disabled */ + if (!max_pages) + return; + + max_len = max_pages << PAGE_SHIFT; - /* Now, round up the length to the next block */ - rreq->len = roundup(rreq->len, lo->stripe_unit); + /* + * Try to expand the length forward by rounding up it to the next + * block, but do not exceed the file size, unless the original + * request already exceeds it. + */ + new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); + if (new_end > end && new_end <= rreq->start + max_len) + rreq->len = new_end - rreq->start; + + /* Try to expand the start downward */ + div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); + if (rreq->len + blockoff <= max_len) { + rreq->start -= blockoff; + rreq->len += blockoff; + } } static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) -- cgit v1.2.3 From 2d12ad950b0c2a89d82f5d258309ad23aa70fc38 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 11 May 2023 13:19:45 +0800 Subject: ceph: trigger to flush the buffer when making snapshot The 'i_wr_ref' is used to track the 'Fb' caps, while whenever the 'Fb' caps is took the kclient will always take the 'Fw' caps at the same time. That means it will always be a false check in __ceph_finish_cap_snap(). When writing to buffer the kclient will take both 'Fb|Fw' caps and then write the contents to the buffer pages by increasing the 'i_wrbuffer_ref' and then just release both 'Fb|Fw'. This is different with the user space libcephfs, which will keep the 'Fb' being took and use 'i_wr_ref' instead of 'i_wrbuffer_ref' to track this until the buffer is flushed to Rados. We need to defer flushing the capsnap until the corresponding buffer pages are all flushed to Rados, and at the same time just trigger to flush the buffer pages immediately. Link: https://tracker.ceph.com/issues/48640 Link: https://tracker.ceph.com/issues/59343 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 6 ++++++ fs/ceph/snap.c | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2321e5ddb664..6fb73e3a7f8e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3109,6 +3109,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, } if (had & CEPH_CAP_FILE_WR) { if (--ci->i_wr_ref == 0) { + /* + * The Fb caps will always be took and released + * together with the Fw caps. + */ + WARN_ON_ONCE(ci->i_wb_ref); + last++; check_flushsnaps = true; if (ci->i_wrbuffer_ref_head == 0 && diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 2e73ba62bd7a..343d738448dc 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -675,14 +675,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, return 0; } - /* Fb cap still in use, delay it */ - if (ci->i_wb_ref) { + /* + * Defer flushing the capsnap if the dirty buffer not flushed yet. + * And trigger to flush the buffer immediately. + */ + if (ci->i_wrbuffer_ref) { dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " "used WRBUFFER, delaying\n", __func__, inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), capsnap->size); - capsnap->writing = 1; + ceph_queue_writeback(inode); return 0; } -- cgit v1.2.3 From ce72d4e0f179340cece90d5b826eb63bbf9fefc0 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 13 Jun 2023 12:49:59 +0800 Subject: ceph: issue a cap release immediately if no cap exists In case: mds client - Releases cap and put Inode - Increase cap->seq and sends revokes req to the client - Receives release req and - Receives & drops the revoke req skip removing the cap and then eval the CInode and issue or revoke caps again. - Receives & drops the caps update or revoke req - Health warning for client isn't responding to mclientcaps(revoke) All the IMPORT/REVOKE/GRANT cap ops will increase the session seq in MDS side and then the client need to issue a cap release to unblock MDS to remove the corresponding cap to unblock possible waiters. Link: https://tracker.ceph.com/issues/61332 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 6fb73e3a7f8e..cef91dd5ef83 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4092,6 +4092,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct cap_extra_info extra_info = {}; bool queue_trunc; bool close_sessions = false; + bool do_cap_release = false; dout("handle_caps from mds%d\n", session->s_mds); @@ -4198,17 +4199,14 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (!inode) { dout(" i don't have ino %llx\n", vino.ino); - if (op == CEPH_CAP_OP_IMPORT) { - cap = ceph_get_cap(mdsc, NULL); - cap->cap_ino = vino.ino; - cap->queue_release = 1; - cap->cap_id = le64_to_cpu(h->cap_id); - cap->mseq = mseq; - cap->seq = seq; - cap->issue_seq = seq; - spin_lock(&session->s_cap_lock); - __ceph_queue_cap_release(session, cap); - spin_unlock(&session->s_cap_lock); + switch (op) { + case CEPH_CAP_OP_IMPORT: + case CEPH_CAP_OP_REVOKE: + case CEPH_CAP_OP_GRANT: + do_cap_release = true; + break; + default: + break; } goto flush_cap_releases; } @@ -4258,6 +4256,14 @@ void ceph_handle_caps(struct ceph_mds_session *session, inode, ceph_ino(inode), ceph_snap(inode), session->s_mds); spin_unlock(&ci->i_ceph_lock); + switch (op) { + case CEPH_CAP_OP_REVOKE: + case CEPH_CAP_OP_GRANT: + do_cap_release = true; + break; + default: + break; + } goto flush_cap_releases; } @@ -4308,6 +4314,18 @@ flush_cap_releases: * along for the mds (who clearly thinks we still have this * cap). */ + if (do_cap_release) { + cap = ceph_get_cap(mdsc, NULL); + cap->cap_ino = vino.ino; + cap->queue_release = 1; + cap->cap_id = le64_to_cpu(h->cap_id); + cap->mseq = mseq; + cap->seq = seq; + cap->issue_seq = seq; + spin_lock(&session->s_cap_lock); + __ceph_queue_cap_release(session, cap); + spin_unlock(&session->s_cap_lock); + } ceph_flush_cap_releases(mdsc, session); goto done; -- cgit v1.2.3 From 257e6172ab36ebbe295a6c9ee9a9dd0fe54c1dc2 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 28 Jun 2023 07:57:09 +0800 Subject: ceph: don't let check_caps skip sending responses for revoke msgs If a client sends out a cap update dropping caps with the prior 'seq' just before an incoming cap revoke request, then the client may drop the revoke because it believes it's already released the requested capabilities. This causes the MDS to wait indefinitely for the client to respond to the revoke. It's therefore always a good idea to ack the cap revoke request with the bumped up 'seq'. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61782 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Reviewed-by: Patrick Donnelly Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/ceph') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index cef91dd5ef83..e2bb0d0072da 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3566,6 +3566,15 @@ static void handle_cap_grant(struct inode *inode, } BUG_ON(cap->issued & ~cap->implemented); + /* don't let check_caps skip sending a response to MDS for revoke msgs */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + cap->mds_wanted = 0; + if (cap == ci->i_auth_cap) + check_caps = 1; /* check auth cap only */ + else + check_caps = 2; /* check all caps */ + } + if (extra_info->inline_version > 0 && extra_info->inline_version >= ci->i_inline_version) { ci->i_inline_version = extra_info->inline_version; -- cgit v1.2.3