summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/acl.c2
-rw-r--r--fs/ceph/addr.c85
-rw-r--r--fs/ceph/cache.c2
-rw-r--r--fs/ceph/caps.c57
-rw-r--r--fs/ceph/dir.c22
-rw-r--r--fs/ceph/file.c74
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/mds_client.c8
-rw-r--r--fs/ceph/mds_client.h5
-rw-r--r--fs/ceph/metric.c10
-rw-r--r--fs/ceph/snap.c11
-rw-r--r--fs/ceph/super.c10
-rw-r--r--fs/ceph/super.h13
-rw-r--r--fs/ceph/xattr.c2
14 files changed, 255 insertions, 64 deletions
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 6945a938d396..c91b293267d7 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -93,7 +93,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
char *value = NULL;
struct iattr newattrs;
struct inode *inode = d_inode(dentry);
- struct timespec64 old_ctime = inode->i_ctime;
+ struct timespec64 old_ctime = inode_get_ctime(inode);
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6bb251a4d613..59cbfb80edbd 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
struct inode *inode = rreq->inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_layout *lo = &ci->i_layout;
+ unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
+ loff_t end = rreq->start + rreq->len, new_end;
+ struct ceph_netfs_request_data *priv = rreq->netfs_priv;
+ unsigned long max_len;
u32 blockoff;
- u64 blockno;
- /* Expand the start downward */
- blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
- rreq->start = blockno * lo->stripe_unit;
- rreq->len += blockoff;
+ if (priv) {
+ /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */
+ if (priv->file_ra_disabled)
+ max_pages = 0;
+ else
+ max_pages = priv->file_ra_pages;
+
+ }
- /* Now, round up the length to the next block */
- rreq->len = roundup(rreq->len, lo->stripe_unit);
+ /* Readahead is disabled */
+ if (!max_pages)
+ return;
+
+ max_len = max_pages << PAGE_SHIFT;
+
+ /*
+ * Try to expand the length forward by rounding up it to the next
+ * block, but do not exceed the file size, unless the original
+ * request already exceeds it.
+ */
+ new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
+ if (new_end > end && new_end <= rreq->start + max_len)
+ rreq->len = new_end - rreq->start;
+
+ /* Try to expand the start downward */
+ div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+ if (rreq->len + blockoff <= max_len) {
+ rreq->start -= blockoff;
+ rreq->len += blockoff;
+ }
}
static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
@@ -362,18 +388,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
{
struct inode *inode = rreq->inode;
int got = 0, want = CEPH_CAP_FILE_CACHE;
+ struct ceph_netfs_request_data *priv;
int ret = 0;
if (rreq->origin != NETFS_READAHEAD)
return 0;
+ priv = kzalloc(sizeof(*priv), GFP_NOFS);
+ if (!priv)
+ return -ENOMEM;
+
if (file) {
struct ceph_rw_context *rw_ctx;
struct ceph_file_info *fi = file->private_data;
+ priv->file_ra_pages = file->f_ra.ra_pages;
+ priv->file_ra_disabled = file->f_mode & FMODE_RANDOM;
+
rw_ctx = ceph_find_rw_context(fi);
- if (rw_ctx)
+ if (rw_ctx) {
+ rreq->netfs_priv = priv;
return 0;
+ }
}
/*
@@ -383,27 +419,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
if (ret < 0) {
dout("start_read %p, error getting cap\n", inode);
- return ret;
+ goto out;
}
if (!(got & want)) {
dout("start_read %p, no cache cap\n", inode);
- return -EACCES;
+ ret = -EACCES;
+ goto out;
+ }
+ if (ret == 0) {
+ ret = -EACCES;
+ goto out;
}
- if (ret == 0)
- return -EACCES;
- rreq->netfs_priv = (void *)(uintptr_t)got;
- return 0;
+ priv->caps = got;
+ rreq->netfs_priv = priv;
+
+out:
+ if (ret < 0)
+ kfree(priv);
+
+ return ret;
}
static void ceph_netfs_free_request(struct netfs_io_request *rreq)
{
- struct ceph_inode_info *ci = ceph_inode(rreq->inode);
- int got = (uintptr_t)rreq->netfs_priv;
+ struct ceph_netfs_request_data *priv = rreq->netfs_priv;
+
+ if (!priv)
+ return;
- if (got)
- ceph_put_cap_refs(ci, got);
+ if (priv->caps)
+ ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps);
+ kfree(priv);
+ rreq->netfs_priv = NULL;
}
const struct netfs_request_ops ceph_netfs_ops = {
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 177d8e8d73fe..de1dee46d3df 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -36,6 +36,8 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
&ci->i_vino, sizeof(ci->i_vino),
&ci->i_version, sizeof(ci->i_version),
i_size_read(inode));
+ if (ci->netfs.cache)
+ mapping_set_release_always(inode->i_mapping);
}
void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info *ci)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2321e5ddb664..09cd6d334604 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1400,7 +1400,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
arg->mtime = inode->i_mtime;
arg->atime = inode->i_atime;
- arg->ctime = inode->i_ctime;
+ arg->ctime = inode_get_ctime(inode);
arg->btime = ci->i_btime;
arg->change_attr = inode_peek_iversion_raw(inode);
@@ -3109,6 +3109,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
}
if (had & CEPH_CAP_FILE_WR) {
if (--ci->i_wr_ref == 0) {
+ /*
+ * The Fb caps will always be took and released
+ * together with the Fw caps.
+ */
+ WARN_ON_ONCE(ci->i_wb_ref);
+
last++;
check_flushsnaps = true;
if (ci->i_wrbuffer_ref_head == 0 &&
@@ -3560,6 +3566,15 @@ static void handle_cap_grant(struct inode *inode,
}
BUG_ON(cap->issued & ~cap->implemented);
+ /* don't let check_caps skip sending a response to MDS for revoke msgs */
+ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
+ cap->mds_wanted = 0;
+ if (cap == ci->i_auth_cap)
+ check_caps = 1; /* check auth cap only */
+ else
+ check_caps = 2; /* check all caps */
+ }
+
if (extra_info->inline_version > 0 &&
extra_info->inline_version >= ci->i_inline_version) {
ci->i_inline_version = extra_info->inline_version;
@@ -4086,6 +4101,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct cap_extra_info extra_info = {};
bool queue_trunc;
bool close_sessions = false;
+ bool do_cap_release = false;
dout("handle_caps from mds%d\n", session->s_mds);
@@ -4192,17 +4208,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
- if (op == CEPH_CAP_OP_IMPORT) {
- cap = ceph_get_cap(mdsc, NULL);
- cap->cap_ino = vino.ino;
- cap->queue_release = 1;
- cap->cap_id = le64_to_cpu(h->cap_id);
- cap->mseq = mseq;
- cap->seq = seq;
- cap->issue_seq = seq;
- spin_lock(&session->s_cap_lock);
- __ceph_queue_cap_release(session, cap);
- spin_unlock(&session->s_cap_lock);
+ switch (op) {
+ case CEPH_CAP_OP_IMPORT:
+ case CEPH_CAP_OP_REVOKE:
+ case CEPH_CAP_OP_GRANT:
+ do_cap_release = true;
+ break;
+ default:
+ break;
}
goto flush_cap_releases;
}
@@ -4252,6 +4265,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
inode, ceph_ino(inode), ceph_snap(inode),
session->s_mds);
spin_unlock(&ci->i_ceph_lock);
+ switch (op) {
+ case CEPH_CAP_OP_REVOKE:
+ case CEPH_CAP_OP_GRANT:
+ do_cap_release = true;
+ break;
+ default:
+ break;
+ }
goto flush_cap_releases;
}
@@ -4302,6 +4323,18 @@ flush_cap_releases:
* along for the mds (who clearly thinks we still have this
* cap).
*/
+ if (do_cap_release) {
+ cap = ceph_get_cap(mdsc, NULL);
+ cap->cap_ino = vino.ino;
+ cap->queue_release = 1;
+ cap->cap_id = le64_to_cpu(h->cap_id);
+ cap->mseq = mseq;
+ cap->seq = seq;
+ cap->issue_seq = seq;
+ spin_lock(&session->s_cap_lock);
+ __ceph_queue_cap_release(session, cap);
+ spin_unlock(&session->s_cap_lock);
+ }
ceph_flush_cap_releases(mdsc, session);
goto done;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index cb67ac821f0e..bdcffb04513f 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -886,7 +886,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mknod.mode = cpu_to_le32(mode);
req->r_args.mknod.rdev = cpu_to_le32(rdev);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
@@ -953,7 +954,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
@@ -1022,7 +1024,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
ihold(dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mkdir.mode = cpu_to_le32(mode);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
@@ -1079,7 +1082,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_parent = dir;
ihold(dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_SHARED on source inode (mds will lock it) */
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
@@ -1218,7 +1221,7 @@ retry:
req->r_num_caps = 2;
req->r_parent = dir;
ihold(dir);
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_inode_drop = ceph_drop_caps_for_unlink(inode);
@@ -1320,9 +1323,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
req->r_parent = new_dir;
ihold(new_dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
- req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_RDCACHE on source inode (mds will lock it) */
req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
@@ -2016,9 +2019,10 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
}
}
+WRAP_DIR_ITER(ceph_readdir) // FIXME!
const struct file_operations ceph_dir_fops = {
.read = ceph_read_dir,
- .iterate = ceph_readdir,
+ .iterate_shared = shared_ceph_readdir,
.llseek = ceph_dir_llseek,
.open = ceph_open,
.release = ceph_release,
@@ -2030,7 +2034,7 @@ const struct file_operations ceph_dir_fops = {
};
const struct file_operations ceph_snapdir_fops = {
- .iterate = ceph_readdir,
+ .iterate_shared = shared_ceph_readdir,
.llseek = ceph_dir_llseek,
.open = ceph_open,
.release = ceph_release,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f4d8bf7dec88..63efe5389783 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -791,7 +791,8 @@ retry:
if (flags & O_CREAT) {
struct ceph_file_layout lo;
- req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
+ req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
+ CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
if (as_ctx.pagelist) {
req->r_pagelist = as_ctx.pagelist;
@@ -1746,6 +1747,69 @@ again:
}
/*
+ * Wrap filemap_splice_read with checks for cap bits on the inode.
+ * Atomically grab references, so that those bits are not released
+ * back to the MDS mid-read.
+ */
+static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct ceph_file_info *fi = in->private_data;
+ struct inode *inode = file_inode(in);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ ssize_t ret;
+ int want = 0, got = 0;
+ CEPH_DEFINE_RW_CONTEXT(rw_ctx, 0);
+
+ dout("splice_read %p %llx.%llx %llu~%zu trying to get caps on %p\n",
+ inode, ceph_vinop(inode), *ppos, len, inode);
+
+ if (ceph_inode_is_shutdown(inode))
+ return -ESTALE;
+
+ if (ceph_has_inline_data(ci) ||
+ (fi->flags & CEPH_F_SYNC))
+ return copy_splice_read(in, ppos, pipe, len, flags);
+
+ ceph_start_io_read(inode);
+
+ want = CEPH_CAP_FILE_CACHE;
+ if (fi->fmode & CEPH_FILE_MODE_LAZY)
+ want |= CEPH_CAP_FILE_LAZYIO;
+
+ ret = ceph_get_caps(in, CEPH_CAP_FILE_RD, want, -1, &got);
+ if (ret < 0)
+ goto out_end;
+
+ if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) == 0) {
+ dout("splice_read/sync %p %llx.%llx %llu~%zu got cap refs on %s\n",
+ inode, ceph_vinop(inode), *ppos, len,
+ ceph_cap_string(got));
+
+ ceph_put_cap_refs(ci, got);
+ ceph_end_io_read(inode);
+ return copy_splice_read(in, ppos, pipe, len, flags);
+ }
+
+ dout("splice_read %p %llx.%llx %llu~%zu got cap refs on %s\n",
+ inode, ceph_vinop(inode), *ppos, len, ceph_cap_string(got));
+
+ rw_ctx.caps = got;
+ ceph_add_rw_context(fi, &rw_ctx);
+ ret = filemap_splice_read(in, ppos, pipe, len, flags);
+ ceph_del_rw_context(fi, &rw_ctx);
+
+ dout("splice_read %p %llx.%llx dropping cap refs on %s = %zd\n",
+ inode, ceph_vinop(inode), ceph_cap_string(got), ret);
+
+ ceph_put_cap_refs(ci, got);
+out_end:
+ ceph_end_io_read(inode);
+ return ret;
+}
+
+/*
* Take cap references to avoid releasing caps to MDS mid-write.
*
* If we are synchronous, and write with an old snap context, the OSD
@@ -1791,9 +1855,6 @@ retry_snap:
else
ceph_start_io_write(inode);
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
-
if (iocb->ki_flags & IOCB_APPEND) {
err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
if (err < 0)
@@ -1894,8 +1955,6 @@ retry_snap:
* can not run at the same time
*/
written = generic_perform_write(iocb, from);
- if (likely(written >= 0))
- iocb->ki_pos = pos + written;
ceph_end_io_write(inode);
}
@@ -1940,7 +1999,6 @@ out:
ceph_end_io_write(inode);
out_unlocked:
ceph_free_cap_flush(prealloc_cf);
- current->backing_dev_info = NULL;
return written ? written : err;
}
@@ -2593,7 +2651,7 @@ const struct file_operations ceph_file_fops = {
.lock = ceph_lock,
.setlease = simple_nosetlease,
.flock = ceph_flock,
- .splice_read = generic_file_splice_read,
+ .splice_read = ceph_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = ceph_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8e5f41d45283..fd05d68e2990 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -100,7 +100,7 @@ struct inode *ceph_get_snapdir(struct inode *parent)
inode->i_uid = parent->i_uid;
inode->i_gid = parent->i_gid;
inode->i_mtime = parent->i_mtime;
- inode->i_ctime = parent->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(parent));
inode->i_atime = parent->i_atime;
ci->i_rbytes = 0;
ci->i_btime = ceph_inode(parent)->i_btime;
@@ -688,6 +688,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
struct timespec64 *mtime, struct timespec64 *atime)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct timespec64 ictime = inode_get_ctime(inode);
int warn = 0;
if (issued & (CEPH_CAP_FILE_EXCL|
@@ -696,11 +697,11 @@ void ceph_fill_file_time(struct inode *inode, int issued,
CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) {
if (ci->i_version == 0 ||
- timespec64_compare(ctime, &inode->i_ctime) > 0) {
+ timespec64_compare(ctime, &ictime) > 0) {
dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ ictime.tv_sec, ictime.tv_nsec,
ctime->tv_sec, ctime->tv_nsec);
- inode->i_ctime = *ctime;
+ inode_set_ctime_to_ts(inode, *ctime);
}
if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
@@ -738,7 +739,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
} else {
/* we have no write|excl caps; whatever the MDS says is true */
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
- inode->i_ctime = *ctime;
+ inode_set_ctime_to_ts(inode, *ctime);
inode->i_mtime = *mtime;
inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
@@ -2166,7 +2167,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ inode_get_ctime(inode).tv_sec,
+ inode_get_ctime(inode).tv_nsec,
attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
if (only) {
@@ -2191,7 +2193,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
if (dirtied) {
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
inode_inc_iversion_raw(inode);
}
@@ -2465,7 +2467,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
return err;
}
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->ino = ceph_present_inode(inode);
/*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4c0f22acf53d..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -645,6 +645,7 @@ bad:
err = -EIO;
out_bad:
pr_err("mds parse_reply err %d\n", err);
+ ceph_msg_dump(msg);
return err;
}
@@ -3538,6 +3539,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
bad:
pr_err("mdsc_handle_forward decode error err=%d\n", err);
+ ceph_msg_dump(msg);
}
static int __decode_session_metadata(void **p, void *end,
@@ -4762,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4941,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
@@ -5258,6 +5260,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
bad:
pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
ceph_umount_begin(mdsc->fsc->sb);
+ ceph_msg_dump(msg);
err_out:
mutex_lock(&mdsc->mutex);
mdsc->mdsmap_err = err;
@@ -5326,6 +5329,7 @@ bad_unlock:
bad:
pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
ceph_umount_begin(mdsc->fsc->sb);
+ ceph_msg_dump(msg);
return;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index c47347d2e84e..6d3584f16f9a 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -36,6 +36,14 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 items = 0;
s32 len;
+ /* Do not send the metrics until the MDS rank is ready */
+ mutex_lock(&mdsc->mutex);
+ if (ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) != CEPH_MDS_STATE_ACTIVE) {
+ mutex_unlock(&mdsc->mutex);
+ return false;
+ }
+ mutex_unlock(&mdsc->mutex);
+
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
+ sizeof(*icaps) + sizeof(*inodes) + sizeof(*rsize)
@@ -208,7 +216,7 @@ static void metric_delayed_work(struct work_struct *work)
struct ceph_mds_client *mdsc =
container_of(m, struct ceph_mds_client, metric);
- if (mdsc->stopping)
+ if (mdsc->stopping || disable_send_metrics)
return;
if (!m->session || !check_session_state(m->session)) {
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2e73ba62bd7a..c9920ade15f5 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -660,7 +660,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size = i_size_read(inode);
capsnap->mtime = inode->i_mtime;
capsnap->atime = inode->i_atime;
- capsnap->ctime = inode->i_ctime;
+ capsnap->ctime = inode_get_ctime(inode);
capsnap->btime = ci->i_btime;
capsnap->change_attr = inode_peek_iversion_raw(inode);
capsnap->time_warp_seq = ci->i_time_warp_seq;
@@ -675,14 +675,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
return 0;
}
- /* Fb cap still in use, delay it */
- if (ci->i_wb_ref) {
+ /*
+ * Defer flushing the capsnap if the dirty buffer not flushed yet.
+ * And trigger to flush the buffer immediately.
+ */
+ if (ci->i_wrbuffer_ref) {
dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
"used WRBUFFER, delaying\n", __func__, inode,
ceph_vinop(inode), capsnap, capsnap->context,
capsnap->context->seq, ceph_cap_string(capsnap->dirty),
capsnap->size);
- capsnap->writing = 1;
+ ceph_queue_writeback(inode);
return 0;
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d24bf0db5234..3bfddf34d488 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -451,6 +451,19 @@ struct ceph_inode_info {
unsigned long i_work_mask;
};
+struct ceph_netfs_request_data {
+ int caps;
+
+ /*
+ * Maximum size of a file readahead request.
+ * The fadvise could update the bdi's default ra_pages.
+ */
+ unsigned int file_ra_pages;
+
+ /* Set it if fadvise disables file readahead entirely */
+ bool file_ra_disabled;
+};
+
static inline struct ceph_inode_info *
ceph_inode(const struct inode *inode)
{
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 806183959c47..1cbd84cc82a8 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1238,7 +1238,7 @@ retry:
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
&prealloc_cf);
ci->i_xattrs.dirty = true;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
}
spin_unlock(&ci->i_ceph_lock);