diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/caps.c | 6 | ||||
-rw-r--r-- | fs/ceph/file.c | 65 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 3 | ||||
-rw-r--r-- | fs/ceph/snap.c | 17 |
4 files changed, 88 insertions, 3 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 789be30d6ee2..2321e5ddb664 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1627,6 +1627,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *session = NULL; + bool need_put = false; int mds; dout("ceph_flush_snaps %p\n", inode); @@ -1671,8 +1672,13 @@ out: ceph_put_mds_session(session); /* we flushed them all; remove this inode from the queue */ spin_lock(&mdsc->snap_flush_lock); + if (!list_empty(&ci->i_snap_flush_item)) + need_put = true; list_del_init(&ci->i_snap_flush_item); spin_unlock(&mdsc->snap_flush_lock); + + if (need_put) + iput(inode); } /* diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f4d8bf7dec88..4285f6cb5d3b 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1746,6 +1746,69 @@ again: } /* + * Wrap filemap_splice_read with checks for cap bits on the inode. + * Atomically grab references, so that those bits are not released + * back to the MDS mid-read. + */ +static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + struct ceph_file_info *fi = in->private_data; + struct inode *inode = file_inode(in); + struct ceph_inode_info *ci = ceph_inode(inode); + ssize_t ret; + int want = 0, got = 0; + CEPH_DEFINE_RW_CONTEXT(rw_ctx, 0); + + dout("splice_read %p %llx.%llx %llu~%zu trying to get caps on %p\n", + inode, ceph_vinop(inode), *ppos, len, inode); + + if (ceph_inode_is_shutdown(inode)) + return -ESTALE; + + if (ceph_has_inline_data(ci) || + (fi->flags & CEPH_F_SYNC)) + return copy_splice_read(in, ppos, pipe, len, flags); + + ceph_start_io_read(inode); + + want = CEPH_CAP_FILE_CACHE; + if (fi->fmode & CEPH_FILE_MODE_LAZY) + want |= CEPH_CAP_FILE_LAZYIO; + + ret = ceph_get_caps(in, CEPH_CAP_FILE_RD, want, -1, &got); + if (ret < 0) + goto out_end; + + if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) == 0) { + dout("splice_read/sync %p %llx.%llx %llu~%zu got cap refs on %s\n", + inode, ceph_vinop(inode), *ppos, len, + ceph_cap_string(got)); + + ceph_put_cap_refs(ci, got); + ceph_end_io_read(inode); + return copy_splice_read(in, ppos, pipe, len, flags); + } + + dout("splice_read %p %llx.%llx %llu~%zu got cap refs on %s\n", + inode, ceph_vinop(inode), *ppos, len, ceph_cap_string(got)); + + rw_ctx.caps = got; + ceph_add_rw_context(fi, &rw_ctx); + ret = filemap_splice_read(in, ppos, pipe, len, flags); + ceph_del_rw_context(fi, &rw_ctx); + + dout("splice_read %p %llx.%llx dropping cap refs on %s = %zd\n", + inode, ceph_vinop(inode), ceph_cap_string(got), ret); + + ceph_put_cap_refs(ci, got); +out_end: + ceph_end_io_read(inode); + return ret; +} + +/* * Take cap references to avoid releasing caps to MDS mid-write. * * If we are synchronous, and write with an old snap context, the OSD @@ -2593,7 +2656,7 @@ const struct file_operations ceph_file_fops = { .lock = ceph_lock, .setlease = simple_nosetlease, .flock = ceph_flock, - .splice_read = generic_file_splice_read, + .splice_read = ceph_splice_read, .splice_write = iter_file_splice_write, .unlocked_ioctl = ceph_ioctl, .compat_ioctl = compat_ptr_ioctl, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 29cf00220b09..4c0f22acf53d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3942,7 +3942,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) struct dentry *dentry; struct ceph_cap *cap; char *path; - int pathlen = 0, err = 0; + int pathlen = 0, err; u64 pathbase; u64 snap_follows; @@ -3965,6 +3965,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) cap = __get_cap_for_mds(ci, mds); if (!cap) { spin_unlock(&ci->i_ceph_lock); + err = 0; goto out_err; } dout(" adding %p ino %llx.%llx cap %p %lld %s\n", diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 87007203f130..2e73ba62bd7a 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -693,8 +693,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->size); spin_lock(&mdsc->snap_flush_lock); - if (list_empty(&ci->i_snap_flush_item)) + if (list_empty(&ci->i_snap_flush_item)) { + ihold(inode); list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); + } spin_unlock(&mdsc->snap_flush_lock); return 1; /* caller may want to ceph_flush_snaps */ } @@ -1111,6 +1113,19 @@ skip_inode: continue; adjust_snap_realm_parent(mdsc, child, realm->ino); } + } else { + /* + * In the non-split case both 'num_split_inos' and + * 'num_split_realms' should be 0, making this a no-op. + * However the MDS happens to populate 'split_realms' list + * in one of the UPDATE op cases by mistake. + * + * Skip both lists just in case to ensure that 'p' is + * positioned at the start of realm info, as expected by + * ceph_update_snap_trace(). + */ + p += sizeof(u64) * num_split_inos; + p += sizeof(u64) * num_split_realms; } /* |