summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2023-08-31 02:06:38 +0300
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2023-08-31 02:06:38 +0300
commit1ac731c529cd4d6adbce134754b51ff7d822b145 (patch)
tree143ab3f35ca5f3b69f583c84e6964b17139c2ec1 /fs/ceph
parent07b4c950f27bef0362dc6ad7ee713aab61d58149 (diff)
parent54116d442e001e1b6bd482122043b1870998a1f3 (diff)
downloadlinux-1ac731c529cd4d6adbce134754b51ff7d822b145.tar.xz
Merge branch 'next' into for-linus
Prepare input updates for 6.6 merge window.
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c11
-rw-r--r--fs/ceph/caps.c8
-rw-r--r--fs/ceph/debugfs.c18
-rw-r--r--fs/ceph/dir.c13
-rw-r--r--fs/ceph/mds_client.c77
-rw-r--r--fs/ceph/mds_client.h5
-rw-r--r--fs/ceph/snap.c17
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/ceph/xattr.c24
9 files changed, 124 insertions, 51 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index d5335f445233..6bb251a4d613 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -808,6 +808,7 @@ static int ceph_writepages_start(struct address_space *mapping,
bool should_loop, range_whole = false;
bool done = false;
bool caching = ceph_is_cache_enabled(inode);
+ xa_mark_t tag;
if (wbc->sync_mode == WB_SYNC_NONE &&
fsc->write_congested)
@@ -834,6 +835,11 @@ static int ceph_writepages_start(struct address_space *mapping,
start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
index = start_index;
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
+ tag = PAGECACHE_TAG_TOWRITE;
+ } else {
+ tag = PAGECACHE_TAG_DIRTY;
+ }
retry:
/* find oldest snap context with dirty data */
snapc = get_oldest_context(inode, &ceph_wbc, NULL);
@@ -872,6 +878,9 @@ retry:
dout(" non-head snapc, range whole\n");
}
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+ tag_pages_for_writeback(mapping, index, end);
+
ceph_put_snap_context(last_snapc);
last_snapc = snapc;
@@ -888,7 +897,7 @@ retry:
get_more_pages:
nr_folios = filemap_get_folios_tag(mapping, &index,
- end, PAGECACHE_TAG_DIRTY, &fbatch);
+ end, tag, &fbatch);
dout("pagevec_lookup_range_tag got %d\n", nr_folios);
if (!nr_folios && !locked_pages)
break;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7cc20772eac9..2321e5ddb664 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -431,7 +431,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
*
* Called with i_ceph_lock held.
*/
-static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
+struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{
struct ceph_cap *cap;
struct rb_node *n = ci->i_caps.rb_node;
@@ -1627,6 +1627,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_session *session = NULL;
+ bool need_put = false;
int mds;
dout("ceph_flush_snaps %p\n", inode);
@@ -1671,8 +1672,13 @@ out:
ceph_put_mds_session(session);
/* we flushed them all; remove this inode from the queue */
spin_lock(&mdsc->snap_flush_lock);
+ if (!list_empty(&ci->i_snap_flush_item))
+ need_put = true;
list_del_init(&ci->i_snap_flush_item);
spin_unlock(&mdsc->snap_flush_lock);
+
+ if (need_put)
+ iput(inode);
}
/*
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index bec3c4549c07..3904333fa6c3 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -248,14 +248,20 @@ static int metrics_caps_show(struct seq_file *s, void *p)
return 0;
}
-static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
+static int caps_show_cb(struct inode *inode, int mds, void *p)
{
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct seq_file *s = p;
-
- seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
- cap->session->s_mds,
- ceph_cap_string(cap->issued),
- ceph_cap_string(cap->implemented));
+ struct ceph_cap *cap;
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ if (cap)
+ seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
+ cap->session->s_mds,
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->implemented));
+ spin_unlock(&ci->i_ceph_lock);
return 0;
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0ced8b570e42..cb67ac821f0e 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1050,6 +1050,9 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
struct ceph_mds_request *req;
int err;
+ if (dentry->d_flags & DCACHE_DISCONNECTED)
+ return -EINVAL;
+
err = ceph_wait_on_conflict_unlink(dentry);
if (err)
return err;
@@ -1057,8 +1060,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
- dout("link in dir %p old_dentry %p dentry %p\n", dir,
- old_dentry, dentry);
+ dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
+ dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
if (IS_ERR(req)) {
d_drop(dentry);
@@ -1067,6 +1070,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
+ /*
+ * The old_dentry maybe a DCACHE_DISCONNECTED dentry, then we
+ * will just pass the ino# to MDSs.
+ */
+ if (old_dentry->d_flags & DCACHE_DISCONNECTED)
+ req->r_ino2 = ceph_vino(d_inode(old_dentry));
req->r_parent = dir;
ihold(dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 27a245d959c0..4c0f22acf53d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1632,8 +1632,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
* Caller must hold session s_mutex.
*/
int ceph_iterate_session_caps(struct ceph_mds_session *session,
- int (*cb)(struct inode *, struct ceph_cap *,
- void *), void *arg)
+ int (*cb)(struct inode *, int mds, void *),
+ void *arg)
{
struct list_head *p;
struct ceph_cap *cap;
@@ -1645,6 +1645,8 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
spin_lock(&session->s_cap_lock);
p = session->s_caps.next;
while (p != &session->s_caps) {
+ int mds;
+
cap = list_entry(p, struct ceph_cap, session_caps);
inode = igrab(&cap->ci->netfs.inode);
if (!inode) {
@@ -1652,6 +1654,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
continue;
}
session->s_cap_iterator = cap;
+ mds = cap->mds;
spin_unlock(&session->s_cap_lock);
if (last_inode) {
@@ -1663,7 +1666,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
old_cap = NULL;
}
- ret = cb(inode, cap, arg);
+ ret = cb(inode, mds, arg);
last_inode = inode;
spin_lock(&session->s_cap_lock);
@@ -1696,20 +1699,25 @@ out:
return ret;
}
-static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
- void *arg)
+static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
{
struct ceph_inode_info *ci = ceph_inode(inode);
bool invalidate = false;
- int iputs;
+ struct ceph_cap *cap;
+ int iputs = 0;
- dout("removing cap %p, ci is %p, inode is %p\n",
- cap, ci, &ci->netfs.inode);
spin_lock(&ci->i_ceph_lock);
- iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
+ cap = __get_cap_for_mds(ci, mds);
+ if (cap) {
+ dout(" removing cap %p, ci is %p, inode is %p\n",
+ cap, ci, &ci->netfs.inode);
+
+ iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
+ }
spin_unlock(&ci->i_ceph_lock);
- wake_up_all(&ci->i_cap_wq);
+ if (cap)
+ wake_up_all(&ci->i_cap_wq);
if (invalidate)
ceph_queue_invalidate(inode);
while (iputs--)
@@ -1780,8 +1788,7 @@ enum {
*
* caller must hold s_mutex.
*/
-static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
- void *arg)
+static int wake_up_session_cb(struct inode *inode, int mds, void *arg)
{
struct ceph_inode_info *ci = ceph_inode(inode);
unsigned long ev = (unsigned long)arg;
@@ -1792,12 +1799,14 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
ci->i_requested_max_size = 0;
spin_unlock(&ci->i_ceph_lock);
} else if (ev == RENEWCAPS) {
- if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) {
- /* mds did not re-issue stale cap */
- spin_lock(&ci->i_ceph_lock);
+ struct ceph_cap *cap;
+
+ spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ /* mds did not re-issue stale cap */
+ if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen))
cap->issued = cap->implemented = CEPH_CAP_PIN;
- spin_unlock(&ci->i_ceph_lock);
- }
+ spin_unlock(&ci->i_ceph_lock);
} else if (ev == FORCE_RO) {
}
wake_up_all(&ci->i_cap_wq);
@@ -1959,16 +1968,22 @@ out:
* Yes, this is a bit sloppy. Our only real goal here is to respond to
* memory pressure from the MDS, though, so it needn't be perfect.
*/
-static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
+static int trim_caps_cb(struct inode *inode, int mds, void *arg)
{
int *remaining = arg;
struct ceph_inode_info *ci = ceph_inode(inode);
int used, wanted, oissued, mine;
+ struct ceph_cap *cap;
if (*remaining <= 0)
return -1;
spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ if (!cap) {
+ spin_unlock(&ci->i_ceph_lock);
+ return 0;
+ }
mine = cap->issued | cap->implemented;
used = __ceph_caps_used(ci);
wanted = __ceph_caps_file_wanted(ci);
@@ -2555,6 +2570,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
bool freepath1 = false, freepath2 = false;
+ struct dentry *old_dentry = NULL;
int len;
u16 releases;
void *p, *end;
@@ -2572,7 +2588,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
}
/* If r_old_dentry is set, then assume that its parent is locked */
- ret = set_request_path_attr(NULL, req->r_old_dentry,
+ if (req->r_old_dentry &&
+ !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
+ old_dentry = req->r_old_dentry;
+ ret = set_request_path_attr(NULL, old_dentry,
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
&path2, &pathlen2, &ino2, &freepath2, true);
@@ -3911,26 +3930,22 @@ out_unlock:
/*
* Encode information about a cap for a reconnect with the MDS.
*/
-static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
- void *arg)
+static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
{
union {
struct ceph_mds_cap_reconnect v2;
struct ceph_mds_cap_reconnect_v1 v1;
} rec;
- struct ceph_inode_info *ci = cap->ci;
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_reconnect_state *recon_state = arg;
struct ceph_pagelist *pagelist = recon_state->pagelist;
struct dentry *dentry;
+ struct ceph_cap *cap;
char *path;
int pathlen = 0, err;
u64 pathbase;
u64 snap_follows;
- dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
- inode, ceph_vinop(inode), cap, cap->cap_id,
- ceph_cap_string(cap->issued));
-
dentry = d_find_primary(inode);
if (dentry) {
/* set pathbase to parent dir when msg_version >= 2 */
@@ -3947,6 +3962,16 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
spin_lock(&ci->i_ceph_lock);
+ cap = __get_cap_for_mds(ci, mds);
+ if (!cap) {
+ spin_unlock(&ci->i_ceph_lock);
+ err = 0;
+ goto out_err;
+ }
+ dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
+ inode, ceph_vinop(inode), cap, cap->cap_id,
+ ceph_cap_string(cap->issued));
+
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
cap->mseq = 0; /* and migrate_seq */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 0598faa50e2e..724307ff89cd 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -355,8 +355,8 @@ struct ceph_snapid_map {
struct rb_node node;
struct list_head lru;
atomic_t ref;
- u64 snap;
dev_t dev;
+ u64 snap;
unsigned long last_used;
};
@@ -541,8 +541,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
- int (*cb)(struct inode *,
- struct ceph_cap *, void *),
+ int (*cb)(struct inode *, int mds, void *),
void *arg);
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 87007203f130..2e73ba62bd7a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -693,8 +693,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
- if (list_empty(&ci->i_snap_flush_item))
+ if (list_empty(&ci->i_snap_flush_item)) {
+ ihold(inode);
list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ }
spin_unlock(&mdsc->snap_flush_lock);
return 1; /* caller may want to ceph_flush_snaps */
}
@@ -1111,6 +1113,19 @@ skip_inode:
continue;
adjust_snap_realm_parent(mdsc, child, realm->ino);
}
+ } else {
+ /*
+ * In the non-split case both 'num_split_inos' and
+ * 'num_split_realms' should be 0, making this a no-op.
+ * However the MDS happens to populate 'split_realms' list
+ * in one of the UPDATE op cases by mistake.
+ *
+ * Skip both lists just in case to ensure that 'p' is
+ * positioned at the start of realm info, as expected by
+ * ceph_update_snap_trace().
+ */
+ p += sizeof(u64) * num_split_inos;
+ p += sizeof(u64) * num_split_realms;
}
/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6ecca2c6d137..d24bf0db5234 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1192,6 +1192,8 @@ extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
struct ceph_inode_info *ci);
+extern struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci,
+ int mds);
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
int mds);
extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f65b07cc33a2..806183959c47 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -535,6 +535,8 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
return NULL;
}
+#define MAX_XATTR_VAL_PRINT_LEN 256
+
static int __set_xattr(struct ceph_inode_info *ci,
const char *name, int name_len,
const char *val, int val_len,
@@ -597,7 +599,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
xattr->should_free_name = update_xattr;
ci->i_xattrs.count++;
- dout("__set_xattr count=%d\n", ci->i_xattrs.count);
+ dout("%s count=%d\n", __func__, ci->i_xattrs.count);
} else {
kfree(*newxattr);
*newxattr = NULL;
@@ -625,11 +627,13 @@ static int __set_xattr(struct ceph_inode_info *ci,
if (new) {
rb_link_node(&xattr->node, parent, p);
rb_insert_color(&xattr->node, &ci->i_xattrs.index);
- dout("__set_xattr_val p=%p\n", p);
+ dout("%s p=%p\n", __func__, p);
}
- dout("__set_xattr_val added %llx.%llx xattr %p %.*s=%.*s\n",
- ceph_vinop(&ci->netfs.inode), xattr, name_len, name, val_len, val);
+ dout("%s added %llx.%llx xattr %p %.*s=%.*s%s\n", __func__,
+ ceph_vinop(&ci->netfs.inode), xattr, name_len, name,
+ min(val_len, MAX_XATTR_VAL_PRINT_LEN), val,
+ val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : "");
return 0;
}
@@ -655,13 +659,15 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
else if (c > 0)
p = &(*p)->rb_right;
else {
- dout("__get_xattr %s: found %.*s\n", name,
- xattr->val_len, xattr->val);
+ int len = min(xattr->val_len, MAX_XATTR_VAL_PRINT_LEN);
+
+ dout("%s %s: found %.*s%s\n", __func__, name, len,
+ xattr->val, xattr->val_len > len ? "..." : "");
return xattr;
}
}
- dout("__get_xattr %s: not found\n", name);
+ dout("%s %s: not found\n", __func__, name);
return NULL;
}
@@ -1411,10 +1417,6 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
* attributes are handled directly.
*/
const struct xattr_handler *ceph_xattr_handlers[] = {
-#ifdef CONFIG_CEPH_FS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
&ceph_other_xattr_handler,
NULL,
};