summaryrefslogtreecommitdiff
path: root/include/linux/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/ceph')
-rw-r--r--include/linux/ceph/ceph_fs.h68
-rw-r--r--include/linux/ceph/messenger.h40
-rw-r--r--include/linux/ceph/osd_client.h93
-rw-r--r--include/linux/ceph/rados.h4
4 files changed, 180 insertions, 25 deletions
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 49586ff26152..5f2301ee88bc 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -359,14 +359,19 @@ enum {
extern const char *ceph_mds_op_name(int op);
-
-#define CEPH_SETATTR_MODE 1
-#define CEPH_SETATTR_UID 2
-#define CEPH_SETATTR_GID 4
-#define CEPH_SETATTR_MTIME 8
-#define CEPH_SETATTR_ATIME 16
-#define CEPH_SETATTR_SIZE 32
-#define CEPH_SETATTR_CTIME 64
+#define CEPH_SETATTR_MODE (1 << 0)
+#define CEPH_SETATTR_UID (1 << 1)
+#define CEPH_SETATTR_GID (1 << 2)
+#define CEPH_SETATTR_MTIME (1 << 3)
+#define CEPH_SETATTR_ATIME (1 << 4)
+#define CEPH_SETATTR_SIZE (1 << 5)
+#define CEPH_SETATTR_CTIME (1 << 6)
+#define CEPH_SETATTR_MTIME_NOW (1 << 7)
+#define CEPH_SETATTR_ATIME_NOW (1 << 8)
+#define CEPH_SETATTR_BTIME (1 << 9)
+#define CEPH_SETATTR_KILL_SGUID (1 << 10)
+#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11)
+#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12)
/*
* Ceph setxattr request flags.
@@ -462,24 +467,26 @@ union ceph_mds_request_args {
} __attribute__ ((packed));
union ceph_mds_request_args_ext {
- union ceph_mds_request_args old;
- struct {
- __le32 mode;
- __le32 uid;
- __le32 gid;
- struct ceph_timespec mtime;
- struct ceph_timespec atime;
- __le64 size, old_size; /* old_size needed by truncate */
- __le32 mask; /* CEPH_SETATTR_* */
- struct ceph_timespec btime;
- } __attribute__ ((packed)) setattr_ext;
+ union {
+ union ceph_mds_request_args old;
+ struct {
+ __le32 mode;
+ __le32 uid;
+ __le32 gid;
+ struct ceph_timespec mtime;
+ struct ceph_timespec atime;
+ __le64 size, old_size; /* old_size needed by truncate */
+ __le32 mask; /* CEPH_SETATTR_* */
+ struct ceph_timespec btime;
+ } __attribute__ ((packed)) setattr_ext;
+ };
};
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
-struct ceph_mds_request_head_old {
+struct ceph_mds_request_head_legacy {
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
@@ -492,9 +499,9 @@ struct ceph_mds_request_head_old {
union ceph_mds_request_args args;
} __attribute__ ((packed));
-#define CEPH_MDS_REQUEST_HEAD_VERSION 1
+#define CEPH_MDS_REQUEST_HEAD_VERSION 2
-struct ceph_mds_request_head {
+struct ceph_mds_request_head_old {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
@@ -508,6 +515,23 @@ struct ceph_mds_request_head {
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
+struct ceph_mds_request_head {
+ __le16 version; /* struct version */
+ __le64 oldest_client_tid;
+ __le32 mdsmap_epoch; /* on client */
+ __le32 flags; /* CEPH_MDS_FLAG_* */
+ __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */
+ __le16 num_releases; /* # include cap/lease release records */
+ __le32 op; /* mds op code */
+ __le32 caller_uid, caller_gid;
+ __le64 ino; /* use this ino for openc, mkdir, mknod,
+ etc. (if replaying) */
+ union ceph_mds_request_args_ext args;
+
+ __le32 ext_num_retry; /* new count retry attempts */
+ __le32 ext_num_fwd; /* new count fwd attempts */
+} __attribute__ ((packed));
+
/* cap/lease release record */
struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 99c1726be6ee..2eaaabbe98cb 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -17,6 +17,7 @@
struct ceph_msg;
struct ceph_connection;
+struct ceph_msg_data_cursor;
/*
* Ceph defines these callbacks for handling connection events.
@@ -70,6 +71,30 @@ struct ceph_connection_operations {
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
+
+ /**
+ * sparse_read: read sparse data
+ * @con: connection we're reading from
+ * @cursor: data cursor for reading extents
+ * @buf: optional buffer to read into
+ *
+ * This should be called more than once, each time setting up to
+ * receive an extent into the current cursor position, and zeroing
+ * the holes between them.
+ *
+ * Returns amount of data to be read (in bytes), 0 if reading is
+ * complete, or -errno if there was an error.
+ *
+ * If @buf is set on a >0 return, then the data should be read into
+ * the provided buffer. Otherwise, it should be read into the cursor.
+ *
+ * The sparse read operation is expected to initialize the cursor
+ * with a length covering up to the end of the last extent.
+ */
+ int (*sparse_read)(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor,
+ char **buf);
+
};
/* use format string %s%lld */
@@ -98,6 +123,7 @@ enum ceph_msg_data_type {
CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */
#endif /* CONFIG_BLOCK */
CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */
+ CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */
};
#ifdef CONFIG_BLOCK
@@ -199,6 +225,7 @@ struct ceph_msg_data {
bool own_pages;
};
struct ceph_pagelist *pagelist;
+ struct iov_iter iter;
};
};
@@ -207,6 +234,7 @@ struct ceph_msg_data_cursor {
struct ceph_msg_data *data; /* current data item */
size_t resid; /* bytes not yet consumed */
+ int sr_resid; /* residual sparse_read len */
bool need_crc; /* crc update needed */
union {
#ifdef CONFIG_BLOCK
@@ -222,6 +250,10 @@ struct ceph_msg_data_cursor {
struct page *page; /* page from list */
size_t offset; /* bytes from list */
};
+ struct {
+ struct iov_iter iov_iter;
+ unsigned int lastlen;
+ };
};
};
@@ -251,6 +283,7 @@ struct ceph_msg {
struct kref kref;
bool more_to_follow;
bool needs_out_seq;
+ bool sparse_read;
int front_alloc_len;
struct ceph_msgpool *pool;
@@ -309,6 +342,10 @@ struct ceph_connection_v1_info {
int in_base_pos; /* bytes read */
+ /* sparse reads */
+ struct kvec in_sr_kvec; /* current location to receive into */
+ u64 in_sr_len; /* amount of data in this extent */
+
/* message in temps */
u8 in_tag; /* protocol control byte */
struct ceph_msg_header in_hdr;
@@ -395,6 +432,7 @@ struct ceph_connection_v2_info {
void *conn_bufs[16];
int conn_buf_cnt;
+ int data_len_remain;
struct kvec in_sign_kvecs[8];
struct kvec out_sign_kvecs[8];
@@ -573,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
#endif /* CONFIG_BLOCK */
void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
struct ceph_bvec_iter *bvec_pos);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter);
struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
gfp_t flags, bool can_fail);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fb6be72104df..bf9823956758 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -29,14 +29,62 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
#define CEPH_HOMELESS_OSD -1
-/* a given osd we're communicating with */
+/*
+ * A single extent in a SPARSE_READ reply.
+ *
+ * Note that these come from the OSD as little-endian values. On BE arches,
+ * we convert them in-place after receipt.
+ */
+struct ceph_sparse_extent {
+ u64 off;
+ u64 len;
+} __packed;
+
+/* Sparse read state machine state values */
+enum ceph_sparse_read_state {
+ CEPH_SPARSE_READ_HDR = 0,
+ CEPH_SPARSE_READ_EXTENTS,
+ CEPH_SPARSE_READ_DATA_LEN,
+ CEPH_SPARSE_READ_DATA,
+};
+
+/*
+ * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
+ * 64-bit offset/length pairs, and then all of the actual file data
+ * concatenated after it (sans holes).
+ *
+ * Unfortunately, we don't know how long the extent array is until we've
+ * started reading the data section of the reply. The caller should send down
+ * a destination buffer for the array, but we'll alloc one if it's too small
+ * or if the caller doesn't.
+ */
+struct ceph_sparse_read {
+ enum ceph_sparse_read_state sr_state; /* state machine state */
+ u64 sr_req_off; /* orig request offset */
+ u64 sr_req_len; /* orig request length */
+ u64 sr_pos; /* current pos in buffer */
+ int sr_index; /* current extent index */
+ __le32 sr_datalen; /* length of actual data */
+ u32 sr_count; /* extent count in reply */
+ int sr_ext_len; /* length of extent array */
+ struct ceph_sparse_extent *sr_extent; /* extent array */
+};
+
+/*
+ * A given osd we're communicating with.
+ *
+ * Note that the o_requests tree can be searched while holding the "lock" mutex
+ * or the "o_requests_lock" spinlock. Insertion or removal requires both!
+ */
struct ceph_osd {
refcount_t o_ref;
+ int o_sparse_op_idx;
struct ceph_osd_client *o_osdc;
int o_osd;
int o_incarnation;
struct rb_node o_node;
struct ceph_connection o_con;
+ spinlock_t o_requests_lock;
struct rb_root o_requests;
struct rb_root o_linger_requests;
struct rb_root o_backoff_mappings;
@@ -46,6 +94,7 @@ struct ceph_osd {
unsigned long lru_ttl;
struct list_head o_keepalive_item;
struct mutex lock;
+ struct ceph_sparse_read o_sparse_read;
};
#define CEPH_OSD_SLAB_OPS 2
@@ -59,6 +108,7 @@ enum ceph_osd_data_type {
CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
CEPH_OSD_DATA_TYPE_BVECS,
+ CEPH_OSD_DATA_TYPE_ITER,
};
struct ceph_osd_data {
@@ -82,6 +132,7 @@ struct ceph_osd_data {
struct ceph_bvec_iter bvec_pos;
u32 num_bvecs;
};
+ struct iov_iter iter;
};
};
@@ -98,6 +149,8 @@ struct ceph_osd_req_op {
u64 offset, length;
u64 truncate_size;
u32 truncate_seq;
+ int sparse_ext_cnt;
+ struct ceph_sparse_extent *sparse_ext;
struct ceph_osd_data osd_data;
} extent;
struct {
@@ -145,6 +198,9 @@ struct ceph_osd_req_op {
u32 src_fadvise_flags;
struct ceph_osd_data osd_data;
} copy_from;
+ struct {
+ u64 ver;
+ } assert_ver;
};
};
@@ -199,6 +255,7 @@ struct ceph_osd_request {
struct ceph_osd_client *r_osdc;
struct kref r_kref;
bool r_mempool;
+ bool r_linger; /* don't resend on failure */
struct completion r_completion; /* private to osd_client.c */
ceph_osdc_callback_t r_callback;
@@ -211,9 +268,9 @@ struct ceph_osd_request {
struct ceph_snap_context *r_snapc; /* for writes */
struct timespec64 r_mtime; /* ditto */
u64 r_data_offset; /* ditto */
- bool r_linger; /* don't resend on failure */
/* internal */
+ u64 r_version; /* data version sent in reply */
unsigned long r_stamp; /* jiffies, send or check time */
unsigned long r_start_stamp; /* jiffies */
ktime_t r_start_latency; /* ktime_t */
@@ -450,6 +507,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bvec_iter *bvec_pos);
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter);
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
unsigned int which,
@@ -504,6 +563,20 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
u32 truncate_seq, u64 truncate_size,
bool use_mempool);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
+
+/*
+ * How big an extent array should we preallocate for a sparse read? This is
+ * just a starting value. If we get more than this back from the OSD, the
+ * receiver will reallocate.
+ */
+#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16
+
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+{
+ return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+}
+
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
@@ -558,5 +631,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
struct ceph_object_locator *oloc,
struct ceph_watch_item **watchers,
u32 *num_watchers);
-#endif
+/* Find offset into the buffer of the end of the extent map */
+static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
+{
+ struct ceph_sparse_extent *ext;
+
+ /* No extents? No data */
+ if (op->extent.sparse_ext_cnt == 0)
+ return 0;
+
+ ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
+
+ return ext->off + ext->len - op->extent.offset;
+}
+
+#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 43a7a1573b51..73c3efbec36c 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -524,6 +524,10 @@ struct ceph_osd_op {
__le64 cookie;
} __attribute__ ((packed)) notify;
struct {
+ __le64 unused;
+ __le64 ver;
+ } __attribute__ ((packed)) assert_ver;
+ struct {
__le64 offset, length;
__le64 src_offset;
} __attribute__ ((packed)) clonerange;