diff options
-rw-r--r-- | fs/ceph/addr.c | 69 | ||||
-rw-r--r-- | fs/ceph/file.c | 90 |
2 files changed, 125 insertions, 34 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 89ca27848325..95ff84930dcf 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -18,6 +18,7 @@ #include "mds_client.h" #include "cache.h" #include "metric.h" +#include "crypto.h" #include <linux/ceph/osd_client.h> #include <linux/ceph/striper.h> @@ -242,7 +243,8 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) static void finish_netfs_read(struct ceph_osd_request *req) { - struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode); + struct inode *inode = req->r_inode; + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); struct netfs_io_subrequest *subreq = req->r_priv; struct ceph_osd_req_op *op = &req->r_ops[0]; @@ -256,16 +258,31 @@ static void finish_netfs_read(struct ceph_osd_request *req) subreq->len, i_size_read(req->r_inode)); /* no object means success but no data */ - if (sparse && err >= 0) - err = ceph_sparse_ext_map_end(op); - else if (err == -ENOENT) + if (err == -ENOENT) err = 0; else if (err == -EBLOCKLISTED) fsc->blocklisted = true; - if (err >= 0 && err < subreq->len) - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (err >= 0) { + if (sparse && err > 0) + err = ceph_sparse_ext_map_end(op); + if (err < subreq->len) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (IS_ENCRYPTED(inode) && err > 0) { + err = ceph_fscrypt_decrypt_extents(inode, + osd_data->pages, subreq->start, + op->extent.sparse_ext, + op->extent.sparse_ext_cnt); + if (err > subreq->len) + err = subreq->len; + } + } + if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { + ceph_put_page_vector(osd_data->pages, + calc_pages_for(osd_data->alignment, + osd_data->length), false); + } netfs_subreq_terminated(subreq, err, false); iput(req->r_inode); } @@ -336,7 +353,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct iov_iter iter; int err = 0; u64 len = subreq->len; - bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD); + bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); + u64 off = subreq->start; if (ceph_inode_is_shutdown(inode)) { err = -EIO; @@ -346,8 +364,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq)) return; - req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len, - 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, + ceph_fscrypt_adjust_off_and_len(inode, &off, &len); + + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, + off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); if (IS_ERR(req)) { @@ -363,8 +383,37 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) } dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len); + iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); - osd_req_op_extent_osd_iter(req, 0, &iter); + + /* + * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for + * encrypted inodes. We'd need infrastructure that handles an iov_iter + * instead of page arrays, and we don't have that as of yet. Once the + * dust settles on the write helpers and encrypt/decrypt routines for + * netfs, we should be able to rework this. + */ + if (IS_ENCRYPTED(inode)) { + struct page **pages; + size_t page_off; + + err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); + if (err < 0) { + dout("%s: iov_ter_get_pages_alloc returned %d\n", + __func__, err); + goto out; + } + + /* should always give us a page-aligned read */ + WARN_ON_ONCE(page_off); + len = err; + err = 0; + + osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, + false); + } else { + osd_req_op_extent_osd_iter(req, 0, &iter); + } req->r_callback = finish_netfs_read; req->r_priv = subreq; req->r_inode = inode; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 45e00e42960b..9d1a77cdc494 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -970,7 +970,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, u64 off = *ki_pos; u64 len = iov_iter_count(to); u64 i_size = i_size_read(inode); - bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD); + bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 objver = 0; dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len); @@ -1001,10 +1001,19 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, int idx; size_t left; struct ceph_osd_req_op *op; + u64 read_off = off; + u64 read_len = len; + + /* determine new offset/length if encrypted */ + ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); + + dout("sync_read orig %llu~%llu reading %llu~%llu", + off, len, read_off, read_len); req = ceph_osdc_new_request(osdc, &ci->i_layout, - ci->i_vino, off, &len, 0, 1, - sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, + ci->i_vino, read_off, &read_len, 0, 1, + sparse ? CEPH_OSD_OP_SPARSE_READ : + CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, ci->i_truncate_size, false); @@ -1013,10 +1022,13 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, break; } + /* adjust len downward if the request truncated the len */ + if (off + len > read_off + read_len) + len = read_off + read_len - off; more = len < iov_iter_count(to); - num_pages = calc_pages_for(off, len); - page_off = off & ~PAGE_MASK; + num_pages = calc_pages_for(read_off, read_len); + page_off = offset_in_page(off); pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); if (IS_ERR(pages)) { ceph_osdc_put_request(req); @@ -1024,7 +1036,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, break; } - osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off, + osd_req_op_extent_osd_data_pages(req, 0, pages, read_len, + offset_in_page(read_off), false, false); op = &req->r_ops[0]; @@ -1042,7 +1055,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, - len, ret); + read_len, ret); if (ret > 0) objver = req->r_version; @@ -1057,8 +1070,35 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, else if (ret == -ENOENT) ret = 0; + if (ret > 0 && IS_ENCRYPTED(inode)) { + int fret; + + fret = ceph_fscrypt_decrypt_extents(inode, pages, + read_off, op->extent.sparse_ext, + op->extent.sparse_ext_cnt); + if (fret < 0) { + ret = fret; + ceph_osdc_put_request(req); + break; + } + + /* account for any partial block at the beginning */ + fret -= (off - read_off); + + /* + * Short read after big offset adjustment? + * Nothing is usable, just call it a zero + * len read. + */ + fret = max(fret, 0); + + /* account for partial block at the end */ + ret = min_t(ssize_t, fret, len); + } + ceph_osdc_put_request(req); + /* Short read but not EOF? Zero out the remainder. */ if (ret >= 0 && ret < len && (off + ret < i_size)) { int zlen = min(len - ret, i_size - off - ret); int zoff = page_off + ret; @@ -1072,15 +1112,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, idx = 0; left = ret > 0 ? ret : 0; while (left > 0) { - size_t len, copied; - page_off = off & ~PAGE_MASK; - len = min_t(size_t, left, PAGE_SIZE - page_off); + size_t plen, copied; + + plen = min_t(size_t, left, PAGE_SIZE - page_off); SetPageUptodate(pages[idx]); copied = copy_page_to_iter(pages[idx++], - page_off, len, to); + page_off, plen, to); off += copied; left -= copied; - if (copied < len) { + page_off = 0; + if (copied < plen) { ret = -EFAULT; break; } @@ -1097,20 +1138,21 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, break; } - if (off > *ki_pos) { - if (off >= i_size) { - *retry_op = CHECK_EOF; - ret = i_size - *ki_pos; - *ki_pos = i_size; - } else { - ret = off - *ki_pos; - *ki_pos = off; + if (ret > 0) { + if (off > *ki_pos) { + if (off >= i_size) { + *retry_op = CHECK_EOF; + ret = i_size - *ki_pos; + *ki_pos = i_size; + } else { + ret = off - *ki_pos; + *ki_pos = off; + } } - } - - if (last_objver && ret > 0) - *last_objver = objver; + if (last_objver) + *last_objver = objver; + } dout("sync_read result %zd retry_op %d\n", ret, *retry_op); return ret; } |