From 0cebaccef3acbdfbc2d85880a2efb765d2f4e2e3 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 15 Feb 2018 10:49:36 -0800 Subject: rds: zerocopy Tx support. If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and, if the SO_ZEROCOPY socket option has been set on the PF_RDS socket, application pages sent down with rds_sendmsg() are pinned. The pinning uses the accounting infrastructure added by Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages") The payload bytes in the message may not be modified for the duration that the message has been pinned. A multi-threaded application using this infrastructure may thus need to be notified about send-completion so that it can free/reuse the buffers passed to rds_sendmsg(). Notification of send-completion will identify each message-buffer by a cookie that the application must specify as ancillary data to rds_sendmsg(). The ancillary data in this case has cmsg_level == SOL_RDS and cmsg_type == RDS_CMSG_ZCOPY_COOKIE. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/rds/message.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) (limited to 'net/rds/message.c') diff --git a/net/rds/message.c b/net/rds/message.c index bf1a656b198a..651834513481 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -341,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in return rm; } -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, + bool zcopy) { unsigned long to_copy, nbytes; unsigned long sg_off; struct scatterlist *sg; int ret = 0; + int length = iov_iter_count(from); rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); @@ -356,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from) sg = rm->data.op_sg; sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ + if (zcopy) { + int total_copied = 0; + struct sk_buff *skb; + + skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32), + GFP_KERNEL); + if (!skb) + return -ENOMEM; + rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb); + if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, + length)) { + ret = -ENOMEM; + goto err; + } + while (iov_iter_count(from)) { + struct page *pages; + size_t start; + ssize_t copied; + + copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, + 1, &start); + if (copied < 0) { + struct mmpin *mmp; + int i; + + for (i = 0; i < rm->data.op_nents; i++) + put_page(sg_page(&rm->data.op_sg[i])); + mmp = &rm->data.op_mmp_znotifier->z_mmp; + mm_unaccount_pinned_pages(mmp); + ret = -EFAULT; + goto err; + } + total_copied += copied; + iov_iter_advance(from, copied); + length -= copied; + sg_set_page(sg, pages, copied, start); + rm->data.op_nents++; + sg++; + } + WARN_ON_ONCE(length != 0); + return ret; +err: + consume_skb(skb); + rm->data.op_mmp_znotifier = NULL; + return ret; + } /* zcopy */ + while (iov_iter_count(from)) { if (!sg_page(sg)) { ret = rds_page_remainder_alloc(sg, iov_iter_count(from), -- cgit v1.2.3