summaryrefslogtreecommitdiff
path: root/net/tls
diff options
context:
space:
mode:
Diffstat (limited to 'net/tls')
-rw-r--r--net/tls/tls.h13
-rw-r--r--net/tls/tls_device.c132
-rw-r--r--net/tls/tls_device_fallback.c2
-rw-r--r--net/tls/tls_main.c70
-rw-r--r--net/tls/tls_strp.c189
-rw-r--r--net/tls/tls_sw.c257
6 files changed, 389 insertions, 274 deletions
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 804c3880d028..86cef1c68e03 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -97,10 +97,7 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
void tls_sw_strparser_done(struct tls_context *tls_ctx);
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
-int tls_sw_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
+void tls_sw_splice_eof(struct socket *sock);
void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
void tls_sw_release_resources_tx(struct sock *sk);
void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
@@ -115,8 +112,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
size_t len, unsigned int flags);
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int tls_device_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags);
+void tls_device_splice_eof(struct socket *sock);
int tls_tx_records(struct sock *sk, int flags);
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
@@ -167,6 +163,11 @@ static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
return ctx->strp.msg_ready;
}
+static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
+{
+ return ctx->strp.mixed_decrypted;
+}
+
#ifdef CONFIG_TLS_DEVICE
int tls_device_init(void);
void tls_device_cleanup(void);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a7cc4f9faac2..2021fe557e50 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -268,9 +268,8 @@ static void tls_append_frag(struct tls_record_info *record,
skb_frag_size_add(frag, size);
} else {
++frag;
- __skb_frag_set_page(frag, pfrag->page);
- skb_frag_off_set(frag, pfrag->offset);
- skb_frag_size_set(frag, size);
+ skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+ size);
++record->num_frags;
get_page(pfrag->page);
}
@@ -357,9 +356,8 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
return -ENOMEM;
frag = &record->frags[0];
- __skb_frag_set_page(frag, pfrag->page);
- skb_frag_off_set(frag, pfrag->offset);
- skb_frag_size_set(frag, prepend_size);
+ skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+ prepend_size);
get_page(pfrag->page);
pfrag->offset += prepend_size;
@@ -424,16 +422,10 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
return 0;
}
-union tls_iter_offset {
- struct iov_iter *msg_iter;
- int offset;
-};
-
static int tls_push_data(struct sock *sk,
- union tls_iter_offset iter_offset,
+ struct iov_iter *iter,
size_t size, int flags,
- unsigned char record_type,
- struct page *zc_page)
+ unsigned char record_type)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -449,14 +441,14 @@ static int tls_push_data(struct sock *sk,
long timeo;
if (flags &
- ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
+ ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SPLICE_PAGES))
return -EOPNOTSUPP;
if (unlikely(sk->sk_err))
return -sk->sk_err;
flags |= MSG_SENDPAGE_DECRYPTED;
- tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+ tls_push_record_flags = flags | MSG_MORE;
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
if (tls_is_partially_sent_record(tls_ctx)) {
@@ -501,21 +493,35 @@ handle_error:
record = ctx->open_record;
copy = min_t(size_t, size, max_open_record_len - record->len);
- if (copy && zc_page) {
+ if (copy && (flags & MSG_SPLICE_PAGES)) {
struct page_frag zc_pfrag;
+ struct page **pages = &zc_pfrag.page;
+ size_t off;
+
+ rc = iov_iter_extract_pages(iter, &pages,
+ copy, 1, 0, &off);
+ if (rc <= 0) {
+ if (rc == 0)
+ rc = -EIO;
+ goto handle_error;
+ }
+ copy = rc;
- zc_pfrag.page = zc_page;
- zc_pfrag.offset = iter_offset.offset;
+ if (WARN_ON_ONCE(!sendpage_ok(zc_pfrag.page))) {
+ iov_iter_revert(iter, copy);
+ rc = -EIO;
+ goto handle_error;
+ }
+
+ zc_pfrag.offset = off;
zc_pfrag.size = copy;
tls_append_frag(record, &zc_pfrag, copy);
-
- iter_offset.offset += copy;
} else if (copy) {
copy = min_t(size_t, copy, pfrag->size - pfrag->offset);
rc = tls_device_copy_data(page_address(pfrag->page) +
pfrag->offset, copy,
- iter_offset.msg_iter);
+ iter);
if (rc)
goto handle_error;
tls_append_frag(record, pfrag, copy);
@@ -525,7 +531,7 @@ handle_error:
if (!size) {
last_record:
tls_push_record_flags = flags;
- if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) {
+ if (flags & MSG_MORE) {
more = true;
break;
}
@@ -570,9 +576,11 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct tls_context *tls_ctx = tls_get_ctx(sk);
- union tls_iter_offset iter;
int rc;
+ if (!tls_ctx->zerocopy_sendfile)
+ msg->msg_flags &= ~MSG_SPLICE_PAGES;
+
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
@@ -582,8 +590,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto out;
}
- iter.msg_iter = &msg->msg_iter;
- rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL);
+ rc = tls_push_data(sk, &msg->msg_iter, size, msg->msg_flags,
+ record_type);
out:
release_sock(sk);
@@ -591,47 +599,25 @@ out:
return rc;
}
-int tls_device_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
+void tls_device_splice_eof(struct socket *sock)
{
+ struct sock *sk = sock->sk;
struct tls_context *tls_ctx = tls_get_ctx(sk);
- union tls_iter_offset iter_offset;
- struct iov_iter msg_iter;
- char *kaddr;
- struct kvec iov;
- int rc;
+ struct iov_iter iter = {};
- if (flags & MSG_SENDPAGE_NOTLAST)
- flags |= MSG_MORE;
+ if (!tls_is_partially_sent_record(tls_ctx))
+ return;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
- if (flags & MSG_OOB) {
- rc = -EOPNOTSUPP;
- goto out;
- }
-
- if (tls_ctx->zerocopy_sendfile) {
- iter_offset.offset = offset;
- rc = tls_push_data(sk, iter_offset, size,
- flags, TLS_RECORD_TYPE_DATA, page);
- goto out;
+ if (tls_is_partially_sent_record(tls_ctx)) {
+ iov_iter_bvec(&iter, ITER_SOURCE, NULL, 0, 0);
+ tls_push_data(sk, &iter, 0, 0, TLS_RECORD_TYPE_DATA);
}
- kaddr = kmap(page);
- iov.iov_base = kaddr + offset;
- iov.iov_len = size;
- iov_iter_kvec(&msg_iter, ITER_SOURCE, &iov, 1, size);
- iter_offset.msg_iter = &msg_iter;
- rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA,
- NULL);
- kunmap(page);
-
-out:
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
- return rc;
}
struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
@@ -696,12 +682,10 @@ EXPORT_SYMBOL(tls_get_record);
static int tls_device_push_pending_record(struct sock *sk, int flags)
{
- union tls_iter_offset iter;
- struct iov_iter msg_iter;
+ struct iov_iter iter;
- iov_iter_kvec(&msg_iter, ITER_SOURCE, NULL, 0, 0);
- iter.msg_iter = &msg_iter;
- return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL);
+ iov_iter_kvec(&iter, ITER_SOURCE, NULL, 0, 0);
+ return tls_push_data(sk, &iter, 0, flags, TLS_RECORD_TYPE_DATA);
}
void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
@@ -1007,20 +991,14 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_buff *skb = tls_strp_msg(sw_ctx);
struct strp_msg *rxm = strp_msg(skb);
- int is_decrypted = skb->decrypted;
- int is_encrypted = !is_decrypted;
- struct sk_buff *skb_iter;
- int left;
-
- left = rxm->full_len - skb->len;
- /* Check if all the data is decrypted already */
- skb_iter = skb_shinfo(skb)->frag_list;
- while (skb_iter && left > 0) {
- is_decrypted &= skb_iter->decrypted;
- is_encrypted &= !skb_iter->decrypted;
-
- left -= skb_iter->len;
- skb_iter = skb_iter->next;
+ int is_decrypted, is_encrypted;
+
+ if (!tls_strp_msg_mixed_decrypted(sw_ctx)) {
+ is_decrypted = skb->decrypted;
+ is_encrypted = !is_decrypted;
+ } else {
+ is_decrypted = 0;
+ is_encrypted = 0;
}
trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
@@ -1223,7 +1201,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
tls_device_attach(ctx, sk, netdev);
up_read(&device_offload_lock);
- /* following this assignment tls_is_sk_tx_device_offloaded
+ /* following this assignment tls_is_skb_tx_device_offloaded
* will return true and the context might be accessed
* by the netdev's xmit function.
*/
@@ -1376,7 +1354,7 @@ static int tls_device_down(struct net_device *netdev)
list_for_each_entry_safe(ctx, tmp, &list, list) {
/* Stop offloaded TX and switch to the fallback.
- * tls_is_sk_tx_device_offloaded will return false.
+ * tls_is_skb_tx_device_offloaded will return false.
*/
WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw);
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 7fbb1d0b69b3..b28c5e296dfd 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -271,7 +271,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
* There is a corner case where the packet contains
* both an acked and a non-acked record.
* We currently don't handle that case and rely
- * on TCP to retranmit a packet that doesn't contain
+ * on TCP to retransmit a packet that doesn't contain
* already acked payload.
*/
if (!is_start_marker)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index f2e7302a4d96..b6896126bb92 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -125,7 +125,10 @@ int tls_push_sg(struct sock *sk,
u16 first_offset,
int flags)
{
- int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST;
+ struct bio_vec bvec;
+ struct msghdr msg = {
+ .msg_flags = MSG_SPLICE_PAGES | flags,
+ };
int ret = 0;
struct page *p;
size_t size;
@@ -134,16 +137,19 @@ int tls_push_sg(struct sock *sk,
size = sg->length - offset;
offset += sg->offset;
- ctx->in_tcp_sendpages = true;
+ ctx->splicing_pages = true;
while (1) {
if (sg_is_last(sg))
- sendpage_flags = flags;
+ msg.msg_flags = flags;
/* is sending application-limited? */
tcp_rate_check_app_limited(sk);
p = sg_page(sg);
retry:
- ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags);
+ bvec_set_page(&bvec, p, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
+ ret = tcp_sendmsg_locked(sk, &msg, size);
if (ret != size) {
if (ret > 0) {
@@ -155,7 +161,7 @@ retry:
offset -= sg->offset;
ctx->partially_sent_offset = offset;
ctx->partially_sent_record = (void *)sg;
- ctx->in_tcp_sendpages = false;
+ ctx->splicing_pages = false;
return ret;
}
@@ -169,7 +175,7 @@ retry:
size = sg->length;
}
- ctx->in_tcp_sendpages = false;
+ ctx->splicing_pages = false;
return 0;
}
@@ -247,11 +253,11 @@ static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
- /* If in_tcp_sendpages call lower protocol write space handler
+ /* If splicing_pages call lower protocol write space handler
* to ensure we wake up any waiting operations there. For example
- * if do_tcp_sendpages where to call sk_wait_event.
+ * if splicing pages where to call sk_wait_event.
*/
- if (ctx->in_tcp_sendpages) {
+ if (ctx->splicing_pages) {
ctx->sk_write_space(sk);
return;
}
@@ -352,6 +358,39 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
tls_ctx_free(sk, ctx);
}
+static __poll_t tls_sk_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait)
+{
+ struct tls_sw_context_rx *ctx;
+ struct tls_context *tls_ctx;
+ struct sock *sk = sock->sk;
+ struct sk_psock *psock;
+ __poll_t mask = 0;
+ u8 shutdown;
+ int state;
+
+ mask = tcp_poll(file, sock, wait);
+
+ state = inet_sk_state_load(sk);
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (unlikely(state != TCP_ESTABLISHED || shutdown & RCV_SHUTDOWN))
+ return mask;
+
+ tls_ctx = tls_get_ctx(sk);
+ ctx = tls_sw_ctx_rx(tls_ctx);
+ psock = sk_psock_get(sk);
+
+ if (skb_queue_empty_lockless(&ctx->rx_list) &&
+ !tls_strp_msg_ready(ctx) &&
+ sk_psock_queue_empty(psock))
+ mask &= ~(EPOLLIN | EPOLLRDNORM);
+
+ if (psock)
+ sk_psock_put(sk, psock);
+
+ return mask;
+}
+
static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
int __user *optlen, int tx)
{
@@ -918,27 +957,26 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
ops[TLS_BASE][TLS_BASE] = *base;
ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
- ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked;
+ ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof;
ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE];
ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read;
+ ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll;
ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE];
ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read;
+ ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll;
#ifdef CONFIG_TLS_DEVICE
ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
- ops[TLS_HW ][TLS_BASE].sendpage_locked = NULL;
ops[TLS_HW ][TLS_SW ] = ops[TLS_BASE][TLS_SW ];
- ops[TLS_HW ][TLS_SW ].sendpage_locked = NULL;
ops[TLS_BASE][TLS_HW ] = ops[TLS_BASE][TLS_SW ];
ops[TLS_SW ][TLS_HW ] = ops[TLS_SW ][TLS_SW ];
ops[TLS_HW ][TLS_HW ] = ops[TLS_HW ][TLS_SW ];
- ops[TLS_HW ][TLS_HW ].sendpage_locked = NULL;
#endif
#ifdef CONFIG_TLS_TOE
ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
@@ -986,7 +1024,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg;
- prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage;
+ prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof;
prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg;
@@ -1001,11 +1039,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
#ifdef CONFIG_TLS_DEVICE
prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg;
- prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage;
+ prot[TLS_HW][TLS_BASE].splice_eof = tls_device_splice_eof;
prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg;
- prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage;
+ prot[TLS_HW][TLS_SW].splice_eof = tls_device_splice_eof;
prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 955ac3e0bf4d..f37f4a0fcd3c 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -20,7 +20,9 @@ static void tls_strp_abort_strp(struct tls_strparser *strp, int err)
strp->stopped = 1;
/* Report an error on the lower socket */
- strp->sk->sk_err = -err;
+ WRITE_ONCE(strp->sk->sk_err, -err);
+ /* Paired with smp_rmb() in tcp_poll() */
+ smp_wmb();
sk_error_report(strp->sk);
}
@@ -29,34 +31,50 @@ static void tls_strp_anchor_free(struct tls_strparser *strp)
struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
- shinfo->frag_list = NULL;
+ if (!strp->copy_mode)
+ shinfo->frag_list = NULL;
consume_skb(strp->anchor);
strp->anchor = NULL;
}
-/* Create a new skb with the contents of input copied to its page frags */
-static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+static struct sk_buff *
+tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb,
+ int offset, int len)
{
- struct strp_msg *rxm;
struct sk_buff *skb;
- int i, err, offset;
+ int i, err;
- skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER,
+ skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER,
&err, strp->sk->sk_allocation);
if (!skb)
return NULL;
- offset = strp->stm.offset;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
+ WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
skb_frag_address(frag),
skb_frag_size(frag)));
offset += skb_frag_size(frag);
}
- skb_copy_header(skb, strp->anchor);
+ skb->len = len;
+ skb->data_len = len;
+ skb_copy_header(skb, in_skb);
+ return skb;
+}
+
+/* Create a new skb with the contents of input copied to its page frags */
+static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+{
+ struct strp_msg *rxm;
+ struct sk_buff *skb;
+
+ skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset,
+ strp->stm.full_len);
+ if (!skb)
+ return NULL;
+
rxm = strp_msg(skb);
rxm->offset = 0;
return skb;
@@ -180,22 +198,22 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i], false);
shinfo->nr_frags = 0;
+ if (strp->copy_mode) {
+ kfree_skb_list(shinfo->frag_list);
+ shinfo->frag_list = NULL;
+ }
strp->copy_mode = 0;
+ strp->mixed_decrypted = 0;
}
-static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
- unsigned int offset, size_t in_len)
+static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
+ struct sk_buff *in_skb, unsigned int offset,
+ size_t in_len)
{
- struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
- struct sk_buff *skb;
- skb_frag_t *frag;
size_t len, chunk;
+ skb_frag_t *frag;
int sz;
- if (strp->msg_ready)
- return 0;
-
- skb = strp->anchor;
frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
len = in_len;
@@ -208,19 +226,26 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
skb_frag_size(frag),
chunk));
- sz = tls_rx_msg_size(strp, strp->anchor);
- if (sz < 0) {
- desc->error = sz;
- return 0;
- }
-
- /* We may have over-read, sz == 0 is guaranteed under-read */
- if (sz > 0)
- chunk = min_t(size_t, chunk, sz - skb->len);
-
skb->len += chunk;
skb->data_len += chunk;
skb_frag_size_add(frag, chunk);
+
+ sz = tls_rx_msg_size(strp, skb);
+ if (sz < 0)
+ return sz;
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (unlikely(sz && sz < skb->len)) {
+ int over = skb->len - sz;
+
+ WARN_ON_ONCE(over > chunk);
+ skb->len -= over;
+ skb->data_len -= over;
+ skb_frag_size_add(frag, -over);
+
+ chunk -= over;
+ }
+
frag++;
len -= chunk;
offset += chunk;
@@ -247,15 +272,99 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
offset += chunk;
}
- if (strp->stm.full_len == skb->len) {
+read_done:
+ return in_len - len;
+}
+
+static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb,
+ struct sk_buff *in_skb, unsigned int offset,
+ size_t in_len)
+{
+ struct sk_buff *nskb, *first, *last;
+ struct skb_shared_info *shinfo;
+ size_t chunk;
+ int sz;
+
+ if (strp->stm.full_len)
+ chunk = strp->stm.full_len - skb->len;
+ else
+ chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
+ chunk = min(chunk, in_len);
+
+ nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk);
+ if (!nskb)
+ return -ENOMEM;
+
+ shinfo = skb_shinfo(skb);
+ if (!shinfo->frag_list) {
+ shinfo->frag_list = nskb;
+ nskb->prev = nskb;
+ } else {
+ first = shinfo->frag_list;
+ last = first->prev;
+ last->next = nskb;
+ first->prev = nskb;
+ }
+
+ skb->len += chunk;
+ skb->data_len += chunk;
+
+ if (!strp->stm.full_len) {
+ sz = tls_rx_msg_size(strp, skb);
+ if (sz < 0)
+ return sz;
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (unlikely(sz && sz < skb->len)) {
+ int over = skb->len - sz;
+
+ WARN_ON_ONCE(over > chunk);
+ skb->len -= over;
+ skb->data_len -= over;
+ __pskb_trim(nskb, nskb->len - over);
+
+ chunk -= over;
+ }
+
+ strp->stm.full_len = sz;
+ }
+
+ return chunk;
+}
+
+static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+ unsigned int offset, size_t in_len)
+{
+ struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
+ struct sk_buff *skb;
+ int ret;
+
+ if (strp->msg_ready)
+ return 0;
+
+ skb = strp->anchor;
+ if (!skb->len)
+ skb_copy_decrypted(skb, in_skb);
+ else
+ strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb);
+
+ if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted)
+ ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len);
+ else
+ ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len);
+ if (ret < 0) {
+ desc->error = ret;
+ ret = 0;
+ }
+
+ if (strp->stm.full_len && strp->stm.full_len == skb->len) {
desc->count = 0;
strp->msg_ready = 1;
tls_rx_msg_ready(strp);
}
-read_done:
- return in_len - len;
+ return ret;
}
static int tls_strp_read_copyin(struct tls_strparser *strp)
@@ -315,15 +424,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0;
}
-static bool tls_strp_check_no_dup(struct tls_strparser *strp)
+static bool tls_strp_check_queue_ok(struct tls_strparser *strp)
{
unsigned int len = strp->stm.offset + strp->stm.full_len;
- struct sk_buff *skb;
+ struct sk_buff *first, *skb;
u32 seq;
- skb = skb_shinfo(strp->anchor)->frag_list;
- seq = TCP_SKB_CB(skb)->seq;
+ first = skb_shinfo(strp->anchor)->frag_list;
+ skb = first;
+ seq = TCP_SKB_CB(first)->seq;
+ /* Make sure there's no duplicate data in the queue,
+ * and the decrypted status matches.
+ */
while (skb->len < len) {
seq += skb->len;
len -= skb->len;
@@ -331,6 +444,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp)
if (TCP_SKB_CB(skb)->seq != seq)
return false;
+ if (skb_cmp_decrypted(first, skb))
+ return false;
}
return true;
@@ -411,7 +526,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
return tls_strp_read_copy(strp, true);
}
- if (!tls_strp_check_no_dup(strp))
+ if (!tls_strp_check_queue_ok(strp))
return tls_strp_read_copy(strp, false);
strp->msg_ready = 1;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 635b8bf6b937..53f944e6d8ef 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -70,7 +70,9 @@ noinline void tls_err_abort(struct sock *sk, int err)
{
WARN_ON_ONCE(err >= 0);
/* sk->sk_err should contain a positive error code. */
- sk->sk_err = -err;
+ WRITE_ONCE(sk->sk_err, -err);
+ /* Paired with smp_rmb() in tcp_poll() */
+ smp_wmb();
sk_error_report(sk);
}
@@ -929,7 +931,37 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
&copied, flags);
}
-int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg,
+ struct sk_msg *msg_pl, size_t try_to_copy,
+ ssize_t *copied)
+{
+ struct page *page = NULL, **pages = &page;
+
+ do {
+ ssize_t part;
+ size_t off;
+
+ part = iov_iter_extract_pages(&msg->msg_iter, &pages,
+ try_to_copy, 1, 0, &off);
+ if (part <= 0)
+ return part ?: -EIO;
+
+ if (WARN_ON_ONCE(!sendpage_ok(page))) {
+ iov_iter_revert(&msg->msg_iter, part);
+ return -EIO;
+ }
+
+ sk_msg_page_add(msg_pl, page, part, off);
+ sk_mem_charge(sk, part);
+ *copied += part;
+ try_to_copy -= part;
+ } while (try_to_copy && !sk_msg_full(msg_pl));
+
+ return 0;
+}
+
+static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+ size_t size)
{
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -952,15 +984,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int ret = 0;
int pending;
- if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_CMSG_COMPAT))
- return -EOPNOTSUPP;
-
- ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
- if (ret)
- return ret;
- lock_sock(sk);
-
if (unlikely(msg->msg_controllen)) {
ret = tls_process_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1018,6 +1041,17 @@ alloc_encrypted:
full_record = true;
}
+ if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) {
+ ret = tls_sw_sendmsg_splice(sk, msg, msg_pl,
+ try_to_copy, &copied);
+ if (ret < 0)
+ goto send_end;
+ tls_ctx->pending_open_record_frags = true;
+ if (full_record || eor || sk_msg_full(msg_pl))
+ goto copied;
+ continue;
+ }
+
if (!is_kvec && (full_record || eor) && !async_capable) {
u32 first = msg_pl->sg.end;
@@ -1082,6 +1116,7 @@ fallback_to_reg_send:
*/
tls_ctx->pending_open_record_frags = true;
copied += try_to_copy;
+copied:
if (full_record || eor) {
ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
record_type, &copied,
@@ -1149,157 +1184,101 @@ trim_sgl:
send_end:
ret = sk_stream_error(sk, msg->msg_flags, ret);
+ return copied > 0 ? copied : ret;
+}
+
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ int ret;
+ if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
+ MSG_SENDPAGE_NOPOLICY))
+ return -EOPNOTSUPP;
+
+ ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
+ if (ret)
+ return ret;
+ lock_sock(sk);
+ ret = tls_sw_sendmsg_locked(sk, msg, size);
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
- return copied > 0 ? copied : ret;
+ return ret;
}
-static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
+/*
+ * Handle unexpected EOF during splice without SPLICE_F_MORE set.
+ */
+void tls_sw_splice_eof(struct socket *sock)
{
- long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ struct sock *sk = sock->sk;
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
- struct tls_prot_info *prot = &tls_ctx->prot_info;
- unsigned char record_type = TLS_RECORD_TYPE_DATA;
- struct sk_msg *msg_pl;
struct tls_rec *rec;
- int num_async = 0;
+ struct sk_msg *msg_pl;
ssize_t copied = 0;
- bool full_record;
- int record_room;
+ bool retrying = false;
int ret = 0;
- bool eor;
-
- eor = !(flags & MSG_SENDPAGE_NOTLAST);
- sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
- /* Call the sk_stream functions to manage the sndbuf mem. */
- while (size > 0) {
- size_t copy, required_size;
-
- if (sk->sk_err) {
- ret = -sk->sk_err;
- goto sendpage_end;
- }
-
- if (ctx->open_rec)
- rec = ctx->open_rec;
- else
- rec = ctx->open_rec = tls_get_rec(sk);
- if (!rec) {
- ret = -ENOMEM;
- goto sendpage_end;
- }
-
- msg_pl = &rec->msg_plaintext;
-
- full_record = false;
- record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
- copy = size;
- if (copy >= record_room) {
- copy = record_room;
- full_record = true;
- }
-
- required_size = msg_pl->sg.size + copy + prot->overhead_size;
-
- if (!sk_stream_memory_free(sk))
- goto wait_for_sndbuf;
-alloc_payload:
- ret = tls_alloc_encrypted_msg(sk, required_size);
- if (ret) {
- if (ret != -ENOSPC)
- goto wait_for_memory;
+ int pending;
- /* Adjust copy according to the amount that was
- * actually allocated. The difference is due
- * to max sg elements limit
- */
- copy -= required_size - msg_pl->sg.size;
- full_record = true;
- }
+ if (!ctx->open_rec)
+ return;
- sk_msg_page_add(msg_pl, page, copy, offset);
- sk_mem_charge(sk, copy);
+ mutex_lock(&tls_ctx->tx_lock);
+ lock_sock(sk);
- offset += copy;
- size -= copy;
- copied += copy;
+retry:
+ rec = ctx->open_rec;
+ if (!rec)
+ goto unlock;
- tls_ctx->pending_open_record_frags = true;
- if (full_record || eor || sk_msg_full(msg_pl)) {
- ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
- record_type, &copied, flags);
- if (ret) {
- if (ret == -EINPROGRESS)
- num_async++;
- else if (ret == -ENOMEM)
- goto wait_for_memory;
- else if (ret != -EAGAIN) {
- if (ret == -ENOSPC)
- ret = 0;
- goto sendpage_end;
- }
- }
- }
- continue;
-wait_for_sndbuf:
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
- ret = sk_stream_wait_memory(sk, &timeo);
- if (ret) {
- if (ctx->open_rec)
- tls_trim_both_msgs(sk, msg_pl->sg.size);
- goto sendpage_end;
- }
+ msg_pl = &rec->msg_plaintext;
- if (ctx->open_rec)
- goto alloc_payload;
+ /* Check the BPF advisor and perform transmission. */
+ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA,
+ &copied, 0);
+ switch (ret) {
+ case 0:
+ case -EAGAIN:
+ if (retrying)
+ goto unlock;
+ retrying = true;
+ goto retry;
+ case -EINPROGRESS:
+ break;
+ default:
+ goto unlock;
}
- if (num_async) {
- /* Transmit if any encryptions have completed */
- if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
- cancel_delayed_work(&ctx->tx_work.work);
- tls_tx_records(sk, flags);
- }
- }
-sendpage_end:
- ret = sk_stream_error(sk, flags, ret);
- return copied > 0 ? copied : ret;
-}
+ /* Wait for pending encryptions to get completed */
+ spin_lock_bh(&ctx->encrypt_compl_lock);
+ ctx->async_notify = true;
-int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
-{
- if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
- MSG_NO_SHARED_FRAGS))
- return -EOPNOTSUPP;
+ pending = atomic_read(&ctx->encrypt_pending);
+ spin_unlock_bh(&ctx->encrypt_compl_lock);
+ if (pending)
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ else
+ reinit_completion(&ctx->async_wait.completion);
- return tls_sw_do_sendpage(sk, page, offset, size, flags);
-}
+ /* There can be no concurrent accesses, since we have no pending
+ * encrypt operations
+ */
+ WRITE_ONCE(ctx->async_notify, false);
-int tls_sw_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
-{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- int ret;
+ if (ctx->async_wait.err)
+ goto unlock;
- if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
- return -EOPNOTSUPP;
+ /* Transmit if any encryptions have completed */
+ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+ cancel_delayed_work(&ctx->tx_work.work);
+ tls_tx_records(sk, 0);
+ }
- ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
- if (ret)
- return ret;
- lock_sock(sk);
- ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
+unlock:
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
- return ret;
}
static int
@@ -2304,10 +2283,14 @@ static void tls_data_ready(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
+ gfp_t alloc_save;
trace_sk_data_ready(sk);
+ alloc_save = sk->sk_allocation;
+ sk->sk_allocation = GFP_ATOMIC;
tls_strp_data_ready(&ctx->strp);
+ sk->sk_allocation = alloc_save;
psock = sk_psock_get(sk);
if (psock) {