From 15a78ba1844a8e052c1226f930133de4cef4e7ad Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 20 Jul 2019 07:22:45 -0500 Subject: netfilter: ebtables: fix a memory leak bug in compat In compat_do_replace(), a temporary buffer is allocated through vmalloc() to hold entries copied from the user space. The buffer address is firstly saved to 'newinfo->entries', and later on assigned to 'entries_tmp'. Then the entries in this temporary buffer is copied to the internal kernel structure through compat_copy_entries(). If this copy process fails, compat_do_replace() should be terminated. However, the allocated temporary buffer is not freed on this path, leading to a memory leak. To fix the bug, free the buffer before returning from compat_do_replace(). Signed-off-by: Wenwen Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 963dfdc14827..fd84b48e48b5 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2261,8 +2261,10 @@ static int compat_do_replace(struct net *net, void __user *user, state.buf_kern_len = size64; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); - if (WARN_ON(ret < 0)) + if (WARN_ON(ret < 0)) { + vfree(entries_tmp); goto out_unlock; + } vfree(entries_tmp); tmp.entries_size = size64; -- cgit v1.2.3 From 318892ac068397f40ff81d9155898da01493b1d2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Jul 2019 10:29:14 -0700 Subject: net/tls: don't arm strparser immediately in tls_set_sw_offload() In tls_set_device_offload_rx() we prepare the software context for RX fallback and proceed to add the connection to the device. Unfortunately, software context prep includes arming strparser so in case of a later error we have to release the socket lock to call strp_done(). In preparation for not releasing the socket lock half way through callbacks move arming strparser into a separate function. Following patches will make use of that. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/net/tls.h | 1 + net/tls/tls_device.c | 1 + net/tls/tls_main.c | 8 +++++--- net/tls/tls_sw.c | 19 ++++++++++++------- 4 files changed, 19 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 584609174fe0..43f551cd508b 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -355,6 +355,7 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 7c0b2b778703..4d67d72f007c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1045,6 +1045,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto release_ctx; + tls_sw_strparser_arm(sk, ctx); rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 4674e57e66b0..85a9d7d57b32 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -526,6 +526,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, { #endif rc = tls_set_sw_offload(sk, ctx, 1); + if (rc) + goto err_crypto_info; conf = TLS_SW; } } else { @@ -537,13 +539,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, { #endif rc = tls_set_sw_offload(sk, ctx, 0); + if (rc) + goto err_crypto_info; + tls_sw_strparser_arm(sk, ctx); conf = TLS_SW; } } - if (rc) - goto err_crypto_info; - if (tx) ctx->tx_conf = conf; else diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 53b4ad94e74a..f58a8ffc2a9c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2160,6 +2160,18 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) } } +void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx); + + write_lock_bh(&sk->sk_callback_lock); + rx_ctx->saved_data_ready = sk->sk_data_ready; + sk->sk_data_ready = tls_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + + strp_check_rcv(&rx_ctx->strp); +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2357,13 +2369,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) cb.parse_msg = tls_read_size; strp_init(&sw_ctx_rx->strp, sk, &cb); - - write_lock_bh(&sk->sk_callback_lock); - sw_ctx_rx->saved_data_ready = sk->sk_data_ready; - sk->sk_data_ready = tls_data_ready; - write_unlock_bh(&sk->sk_callback_lock); - - strp_check_rcv(&sw_ctx_rx->strp); } goto out; -- cgit v1.2.3 From ac78fc148d8249dbf382c2127456dd08ec5b161c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Jul 2019 10:29:15 -0700 Subject: net/tls: don't call tls_sk_proto_close for hw record offload The deprecated TOE offload doesn't actually do anything in tls_sk_proto_close() - all TLS code is skipped and context not freed. Remove the callback to make it easier to refactor tls_sk_proto_close(). Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- net/tls/tls_main.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 85a9d7d57b32..7ab682ed99fa 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -271,9 +271,6 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) lock_sock(sk); sk_proto_close = ctx->sk_proto_close; - if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD) - goto skip_tx_cleanup; - if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) { free_ctx = true; goto skip_tx_cleanup; @@ -766,7 +763,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base; prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash; prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash; - prot[TLS_HW_RECORD][TLS_HW_RECORD].close = tls_sk_proto_close; } static int tls_init(struct sock *sk) -- cgit v1.2.3 From f87e62d45e51b12d48d2cb46b5cde8f83b866bc4 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:16 -0700 Subject: net/tls: remove close callback sock unlock/lock around TX work flush The tls close() callback currently drops the sock lock, makes a cancel_delayed_work_sync() call, and then relocks the sock. By restructuring the code we can avoid droping lock and then reclaiming it. To simplify this we do the following, tls_sk_proto_close set_bit(CLOSING) set_bit(SCHEDULE) cancel_delay_work_sync() <- cancel workqueue lock_sock(sk) ... release_sock(sk) strp_done() Setting the CLOSING bit prevents the SCHEDULE bit from being cleared by any workqueue items e.g. if one happens to be scheduled and run between when we set SCHEDULE bit and cancel work. Then because SCHEDULE bit is set now no new work will be scheduled. Tested with net selftests and bpf selftests. Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/net/tls.h | 2 ++ net/tls/tls_main.c | 3 +++ net/tls/tls_sw.c | 24 +++++++++++++++++------- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index 43f551cd508b..d4276cb6de53 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -162,6 +162,7 @@ struct tls_sw_context_tx { int async_capable; #define BIT_TX_SCHEDULED 0 +#define BIT_TX_CLOSING 1 unsigned long tx_bitmask; }; @@ -360,6 +361,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); +void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); void tls_sw_free_resources_tx(struct sock *sk); void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 7ab682ed99fa..5c29b410cf7d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -268,6 +268,9 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) void (*sk_proto_close)(struct sock *sk, long timeout); bool free_ctx = false; + if (ctx->tx_conf == TLS_SW) + tls_sw_cancel_work_tx(ctx); + lock_sock(sk); sk_proto_close = ctx->sk_proto_close; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f58a8ffc2a9c..38c0e53c727d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2054,6 +2054,15 @@ static void tls_data_ready(struct sock *sk) } } +void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) +{ + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + + set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask); + set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); + cancel_delayed_work_sync(&ctx->tx_work.work); +} + void tls_sw_free_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2065,11 +2074,6 @@ void tls_sw_free_resources_tx(struct sock *sk) if (atomic_read(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - release_sock(sk); - cancel_delayed_work_sync(&ctx->tx_work.work); - lock_sock(sk); - - /* Tx whatever records we can transmit and abandon the rest */ tls_tx_records(sk, -1); /* Free up un-sent records in tx_list. First, free @@ -2137,11 +2141,17 @@ static void tx_work_handler(struct work_struct *work) struct tx_work, work); struct sock *sk = tx_work->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_sw_context_tx *ctx; - if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) + if (unlikely(!tls_ctx)) return; + ctx = tls_sw_ctx_tx(tls_ctx); + if (test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask)) + return; + + if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) + return; lock_sock(sk); tls_tx_records(sk, -1); release_sock(sk); -- cgit v1.2.3 From 313ab004805cf52a42673b15852b3842474ccd87 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:17 -0700 Subject: net/tls: remove sock unlock/lock around strp_done() The tls close() callback currently drops the sock lock to call strp_done(). Split up the RX cleanup into stopping the strparser and releasing most resources, syncing strparser and finally freeing the context. To avoid the need for a strp_done() call on the cleanup path of device offload make sure we don't arm the strparser until we are sure init will be successful. Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/net/tls.h | 7 +++--- net/tls/tls_device.c | 1 - net/tls/tls_main.c | 61 ++++++++++++++++++++++++++-------------------------- net/tls/tls_sw.c | 40 +++++++++++++++++++++++++--------- 4 files changed, 64 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/include/net/tls.h b/include/net/tls.h index d4276cb6de53..235508e35fd4 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -107,9 +107,7 @@ struct tls_device { enum { TLS_BASE, TLS_SW, -#ifdef CONFIG_TLS_DEVICE TLS_HW, -#endif TLS_HW_RECORD, TLS_NUM_CONFIG, }; @@ -357,14 +355,17 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval, int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); +void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); -void tls_sw_free_resources_tx(struct sock *sk); +void tls_sw_release_resources_tx(struct sock *sk); +void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); +void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); bool tls_sw_stream_read(const struct sock *sk); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4d67d72f007c..7c0b2b778703 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1045,7 +1045,6 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto release_ctx; - tls_sw_strparser_arm(sk, ctx); rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 5c29b410cf7d..d152a00a7a27 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -261,24 +261,9 @@ void tls_ctx_free(struct tls_context *ctx) kfree(ctx); } -static void tls_sk_proto_close(struct sock *sk, long timeout) +static void tls_sk_proto_cleanup(struct sock *sk, + struct tls_context *ctx, long timeo) { - struct tls_context *ctx = tls_get_ctx(sk); - long timeo = sock_sndtimeo(sk, 0); - void (*sk_proto_close)(struct sock *sk, long timeout); - bool free_ctx = false; - - if (ctx->tx_conf == TLS_SW) - tls_sw_cancel_work_tx(ctx); - - lock_sock(sk); - sk_proto_close = ctx->sk_proto_close; - - if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) { - free_ctx = true; - goto skip_tx_cleanup; - } - if (unlikely(sk->sk_write_pending) && !wait_on_pending_writer(sk, &timeo)) tls_handle_open_record(sk, 0); @@ -287,7 +272,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (ctx->tx_conf == TLS_SW) { kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); - tls_sw_free_resources_tx(sk); + tls_sw_release_resources_tx(sk); #ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); @@ -295,26 +280,40 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) } if (ctx->rx_conf == TLS_SW) - tls_sw_free_resources_rx(sk); + tls_sw_release_resources_rx(sk); #ifdef CONFIG_TLS_DEVICE if (ctx->rx_conf == TLS_HW) tls_device_offload_cleanup_rx(sk); - - if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) { -#else - { #endif - tls_ctx_free(ctx); - ctx = NULL; - } +} + +static void tls_sk_proto_close(struct sock *sk, long timeout) +{ + void (*sk_proto_close)(struct sock *sk, long timeout); + struct tls_context *ctx = tls_get_ctx(sk); + long timeo = sock_sndtimeo(sk, 0); + bool free_ctx; + + if (ctx->tx_conf == TLS_SW) + tls_sw_cancel_work_tx(ctx); + + lock_sock(sk); + free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW; + sk_proto_close = ctx->sk_proto_close; + + if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE) + tls_sk_proto_cleanup(sk, ctx, timeo); -skip_tx_cleanup: release_sock(sk); + if (ctx->tx_conf == TLS_SW) + tls_sw_free_ctx_tx(ctx); + if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) + tls_sw_strparser_done(ctx); + if (ctx->rx_conf == TLS_SW) + tls_sw_free_ctx_rx(ctx); sk_proto_close(sk, timeout); - /* free ctx for TLS_HW_RECORD, used by tcp_set_state - * for sk->sk_prot->unhash [tls_hw_unhash] - */ + if (free_ctx) tls_ctx_free(ctx); } @@ -541,9 +540,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto err_crypto_info; - tls_sw_strparser_arm(sk, ctx); conf = TLS_SW; } + tls_sw_strparser_arm(sk, ctx); } if (tx) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 38c0e53c727d..91d21b048a9b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2063,7 +2063,7 @@ void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) cancel_delayed_work_sync(&ctx->tx_work.work); } -void tls_sw_free_resources_tx(struct sock *sk) +void tls_sw_release_resources_tx(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); @@ -2096,6 +2096,11 @@ void tls_sw_free_resources_tx(struct sock *sk) crypto_free_aead(ctx->aead_send); tls_free_open_rec(sk); +} + +void tls_sw_free_ctx_tx(struct tls_context *tls_ctx) +{ + struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); kfree(ctx); } @@ -2114,25 +2119,40 @@ void tls_sw_release_resources_rx(struct sock *sk) skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); strp_stop(&ctx->strp); - write_lock_bh(&sk->sk_callback_lock); - sk->sk_data_ready = ctx->saved_data_ready; - write_unlock_bh(&sk->sk_callback_lock); - release_sock(sk); - strp_done(&ctx->strp); - lock_sock(sk); + /* If tls_sw_strparser_arm() was not called (cleanup paths) + * we still want to strp_stop(), but sk->sk_data_ready was + * never swapped. + */ + if (ctx->saved_data_ready) { + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = ctx->saved_data_ready; + write_unlock_bh(&sk->sk_callback_lock); + } } } -void tls_sw_free_resources_rx(struct sock *sk) +void tls_sw_strparser_done(struct tls_context *tls_ctx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - tls_sw_release_resources_rx(sk); + strp_done(&ctx->strp); +} + +void tls_sw_free_ctx_rx(struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); kfree(ctx); } +void tls_sw_free_resources_rx(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + tls_sw_release_resources_rx(sk); + tls_sw_free_ctx_rx(tls_ctx); +} + /* The work handler to transmitt the encrypted records in tx_list */ static void tx_work_handler(struct work_struct *work) { -- cgit v1.2.3 From 32857cf57f920cdc03b5095f08febec94cf9c36b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:18 -0700 Subject: net/tls: fix transition through disconnect with close It is possible (via shutdown()) for TCP socks to go through TCP_CLOSE state via tcp_disconnect() without actually calling tcp_close which would then call the tls close callback. Because of this a user could disconnect a socket then put it in a LISTEN state which would break our assumptions about sockets always being ESTABLISHED state. More directly because close() can call unhash() and unhash is implemented by sockmap if a sockmap socket has TLS enabled we can incorrectly destroy the psock from unhash() and then call its close handler again. But because the psock (sockmap socket representation) is already destroyed we call close handler in sk->prot. However, in some cases (TLS BASE/BASE case) this will still point at the sockmap close handler resulting in a circular call and crash reported by syzbot. To fix both above issues implement the unhash() routine for TLS. v4: - add note about tls offload still needing the fix; - move sk_proto to the cold cache line; - split TX context free into "release" and "free", otherwise the GC work itself is in already freed memory; - more TX before RX for consistency; - reuse tls_ctx_free(); - schedule the GC work after we're done with context to avoid UAF; - don't set the unhash in all modes, all modes "inherit" TLS_BASE's callbacks anyway; - disable the unhash hook for TLS_HW. Fixes: 3c4d7559159bf ("tls: kernel TLS support") Reported-by: Eric Dumazet Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- Documentation/networking/tls-offload.rst | 6 ++++ include/net/tls.h | 5 ++- net/tls/tls_main.c | 55 ++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 048e5ca44824..8a1eeb393316 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -513,3 +513,9 @@ Redirects leak clear text In the RX direction, if segment has already been decrypted by the device and it gets redirected or mirrored - clear text will be transmitted out. + +shutdown() doesn't clear TLS state +---------------------------------- + +shutdown() system call allows for a TLS socket to be reused as a different +connection. Offload doesn't currently handle that. diff --git a/include/net/tls.h b/include/net/tls.h index 235508e35fd4..9e425ac2de45 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -271,6 +271,8 @@ struct tls_context { unsigned long flags; /* cache cold stuff */ + struct proto *sk_proto; + void (*sk_destruct)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -288,6 +290,8 @@ struct tls_context { struct list_head list; refcount_t refcount; + + struct work_struct gc; }; enum tls_offload_ctx_dir { @@ -359,7 +363,6 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -void tls_sw_close(struct sock *sk, long timeout); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); void tls_sw_release_resources_tx(struct sock *sk); void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index d152a00a7a27..48f1c26459d0 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -261,6 +261,33 @@ void tls_ctx_free(struct tls_context *ctx) kfree(ctx); } +static void tls_ctx_free_deferred(struct work_struct *gc) +{ + struct tls_context *ctx = container_of(gc, struct tls_context, gc); + + /* Ensure any remaining work items are completed. The sk will + * already have lost its tls_ctx reference by the time we get + * here so no xmit operation will actually be performed. + */ + if (ctx->tx_conf == TLS_SW) { + tls_sw_cancel_work_tx(ctx); + tls_sw_free_ctx_tx(ctx); + } + + if (ctx->rx_conf == TLS_SW) { + tls_sw_strparser_done(ctx); + tls_sw_free_ctx_rx(ctx); + } + + tls_ctx_free(ctx); +} + +static void tls_ctx_free_wq(struct tls_context *ctx) +{ + INIT_WORK(&ctx->gc, tls_ctx_free_deferred); + schedule_work(&ctx->gc); +} + static void tls_sk_proto_cleanup(struct sock *sk, struct tls_context *ctx, long timeo) { @@ -288,6 +315,26 @@ static void tls_sk_proto_cleanup(struct sock *sk, #endif } +static void tls_sk_proto_unhash(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + long timeo = sock_sndtimeo(sk, 0); + struct tls_context *ctx; + + if (unlikely(!icsk->icsk_ulp_data)) { + if (sk->sk_prot->unhash) + sk->sk_prot->unhash(sk); + } + + ctx = tls_get_ctx(sk); + tls_sk_proto_cleanup(sk, ctx, timeo); + icsk->icsk_ulp_data = NULL; + + if (ctx->sk_proto->unhash) + ctx->sk_proto->unhash(sk); + tls_ctx_free_wq(ctx); +} + static void tls_sk_proto_close(struct sock *sk, long timeout) { void (*sk_proto_close)(struct sock *sk, long timeout); @@ -305,6 +352,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE) tls_sk_proto_cleanup(sk, ctx, timeo); + sk->sk_prot = ctx->sk_proto; release_sock(sk); if (ctx->tx_conf == TLS_SW) tls_sw_free_ctx_tx(ctx); @@ -608,6 +656,7 @@ static struct tls_context *create_ctx(struct sock *sk) ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; ctx->sk_proto_close = sk->sk_prot->close; + ctx->unhash = sk->sk_prot->unhash; return ctx; } @@ -731,6 +780,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; + prot[TLS_BASE][TLS_BASE].unhash = tls_sk_proto_unhash; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; @@ -748,16 +798,20 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], #ifdef CONFIG_TLS_DEVICE prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; + prot[TLS_HW][TLS_BASE].unhash = base->unhash; prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; + prot[TLS_HW][TLS_SW].unhash = base->unhash; prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; + prot[TLS_BASE][TLS_HW].unhash = base->unhash; prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW]; + prot[TLS_SW][TLS_HW].unhash = base->unhash; prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW]; #endif @@ -794,6 +848,7 @@ static int tls_init(struct sock *sk) tls_build_proto(sk); ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; + ctx->sk_proto = sk->sk_prot; update_sk_prot(sk, ctx); out: return rc; -- cgit v1.2.3 From 45a4521dcbd92e71c9e53031b40e34211d3b4feb Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:19 -0700 Subject: bpf: sockmap, sock_map_delete needs to use xchg __sock_map_delete() may be called from a tcp event such as unhash or close from the following trace, tcp_bpf_close() tcp_bpf_remove() sk_psock_unlink() sock_map_delete_from_link() __sock_map_delete() In this case the sock lock is held but this only protects against duplicate removals on the TCP side. If the map is free'd then we have this trace, sock_map_free xchg() <- replaces map entry sock_map_unref() sk_psock_put() sock_map_del_link() The __sock_map_delete() call however uses a read, test, null over the map entry which can result in both paths trying to free the map entry. To fix use xchg in TCP paths as well so we avoid having two references to the same map entry. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/core/sock_map.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 52d4faeee18b..28702f2e9a4a 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -276,16 +276,20 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock **psk) { struct sock *sk; + int err = 0; raw_spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) - *psk = NULL; + sk = xchg(psk, NULL); + + if (likely(sk)) + sock_map_unref(sk, psk); + else + err = -EINVAL; + raw_spin_unlock_bh(&stab->lock); - if (unlikely(!sk)) - return -EINVAL; - sock_map_unref(sk, psk); - return 0; + return err; } static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk, -- cgit v1.2.3 From 2bb90e5cc90e1d09f631aeab041a9cf913a5bbe5 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:20 -0700 Subject: bpf: sockmap, synchronize_rcu before free'ing map We need to have a synchronize_rcu before free'ing the sockmap because any outstanding psock references will have a pointer to the map and when they use this could trigger a use after free. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/core/sock_map.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 28702f2e9a4a..56bcabe7c2f2 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -247,6 +247,8 @@ static void sock_map_free(struct bpf_map *map) raw_spin_unlock_bh(&stab->lock); rcu_read_unlock(); + synchronize_rcu(); + bpf_map_area_free(stab->sks); kfree(stab); } -- cgit v1.2.3 From 0e858739c2d2eedeeac1d35bfa0ec3cc2a7190d8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:21 -0700 Subject: bpf: sockmap, only create entry if ulp is not already enabled Sockmap does not currently support adding sockets after TLS has been enabled. There never was a real use case for this so it was never added. But, we lost the test for ULP at some point so add it here and fail the socket insert if TLS is enabled. Future work could make sockmap support this use case but fixup the bug here. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/core/sock_map.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 56bcabe7c2f2..1330a7442e5b 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -334,6 +334,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, struct sock *sk, u64 flags) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); + struct inet_connection_sock *icsk = inet_csk(sk); struct sk_psock_link *link; struct sk_psock *psock; struct sock *osk; @@ -344,6 +345,8 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, return -EINVAL; if (unlikely(idx >= map->max_entries)) return -E2BIG; + if (unlikely(icsk->icsk_ulp_data)) + return -EINVAL; link = sk_psock_init_link(); if (!link) -- cgit v1.2.3 From 95fa145479fbc0a0c1fd3274ceb42ec03c042a4a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Jul 2019 10:29:22 -0700 Subject: bpf: sockmap/tls, close can race with map free When a map free is called and in parallel a socket is closed we have two paths that can potentially reset the socket prot ops, the bpf close() path and the map free path. This creates a problem with which prot ops should be used from the socket closed side. If the map_free side completes first then we want to call the original lowest level ops. However, if the tls path runs first we want to call the sockmap ops. Additionally there was no locking around prot updates in TLS code paths so the prot ops could be changed multiple times once from TLS path and again from sockmap side potentially leaving ops pointed at either TLS or sockmap when psock and/or tls context have already been destroyed. To fix this race first only update ops inside callback lock so that TLS, sockmap and lowest level all agree on prot state. Second and a ULP callback update() so that lower layers can inform the upper layer when they are being removed allowing the upper layer to reset prot ops. This gets us close to allowing sockmap and tls to be stacked in arbitrary order but will save that patch for *next trees. v4: - make sure we don't free things for device; - remove the checks which swap the callbacks back only if TLS is at the top. Reported-by: syzbot+06537213db7ba2745c4a@syzkaller.appspotmail.com Fixes: 02c558b2d5d6 ("bpf: sockmap, support for msg_peek in sk_msg with redirect ingress") Signed-off-by: John Fastabend Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 8 +++++++- include/net/tcp.h | 3 +++ net/core/skmsg.c | 4 ++-- net/ipv4/tcp_ulp.c | 13 +++++++++++++ net/tls/tls_main.c | 33 ++++++++++++++++++++++++++++----- 5 files changed, 53 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 50ced8aba9db..e4b3fb4bb77c 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -354,7 +354,13 @@ static inline void sk_psock_restore_proto(struct sock *sk, sk->sk_write_space = psock->saved_write_space; if (psock->sk_proto) { - sk->sk_prot = psock->sk_proto; + struct inet_connection_sock *icsk = inet_csk(sk); + bool has_ulp = !!icsk->icsk_ulp_data; + + if (has_ulp) + tcp_update_ulp(sk, psock->sk_proto); + else + sk->sk_prot = psock->sk_proto; psock->sk_proto = NULL; } } diff --git a/include/net/tcp.h b/include/net/tcp.h index f42d300f0cfa..c82a23470081 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2103,6 +2103,8 @@ struct tcp_ulp_ops { /* initialize ulp */ int (*init)(struct sock *sk); + /* update ulp */ + void (*update)(struct sock *sk, struct proto *p); /* cleanup ulp */ void (*release)(struct sock *sk); @@ -2114,6 +2116,7 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); +void tcp_update_ulp(struct sock *sk, struct proto *p); #define MODULE_ALIAS_TCP_ULP(name) \ __MODULE_INFO(alias, alias_userspace, name); \ diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93bffaad2135..6832eeb4b785 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -585,12 +585,12 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy); void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - rcu_assign_sk_user_data(sk, NULL); sk_psock_cork_free(psock); sk_psock_zap_ingress(psock); - sk_psock_restore_proto(sk, psock); write_lock_bh(&sk->sk_callback_lock); + sk_psock_restore_proto(sk, psock); + rcu_assign_sk_user_data(sk, NULL); if (psock->progs.skb_parser) sk_psock_stop_strp(sk, psock); write_unlock_bh(&sk->sk_callback_lock); diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 3d8a1d835471..4849edb62d52 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -96,6 +96,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen) rcu_read_unlock(); } +void tcp_update_ulp(struct sock *sk, struct proto *proto) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!icsk->icsk_ulp_ops) { + sk->sk_prot = proto; + return; + } + + if (icsk->icsk_ulp_ops->update) + icsk->icsk_ulp_ops->update(sk, proto); +} + void tcp_cleanup_ulp(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 48f1c26459d0..f208f8455ef2 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -328,7 +328,10 @@ static void tls_sk_proto_unhash(struct sock *sk) ctx = tls_get_ctx(sk); tls_sk_proto_cleanup(sk, ctx, timeo); + write_lock_bh(&sk->sk_callback_lock); icsk->icsk_ulp_data = NULL; + sk->sk_prot = ctx->sk_proto; + write_unlock_bh(&sk->sk_callback_lock); if (ctx->sk_proto->unhash) ctx->sk_proto->unhash(sk); @@ -337,7 +340,7 @@ static void tls_sk_proto_unhash(struct sock *sk) static void tls_sk_proto_close(struct sock *sk, long timeout) { - void (*sk_proto_close)(struct sock *sk, long timeout); + struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx = tls_get_ctx(sk); long timeo = sock_sndtimeo(sk, 0); bool free_ctx; @@ -347,12 +350,15 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) lock_sock(sk); free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW; - sk_proto_close = ctx->sk_proto_close; if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE) tls_sk_proto_cleanup(sk, ctx, timeo); + write_lock_bh(&sk->sk_callback_lock); + if (free_ctx) + icsk->icsk_ulp_data = NULL; sk->sk_prot = ctx->sk_proto; + write_unlock_bh(&sk->sk_callback_lock); release_sock(sk); if (ctx->tx_conf == TLS_SW) tls_sw_free_ctx_tx(ctx); @@ -360,7 +366,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) tls_sw_strparser_done(ctx); if (ctx->rx_conf == TLS_SW) tls_sw_free_ctx_rx(ctx); - sk_proto_close(sk, timeout); + ctx->sk_proto_close(sk, timeout); if (free_ctx) tls_ctx_free(ctx); @@ -827,7 +833,7 @@ static int tls_init(struct sock *sk) int rc = 0; if (tls_hw_prot(sk)) - goto out; + return 0; /* The TLS ulp is currently supported only for TCP sockets * in ESTABLISHED state. @@ -838,22 +844,38 @@ static int tls_init(struct sock *sk) if (sk->sk_state != TCP_ESTABLISHED) return -ENOTSUPP; + tls_build_proto(sk); + /* allocate tls context */ + write_lock_bh(&sk->sk_callback_lock); ctx = create_ctx(sk); if (!ctx) { rc = -ENOMEM; goto out; } - tls_build_proto(sk); ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; ctx->sk_proto = sk->sk_prot; update_sk_prot(sk, ctx); out: + write_unlock_bh(&sk->sk_callback_lock); return rc; } +static void tls_update(struct sock *sk, struct proto *p) +{ + struct tls_context *ctx; + + ctx = tls_get_ctx(sk); + if (likely(ctx)) { + ctx->sk_proto_close = p->close; + ctx->sk_proto = p; + } else { + sk->sk_prot = p; + } +} + void tls_register_device(struct tls_device *device) { spin_lock_bh(&device_spinlock); @@ -874,6 +896,7 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", .owner = THIS_MODULE, .init = tls_init, + .update = tls_update, }; static int __init tls_register(void) -- cgit v1.2.3 From 06a22d897d82f12776d44dbf0850f5895469cb2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jul 2019 03:15:37 -0700 Subject: bpf: fix access to skb_shared_info->gso_segs It is possible we reach bpf_convert_ctx_access() with si->dst_reg == si->src_reg Therefore, we need to load BPF_REG_AX before eventually mangling si->src_reg. syzbot generated this x86 code : 3: 55 push %rbp 4: 48 89 e5 mov %rsp,%rbp 7: 48 81 ec 00 00 00 00 sub $0x0,%rsp // Might be avoided ? e: 53 push %rbx f: 41 55 push %r13 11: 41 56 push %r14 13: 41 57 push %r15 15: 6a 00 pushq $0x0 17: 31 c0 xor %eax,%eax 19: 48 8b bf c0 00 00 00 mov 0xc0(%rdi),%rdi 20: 44 8b 97 bc 00 00 00 mov 0xbc(%rdi),%r10d 27: 4c 01 d7 add %r10,%rdi 2a: 48 0f b7 7f 06 movzwq 0x6(%rdi),%rdi // Crash 2f: 5b pop %rbx 30: 41 5f pop %r15 32: 41 5e pop %r14 34: 41 5d pop %r13 36: 5b pop %rbx 37: c9 leaveq 38: c3 retq Fixes: d9ff286a0f59 ("bpf: allow BPF programs access skb_shared_info->gso_segs field") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 4e2a79b2fd77..7878f918b8c0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7455,12 +7455,12 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, gso_segs): /* si->dst_reg = skb_shinfo(SKB); */ #ifdef NET_SKBUFF_DATA_USES_OFFSET - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, head)); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), BPF_REG_AX, si->src_reg, offsetof(struct sk_buff, end)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); #else *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), -- cgit v1.2.3 From b7a14297f102b6e2ce6f16feffebbb9bde1e9b55 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 18 May 2019 17:35:43 +0800 Subject: can: gw: Fix error path of cgw_module_init This patch add error path for cgw_module_init to avoid possible crash if some error occurs. Fixes: c1aabdf379bc ("can-gw: add netlink based CAN routing") Signed-off-by: YueHaibing Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 48 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/can/gw.c b/net/can/gw.c index 5275ddf580bc..72711053ebe6 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1046,32 +1046,50 @@ static __init int cgw_module_init(void) pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n", max_hops); - register_pernet_subsys(&cangw_pernet_ops); + ret = register_pernet_subsys(&cangw_pernet_ops); + if (ret) + return ret; + + ret = -ENOMEM; cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), 0, 0, NULL); - if (!cgw_cache) - return -ENOMEM; + goto out_cache_create; /* set notifier */ notifier.notifier_call = cgw_notifier; - register_netdevice_notifier(¬ifier); + ret = register_netdevice_notifier(¬ifier); + if (ret) + goto out_register_notifier; ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0); - if (ret) { - unregister_netdevice_notifier(¬ifier); - kmem_cache_destroy(cgw_cache); - return -ENOBUFS; - } - - /* Only the first call to rtnl_register_module can fail */ - rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, - cgw_create_job, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, - cgw_remove_job, NULL, 0); + if (ret) + goto out_rtnl_register1; + + ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE, + cgw_create_job, NULL, 0); + if (ret) + goto out_rtnl_register2; + ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE, + cgw_remove_job, NULL, 0); + if (ret) + goto out_rtnl_register3; return 0; + +out_rtnl_register3: + rtnl_unregister(PF_CAN, RTM_NEWROUTE); +out_rtnl_register2: + rtnl_unregister(PF_CAN, RTM_GETROUTE); +out_rtnl_register1: + unregister_netdevice_notifier(¬ifier); +out_register_notifier: + kmem_cache_destroy(cgw_cache); +out_cache_create: + unregister_pernet_subsys(&cangw_pernet_ops); + + return ret; } static __exit void cgw_module_exit(void) -- cgit v1.2.3 From c7148c03db80e44da7197ec5f558f630333c5cad Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 24 Jul 2019 13:56:37 -0700 Subject: net/ipv4: cleanup error condition testing Cleanup testing for error condition. Signed-off-by: Pavel Machek Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index d666756be5f1..a999451345f9 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -331,7 +331,7 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); if (!prev) fq = inet_frag_create(fqdir, key, &prev); - if (prev && !IS_ERR(prev)) { + if (!IS_ERR_OR_NULL(prev)) { fq = prev; if (!refcount_inc_not_zero(&fq->refcnt)) fq = NULL; -- cgit v1.2.3 From 3bc817d665ac6d9de89f59df522ad86f5b5dfc03 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 24 Jul 2019 20:00:42 +0800 Subject: ip6_gre: reload ipv6h in prepare_ip6gre_xmit_ipv6 Since ip6_tnl_parse_tlv_enc_lim() can call pskb_may_pull() which may change skb->data, so we need to re-load ipv6h at the right place. Fixes: 898b29798e36 ("ip6_gre: Refactor ip6gre xmit codes") Cc: William Tu Signed-off-by: Haishuang Yan Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c2049c72f3e5..dd2d0b963260 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -660,12 +660,13 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, struct flowi6 *fl6, __u8 *dsfield, int *encap_limit) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; struct ip6_tnl *t = netdev_priv(dev); __u16 offset; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ + ipv6h = ipv6_hdr(skb); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; -- cgit v1.2.3 From 260637903f47f20c5918bb5c1eea52b2a28ea863 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 17:00:01 +0200 Subject: ovs: datapath: hide clang frame-overflow warnings Some functions in the datapath code are factored out so that each one has a stack frame smaller than 1024 bytes with gcc. However, when compiling with clang, the functions are inlined more aggressively and combined again so we get net/openvswitch/datapath.c:1124:12: error: stack frame size of 1528 bytes in function 'ovs_flow_cmd_set' [-Werror,-Wframe-larger-than=] Marking both get_flow_actions() and ovs_nla_init_match_and_action() as 'noinline_for_stack' gives us the same behavior that we see with gcc, and no warning. Note that this does not mean we actually use less stack, as the functions call each other, and we still get three copies of the large 'struct sw_flow_key' type on the stack. The comment tells us that this was previously considered safe, presumably since the netlink parsing functions are called with a known backchain that does not also use a lot of stack space. Fixes: 9cc9a5cb176c ("datapath: Avoid using stack larger than 1024.") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 892287d06c17..d01410e52097 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1047,7 +1047,7 @@ error: } /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ -static struct sw_flow_actions *get_flow_actions(struct net *net, +static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net, const struct nlattr *a, const struct sw_flow_key *key, const struct sw_flow_mask *mask, @@ -1081,12 +1081,13 @@ static struct sw_flow_actions *get_flow_actions(struct net *net, * we should not to return match object with dangling reference * to mask. * */ -static int ovs_nla_init_match_and_action(struct net *net, - struct sw_flow_match *match, - struct sw_flow_key *key, - struct nlattr **a, - struct sw_flow_actions **acts, - bool log) +static noinline_for_stack int +ovs_nla_init_match_and_action(struct net *net, + struct sw_flow_match *match, + struct sw_flow_key *key, + struct nlattr **a, + struct sw_flow_actions **acts, + bool log) { struct sw_flow_mask mask; int error = 0; -- cgit v1.2.3 From 4638faac032756f7eab5524be7be56bee77e426b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 22 Jul 2019 20:41:22 -0700 Subject: netrom: hold sock when setting skb->destructor sock_efree() releases the sock refcnt, if we don't hold this refcnt when setting skb->destructor to it, the refcnt would not be balanced. This leads to several bug reports from syzbot. I have checked other users of sock_efree(), all of them hold the sock refcnt. Fixes: c8c8218ec5af ("netrom: fix a memory leak in nr_rx_frame()") Reported-and-tested-by: Reported-and-tested-by: Reported-and-tested-by: Reported-and-tested-by: Cc: Ralf Baechle Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 96740d389377..c4f54ad2b98a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -967,6 +967,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) window = skb->data[20]; + sock_hold(make); skb->sk = make; skb->destructor = sock_efree; make->sk_state = TCP_ESTABLISHED; -- cgit v1.2.3 From cb81572e8cb50c5fb98b0b962cdfe48fff71fd37 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 23 Jul 2019 15:27:52 +0200 Subject: netfilter: nf_tables: Make nft_meta expression more robust nft_meta_get_eval()'s tendency to bail out setting NFT_BREAK verdict in situations where required data is missing leads to unexpected behaviour with inverted checks like so: | meta iifname != eth0 accept This rule will never match if there is no input interface (or it is not known) which is not intuitive and, what's worse, breaks consistency of iptables-nft with iptables-legacy. Fix this by falling back to placing a value in dreg which never matches (avoiding accidental matches), i.e. zero for interface index and an empty string for interface name. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_meta_bridge.c | 6 +----- net/netfilter/nft_meta.c | 16 ++++------------ 2 files changed, 5 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index bed66f536b34..a98dec2cf0cf 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -30,13 +30,9 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_META_BRI_IIFNAME: br_dev = nft_meta_get_bridge(in); - if (!br_dev) - goto err; break; case NFT_META_BRI_OIFNAME: br_dev = nft_meta_get_bridge(out); - if (!br_dev) - goto err; break; case NFT_META_BRI_IIFPVID: { u16 p_pvid; @@ -64,7 +60,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, goto out; } - strncpy((char *)dest, br_dev->name, IFNAMSIZ); + strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ); return; out: return nft_meta_get_eval(expr, regs, pkt); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index f1b1d948c07b..f69afb9ff3cb 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -60,24 +60,16 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->mark; break; case NFT_META_IIF: - if (in == NULL) - goto err; - *dest = in->ifindex; + *dest = in ? in->ifindex : 0; break; case NFT_META_OIF: - if (out == NULL) - goto err; - *dest = out->ifindex; + *dest = out ? out->ifindex : 0; break; case NFT_META_IIFNAME: - if (in == NULL) - goto err; - strncpy((char *)dest, in->name, IFNAMSIZ); + strncpy((char *)dest, in ? in->name : "", IFNAMSIZ); break; case NFT_META_OIFNAME: - if (out == NULL) - goto err; - strncpy((char *)dest, out->name, IFNAMSIZ); + strncpy((char *)dest, out ? out->name : "", IFNAMSIZ); break; case NFT_META_IIFTYPE: if (in == NULL) -- cgit v1.2.3 From 67d86835840a533ce868c42a7f05507e66ba95f0 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 23 Jul 2019 15:27:53 +0200 Subject: netfilter: nft_meta_bridge: Eliminate 'out' label The label is used just once and the code it points at is not reused, no point in keeping it. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_meta_bridge.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index a98dec2cf0cf..1804e867f715 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -57,13 +57,11 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, return; } default: - goto out; + return nft_meta_get_eval(expr, regs, pkt); } strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ); return; -out: - return nft_meta_get_eval(expr, regs, pkt); err: regs->verdict.code = NFT_BREAK; } -- cgit v1.2.3 From c8ec4632c6ac9cda0e8c3d51aa41eeab66585bd5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 22 Jul 2019 21:43:00 -0700 Subject: ife: error out when nla attributes are empty act_ife at least requires TCA_IFE_PARMS, so we have to bail out when there is no attribute passed in. Reported-by: syzbot+fbb5b288c9cb6a2eeac4@syzkaller.appspotmail.com Fixes: ef6980b6becb ("introduce IFE action") Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 41d5398dd2f2..3578196d1600 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -481,6 +481,11 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, int ret = 0; int err; + if (!nla) { + NL_SET_ERR_MSG_MOD(extack, "IFE requires attributes to be passed"); + return -EINVAL; + } + err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy, NULL); if (err < 0) -- cgit v1.2.3 From 47d858d0bdcd47cc1c6c9eeca91b091dd9e55637 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Thu, 25 Jul 2019 11:07:56 +0800 Subject: ipip: validate header length in ipip_tunnel_xmit We need the same checks introduced by commit cb9f1b783850 ("ip: validate header length on virtual device xmit") for ipip tunnel. Fixes: cb9f1b783850b ("ip: validate header length on virtual device xmit") Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 43adfc1641ba..2f01cf6fa0de 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -275,6 +275,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, const struct iphdr *tiph = &tunnel->parms.iph; u8 ipproto; + if (!pskb_inet_may_pull(skb)) + goto tx_error; + switch (skb->protocol) { case htons(ETH_P_IP): ipproto = IPPROTO_IPIP; -- cgit v1.2.3 From e6f4051123fd33901e9655a675b22aefcdc5d277 Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Mon, 22 Jul 2019 12:44:50 +0530 Subject: {nl,mac}80211: fix interface combinations on crypto controlled devices Commit 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on crypto controlled devices") has introduced a change which allows 4addr operation on crypto controlled devices (ex: ath10k). This change has inadvertently impacted the interface combinations logic on such devices. General rule is that software interfaces like AP/VLAN should not be listed under supported interface combinations and should not be considered during validation of these combinations; because of the aforementioned change, AP/VLAN interfaces(if present) will be checked against interfaces supported by the device and blocks valid interface combinations. Consider a case where an AP and AP/VLAN are up and running; when a second AP device is brought up on the same physical device, this AP will be checked against the AP/VLAN interface (which will not be part of supported interface combinations of the device) and blocks second AP to come up. Add a new API cfg80211_iftype_allowed() to fix the problem, this API works for all devices with/without SW crypto control. Signed-off-by: Manikanta Pubbisetty Fixes: 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on crypto controlled devices") Link: https://lore.kernel.org/r/1563779690-9716-1-git-send-email-mpubbise@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 +++++++++++++++ net/mac80211/util.c | 7 +++---- net/wireless/core.c | 6 ++---- net/wireless/nl80211.c | 4 +--- net/wireless/util.c | 27 +++++++++++++++++++++++++-- 5 files changed, 46 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 45850a8391d9..26e2ad2c7027 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7320,6 +7320,21 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev, struct cfg80211_pmsr_request *req, gfp_t gfp); +/** + * cfg80211_iftype_allowed - check whether the interface can be allowed + * @wiphy: the wiphy + * @iftype: interface type + * @is_4addr: use_4addr flag, must be '0' when check_swif is '1' + * @check_swif: check iftype against software interfaces + * + * Check whether the interface is allowed to operate; additionally, this API + * can be used to check iftype against the software interfaces when + * check_swif is '1'. + */ +bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, + bool is_4addr, u8 check_swif); + + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1b224fa27367..ad1e58184c4e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3796,9 +3796,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, } /* Always allow software iftypes */ - if (local->hw.wiphy->software_iftypes & BIT(iftype) || - (iftype == NL80211_IFTYPE_AP_VLAN && - local->hw.wiphy->flags & WIPHY_FLAG_4ADDR_AP)) { + if (cfg80211_iftype_allowed(local->hw.wiphy, iftype, 0, 1)) { if (radar_detect) return -EINVAL; return 0; @@ -3833,7 +3831,8 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, if (sdata_iter == sdata || !ieee80211_sdata_running(sdata_iter) || - local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) + cfg80211_iftype_allowed(local->hw.wiphy, + wdev_iter->iftype, 0, 1)) continue; params.iftype_num[wdev_iter->iftype]++; diff --git a/net/wireless/core.c b/net/wireless/core.c index 45d9afcff6d5..32b3c719fdfc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1410,10 +1410,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } break; case NETDEV_PRE_UP: - if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)) && - !(wdev->iftype == NL80211_IFTYPE_AP_VLAN && - rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP && - wdev->use_4addr)) + if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype, + wdev->use_4addr, 0)) return notifier_from_errno(-EOPNOTSUPP); if (rfkill_blocked(rdev->rfkill)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fc83dd179c1a..fd05ae1437a9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3484,9 +3484,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return err; } - if (!(rdev->wiphy.interface_modes & (1 << type)) && - !(type == NL80211_IFTYPE_AP_VLAN && params.use_4addr && - rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP)) + if (!cfg80211_iftype_allowed(&rdev->wiphy, type, params.use_4addr, 0)) return -EOPNOTSUPP; err = nl80211_parse_mon_options(rdev, type, info, ¶ms); diff --git a/net/wireless/util.c b/net/wireless/util.c index 1c39d6a2e850..d0e35b7b9e35 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1697,7 +1697,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { num_interfaces += params->iftype_num[iftype]; if (params->iftype_num[iftype] > 0 && - !(wiphy->software_iftypes & BIT(iftype))) + !cfg80211_iftype_allowed(wiphy, iftype, 0, 1)) used_iftypes |= BIT(iftype); } @@ -1719,7 +1719,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, return -ENOMEM; for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - if (wiphy->software_iftypes & BIT(iftype)) + if (cfg80211_iftype_allowed(wiphy, iftype, 0, 1)) continue; for (j = 0; j < c->n_limits; j++) { all_iftypes |= limits[j].types; @@ -2072,3 +2072,26 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, return max_vht_nss; } EXPORT_SYMBOL(ieee80211_get_vht_max_nss); + +bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype, + bool is_4addr, u8 check_swif) + +{ + bool is_vlan = iftype == NL80211_IFTYPE_AP_VLAN; + + switch (check_swif) { + case 0: + if (is_vlan && is_4addr) + return wiphy->flags & WIPHY_FLAG_4ADDR_AP; + return wiphy->interface_modes & BIT(iftype); + case 1: + if (!(wiphy->software_iftypes & BIT(iftype)) && is_vlan) + return wiphy->flags & WIPHY_FLAG_4ADDR_AP; + return wiphy->software_iftypes & BIT(iftype); + default: + break; + } + + return false; +} +EXPORT_SYMBOL(cfg80211_iftype_allowed); -- cgit v1.2.3 From 01f5bffad555f8e22a61f4b1261fe09cf1b96994 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Fri, 26 Jul 2019 00:40:17 +0800 Subject: ip6_tunnel: fix possible use-after-free on xmit ip4ip6/ip6ip6 tunnels run iptunnel_handle_offloads on xmit which can cause a possible use-after-free accessing iph/ipv6h pointer since the packet will be 'uncloned' running pskb_expand_head if it is a cloned gso skb. Fixes: 0e9a709560db ("ip6_tunnel, ip6_gre: fix setting of DSCP on encapsulated packets") Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3134fbb65d7f..754a484d35df 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1278,12 +1278,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - skb_set_inner_ipproto(skb, IPPROTO_IPIP); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1367,12 +1366,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); - skb_set_inner_ipproto(skb, IPPROTO_IPV6); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, -- cgit v1.2.3 From c7ba50fe2399f0621fae39eb6f5e6abfbb83c38d Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Fri, 26 Jul 2019 22:17:05 +0800 Subject: net: rds: Fix possible null-pointer dereferences in rds_rdma_cm_event_handler_cmn() In rds_rdma_cm_event_handler_cmn(), there are some if statements to check whether conn is NULL, such as on lines 65, 96 and 112. But conn is not checked before being used on line 108: trans->cm_connect_complete(conn, event); and on lines 140-143: rdsdebug("DISCONNECT event - dropping connection " "%pI6c->%pI6c\n", &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); Thus, possible null-pointer dereferences may occur. To fix these bugs, conn is checked before being used. These bugs are found by a static analysis tool STCheck written by us. Signed-off-by: Jia-Ju Bai Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rdma_transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index ff74c4bbb9fc..9986d6065c4d 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -105,7 +105,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ESTABLISHED: - trans->cm_connect_complete(conn, event); + if (conn) + trans->cm_connect_complete(conn, event); break; case RDMA_CM_EVENT_REJECTED: @@ -137,6 +138,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_DISCONNECTED: + if (!conn) + break; rdsdebug("DISCONNECT event - dropping connection " "%pI6c->%pI6c\n", &conn->c_laddr, &conn->c_faddr); -- cgit v1.2.3 From 05aaa5c97dce4c10a9e7eae2f1569a684e0c5ced Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 26 Jul 2019 15:47:58 -0700 Subject: mac80211: don't WARN on short WMM parameters from AP In a very similar spirit to commit c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs"), an AP may not transmit a fully-formed WMM IE. For example, it may miss or repeat an Access Category. The above loop won't catch that and will instead leave one of the four ACs zeroed out. This triggers the following warning in drv_conf_tx() wlan0: invalid CW_min/CW_max: 0/0 and it may leave one of the hardware queues unconfigured. If we detect such a case, let's just print a warning and fall back to the defaults. Tested with a hacked version of hostapd, intentionally corrupting the IEs in hostapd_eid_wmm(). Cc: stable@vger.kernel.org Signed-off-by: Brian Norris Link: https://lore.kernel.org/r/20190726224758.210953-1-briannorris@chromium.org Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a99ad0325309..4c888dc9bd81 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2042,6 +2042,16 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local, ieee80211_regulatory_limit_wmm_params(sdata, ¶ms[ac], ac); } + /* WMM specification requires all 4 ACIs. */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + if (params[ac].cw_min == 0) { + sdata_info(sdata, + "AP has invalid WMM params (missing AC %d), using defaults\n", + ac); + return false; + } + } + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { mlme_dbg(sdata, "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n", -- cgit v1.2.3 From 051c7b39be4a91f6b7d8c4548444e4b850f1f56c Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 29 Jul 2019 16:24:33 +0800 Subject: net: sched: Fix a possible null-pointer dereference in dequeue_func() In dequeue_func(), there is an if statement on line 74 to check whether skb is NULL: if (skb) When skb is NULL, it is used on line 77: prefetch(&skb->end); Thus, a possible null-pointer dereference may occur. To fix this bug, skb->end is used when skb is not NULL. This bug is found by a static analysis tool STCheck written by us. Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM") Signed-off-by: Jia-Ju Bai Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_codel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 25ef172c23df..30169b3adbbb 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -71,10 +71,10 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) struct Qdisc *sch = ctx; struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); - if (skb) + if (skb) { sch->qstats.backlog -= qdisc_pkt_len(skb); - - prefetch(&skb->end); /* we'll need skb_shinfo() */ + prefetch(&skb->end); /* we'll need skb_shinfo() */ + } return skb; } -- cgit v1.2.3 From d7bae09fa008c6c9a489580db0a5a12063b97f97 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 29 Jul 2019 12:28:41 +0300 Subject: net: bridge: delete local fdb on device init failure On initialization failure we have to delete the local fdb which was inserted due to the default pvid creation. This problem has been present since the inception of default_pvid. Note that currently there are 2 cases: 1) in br_dev_init() when br_multicast_init() fails 2) if register_netdevice() fails after calling ndo_init() This patch takes care of both since br_vlan_flush() is called on both occasions. Also the new fdb delete would be a no-op on normal bridge device destruction since the local fdb would've been already flushed by br_dev_delete(). This is not an issue for ports since nbp_vlan_init() is called last when adding a port thus nothing can fail after it. Reported-by: syzbot+88533dc8b582309bf3ee@syzkaller.appspotmail.com Fixes: 5be5a2df40f0 ("bridge: Add filtering support for default_pvid") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 021cc9f66804..a544e161c7fa 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -715,6 +715,11 @@ void br_vlan_flush(struct net_bridge *br) ASSERT_RTNL(); + /* delete auto-added default pvid local fdb before flushing vlans + * otherwise it will be leaked on bridge device init failure + */ + br_fdb_delete_by_port(br, NULL, 0, 1); + vg = br_vlan_group(br); __vlan_flush(vg); RCU_INIT_POINTER(br->vlgrp, NULL); -- cgit v1.2.3 From 05bba1edaf9c11023901800bc4d2ce5b96e0836a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 09:59:47 -0500 Subject: net/af_iucv: mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: net/iucv/af_iucv.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 537:3, 519:6, 2246:6, 510:6 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. Reported-by: Geert Uytterhoeven Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 09e1694b6d34..ebb62a4ebe30 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -512,7 +512,9 @@ static void iucv_sock_close(struct sock *sk) sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); } - case IUCV_DISCONN: /* fall through */ + /* fall through */ + + case IUCV_DISCONN: sk->sk_state = IUCV_CLOSING; sk->sk_state_change(sk); @@ -525,8 +527,9 @@ static void iucv_sock_close(struct sock *sk) iucv_sock_in_state(sk, IUCV_CLOSED, 0), timeo); } + /* fall through */ - case IUCV_CLOSING: /* fall through */ + case IUCV_CLOSING: sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); @@ -535,8 +538,9 @@ static void iucv_sock_close(struct sock *sk) skb_queue_purge(&iucv->send_skb_q); skb_queue_purge(&iucv->backlog_skb_q); + /* fall through */ - default: /* fall through */ + default: iucv_sever_path(sk, 1); } @@ -2247,10 +2251,10 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, kfree_skb(skb); break; } - /* fall through and receive non-zero length data */ + /* fall through - and receive non-zero length data */ case (AF_IUCV_FLAG_SHT): /* shutdown request */ - /* fall through and receive zero length data */ + /* fall through - and receive zero length data */ case 0: /* plain data frame */ IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class; -- cgit v1.2.3 From 55b40dbf0e76b4bfb9d8b3a16a0208640a9a45df Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 28 Jul 2019 14:56:36 +0200 Subject: net: fix ifindex collision during namespace removal Commit aca51397d014 ("netns: Fix arbitrary net_device-s corruptions on net_ns stop.") introduced a possibility to hit a BUG in case device is returning back to init_net and two following conditions are met: 1) dev->ifindex value is used in a name of another "dev%d" device in init_net. 2) dev->name is used by another device in init_net. Under real life circumstances this is hard to get. Therefore this has been present happily for over 10 years. To reproduce: $ ip a 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: dummy0: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 86:89:3f:86:61:29 brd ff:ff:ff:ff:ff:ff 3: enp0s2: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff $ ip netns add ns1 $ ip -n ns1 link add dummy1ns1 type dummy $ ip -n ns1 link add dummy2ns1 type dummy $ ip link set enp0s2 netns ns1 $ ip -n ns1 link set enp0s2 name dummy0 [ 100.858894] virtio_net virtio0 dummy0: renamed from enp0s2 $ ip link add dev4 type dummy $ ip -n ns1 a 1: lo: mtu 65536 qdisc noop state DOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 2: dummy1ns1: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 16:63:4c:38:3e:ff brd ff:ff:ff:ff:ff:ff 3: dummy2ns1: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether aa:9e:86:dd:6b:5d brd ff:ff:ff:ff:ff:ff 4: dummy0: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff $ ip a 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: dummy0: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 86:89:3f:86:61:29 brd ff:ff:ff:ff:ff:ff 4: dev4: mtu 1500 qdisc noop state DOWN group default qlen 1000 link/ether 5a:e1:4a:b6:ec:f8 brd ff:ff:ff:ff:ff:ff $ ip netns del ns1 [ 158.717795] default_device_exit: failed to move dummy0 to init_net: -17 [ 158.719316] ------------[ cut here ]------------ [ 158.720591] kernel BUG at net/core/dev.c:9824! [ 158.722260] invalid opcode: 0000 [#1] SMP KASAN PTI [ 158.723728] CPU: 0 PID: 56 Comm: kworker/u2:1 Not tainted 5.3.0-rc1+ #18 [ 158.725422] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014 [ 158.727508] Workqueue: netns cleanup_net [ 158.728915] RIP: 0010:default_device_exit.cold+0x1d/0x1f [ 158.730683] Code: 84 e8 18 c9 3e fe 0f 0b e9 70 90 ff ff e8 36 e4 52 fe 89 d9 4c 89 e2 48 c7 c6 80 d6 25 84 48 c7 c7 20 c0 25 84 e8 f4 c8 3e [ 158.736854] RSP: 0018:ffff8880347e7b90 EFLAGS: 00010282 [ 158.738752] RAX: 000000000000003b RBX: 00000000ffffffef RCX: 0000000000000000 [ 158.741369] RDX: 0000000000000000 RSI: ffffffff8128013d RDI: ffffed10068fcf64 [ 158.743418] RBP: ffff888033550170 R08: 000000000000003b R09: fffffbfff0b94b9c [ 158.745626] R10: fffffbfff0b94b9b R11: ffffffff85ca5cdf R12: ffff888032f28000 [ 158.748405] R13: dffffc0000000000 R14: ffff8880335501b8 R15: 1ffff110068fcf72 [ 158.750638] FS: 0000000000000000(0000) GS:ffff888036000000(0000) knlGS:0000000000000000 [ 158.752944] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 158.755245] CR2: 00007fe8b45d21d0 CR3: 00000000340b4005 CR4: 0000000000360ef0 [ 158.757654] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 158.760012] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 158.762758] Call Trace: [ 158.763882] ? dev_change_net_namespace+0xbb0/0xbb0 [ 158.766148] ? devlink_nl_cmd_set_doit+0x520/0x520 [ 158.768034] ? dev_change_net_namespace+0xbb0/0xbb0 [ 158.769870] ops_exit_list.isra.0+0xa8/0x150 [ 158.771544] cleanup_net+0x446/0x8f0 [ 158.772945] ? unregister_pernet_operations+0x4a0/0x4a0 [ 158.775294] process_one_work+0xa1a/0x1740 [ 158.776896] ? pwq_dec_nr_in_flight+0x310/0x310 [ 158.779143] ? do_raw_spin_lock+0x11b/0x280 [ 158.780848] worker_thread+0x9e/0x1060 [ 158.782500] ? process_one_work+0x1740/0x1740 [ 158.784454] kthread+0x31b/0x420 [ 158.786082] ? __kthread_create_on_node+0x3f0/0x3f0 [ 158.788286] ret_from_fork+0x3a/0x50 [ 158.789871] ---[ end trace defd6c657c71f936 ]--- [ 158.792273] RIP: 0010:default_device_exit.cold+0x1d/0x1f [ 158.795478] Code: 84 e8 18 c9 3e fe 0f 0b e9 70 90 ff ff e8 36 e4 52 fe 89 d9 4c 89 e2 48 c7 c6 80 d6 25 84 48 c7 c7 20 c0 25 84 e8 f4 c8 3e [ 158.804854] RSP: 0018:ffff8880347e7b90 EFLAGS: 00010282 [ 158.807865] RAX: 000000000000003b RBX: 00000000ffffffef RCX: 0000000000000000 [ 158.811794] RDX: 0000000000000000 RSI: ffffffff8128013d RDI: ffffed10068fcf64 [ 158.816652] RBP: ffff888033550170 R08: 000000000000003b R09: fffffbfff0b94b9c [ 158.820930] R10: fffffbfff0b94b9b R11: ffffffff85ca5cdf R12: ffff888032f28000 [ 158.825113] R13: dffffc0000000000 R14: ffff8880335501b8 R15: 1ffff110068fcf72 [ 158.829899] FS: 0000000000000000(0000) GS:ffff888036000000(0000) knlGS:0000000000000000 [ 158.834923] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 158.838164] CR2: 00007fe8b45d21d0 CR3: 00000000340b4005 CR4: 0000000000360ef0 [ 158.841917] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 158.845149] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fix this by checking if a device with the same name exists in init_net and fallback to original code - dev%d to allocate name - in case it does. This was found using syzkaller. Fixes: aca51397d014 ("netns: Fix arbitrary net_device-s corruptions on net_ns stop.") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fc676b2610e3..2f341b850845 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9701,6 +9701,8 @@ static void __net_exit default_device_exit(struct net *net) /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); + if (__dev_get_by_name(&init_net, fb_name)) + snprintf(fb_name, IFNAMSIZ, "dev%%d"); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { pr_emerg("%s: failed to move %s to init_net: %d\n", -- cgit v1.2.3 From b89d15480d0cacacae1a0fe0b3da01b529f2914f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 24 Jun 2019 15:20:11 +0200 Subject: netfilter: ipset: Actually allow destination MAC address for hash:ip,mac sets too In commit 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets"), ipset.git commit 1543514c46a7, I removed the KADT check that prevents matching on destination MAC addresses for hash:mac sets, but forgot to remove the same check for hash:ip,mac set. Drop this check: functionality is now commented in man pages and there's no reason to restrict to source MAC address matching anymore. Reported-by: Chen Yi Fixes: 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_ipmac.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index faf59b6a998f..eb1443408320 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -89,10 +89,6 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - /* MAC can be src only */ - if (!(opt->flags & IPSET_DIM_TWO_SRC)) - return 0; - if (skb_mac_header(skb) < skb->head || (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; -- cgit v1.2.3 From 1b4a75108d5bc153daf965d334e77e8e94534f96 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 24 Jun 2019 15:20:12 +0200 Subject: netfilter: ipset: Copy the right MAC address in bitmap:ip,mac and hash:ip,mac sets In commit 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets"), ipset.git commit 1543514c46a7, I added to the KADT functions for sets matching on MAC addreses the copy of source or destination MAC address depending on the configured match. This was done correctly for hash:mac, but for hash:ip,mac and bitmap:ip,mac, copying and pasting the same code block presents an obvious problem: in these two set types, the MAC address is the second dimension, not the first one, and we are actually selecting the MAC address depending on whether the first dimension (IP address) specifies source or destination. Fix this by checking for the IPSET_DIM_TWO_SRC flag in option flags. This way, mixing source and destination matches for the two dimensions of ip,mac set types works as expected. With this setup: ip netns add A ip link add veth1 type veth peer name veth2 netns A ip addr add 192.0.2.1/24 dev veth1 ip -net A addr add 192.0.2.2/24 dev veth2 ip link set veth1 up ip -net A link set veth2 up dst=$(ip netns exec A cat /sys/class/net/veth2/address) ip netns exec A ipset create test_bitmap bitmap:ip,mac range 192.0.0.0/16 ip netns exec A ipset add test_bitmap 192.0.2.1,${dst} ip netns exec A iptables -A INPUT -m set ! --match-set test_bitmap src,dst -j DROP ip netns exec A ipset create test_hash hash:ip,mac ip netns exec A ipset add test_hash 192.0.2.1,${dst} ip netns exec A iptables -A INPUT -m set ! --match-set test_hash src,dst -j DROP ipset correctly matches a test packet: # ping -c1 192.0.2.2 >/dev/null # echo $? 0 Reported-by: Chen Yi Fixes: 8cc4ccf58379 ("ipset: Allow matching on destination MAC address for mac and ipmac sets") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 2 +- net/netfilter/ipset/ip_set_hash_ipmac.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index ca7ac4a25ada..1d4e63326e68 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -226,7 +226,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, e.id = ip_to_id(map, ip); - if (opt->flags & IPSET_DIM_ONE_SRC) + if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index eb1443408320..24d8f4df4230 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -93,7 +93,7 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - if (opt->flags & IPSET_DIM_ONE_SRC) + if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); -- cgit v1.2.3 From 6c1f7e2c1b96ab9b09ac97c4df2bd9dc327206f6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 23 Jul 2019 10:25:55 +0200 Subject: netfilter: ipset: Fix rename concurrency with listing Shijie Luo reported that when stress-testing ipset with multiple concurrent create, rename, flush, list, destroy commands, it can result ipset : Broken LIST kernel message: missing DATA part! error messages and broken list results. The problem was the rename operation was not properly handled with respect of listing. The patch fixes the issue. Reported-by: Shijie Luo Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 2e151856ad99..e64d5f9a89dd 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1161,7 +1161,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, return -ENOENT; write_lock_bh(&ip_set_ref_lock); - if (set->ref != 0) { + if (set->ref != 0 || set->ref_netlink != 0) { ret = -IPSET_ERR_REFERENCED; goto out; } -- cgit v1.2.3 From d4e575ba9fcc04d10c0a2e555a5b32fa3a8a19d3 Mon Sep 17 00:00:00 2001 From: Enrico Weigelt Date: Mon, 29 Jul 2019 20:55:21 +0200 Subject: net: sctp: drop unneeded likely() call around IS_ERR() IS_ERR() already calls unlikely(), so this extra unlikely() call around IS_ERR() is not needed. Signed-off-by: Enrico Weigelt Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index aa80cda36581..9d1f83b10c0a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -985,7 +985,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, return -EINVAL; kaddrs = memdup_user(addrs, addrs_size); - if (unlikely(IS_ERR(kaddrs))) + if (IS_ERR(kaddrs)) return PTR_ERR(kaddrs); /* Walk through the addrs buffer and count the number of addresses. */ @@ -1315,7 +1315,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, return -EINVAL; kaddrs = memdup_user(addrs, addrs_size); - if (unlikely(IS_ERR(kaddrs))) + if (IS_ERR(kaddrs)) return PTR_ERR(kaddrs); /* Allow security module to validate connectx addresses. */ -- cgit v1.2.3 From 3b48300d5cc7c7bed63fddb006c4046549ed4aec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 29 Jul 2019 17:58:10 +0200 Subject: netfilter: ebtables: also count base chain policies ebtables doesn't include the base chain policies in the rule count, so we need to add them manually when we call into the x_tables core to allocate space for the comapt offset table. This lead syzbot to trigger: WARNING: CPU: 1 PID: 9012 at net/netfilter/x_tables.c:649 xt_compat_add_offset.cold+0x11/0x36 net/netfilter/x_tables.c:649 Reported-by: syzbot+276ddebab3382bbf72db@syzkaller.appspotmail.com Fixes: 2035f3ff8eaa ("netfilter: ebtables: compat: un-break 32bit setsockopt when no rules are present") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index fd84b48e48b5..c8177a89f52c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1770,20 +1770,28 @@ static int compat_calc_entry(const struct ebt_entry *e, return 0; } +static int ebt_compat_init_offsets(unsigned int number) +{ + if (number > INT_MAX) + return -EINVAL; + + /* also count the base chain policies */ + number += NF_BR_NUMHOOKS; + + return xt_compat_init_offsets(NFPROTO_BRIDGE, number); +} static int compat_table_info(const struct ebt_table_info *info, struct compat_ebt_replace *newinfo) { unsigned int size = info->entries_size; const void *entries = info->entries; + int ret; newinfo->entries_size = size; - if (info->nentries) { - int ret = xt_compat_init_offsets(NFPROTO_BRIDGE, - info->nentries); - if (ret) - return ret; - } + ret = ebt_compat_init_offsets(info->nentries); + if (ret) + return ret; return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); @@ -2234,11 +2242,9 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); - if (tmp.nentries) { - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); - if (ret < 0) - goto out_unlock; - } + ret = ebt_compat_init_offsets(tmp.nentries); + if (ret < 0) + goto out_unlock; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) -- cgit v1.2.3 From eef347f846ee8f7296a6f84e3866c057ca6bcce0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jul 2019 14:52:07 +0200 Subject: Revert "mac80211: set NETIF_F_LLTX when using intermediate tx queues" Revert this for now, it has been reported multiple times that it completely breaks connectivity on various devices. Cc: stable@vger.kernel.org Fixes: 8dbb000ee73b ("mac80211: set NETIF_F_LLTX when using intermediate tx queues") Reported-by: Jean Delvare Reported-by: Peter Lebbing Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 06aac0aaae64..8dc6580e1787 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1222,7 +1222,6 @@ static void ieee80211_if_setup(struct net_device *dev) static void ieee80211_if_setup_no_queue(struct net_device *dev) { ieee80211_if_setup(dev); - dev->features |= NETIF_F_LLTX; dev->priv_flags |= IFF_NO_QUEUE; } -- cgit v1.2.3 From 60034d3d146b11922ab1db613bce062dddc0327a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:42:50 +0100 Subject: rxrpc: Fix potential deadlock There is a potential deadlock in rxrpc_peer_keepalive_dispatch() whereby rxrpc_put_peer() is called with the peer_hash_lock held, but if it reduces the peer's refcount to 0, rxrpc_put_peer() calls __rxrpc_put_peer() - which the tries to take the already held lock. Fix this by providing a version of rxrpc_put_peer() that can be called in situations where the lock is already held. The bug may produce the following lockdep report: ============================================ WARNING: possible recursive locking detected 5.2.0-next-20190718 #41 Not tainted -------------------------------------------- kworker/0:3/21678 is trying to acquire lock: 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: spin_lock_bh /./include/linux/spinlock.h:343 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: __rxrpc_put_peer /net/rxrpc/peer_object.c:415 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: rxrpc_put_peer+0x2d3/0x6a0 /net/rxrpc/peer_object.c:435 but task is already holding lock: 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: spin_lock_bh /./include/linux/spinlock.h:343 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: rxrpc_peer_keepalive_dispatch /net/rxrpc/peer_event.c:378 [inline] 00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: rxrpc_peer_keepalive_worker+0x6b3/0xd02 /net/rxrpc/peer_event.c:430 Fixes: 330bdcfadcee ("rxrpc: Fix the keepalive generator [ver #2]") Reported-by: syzbot+72af434e4b3417318f84@syzkaller.appspotmail.com Signed-off-by: David Howells Reviewed-by: Marc Dionne Reviewed-by: Jeffrey Altman --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/peer_event.c | 2 +- net/rxrpc/peer_object.c | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 80335b4ee4fd..822f45386e31 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1061,6 +1061,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); void rxrpc_put_peer(struct rxrpc_peer *); +void rxrpc_put_peer_locked(struct rxrpc_peer *); /* * proc.c diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 9f2f45c09e58..7666ec72d37e 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -378,7 +378,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, spin_lock_bh(&rxnet->peer_hash_lock); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive[slot & mask]); - rxrpc_put_peer(peer); + rxrpc_put_peer_locked(peer); } spin_unlock_bh(&rxnet->peer_hash_lock); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 9d3ce81cf8ae..9c3ac96f71cb 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -436,6 +436,24 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) } } +/* + * Drop a ref on a peer record where the caller already holds the + * peer_hash_lock. + */ +void rxrpc_put_peer_locked(struct rxrpc_peer *peer) +{ + const void *here = __builtin_return_address(0); + int n; + + n = atomic_dec_return(&peer->usage); + trace_rxrpc_peer(peer, rxrpc_peer_put, n, here); + if (n == 0) { + hash_del_rcu(&peer->hash_link); + list_del_init(&peer->keepalive_link); + kfree_rcu(peer, rcu); + } +} + /* * Make sure all peer records have been discarded. */ -- cgit v1.2.3 From c69565ee6681e151e2bb80502930a16e04b553d1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:42:50 +0100 Subject: rxrpc: Fix the lack of notification when sendmsg() fails on a DATA packet Fix the fact that a notification isn't sent to the recvmsg side to indicate a call failed when sendmsg() fails to transmit a DATA packet with the error ENETUNREACH, EHOSTUNREACH or ECONNREFUSED. Without this notification, the afs client just sits there waiting for the call to complete in some manner (which it's not now going to do), which also pins the rxrpc call in place. This can be seen if the client has a scope-level IPv6 address, but not a global-level IPv6 address, and we try and transmit an operation to a server's IPv6 address. Looking in /proc/net/rxrpc/calls shows completed calls just sat there with an abort code of RX_USER_ABORT and an error code of -ENETUNREACH. Fixes: c54e43d752c7 ("rxrpc: Fix missing start of call timeout") Signed-off-by: David Howells Reviewed-by: Marc Dionne Reviewed-by: Jeffrey Altman --- net/rxrpc/sendmsg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 5d3f33ce6d41..bae14438f869 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -226,6 +226,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_notify_socket(call); goto out; } _debug("need instant resend %d", ret); -- cgit v1.2.3 From 2948a1fcd77a8bb11604387e3fc52f0ebf5729e9 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Tue, 30 Jul 2019 20:19:10 +0200 Subject: tipc: fix unitilized skb list crash Our test suite somtimes provokes the following crash: Description of problem: [ 1092.597234] BUG: unable to handle kernel NULL pointer dereference at 00000000000000e8 [ 1092.605072] PGD 0 P4D 0 [ 1092.607620] Oops: 0000 [#1] SMP PTI [ 1092.611118] CPU: 37 PID: 0 Comm: swapper/37 Kdump: loaded Not tainted 4.18.0-122.el8.x86_64 #1 [ 1092.619724] Hardware name: Dell Inc. PowerEdge R740/08D89F, BIOS 1.3.7 02/08/2018 [ 1092.627215] RIP: 0010:tipc_mcast_filter_msg+0x93/0x2d0 [tipc] [ 1092.632955] Code: 0f 84 aa 01 00 00 89 cf 4d 01 ca 4c 8b 26 c1 ef 19 83 e7 0f 83 ff 0c 4d 0f 45 d1 41 8b 6a 10 0f cd 4c 39 e6 0f 84 81 01 00 00 <4d> 8b 9c 24 e8 00 00 00 45 8b 13 41 0f ca 44 89 d7 c1 ef 13 83 e7 [ 1092.651703] RSP: 0018:ffff929e5fa83a18 EFLAGS: 00010282 [ 1092.656927] RAX: ffff929e3fb38100 RBX: 00000000069f29ee RCX: 00000000416c0045 [ 1092.664058] RDX: ffff929e5fa83a88 RSI: ffff929e31a28420 RDI: 0000000000000000 [ 1092.671209] RBP: 0000000029b11821 R08: 0000000000000000 R09: ffff929e39b4407a [ 1092.678343] R10: ffff929e39b4407a R11: 0000000000000007 R12: 0000000000000000 [ 1092.685475] R13: 0000000000000001 R14: ffff929e3fb38100 R15: ffff929e39b4407a [ 1092.692614] FS: 0000000000000000(0000) GS:ffff929e5fa80000(0000) knlGS:0000000000000000 [ 1092.700702] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1092.706447] CR2: 00000000000000e8 CR3: 000000031300a004 CR4: 00000000007606e0 [ 1092.713579] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1092.720712] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1092.727843] PKRU: 55555554 [ 1092.730556] Call Trace: [ 1092.733010] [ 1092.735034] tipc_sk_filter_rcv+0x7ca/0xb80 [tipc] [ 1092.739828] ? __kmalloc_node_track_caller+0x1cb/0x290 [ 1092.744974] ? dev_hard_start_xmit+0xa5/0x210 [ 1092.749332] tipc_sk_rcv+0x389/0x640 [tipc] [ 1092.753519] tipc_sk_mcast_rcv+0x23c/0x3a0 [tipc] [ 1092.758224] tipc_rcv+0x57a/0xf20 [tipc] [ 1092.762154] ? ktime_get_real_ts64+0x40/0xe0 [ 1092.766432] ? tpacket_rcv+0x50/0x9f0 [ 1092.770098] tipc_l2_rcv_msg+0x4a/0x70 [tipc] [ 1092.774452] __netif_receive_skb_core+0xb62/0xbd0 [ 1092.779164] ? enqueue_entity+0xf6/0x630 [ 1092.783084] ? kmem_cache_alloc+0x158/0x1c0 [ 1092.787272] ? __build_skb+0x25/0xd0 [ 1092.790849] netif_receive_skb_internal+0x42/0xf0 [ 1092.795557] napi_gro_receive+0xba/0xe0 [ 1092.799417] mlx5e_handle_rx_cqe+0x83/0xd0 [mlx5_core] [ 1092.804564] mlx5e_poll_rx_cq+0xd5/0x920 [mlx5_core] [ 1092.809536] mlx5e_napi_poll+0xb2/0xce0 [mlx5_core] [ 1092.814415] ? __wake_up_common_lock+0x89/0xc0 [ 1092.818861] net_rx_action+0x149/0x3b0 [ 1092.822616] __do_softirq+0xe3/0x30a [ 1092.826193] irq_exit+0x100/0x110 [ 1092.829512] do_IRQ+0x85/0xd0 [ 1092.832483] common_interrupt+0xf/0xf [ 1092.836147] [ 1092.838255] RIP: 0010:cpuidle_enter_state+0xb7/0x2a0 [ 1092.843221] Code: e8 3e 79 a5 ff 80 7c 24 03 00 74 17 9c 58 0f 1f 44 00 00 f6 c4 02 0f 85 d7 01 00 00 31 ff e8 a0 6b ab ff fb 66 0f 1f 44 00 00 <48> b8 ff ff ff ff f3 01 00 00 4c 29 f3 ba ff ff ff 7f 48 39 c3 7f [ 1092.861967] RSP: 0018:ffffaa5ec6533e98 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd [ 1092.869530] RAX: ffff929e5faa3100 RBX: 000000fe63dd2092 RCX: 000000000000001f [ 1092.876665] RDX: 000000fe63dd2092 RSI: 000000003a518aaa RDI: 0000000000000000 [ 1092.883795] RBP: 0000000000000003 R08: 0000000000000004 R09: 0000000000022940 [ 1092.890929] R10: 0000040cb0666b56 R11: ffff929e5faa20a8 R12: ffff929e5faade78 [ 1092.898060] R13: ffffffffb59258f8 R14: 000000fe60f3228d R15: 0000000000000000 [ 1092.905196] ? cpuidle_enter_state+0x92/0x2a0 [ 1092.909555] do_idle+0x236/0x280 [ 1092.912785] cpu_startup_entry+0x6f/0x80 [ 1092.916715] start_secondary+0x1a7/0x200 [ 1092.920642] secondary_startup_64+0xb7/0xc0 [...] The reason is that the skb list tipc_socket::mc_method.deferredq only is initialized for connectionless sockets, while nothing stops arriving multicast messages from being filtered by connection oriented sockets, with subsequent access to the said list. We fix this by initializing the list unconditionally at socket creation. This eliminates the crash, while the message still is dropped further down in tipc_sk_filter_rcv() as it should be. Reported-by: Li Shuang Signed-off-by: Jon Maloy Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index dd8537f988c4..83ae41d7e554 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -485,9 +485,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); - __skb_queue_head_init(&tsk->mc_method.deferredq); } - + __skb_queue_head_init(&tsk->mc_method.deferredq); trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); return 0; } -- cgit v1.2.3 From 055d88242a6046a1ceac3167290f054c72571cd9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Jul 2019 21:25:20 +0200 Subject: compat_ioctl: pppoe: fix PPPOEIOCSFWD handling Support for handling the PPPOEIOCSFWD ioctl in compat mode was added in linux-2.5.69 along with hundreds of other commands, but was always broken sincen only the structure is compatible, but the command number is not, due to the size being sizeof(size_t), or at first sizeof(sizeof((struct sockaddr_pppox)), which is different on 64-bit architectures. Guillaume Nault adds: And the implementation was broken until 2016 (see 29e73269aa4d ("pppoe: fix reference counting in PPPoE proxy")), and nobody ever noticed. I should probably have removed this ioctl entirely instead of fixing it. Clearly, it has never been used. Fix it by adding a compat_ioctl handler for all pppoe variants that translates the command number and then calls the regular ioctl function. All other ioctl commands handled by pppoe are compatible between 32-bit and 64-bit, and require compat_ptr() conversion. This should apply to all stable kernels. Acked-by: Guillaume Nault Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 3 +++ drivers/net/ppp/pppox.c | 13 +++++++++++++ drivers/net/ppp/pptp.c | 3 +++ fs/compat_ioctl.c | 3 --- include/linux/if_pppox.h | 3 +++ net/l2tp/l2tp_ppp.c | 3 +++ 6 files changed, 25 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 1d902ecb4aa8..a44dd3c8af63 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1115,6 +1115,9 @@ static const struct proto_ops pppoe_ops = { .recvmsg = pppoe_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pppox_compat_ioctl, +#endif }; static const struct pppox_proto pppoe_proto = { diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 5ef422a43d70..08364f10a43f 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,18 @@ int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(pppox_ioctl); +#ifdef CONFIG_COMPAT +int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + if (cmd == PPPOEIOCSFWD32) + cmd = PPPOEIOCSFWD; + + return pppox_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); +} + +EXPORT_SYMBOL(pppox_compat_ioctl); +#endif + static int pppox_create(struct net *net, struct socket *sock, int protocol, int kern) { diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index a8e52c8e4128..734de7de03f7 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -623,6 +623,9 @@ static const struct proto_ops pptp_ops = { .recvmsg = sock_no_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pppox_compat_ioctl, +#endif }; static const struct pppox_proto pppox_pptp_proto = { diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6e30949d9f77..a7ec2d3dff92 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -638,9 +638,6 @@ COMPATIBLE_IOCTL(PPPIOCDISCONN) COMPATIBLE_IOCTL(PPPIOCATTCHAN) COMPATIBLE_IOCTL(PPPIOCGCHAN) COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS) -/* PPPOX */ -COMPATIBLE_IOCTL(PPPOEIOCSFWD) -COMPATIBLE_IOCTL(PPPOEIOCDFWD) /* Big A */ /* sparc only */ /* Big Q for sound/OSS */ diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 8b728750a625..69e813bcb947 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -80,6 +80,9 @@ extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp); extern void unregister_pppox_proto(int proto_num); extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); + +#define PPPOEIOCSFWD32 _IOW(0xB1 ,0, compat_size_t) /* PPPoX socket states */ enum { diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1d0e5904dedf..c54cb59593ef 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1681,6 +1681,9 @@ static const struct proto_ops pppol2tp_ops = { .recvmsg = pppol2tp_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pppox_compat_ioctl, +#endif }; static const struct pppox_proto pppol2tp_proto = { -- cgit v1.2.3 From 5c725b6b65067909548ac9ca9bc777098ec9883d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 30 Jul 2019 14:21:00 +0300 Subject: net: bridge: mcast: don't delete permanent entries when fast leave is enabled When permanent entries were introduced by the commit below, they were exempt from timing out and thus igmp leave wouldn't affect them unless fast leave was enabled on the port which was added before permanent entries existed. It shouldn't matter if fast leave is enabled or not if the user added a permanent entry it shouldn't be deleted on igmp leave. Before: $ echo 1 > /sys/class/net/eth4/brport/multicast_fast_leave $ bridge mdb add dev br0 port eth4 grp 229.1.1.1 permanent $ bridge mdb show dev br0 port eth4 grp 229.1.1.1 permanent < join and leave 229.1.1.1 on eth4 > $ bridge mdb show $ After: $ echo 1 > /sys/class/net/eth4/brport/multicast_fast_leave $ bridge mdb add dev br0 port eth4 grp 229.1.1.1 permanent $ bridge mdb show dev br0 port eth4 grp 229.1.1.1 permanent < join and leave 229.1.1.1 on eth4 > $ bridge mdb show dev br0 port eth4 grp 229.1.1.1 permanent Fixes: ccb1c31a7a87 ("bridge: add flags to distinguish permanent mdb entires") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3d8deac2353d..f8cac3702712 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1388,6 +1388,9 @@ br_multicast_leave_group(struct net_bridge *br, if (!br_port_group_equal(p, port, src)) continue; + if (p->flags & MDB_PG_FLAGS_PERMANENT) + break; + rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); -- cgit v1.2.3 From 4da5f0018eef4c0de31675b670c80e82e13e99d1 Mon Sep 17 00:00:00 2001 From: Taras Kondratiuk Date: Mon, 29 Jul 2019 22:15:07 +0000 Subject: tipc: compat: allow tipc commands without arguments Commit 2753ca5d9009 ("tipc: fix uninit-value in tipc_nl_compat_doit") broke older tipc tools that use compat interface (e.g. tipc-config from tipcutils package): % tipc-config -p operation not supported The commit started to reject TIPC netlink compat messages that do not have attributes. It is too restrictive because some of such messages are valid (they don't need any arguments): % grep 'tx none' include/uapi/linux/tipc_config.h #define TIPC_CMD_NOOP 0x0000 /* tx none, rx none */ #define TIPC_CMD_GET_MEDIA_NAMES 0x0002 /* tx none, rx media_name(s) */ #define TIPC_CMD_GET_BEARER_NAMES 0x0003 /* tx none, rx bearer_name(s) */ #define TIPC_CMD_SHOW_PORTS 0x0006 /* tx none, rx ultra_string */ #define TIPC_CMD_GET_REMOTE_MNG 0x4003 /* tx none, rx unsigned */ #define TIPC_CMD_GET_MAX_PORTS 0x4004 /* tx none, rx unsigned */ #define TIPC_CMD_GET_NETID 0x400B /* tx none, rx unsigned */ #define TIPC_CMD_NOT_NET_ADMIN 0xC001 /* tx none, rx none */ This patch relaxes the original fix and rejects messages without arguments only if such arguments are expected by a command (reg_type is non zero). Fixes: 2753ca5d9009 ("tipc: fix uninit-value in tipc_nl_compat_doit") Cc: stable@vger.kernel.org Signed-off-by: Taras Kondratiuk Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index d86030ef1232..e135d4e11231 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -55,6 +55,7 @@ struct tipc_nl_compat_msg { int rep_type; int rep_size; int req_type; + int req_size; struct net *net; struct sk_buff *rep; struct tlv_desc *req; @@ -257,7 +258,8 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, int err; struct sk_buff *arg; - if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) return -EINVAL; msg->rep = tipc_tlv_alloc(msg->rep_size); @@ -354,7 +356,8 @@ static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, { int err; - if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) return -EINVAL; err = __tipc_nl_compat_doit(cmd, msg); @@ -1278,8 +1281,8 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) goto send; } - len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (!len || !TLV_OK(msg.req, len)) { + msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; -- cgit v1.2.3 From 685703b497bacea8765bb409d6b73455b73c540e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 31 Jul 2019 01:25:45 +0000 Subject: hv_sock: Fix hang when a connection is closed There is a race condition for an established connection that is being closed by the guest: the refcnt is 4 at the end of hvs_release() (Note: here the 'remove_sock' is false): 1 for the initial value; 1 for the sk being in the bound list; 1 for the sk being in the connected list; 1 for the delayed close_work. After hvs_release() finishes, __vsock_release() -> sock_put(sk) *may* decrease the refcnt to 3. Concurrently, hvs_close_connection() runs in another thread: calls vsock_remove_sock() to decrease the refcnt by 2; call sock_put() to decrease the refcnt to 0, and free the sk; next, the "release_sock(sk)" may hang due to use-after-free. In the above, after hvs_release() finishes, if hvs_close_connection() runs faster than "__vsock_release() -> sock_put(sk)", then there is not any issue, because at the beginning of hvs_close_connection(), the refcnt is still 4. The issue can be resolved if an extra reference is taken when the connection is established. Fixes: a9eeb998c28d ("hv_sock: Add support for delayed close") Signed-off-by: Dexuan Cui Reviewed-by: Sunil Muthuswamy Signed-off-by: David S. Miller --- net/vmw_vsock/hyperv_transport.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index f2084e3f7aa4..9d864ebeb7b3 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -312,6 +312,11 @@ static void hvs_close_connection(struct vmbus_channel *chan) lock_sock(sk); hvs_do_close_lock_held(vsock_sk(sk), true); release_sock(sk); + + /* Release the refcnt for the channel that's opened in + * hvs_open_connection(). + */ + sock_put(sk); } static void hvs_open_connection(struct vmbus_channel *chan) @@ -407,6 +412,9 @@ static void hvs_open_connection(struct vmbus_channel *chan) } set_per_channel_state(chan, conn_from_host ? new : sk); + + /* This reference will be dropped by hvs_close_connection(). */ + sock_hold(conn_from_host ? new : sk); vmbus_set_chn_rescind_callback(chan, hvs_close_connection); /* Set the pending send size to max packet size to always get -- cgit v1.2.3 From 7be8ef2cdbfe41a2e524b7c6cc3f8e6cfaa906e4 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Thu, 1 Aug 2019 13:02:51 +0000 Subject: net: sched: use temporary variable for actions indexes Currently init call of all actions (except ipt) init their 'parm' structure as a direct pointer to nla data in skb. This leads to race condition when some of the filter actions were initialized successfully (and were assigned with idr action index that was written directly into nla data), but then were deleted and retried (due to following action module missing or classifier-initiated retry), in which case action init code tries to insert action to idr with index that was assigned on previous iteration. During retry the index can be reused by another action that was inserted concurrently, which causes unintended action sharing between filters. To fix described race condition, save action idr index to temporary stack-allocated variable instead on nla data. Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Signed-off-by: Dmytro Linkin Signed-off-by: Vlad Buslov Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 9 +++++---- net/sched/act_connmark.c | 9 +++++---- net/sched/act_csum.c | 9 +++++---- net/sched/act_ct.c | 9 +++++---- net/sched/act_ctinfo.c | 9 +++++---- net/sched/act_gact.c | 8 +++++--- net/sched/act_ife.c | 8 +++++--- net/sched/act_mirred.c | 13 +++++++------ net/sched/act_mpls.c | 8 +++++--- net/sched/act_nat.c | 9 +++++---- net/sched/act_pedit.c | 10 ++++++---- net/sched/act_police.c | 8 +++++--- net/sched/act_sample.c | 10 +++++----- net/sched/act_simple.c | 10 ++++++---- net/sched/act_skbedit.c | 11 ++++++----- net/sched/act_skbmod.c | 11 ++++++----- net/sched/act_tunnel_key.c | 8 +++++--- net/sched/act_vlan.c | 16 +++++++++------- 18 files changed, 100 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 8126b26f125e..fd1f7e799e23 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -285,6 +285,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct tcf_bpf *prog; bool is_bpf, is_ebpf; int ret, res = 0; + u32 index; if (!nla) return -EINVAL; @@ -298,13 +299,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - - ret = tcf_idr_check_alloc(tn, &parm->index, act, bind); + index = parm->index; + ret = tcf_idr_check_alloc(tn, &index, act, bind); if (!ret) { - ret = tcf_idr_create(tn, parm->index, est, act, + ret = tcf_idr_create(tn, index, est, act, &act_bpf_ops, bind, true); if (ret < 0) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index ce36b0f7e1dc..32ac04d77a45 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -103,6 +103,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct tcf_connmark_info *ci; struct tc_connmark *parm; int ret = 0, err; + u32 index; if (!nla) return -EINVAL; @@ -116,13 +117,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_CONNMARK_PARMS]); - - ret = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + ret = tcf_idr_check_alloc(tn, &index, a, bind); if (!ret) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_connmark_ops, bind, false); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 621fb22ce2a9..9b9288267a54 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -52,6 +52,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, struct tc_csum *parm; struct tcf_csum *p; int ret = 0, err; + u32 index; if (nla == NULL) return -EINVAL; @@ -64,13 +65,13 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (tb[TCA_CSUM_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); - - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_csum_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b501ce0cf116..33a1a7406e87 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -666,6 +666,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, struct tc_ct *parm; struct tcf_ct *c; int err, res = 0; + u32 index; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed"); @@ -681,16 +682,16 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, return -EINVAL; } parm = nla_data(tb[TCA_CT_PARMS]); - - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; if (!err) { - err = tcf_idr_create(tn, parm->index, est, a, + err = tcf_idr_create(tn, index, est, a, &act_ct_ops, bind, true); if (err) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return err; } res = ACT_P_CREATED; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 10eb2bb99861..06ef74b74911 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -157,10 +157,10 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; struct tcf_ctinfo_params *cp_new; struct tcf_chain *goto_ch = NULL; - u32 dscpmask = 0, dscpstatemask; struct tc_ctinfo *actparm; struct tcf_ctinfo *ci; u8 dscpmaskshift; @@ -206,12 +206,13 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, } /* done the validation:now to the actual action allocation */ - err = tcf_idr_check_alloc(tn, &actparm->index, a, bind); + index = actparm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, actparm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_ctinfo_ops, bind, false); if (ret) { - tcf_idr_cleanup(tn, actparm->index); + tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b2380c5284e6..8f0140c6ca58 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -61,6 +61,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, struct tc_gact *parm; struct tcf_gact *gact; int ret = 0; + u32 index; int err; #ifdef CONFIG_GACT_PROB struct tc_gact_p *p_parm = NULL; @@ -77,6 +78,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (tb[TCA_GACT_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_GACT_PARMS]); + index = parm->index; #ifndef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) @@ -94,12 +96,12 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_gact_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 3578196d1600..92ee853d43e6 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -479,6 +479,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, u8 *saddr = NULL; bool exists = false; int ret = 0; + u32 index; int err; if (!nla) { @@ -507,7 +508,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!p) return -ENOMEM; - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) { kfree(p); return err; @@ -519,10 +521,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, + ret = tcf_idr_create(tn, index, est, a, &act_ife_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); kfree(p); return ret; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 055faa298c8e..be3f88dfc37e 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -104,6 +104,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct net_device *dev; bool exists = false; int ret, err; + u32 index; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); @@ -118,8 +119,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return -EINVAL; } parm = nla_data(tb[TCA_MIRRED_PARMS]); - - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -136,21 +137,21 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); return -EINVAL; } if (!exists) { if (!parm->ifindex) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); return -EINVAL; } - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_mirred_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index ca2597ce4ac9..0f299e3b618c 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -138,6 +138,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, struct tcf_mpls *m; int ret = 0, err; u8 mpls_ttl = 0; + u32 index; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "Missing netlink attributes"); @@ -153,6 +154,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, return -EINVAL; } parm = nla_data(tb[TCA_MPLS_PARMS]); + index = parm->index; /* Verify parameters against action type. */ switch (parm->m_action) { @@ -209,7 +211,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, return -EINVAL; } - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -217,10 +219,10 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, return 0; if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 45923ebb7a4f..7b858c11b1b5 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -44,6 +44,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_nat *parm; int ret = 0, err; struct tcf_nat *p; + u32 index; if (nla == NULL) return -EINVAL; @@ -56,13 +57,13 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (tb[TCA_NAT_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_NAT_PARMS]); - - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_nat_ops, bind, false); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 45e9d6bfddb3..17360c6faeaa 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -149,6 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct tcf_pedit *p; int ret = 0, err; int ksize; + u32 index; if (!nla) { NL_SET_ERR_MSG_MOD(extack, "Pedit requires attributes to be passed"); @@ -179,18 +180,19 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (IS_ERR(keys_ex)) return PTR_ERR(keys_ex); - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { if (!parm->nkeys) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed"); ret = -EINVAL; goto out_free; } - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_pedit_ops, bind, false); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); goto out_free; } ret = ACT_P_CREATED; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index a065f62fa79c..49cec3e64a4d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -57,6 +57,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, police_net_id); struct tcf_police_params *new; bool exists = false; + u32 index; if (nla == NULL) return -EINVAL; @@ -73,7 +74,8 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_POLICE_TBF]); - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -81,10 +83,10 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, return 0; if (!exists) { - ret = tcf_idr_create(tn, parm->index, NULL, a, + ret = tcf_idr_create(tn, index, NULL, a, &act_police_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 274d7a0c0e25..595308d60133 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -41,8 +41,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, sample_net_id); struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; + u32 psample_group_num, rate, index; struct tcf_chain *goto_ch = NULL; - u32 psample_group_num, rate; struct tc_sample *parm; struct tcf_sample *s; bool exists = false; @@ -59,8 +59,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_SAMPLE_PARMS]); - - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -68,10 +68,10 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return 0; if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_sample_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } ret = ACT_P_CREATED; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index f28ddbabff76..33aefa25b545 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -95,6 +95,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct tcf_defact *d; bool exists = false; int ret = 0, err; + u32 index; if (nla == NULL) return -EINVAL; @@ -108,7 +109,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -119,15 +121,15 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return -EINVAL; } if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_simp_ops, bind, false); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 215a06705cef..b100870f02a6 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -99,6 +99,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, u16 *queue_mapping = NULL, *ptype = NULL; bool exists = false; int ret = 0, err; + u32 index; if (nla == NULL) return -EINVAL; @@ -146,8 +147,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, } parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -158,15 +159,15 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return -EINVAL; } if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_skbedit_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 4f07706eff07..7da3518e18ef 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -87,12 +87,12 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct tcf_skbmod_params *p, *p_old; struct tcf_chain *goto_ch = NULL; struct tc_skbmod *parm; + u32 lflags = 0, index; struct tcf_skbmod *d; bool exists = false; u8 *daddr = NULL; u8 *saddr = NULL; u16 eth_type = 0; - u32 lflags = 0; int ret = 0, err; if (!nla) @@ -122,10 +122,11 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, } parm = nla_data(tb[TCA_SKBMOD_PARMS]); + index = parm->index; if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -136,15 +137,15 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return -EINVAL; } if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_skbmod_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 10dffda1d5cc..6d0debdc9b97 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -225,6 +225,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, __be16 flags = 0; u8 tos, ttl; int ret = 0; + u32 index; int err; if (!nla) { @@ -245,7 +246,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -345,7 +347,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_tunnel_key_ops, bind, true); if (ret) { NL_SET_ERR_MSG(extack, "Cannot create TC IDR"); @@ -403,7 +405,7 @@ err_out: if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 9269d350fb8a..984b05ab0c87 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -116,6 +116,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, u8 push_prio = 0; bool exists = false; int ret = 0, err; + u32 index; if (!nla) return -EINVAL; @@ -128,7 +129,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); - err = tcf_idr_check_alloc(tn, &parm->index, a, bind); + index = parm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; @@ -144,7 +146,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return -EINVAL; } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); @@ -152,7 +154,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return -ERANGE; } @@ -166,7 +168,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return -EPROTONOSUPPORT; } } else { @@ -180,16 +182,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (exists) tcf_idr_release(*a, bind); else - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return -EINVAL; } action = parm->v_action; if (!exists) { - ret = tcf_idr_create(tn, parm->index, est, a, + ret = tcf_idr_create(tn, index, est, a, &act_vlan_ops, bind, true); if (ret) { - tcf_idr_cleanup(tn, parm->index); + tcf_idr_cleanup(tn, index); return ret; } -- cgit v1.2.3 From 065af355470519bd184019a93ac579f22b036045 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 1 Aug 2019 20:00:31 +0200 Subject: net: fix bpf_xdp_adjust_head regression for generic-XDP When generic-XDP was moved to a later processing step by commit 458bf2f224f0 ("net: core: support XDP generic on stacked devices.") a regression was introduced when using bpf_xdp_adjust_head. The issue is that after this commit the skb->network_header is now changed prior to calling generic XDP and not after. Thus, if the header is changed by XDP (via bpf_xdp_adjust_head), then skb->network_header also need to be updated again. Fix by calling skb_reset_network_header(). Fixes: 458bf2f224f0 ("net: core: support XDP generic on stacked devices.") Reported-by: Brandon Cazander Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/dev.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2f341b850845..0891f499c1bb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4374,12 +4374,17 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, act = bpf_prog_run_xdp(xdp_prog, xdp); + /* check if bpf_xdp_adjust_head was used */ off = xdp->data - orig_data; - if (off > 0) - __skb_pull(skb, off); - else if (off < 0) - __skb_push(skb, -off); - skb->mac_header += off; + if (off) { + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + + skb->mac_header += off; + skb_reset_network_header(skb); + } /* check if bpf_xdp_adjust_tail was used. it can only "shrink" * pckt. -- cgit v1.2.3 From 5d92e631b8be8965a90c144320f06e096081a551 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Aug 2019 14:36:01 -0700 Subject: net/tls: partially revert fix transition through disconnect with close Looks like we were slightly overzealous with the shutdown() cleanup. Even though the sock->sk_state can reach CLOSED again, socket->state will not got back to SS_UNCONNECTED once connections is ESTABLISHED. Meaning we will see EISCONN if we try to reconnect, and EINVAL if we try to listen. Only listen sockets can be shutdown() and reused, but since ESTABLISHED sockets can never be re-connected() or used for listen() we don't need to try to clean up the ULP state early. Fixes: 32857cf57f92 ("net/tls: fix transition through disconnect with close") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/tls-offload.rst | 6 ---- include/net/tls.h | 2 -- net/tls/tls_main.c | 55 -------------------------------- 3 files changed, 63 deletions(-) (limited to 'net') diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 2d9f9ebf4117..b70b70dc4524 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -524,9 +524,3 @@ Redirects leak clear text In the RX direction, if segment has already been decrypted by the device and it gets redirected or mirrored - clear text will be transmitted out. - -shutdown() doesn't clear TLS state ----------------------------------- - -shutdown() system call allows for a TLS socket to be reused as a different -connection. Offload doesn't currently handle that. diff --git a/include/net/tls.h b/include/net/tls.h index 9e425ac2de45..41b2d41bb1b8 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -290,8 +290,6 @@ struct tls_context { struct list_head list; refcount_t refcount; - - struct work_struct gc; }; enum tls_offload_ctx_dir { diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index f208f8455ef2..9cbbae606ced 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -261,33 +261,6 @@ void tls_ctx_free(struct tls_context *ctx) kfree(ctx); } -static void tls_ctx_free_deferred(struct work_struct *gc) -{ - struct tls_context *ctx = container_of(gc, struct tls_context, gc); - - /* Ensure any remaining work items are completed. The sk will - * already have lost its tls_ctx reference by the time we get - * here so no xmit operation will actually be performed. - */ - if (ctx->tx_conf == TLS_SW) { - tls_sw_cancel_work_tx(ctx); - tls_sw_free_ctx_tx(ctx); - } - - if (ctx->rx_conf == TLS_SW) { - tls_sw_strparser_done(ctx); - tls_sw_free_ctx_rx(ctx); - } - - tls_ctx_free(ctx); -} - -static void tls_ctx_free_wq(struct tls_context *ctx) -{ - INIT_WORK(&ctx->gc, tls_ctx_free_deferred); - schedule_work(&ctx->gc); -} - static void tls_sk_proto_cleanup(struct sock *sk, struct tls_context *ctx, long timeo) { @@ -315,29 +288,6 @@ static void tls_sk_proto_cleanup(struct sock *sk, #endif } -static void tls_sk_proto_unhash(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - long timeo = sock_sndtimeo(sk, 0); - struct tls_context *ctx; - - if (unlikely(!icsk->icsk_ulp_data)) { - if (sk->sk_prot->unhash) - sk->sk_prot->unhash(sk); - } - - ctx = tls_get_ctx(sk); - tls_sk_proto_cleanup(sk, ctx, timeo); - write_lock_bh(&sk->sk_callback_lock); - icsk->icsk_ulp_data = NULL; - sk->sk_prot = ctx->sk_proto; - write_unlock_bh(&sk->sk_callback_lock); - - if (ctx->sk_proto->unhash) - ctx->sk_proto->unhash(sk); - tls_ctx_free_wq(ctx); -} - static void tls_sk_proto_close(struct sock *sk, long timeout) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -786,7 +736,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; - prot[TLS_BASE][TLS_BASE].unhash = tls_sk_proto_unhash; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; @@ -804,20 +753,16 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], #ifdef CONFIG_TLS_DEVICE prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; - prot[TLS_HW][TLS_BASE].unhash = base->unhash; prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; - prot[TLS_HW][TLS_SW].unhash = base->unhash; prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; - prot[TLS_BASE][TLS_HW].unhash = base->unhash; prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW]; - prot[TLS_SW][TLS_HW].unhash = base->unhash; prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW]; #endif -- cgit v1.2.3 From cff6a327d78b05c98e0d1c4be77225ea2c0bfe8e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 1 Aug 2019 14:36:35 -0700 Subject: ipv6: Fix unbalanced rcu locking in rt6_update_exception_stamp_rt The nexthop path in rt6_update_exception_stamp_rt needs to call rcu_read_unlock if it fails to find a fib6_nh match rather than just returning. Fixes: e659ba31d806 ("ipv6: Handle all fib6_nh in a nexthop in exception handling") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e49fec767a10..fd059e08785a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1951,7 +1951,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg); if (!arg.match) - return; + goto unlock; fib6_nh = arg.match; } else { fib6_nh = from->fib6_nh; -- cgit v1.2.3 From f9cedf1a9b1cdcfb0c52edb391d01771e43994a4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 2 Aug 2019 10:16:38 +0200 Subject: net/smc: do not schedule tx_work in SMC_CLOSED state The setsockopts options TCP_NODELAY and TCP_CORK may schedule the tx worker. Make sure the socket is not yet moved into SMC_CLOSED state (for instance by a shutdown SHUT_RDWR call). Reported-by: syzbot+92209502e7aab127c75f@syzkaller.appspotmail.com Reported-by: syzbot+b972214bb803a343f4fe@syzkaller.appspotmail.com Fixes: 01d2f7e2cdd31 ("net/smc: sockopts TCP_NODELAY and TCP_CORK") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 302e355f2ebc..f5ea09258ab0 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1732,14 +1732,18 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, } break; case TCP_NODELAY: - if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { + if (sk->sk_state != SMC_INIT && + sk->sk_state != SMC_LISTEN && + sk->sk_state != SMC_CLOSED) { if (val && !smc->use_fallback) mod_delayed_work(system_wq, &smc->conn.tx_work, 0); } break; case TCP_CORK: - if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { + if (sk->sk_state != SMC_INIT && + sk->sk_state != SMC_LISTEN && + sk->sk_state != SMC_CLOSED) { if (!val && !smc->use_fallback) mod_delayed_work(system_wq, &smc->conn.tx_work, 0); -- cgit v1.2.3 From cd2063604ea6a8c2683b4eb9b5f4c4da74592d87 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 2 Aug 2019 10:47:50 +0200 Subject: net/smc: avoid fallback in case of non-blocking connect FASTOPEN is not possible with SMC. sendmsg() with msg_flag MSG_FASTOPEN triggers a fallback to TCP if the socket is in state SMC_INIT. But if a nonblocking connect is already started, fallback to TCP is no longer possible, even though the socket may still be in state SMC_INIT. And if a nonblocking connect is already started, a listen() call does not make sense. Reported-by: syzbot+bd8cc73d665590a1fcad@syzkaller.appspotmail.com Fixes: 50717a37db032 ("net/smc: nonblocking connect rework") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f5ea09258ab0..5b932583e407 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -263,7 +263,7 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, /* Check if socket is already active */ rc = -EINVAL; - if (sk->sk_state != SMC_INIT) + if (sk->sk_state != SMC_INIT || smc->connect_nonblock) goto out_rel; smc->clcsock->sk->sk_reuse = sk->sk_reuse; @@ -1390,7 +1390,8 @@ static int smc_listen(struct socket *sock, int backlog) lock_sock(sk); rc = -EINVAL; - if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) + if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || + smc->connect_nonblock) goto out; rc = 0; @@ -1518,7 +1519,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; if (msg->msg_flags & MSG_FASTOPEN) { - if (sk->sk_state == SMC_INIT) { + if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; } else { -- cgit v1.2.3 From 091adf9ba6cdb432cbcc217b47e4ffb8aa0d8865 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 2 Aug 2019 13:57:36 +0300 Subject: net: bridge: move default pvid init/deinit to NETDEV_REGISTER/UNREGISTER Most of the bridge device's vlan init bugs come from the fact that its default pvid is created at the wrong time, way too early in ndo_init() before the device is even assigned an ifindex. It introduces a bug when the bridge's dev_addr is added as fdb during the initial default pvid creation the notification has ifindex/NDA_MASTER both equal to 0 (see example below) which really makes no sense for user-space[0] and is wrong. Usually user-space software would ignore such entries, but they are actually valid and will eventually have all necessary attributes. It makes much more sense to send a notification *after* the device has registered and has a proper ifindex allocated rather than before when there's a chance that the registration might still fail or to receive it with ifindex/NDA_MASTER == 0. Note that we can remove the fdb flush from br_vlan_flush() since that case can no longer happen. At NETDEV_REGISTER br->default_pvid is always == 1 as it's initialized by br_vlan_init() before that and at NETDEV_UNREGISTER it can be anything depending why it was called (if called due to NETDEV_REGISTER error it'll still be == 1, otherwise it could be any value changed during the device life time). For the demonstration below a small change to iproute2 for printing all fdb notifications is added, because it contained a workaround not to show entries with ifindex == 0. Command executed while monitoring: $ ip l add br0 type bridge Before (both ifindex and master == 0): $ bridge monitor fdb 36:7e:8a:b3:56:ba dev * vlan 1 master * permanent After (proper br0 ifindex): $ bridge monitor fdb e6:2a:ae:7a:b7:48 dev br0 vlan 1 master br0 permanent v4: move only the default pvid init/deinit to NETDEV_REGISTER/UNREGISTER v3: send the correct v2 patch with all changes (stub should return 0) v2: on error in br_vlan_init set br->vlgrp to NULL and return 0 in the br_vlan_bridge_event stub when bridge vlans are disabled [0] https://bugzilla.kernel.org/show_bug.cgi?id=204389 Reported-by: michael-dev Fixes: 5be5a2df40f0 ("bridge: Add filtering support for default_pvid") Signed-off-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br.c | 5 ++++- net/bridge/br_private.h | 9 +++++---- net/bridge/br_vlan.c | 34 ++++++++++++++++------------------ 3 files changed, 25 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/bridge/br.c b/net/bridge/br.c index d164f63a4345..8a8f9e5f264f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -37,12 +37,15 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v int err; if (dev->priv_flags & IFF_EBRIDGE) { + err = br_vlan_bridge_event(dev, event, ptr); + if (err) + return notifier_from_errno(err); + if (event == NETDEV_REGISTER) { /* register of bridge completed, add sysfs entries */ br_sysfs_addbr(dev); return NOTIFY_DONE; } - br_vlan_bridge_event(dev, event, ptr); } /* not a port of a bridge */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e8cf03b43b7d..646504db0220 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -894,8 +894,8 @@ int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); void br_vlan_get_stats(const struct net_bridge_vlan *v, struct br_vlan_stats *stats); void br_vlan_port_event(struct net_bridge_port *p, unsigned long event); -void br_vlan_bridge_event(struct net_device *dev, unsigned long event, - void *ptr); +int br_vlan_bridge_event(struct net_device *dev, unsigned long event, + void *ptr); static inline struct net_bridge_vlan_group *br_vlan_group( const struct net_bridge *br) @@ -1085,9 +1085,10 @@ static inline void br_vlan_port_event(struct net_bridge_port *p, { } -static inline void br_vlan_bridge_event(struct net_device *dev, - unsigned long event, void *ptr) +static inline int br_vlan_bridge_event(struct net_device *dev, + unsigned long event, void *ptr) { + return 0; } #endif diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index a544e161c7fa..f5b2aeebbfe9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -715,11 +715,6 @@ void br_vlan_flush(struct net_bridge *br) ASSERT_RTNL(); - /* delete auto-added default pvid local fdb before flushing vlans - * otherwise it will be leaked on bridge device init failure - */ - br_fdb_delete_by_port(br, NULL, 0, 1); - vg = br_vlan_group(br); __vlan_flush(vg); RCU_INIT_POINTER(br->vlgrp, NULL); @@ -1058,7 +1053,6 @@ int br_vlan_init(struct net_bridge *br) { struct net_bridge_vlan_group *vg; int ret = -ENOMEM; - bool changed; vg = kzalloc(sizeof(*vg), GFP_KERNEL); if (!vg) @@ -1073,17 +1067,10 @@ int br_vlan_init(struct net_bridge *br) br->vlan_proto = htons(ETH_P_8021Q); br->default_pvid = 1; rcu_assign_pointer(br->vlgrp, vg); - ret = br_vlan_add(br, 1, - BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL); - if (ret) - goto err_vlan_add; out: return ret; -err_vlan_add: - vlan_tunnel_deinit(vg); err_tunnel_init: rhashtable_destroy(&vg->vlan_hash); err_rhtbl: @@ -1469,13 +1456,23 @@ static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid) } /* Must be protected by RTNL. */ -void br_vlan_bridge_event(struct net_device *dev, unsigned long event, - void *ptr) +int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr) { struct netdev_notifier_changeupper_info *info; - struct net_bridge *br; + struct net_bridge *br = netdev_priv(dev); + bool changed; + int ret = 0; switch (event) { + case NETDEV_REGISTER: + ret = br_vlan_add(br, br->default_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL); + break; + case NETDEV_UNREGISTER: + br_vlan_delete(br, br->default_pvid); + break; case NETDEV_CHANGEUPPER: info = ptr; br_vlan_upper_change(dev, info->upper_dev, info->linking); @@ -1483,12 +1480,13 @@ void br_vlan_bridge_event(struct net_device *dev, unsigned long event, case NETDEV_CHANGE: case NETDEV_UP: - br = netdev_priv(dev); if (!br_opt_get(br, BROPT_VLAN_BRIDGE_BINDING)) - return; + break; br_vlan_link_state_change(dev, br); break; } + + return ret; } /* Must be protected by RTNL. */ -- cgit v1.2.3 From b35475c5491a14c8ce7a5046ef7bcda8a860581a Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Fri, 2 Aug 2019 15:16:46 -0400 Subject: net sched: update vlan action for batched events operations Add get_fill_size() routine used to calculate the action size when building a batch of events. Fixes: c7e2b9689 ("sched: introduce vlan action") Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 984b05ab0c87..a3c9eea1ee8a 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -308,6 +308,14 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static size_t tcf_vlan_get_fill_size(const struct tc_action *act) +{ + return nla_total_size(sizeof(struct tc_vlan)) + + nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_ID */ + + nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_PROTOCOL */ + + nla_total_size(sizeof(u8)); /* TCA_VLAN_PUSH_VLAN_PRIORITY */ +} + static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .id = TCA_ID_VLAN, @@ -317,6 +325,7 @@ static struct tc_action_ops act_vlan_ops = { .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, + .get_fill_size = tcf_vlan_get_fill_size, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), }; -- cgit v1.2.3 From f163fed2764e66511fb5c489bf87e532ad7606fb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 5 Aug 2019 01:38:47 +0300 Subject: net: dsa: sja1105: Fix memory leak on meta state machine normal path After a meta frame is received, it is associated with the cached sp->data->stampable_skb from the DSA tagger private structure. Cached means its refcount is incremented with skb_get() in order for dsa_switch_rcv() to not free it when the tagger .rcv returns NULL. The mistake is that skb_unref() is not the correct function to use. It will correctly decrement the refcount (which will go back to zero) but the skb memory will not be freed. That is the job of kfree_skb(), which also calls skb_unref(). But it turns out that freeing the cached stampable_skb is in fact not necessary. It is still a perfectly valid skb, and now it is even annotated with the partial RX timestamp. So remove the skb_copy() altogether and simply pass the stampable_skb with a refcount of 1 (incremented by us, decremented by dsa_switch_rcv) up the stack. Fixes: f3097be21bf1 ("net: dsa: sja1105: Add a state machine for RX timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/tag_sja1105.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 26363d72d25b..8fa8dda8a15b 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -211,17 +211,8 @@ static struct sk_buff * for further processing up the network stack. */ kfree_skb(skb); - - skb = skb_copy(stampable_skb, GFP_ATOMIC); - if (!skb) { - dev_err_ratelimited(dp->ds->dev, - "Failed to copy stampable skb\n"); - spin_unlock(&sp->data->meta_lock); - return NULL; - } + skb = stampable_skb; sja1105_transfer_meta(skb, meta); - /* The cached copy will be freed now */ - skb_unref(stampable_skb); spin_unlock(&sp->data->meta_lock); } -- cgit v1.2.3 From 93fa8587b25356382a39f1ca3a81d6c1b42ac731 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 5 Aug 2019 01:38:48 +0300 Subject: net: dsa: sja1105: Fix memory leak on meta state machine error path When RX timestamping is enabled and two link-local (non-meta) frames are received in a row, this constitutes an error. The tagger is always caching the last link-local frame, in an attempt to merge it with the meta follow-up frame when that arrives. To recover from the above error condition, the initial cached link-local frame is dropped and the second frame in a row is cached (in expectance of the second meta frame). However, when dropping the initial link-local frame, its backing memory was being leaked. Fixes: f3097be21bf1 ("net: dsa: sja1105: Add a state machine for RX timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/tag_sja1105.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 8fa8dda8a15b..47ee88163a9d 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -165,6 +165,7 @@ static struct sk_buff "Expected meta frame, is %12llx " "in the DSA master multicast filter?\n", SJA1105_META_DMAC); + kfree_skb(sp->data->stampable_skb); } /* Hold a reference to avoid dsa_switch_rcv -- cgit v1.2.3