From 36eeee75ef0157e42fb6593dcc65daab289b559e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 20 Jul 2022 09:50:14 -0700 Subject: tcp: Fix a data-race around sysctl_tcp_adv_win_scale. While reading sysctl_tcp_adv_win_scale, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 071735e10872..78a64e1b33a7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1419,7 +1419,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : -- cgit v1.2.3 From 4d8f24eeedc58d5f87b650ddda73c16e8ba56559 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 21 Jul 2022 20:44:04 +0000 Subject: Revert "tcp: change pingpong threshold to 3" This reverts commit 4a41f453bedfd5e9cd040bad509d9da49feb3e2c. This to-be-reverted commit was meant to apply a stricter rule for the stack to enter pingpong mode. However, the condition used to check for interactive session "before(tp->lsndtime, icsk->icsk_ack.lrcvtime)" is jiffy based and might be too coarse, which delays the stack entering pingpong mode. We revert this patch so that we no longer use the above condition to determine interactive session, and also reduce pingpong threshold to 1. Fixes: 4a41f453bedf ("tcp: change pingpong threshold to 3") Reported-by: LemmyHuang Suggested-by: Neal Cardwell Signed-off-by: Wei Wang Acked-by: Neal Cardwell Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220721204404.388396-1-weiwan@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 10 +--------- net/ipv4/tcp_output.c | 15 ++++++--------- 2 files changed, 7 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 85cd695e7fd1..ee88f0f1350f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -321,7 +321,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 3 +#define TCP_PINGPONG_THRESH 1 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { @@ -338,14 +338,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } -static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ack.pingpong < U8_MAX) - icsk->icsk_ack.pingpong++; -} - static inline bool inet_csk_has_ulp(struct sock *sk) { return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cf6713c9567e..2efe41c84ee8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - /* If this is the first data packet sent in response to the - * previous received data, - * and it is a reply for ato after last received packet, - * increase pingpong count. - */ - if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && - (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_inc_pingpong_cnt(sk); - tp->lsndtime = now; + + /* If it is a reply for ato after last received + * packet, enter pingpong mode. + */ + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_enter_pingpong_mode(sk); } /* Account for an ACK we sent. */ -- cgit v1.2.3 From 02739545951ad4c1215160db7fbf9b7a918d3c0b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 22 Jul 2022 11:22:00 -0700 Subject: net: Fix data-races around sysctl_[rw]mem(_offset)?. While reading these sysctl variables, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. - .sysctl_rmem - .sysctl_rwmem - .sysctl_rmem_offset - .sysctl_wmem_offset - sysctl_tcp_rmem[1, 2] - sysctl_tcp_wmem[1, 2] - sysctl_decnet_rmem[1] - sysctl_decnet_wmem[1] - sysctl_tipc_rmem[1] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/sock.h | 8 ++++---- net/decnet/af_decnet.c | 4 ++-- net/ipv4/tcp.c | 6 +++--- net/ipv4/tcp_input.c | 13 +++++++------ net/ipv4/tcp_output.c | 2 +- net/mptcp/protocol.c | 6 +++--- net/tipc/socket.c | 2 +- 7 files changed, 21 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 9fa54762e077..7a48991cdb19 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2843,18 +2843,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_wmem ? */ if (proto->sysctl_wmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); - return *proto->sysctl_wmem; + return READ_ONCE(*proto->sysctl_wmem); } static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_rmem ? */ if (proto->sysctl_rmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); - return *proto->sysctl_rmem; + return READ_ONCE(*proto->sysctl_rmem); } /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index dc92a67baea3..7d542eb46172 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf sk->sk_family = PF_DECnet; sk->sk_protocol = 0; sk->sk_allocation = gfp; - sk->sk_sndbuf = sysctl_decnet_wmem[1]; - sk->sk_rcvbuf = sysctl_decnet_rmem[1]; + sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]); + sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]); /* Initialization of DECnet Session Control Port */ scp = DN_SK(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a11e5de3a4c3..002a4a04efbe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); - WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); sk_sockets_allocated_inc(sk); } @@ -1724,7 +1724,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else - cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; + cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; val = min(val, cap); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dd05238f79f6..ff2e0d87aee4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk) if (sk->sk_sndbuf < sndmem) WRITE_ONCE(sk->sk_sndbuf, - min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); + min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2]))); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; - int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; + int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); + int rmem2; icsk->icsk_ack.quick = 0; + rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); - if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && + if (sk->sk_rcvbuf < rmem2 && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { WRITE_ONCE(sk->sk_rcvbuf, - min(atomic_read(&sk->sk_rmem_alloc), - net->ipv4.sysctl_tcp_rmem[2])); + min(atomic_read(&sk->sk_rmem_alloc), rmem2)); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk) do_div(rcvwin, tp->advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2efe41c84ee8..4c376b6d8764 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, *rcv_wscale = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); *rcv_wscale = clamp_t(int, ilog2(space) - 15, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9bbd8cbe0acb..7e1518bb6115 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1926,7 +1926,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) do_div(rcvwin, advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { u32 window_clamp; @@ -2669,8 +2669,8 @@ static int mptcp_init_sock(struct sock *sk) mptcp_ca_reset(sk); sk_sockets_allocated_inc(sk); - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; - sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); return 0; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 43509c7e90fc..f1c3b8eb4b3d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_sk_backlog_rcv; - sk->sk_rcvbuf = sysctl_tipc_rmem[1]; + sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]); sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; -- cgit v1.2.3 From d0be8347c623e0ac4202a1d4e0373882821f56b0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 21 Jul 2022 09:10:50 -0700 Subject: Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put This fixes the following trace which is caused by hci_rx_work starting up *after* the final channel reference has been put() during sock_close() but *before* the references to the channel have been destroyed, so instead the code now rely on kref_get_unless_zero/l2cap_chan_hold_unless_zero to prevent referencing a channel that is about to be destroyed. refcount_t: increment on 0; use-after-free. BUG: KASAN: use-after-free in refcount_dec_and_test+0x20/0xd0 Read of size 4 at addr ffffffc114f5bf18 by task kworker/u17:14/705 CPU: 4 PID: 705 Comm: kworker/u17:14 Tainted: G S W 4.14.234-00003-g1fb6d0bd49a4-dirty #28 Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM sm8150 Flame DVT (DT) Workqueue: hci0 hci_rx_work Call trace: dump_backtrace+0x0/0x378 show_stack+0x20/0x2c dump_stack+0x124/0x148 print_address_description+0x80/0x2e8 __kasan_report+0x168/0x188 kasan_report+0x10/0x18 __asan_load4+0x84/0x8c refcount_dec_and_test+0x20/0xd0 l2cap_chan_put+0x48/0x12c l2cap_recv_frame+0x4770/0x6550 l2cap_recv_acldata+0x44c/0x7a4 hci_acldata_packet+0x100/0x188 hci_rx_work+0x178/0x23c process_one_work+0x35c/0x95c worker_thread+0x4cc/0x960 kthread+0x1a8/0x1c4 ret_from_fork+0x10/0x18 Cc: stable@kernel.org Reported-by: Lee Jones Signed-off-by: Luiz Augusto von Dentz Tested-by: Lee Jones Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 61 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 49 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 3c4f550e5a8b..2f766e3437ce 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -847,6 +847,7 @@ enum { }; void l2cap_chan_hold(struct l2cap_chan *c); +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); void l2cap_chan_put(struct l2cap_chan *c); static inline void l2cap_chan_lock(struct l2cap_chan *chan) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ae78490ecd3d..52668662ae8d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, } /* Find channel with given SCID. - * Returns locked channel. */ + * Returns a reference locked channel. + */ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, u16 cid) { @@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; } /* Find channel with given DCID. - * Returns locked channel. + * Returns a reference locked channel. */ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, u16 cid) @@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_ident(conn, ident); - if (c) - l2cap_chan_lock(c); + if (c) { + /* Only lock if chan reference is not 0 */ + c = l2cap_chan_hold_unless_zero(c); + if (c) + l2cap_chan_lock(c); + } mutex_unlock(&conn->chan_lock); return c; @@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c) kref_get(&c->kref); } +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); + + if (!kref_get_unless_zero(&c->kref)) + return NULL; + + return c; +} + void l2cap_chan_put(struct l2cap_chan *c) { BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); @@ -1968,7 +1991,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); + if (!c) + continue; + read_unlock(&chan_list_lock); return c; } @@ -1983,7 +2009,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, } if (c1) - l2cap_chan_hold(c1); + c1 = l2cap_chan_hold_unless_zero(c1); read_unlock(&chan_list_lock); @@ -4463,6 +4489,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -4577,6 +4604,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return err; } @@ -5304,6 +5332,7 @@ send_move_response: l2cap_send_move_chan_rsp(chan, result); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5396,6 +5425,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, @@ -5425,6 +5455,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static int l2cap_move_channel_rsp(struct l2cap_conn *conn, @@ -5488,6 +5519,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5523,6 +5555,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -5895,12 +5928,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (credits > max_credits) { BT_ERR("LE credits overflow"); l2cap_send_disconn_req(chan, ECONNRESET); - l2cap_chan_unlock(chan); /* Return 0 so that we don't trigger an unnecessary * command reject packet. */ - return 0; + goto unlock; } chan->tx_credits += credits; @@ -5911,7 +5943,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (chan->tx_credits) chan->ops->resume(chan); +unlock: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); return 0; } @@ -7597,6 +7631,7 @@ drop: done: l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, @@ -8085,7 +8120,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, if (src_type != c->src_type) continue; - l2cap_chan_hold(c); + c = l2cap_chan_hold_unless_zero(c); read_unlock(&chan_list_lock); return c; } -- cgit v1.2.3 From 85f0173df35e5462d89947135a6a5599c6c3ef6f Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 28 Jul 2022 09:33:07 +0800 Subject: ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr Change net device's MTU to smaller than IPV6_MIN_MTU or unregister device while matching route. That may trigger null-ptr-deref bug for ip6_ptr probability as following. ========================================================= BUG: KASAN: null-ptr-deref in find_match.part.0+0x70/0x134 Read of size 4 at addr 0000000000000308 by task ping6/263 CPU: 2 PID: 263 Comm: ping6 Not tainted 5.19.0-rc7+ #14 Call trace: dump_backtrace+0x1a8/0x230 show_stack+0x20/0x70 dump_stack_lvl+0x68/0x84 print_report+0xc4/0x120 kasan_report+0x84/0x120 __asan_load4+0x94/0xd0 find_match.part.0+0x70/0x134 __find_rr_leaf+0x408/0x470 fib6_table_lookup+0x264/0x540 ip6_pol_route+0xf4/0x260 ip6_pol_route_output+0x58/0x70 fib6_rule_lookup+0x1a8/0x330 ip6_route_output_flags_noref+0xd8/0x1a0 ip6_route_output_flags+0x58/0x160 ip6_dst_lookup_tail+0x5b4/0x85c ip6_dst_lookup_flow+0x98/0x120 rawv6_sendmsg+0x49c/0xc70 inet_sendmsg+0x68/0x94 Reproducer as following: Firstly, prepare conditions: $ip netns add ns1 $ip netns add ns2 $ip link add veth1 type veth peer name veth2 $ip link set veth1 netns ns1 $ip link set veth2 netns ns2 $ip netns exec ns1 ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1 $ip netns exec ns2 ip -6 addr add 2001:0db8:0:f101::2/64 dev veth2 $ip netns exec ns1 ifconfig veth1 up $ip netns exec ns2 ifconfig veth2 up $ip netns exec ns1 ip -6 route add 2000::/64 dev veth1 metric 1 $ip netns exec ns2 ip -6 route add 2001::/64 dev veth2 metric 1 Secondly, execute the following two commands in two ssh windows respectively: $ip netns exec ns1 sh $while true; do ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1; ip -6 route add 2000::/64 dev veth1 metric 1; ping6 2000::2; done $ip netns exec ns1 sh $while true; do ip link set veth1 mtu 1000; ip link set veth1 mtu 1500; sleep 5; done It is because ip6_ptr has been assigned to NULL in addrconf_ifdown() firstly, then ip6_ignore_linkdown() accesses ip6_ptr directly without NULL check. cpu0 cpu1 fib6_table_lookup __find_rr_leaf addrconf_notify [ NETDEV_CHANGEMTU ] addrconf_ifdown RCU_INIT_POINTER(dev->ip6_ptr, NULL) find_match ip6_ignore_linkdown So we can add NULL check for ip6_ptr before using in ip6_ignore_linkdown() to fix the null-ptr-deref bug. Fixes: dcd1f572954f ("net/ipv6: Remove fib6_idev") Signed-off-by: Ziyang Xuan Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220728013307.656257-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski --- include/net/addrconf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f7506f08e505..c04f359655b8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); + if (unlikely(!idev)) + return true; + return !!idev->cnf.ignore_routes_with_linkdown; } -- cgit v1.2.3