From 7938cd15436873f649f31cb867bac2d88ca564d0 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Thu, 27 Jul 2023 17:33:56 +0200 Subject: net: gro: fix misuse of CB in udp socket lookup This patch fixes a misuse of IP{6}CB(skb) in GRO, while calling to `udp6_lib_lookup2` when handling udp tunnels. `udp6_lib_lookup2` fetch the device from CB. The fix changes it to fetch the device from `skb->dev`. l3mdev case requires special attention since it has a master and a slave device. Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket") Reported-by: Gal Pressman Signed-off-by: Richard Gobert Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/udp.c | 8 ++++++-- net/ipv6/udp_offload.c | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b7c972aa09a7..e5da5d1cb215 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -300,10 +301,13 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, { const struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); + int iif, sdif; + + inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), net->ipv4.udp_table, NULL); + &iph->daddr, dport, iif, + sdif, net->ipv4.udp_table, NULL); } /* Must be called under rcu_read_lock(). diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 09fa7a42cb93..6b95ba241ebe 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -118,10 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct ipv6hdr *iph = skb_gro_network_header(skb); struct net *net = dev_net(skb->dev); + int iif, sdif; + + inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), net->ipv4.udp_table, NULL); + &iph->daddr, dport, iif, + sdif, net->ipv4.udp_table, NULL); } INDIRECT_CALLABLE_SCOPE -- cgit v1.2.3 From 3c5b4d69c358a9275a8de98f87caf6eda644b086 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:15 +0000 Subject: net: annotate data-races around sk->sk_mark sk->sk_mark is often read while another thread could change the value. Fixes: 4a19ec5800fc ("[NET]: Introducing socket mark socket option.") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 7 ++++--- include/net/ip.h | 2 +- include/net/route.h | 4 ++-- net/can/raw.c | 2 +- net/core/sock.c | 4 ++-- net/dccp/ipv6.c | 4 ++-- net/ipv4/inet_diag.c | 4 ++-- net/ipv4/ip_output.c | 4 ++-- net/ipv4/route.c | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 4 ++-- net/ipv6/route.c | 7 ++++--- net/ipv6/tcp_ipv6.c | 6 +++--- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- net/mptcp/sockopt.c | 2 +- net/netfilter/nft_socket.c | 2 +- net/netfilter/xt_socket.c | 4 ++-- net/packet/af_packet.c | 6 +++--- net/smc/af_smc.c | 2 +- net/xdp/xsk.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 23 files changed, 42 insertions(+), 40 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index caa20a905531..0bb32bfc6183 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) + u32 mark = READ_ONCE(sk->sk_mark); + + if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; - return sk->sk_mark; + return mark; } static inline int inet_request_bound_dev_if(const struct sock *sk, diff --git a/include/net/ip.h b/include/net/ip.h index 50d435855ae2..332521170d9b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, { ipcm_init(ipcm); - ipcm->sockc.mark = inet->sk.sk_mark; + ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; diff --git a/include/net/route.h b/include/net/route.h index 5a5c726472bd..8c2a8e7d8f8e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi __be16 dport, __be16 sport, __u8 proto, __u8 tos, int oif) { - flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, + flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); @@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk), + flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); } diff --git a/net/can/raw.c b/net/can/raw.c index ba6b52b1d776..e10f59375659 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -865,7 +865,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) skb->dev = dev; skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, sockc.tsflags); diff --git a/net/core/sock.c b/net/core/sock.c index 96616eb3869d..d831a3df2cef 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -990,7 +990,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf); static void __sock_set_mark(struct sock *sk, u32 val) { if (val != sk->sk_mark) { - sk->sk_mark = val; + WRITE_ONCE(sk->sk_mark, val); sk_dst_reset(sk); } } @@ -1851,7 +1851,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, optval, optlen, len); case SO_MARK: - v.val = sk->sk_mark; + v.val = READ_ONCE(sk->sk_mark); break; case SO_RCVMARK: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 7249ef218178..d29d1163203d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -238,8 +238,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass, - sk->sk_priority); + err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, + np->tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b812eb36f0e3..f7426926a104 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, } #endif - if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark))) goto errout; if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || @@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry.ifindex = sk->sk_bound_dev_if; entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; if (sk_fullsock(sk)) - entry.mark = sk->sk_mark; + entry.mark = READ_ONCE(sk->sk_mark); else if (sk->sk_state == TCP_NEW_SYN_RECV) entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; else if (sk->sk_state == TCP_TIME_WAIT) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6e70839257f7..bcdbf448324a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -186,7 +186,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, skb->priority = sk->sk_priority; if (!skb->mark) - skb->mark = sk->sk_mark; + skb->mark = READ_ONCE(sk->sk_mark); /* Send it out. */ return ip_local_out(net, skb->sk, skb); @@ -529,7 +529,7 @@ packet_routed: /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = READ_ONCE(sk->sk_mark); res = ip_local_out(net, sk, skb); rcu_read_unlock(); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98d7e6ba7493..92fede388d52 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4, const struct inet_sock *inet = inet_sk(sk); oif = sk->sk_bound_dev_if; - mark = sk->sk_mark; + mark = READ_ONCE(sk->sk_mark); tos = ip_sock_rt_tos(sk); scope = ip_sock_rt_scope(sk); prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; @@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk) & IPTOS_RT_MASK, ip_sock_rt_scope(sk), inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 069642014636..894653be033a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -931,7 +931,7 @@ static void tcp_v4_send_ack(const struct sock *sk, ctl_sk = this_cpu_read(ipv4_tcp_sk); sock_net_set(ctl_sk, net); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_mark : sk->sk_mark; + inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark); ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_priority : sk->sk_priority; transmit_time = tcp_transmit_time(sk); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index f804c11e2146..c2c291827a2c 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init_sk(&ipc6, np); ipc6.sockc.tsflags = sk->sk_tsflags; - ipc6.sockc.mark = sk->sk_mark; + ipc6.sockc.mark = READ_ONCE(sk->sk_mark); fl6.flowi6_oif = oif; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ac1cef094c5f..39b7d727ba40 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -774,12 +774,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; ipcm6_init(&ipc6); ipc6.sockc.tsflags = sk->sk_tsflags; - ipc6.sockc.mark = sk->sk_mark; + ipc6.sockc.mark = fl6.flowi6_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 64e873f5895f..56a55585eb79 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2951,7 +2951,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) if (!oif && skb->dev) oif = l3mdev_master_ifindex(skb->dev); - ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid); + ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark), + sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -3172,8 +3173,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, - sk->sk_uid); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, + READ_ONCE(sk->sk_mark), sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4714eb695913..3ec563742ac4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -564,8 +564,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); - err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, - tclass, sk->sk_priority); + err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), + opt, tclass, sk->sk_priority); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -939,7 +939,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (sk->sk_state == TCP_TIME_WAIT) mark = inet_twsk(sk)->tw_mark; else - mark = sk->sk_mark; + mark = READ_ONCE(sk->sk_mark); skb_set_delivery_time(buff, tcp_transmit_time(sk), true); } if (txhash) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e5da5d1cb215..f787e6b8424c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -628,7 +628,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) { if (tunnel) { ip6_redirect(skb, sock_net(sk), inet6_iif(skb), - sk->sk_mark, sk->sk_uid); + READ_ONCE(sk->sk_mark), sk->sk_uid); } else { ip6_sk_redirect(skb, sk); } @@ -1360,7 +1360,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = sk->sk_tsflags; - ipc6.sockc.mark = sk->sk_mark; + ipc6.sockc.mark = READ_ONCE(sk->sk_mark); /* destination address check */ if (sin6) { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b1623f9c4f92..ff78217f0cb1 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* Get and verify the address */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; ipcm6_init(&ipc6); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 63f7a09335c5..a3f1fe810cc9 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -103,7 +103,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in break; case SO_MARK: if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { - ssk->sk_mark = sk->sk_mark; + WRITE_ONCE(ssk->sk_mark, sk->sk_mark); sk_dst_reset(ssk); } break; diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 84def74698b7..9ed85be79452 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr, break; case NFT_SOCKET_MARK: if (sk_fullsock(sk)) { - *dest = sk->sk_mark; + *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; return; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 7013f55f05d1..76e01f292aaf 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) - pskb->mark = sk->sk_mark; + pskb->mark = READ_ONCE(sk->sk_mark); if (sk != skb->sk) sock_gen_put(sk); @@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) - pskb->mark = sk->sk_mark; + pskb->mark = READ_ONCE(sk->sk_mark); if (sk != skb->sk) sock_gen_put(sk); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8e3ddec4c3d5..d9aa21a2b3a1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2051,7 +2051,7 @@ retry: skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; skb_setup_tx_timestamp(skb, sockc.tsflags); @@ -2586,7 +2586,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->protocol = proto; skb->dev = dev; skb->priority = po->sk.sk_priority; - skb->mark = po->sk.sk_mark; + skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -2988,7 +2988,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_unlock; sockcm_init(&sockc, sk); - sockc.mark = sk->sk_mark; + sockc.mark = READ_ONCE(sk->sk_mark); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); if (unlikely(err)) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a7f887d91d89..0c013d2b5d8f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -445,7 +445,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, nsk->sk_rcvbuf = osk->sk_rcvbuf; nsk->sk_sndtimeo = osk->sk_sndtimeo; nsk->sk_rcvtimeo = osk->sk_rcvtimeo; - nsk->sk_mark = osk->sk_mark; + nsk->sk_mark = READ_ONCE(osk->sk_mark); nsk->sk_priority = osk->sk_priority; nsk->sk_rcvlowat = osk->sk_rcvlowat; nsk->sk_bound_dev_if = osk->sk_bound_dev_if; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 31dca4ecb2c5..b89adb52a977 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -505,7 +505,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb->dev = dev; skb->priority = xs->sk.sk_priority; - skb->mark = xs->sk.sk_mark; + skb->mark = READ_ONCE(xs->sk.sk_mark); skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr; skb->destructor = xsk_destruct_skb; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e7617c9959c3..d6b405782b63 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2250,7 +2250,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, match = xfrm_selector_match(&pol->selector, fl, family); if (match) { - if ((sk->sk_mark & pol->mark.m) != pol->mark.v || + if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v || pol->if_id != if_id) { pol = NULL; goto out; -- cgit v1.2.3 From 8bf43be799d4b242ea552a14db10456446be843e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:18 +0000 Subject: net: annotate data-races around sk->sk_priority sk_getsockopt() runs locklessly. This means sk->sk_priority can be read while other threads are changing its value. Other reads also happen without socket lock being held. Add missing annotations where needed. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++--- net/ipv4/ip_output.c | 4 ++-- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 3 ++- net/packet/af_packet.c | 6 +++--- 8 files changed, 14 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/core/sock.c b/net/core/sock.c index f11e19c7edfb..6d4f28efe29a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -806,7 +806,7 @@ EXPORT_SYMBOL(sock_no_linger); void sock_set_priority(struct sock *sk, u32 priority) { lock_sock(sk); - sk->sk_priority = priority; + WRITE_ONCE(sk->sk_priority, priority); release_sock(sk); } EXPORT_SYMBOL(sock_set_priority); @@ -1216,7 +1216,7 @@ set_sndbuf: if ((val >= 0 && val <= 6) || sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - sk->sk_priority = val; + WRITE_ONCE(sk->sk_priority, val); else ret = -EPERM; break; @@ -1685,7 +1685,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_PRIORITY: - v.val = sk->sk_priority; + v.val = READ_ONCE(sk->sk_priority); break; case SO_LINGER: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index bcdbf448324a..54d2d3a2d850 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -184,7 +184,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, ip_options_build(skb, &opt->opt, daddr, rt); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); if (!skb->mark) skb->mark = READ_ONCE(sk->sk_mark); @@ -528,7 +528,7 @@ packet_routed: skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); res = ip_local_out(net, sk, skb); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8e97d8d4cc9d..d41bce8927b2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val) } if (inet_sk(sk)->tos != val) { inet_sk(sk)->tos = val; - sk->sk_priority = rt_tos2priority(val); + WRITE_ONCE(sk->sk_priority, rt_tos2priority(val)); sk_dst_reset(sk); } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7782ff5e6539..cb381f5aa464 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, goto error; skb_reserve(skb, hlen); - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; skb_dst_set(skb, &rt->dst); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 894653be033a..a59cc4b83861 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -933,7 +933,7 @@ static void tcp_v4_send_ack(const struct sock *sk, ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark); ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_priority : sk->sk_priority; + inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 39b7d727ba40..49381f35b623 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -614,7 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IPV6); - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3ec563742ac4..6e86721e1cdb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1128,7 +1128,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), - ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority, + ipv6_get_dsfield(ipv6_hdr(skb)), 0, + READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d9aa21a2b3a1..a4631cb457a9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2050,7 +2050,7 @@ retry: skb->protocol = proto; skb->dev = dev; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; @@ -2585,7 +2585,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->protocol = proto; skb->dev = dev; - skb->priority = po->sk.sk_priority; + skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; skb_setup_tx_timestamp(skb, sockc->tsflags); @@ -3061,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->protocol = proto; skb->dev = dev; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; -- cgit v1.2.3 From 30e0191b16e8a58e4620fa3e2839ddc7b9d4281c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 1 Aug 2023 14:43:18 +0800 Subject: ip6mr: Fix skb_under_panic in ip6mr_cache_report() skbuff: skb_under_panic: text:ffffffff88771f69 len:56 put:-4 head:ffff88805f86a800 data:ffff887f5f86a850 tail:0x88 end:0x2c0 dev:pim6reg ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:192! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 2 PID: 22968 Comm: kworker/2:11 Not tainted 6.5.0-rc3-00044-g0a8db05b571a #236 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:skb_panic+0x152/0x1d0 Call Trace: skb_push+0xc4/0xe0 ip6mr_cache_report+0xd69/0x19b0 reg_vif_xmit+0x406/0x690 dev_hard_start_xmit+0x17e/0x6e0 __dev_queue_xmit+0x2d6a/0x3d20 vlan_dev_hard_start_xmit+0x3ab/0x5c0 dev_hard_start_xmit+0x17e/0x6e0 __dev_queue_xmit+0x2d6a/0x3d20 neigh_connected_output+0x3ed/0x570 ip6_finish_output2+0x5b5/0x1950 ip6_finish_output+0x693/0x11c0 ip6_output+0x24b/0x880 NF_HOOK.constprop.0+0xfd/0x530 ndisc_send_skb+0x9db/0x1400 ndisc_send_rs+0x12a/0x6c0 addrconf_dad_completed+0x3c9/0xea0 addrconf_dad_work+0x849/0x1420 process_one_work+0xa22/0x16e0 worker_thread+0x679/0x10c0 ret_from_fork+0x28/0x60 ret_from_fork_asm+0x11/0x20 When setup a vlan device on dev pim6reg, DAD ns packet may sent on reg_vif_xmit(). reg_vif_xmit() ip6mr_cache_report() skb_push(skb, -skb_network_offset(pkt));//skb_network_offset(pkt) is 4 And skb_push declared as: void *skb_push(struct sk_buff *skb, unsigned int len); skb->data -= len; //0xffff88805f86a84c - 0xfffffffc = 0xffff887f5f86a850 skb->data is set to 0xffff887f5f86a850, which is invalid mem addr, lead to skb_push() fails. Fixes: 14fb64e1f449 ("[IPV6] MROUTE: Support PIM-SM (SSM).") Signed-off-by: Yue Haibing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index cc3d5ad17257..67a3b8f6e72b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, And all this only to mangle msg->im6_msgtype and to set msg->im6_mbz to "mbz" :-) */ - skb_push(skb, -skb_network_offset(pkt)); + __skb_pull(skb, skb_network_offset(pkt)); skb_push(skb, sizeof(*msg)); skb_reset_transport_header(skb); -- cgit v1.2.3