From 9d2d38c35e7a3de96340c446f3b0fde7b2e7348e Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Mon, 14 Feb 2022 03:27:21 +0000 Subject: ipv4: add description about martian source When multiple containers are running in the environment and multiple macvlan network port are configured in each container, a lot of martian source prints will appear after martian_log is enabled. they are almost the same, and printed by net_warn_ratelimited. Each arp message will trigger this print on each network port. Such as: IPv4: martian source 173.254.95.16 from 173.254.100.109, on dev eth0 ll header: 00000000: ff ff ff ff ff ff 40 00 ad fe 64 6d 08 06 ......@...dm.. IPv4: martian source 173.254.95.16 from 173.254.100.109, on dev eth1 ll header: 00000000: ff ff ff ff ff ff 40 00 ad fe 64 6d 08 06 ......@...dm.. There is no description of this kind of source in the RFC1812. Signed-off-by: Zhang Yunkai Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4d61ddd8a0ec..85117b45216d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -436,6 +436,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (net->ipv4.fib_has_custom_local_routes || fib4_has_custom_rules(net)) goto full_check; + /* Within the same container, it is regarded as a martian source, + * and the same host but different containers are not. + */ if (inet_lookup_ifaddr_rcu(net, src)) return -EINVAL; -- cgit v1.2.3 From 35a79e64de29e8d57a5989aac57611c0cd29e13e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Feb 2022 00:20:52 -0500 Subject: ping: fix the dif and sdif check in ping_lookup When 'ping' changes to use PING socket instead of RAW socket by: # sysctl -w net.ipv4.ping_group_range="0 100" There is another regression caused when matching sk_bound_dev_if and dif, RAW socket is using inet_iif() while PING socket lookup is using skb->dev->ifindex, the cmd below fails due to this: # ip link add dummy0 type dummy # ip link set dummy0 up # ip addr add 192.168.111.1/24 dev dummy0 # ping -I dummy0 192.168.111.1 -c1 The issue was also reported on: https://github.com/iputils/iputils/issues/104 But fixed in iputils in a wrong way by not binding to device when destination IP is on device, and it will cause some of kselftests to fail, as Jianlin noticed. This patch is to use inet(6)_iif and inet(6)_sdif to get dif and sdif for PING socket, and keep consistent with RAW socket. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ping.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bcf7bc71cb56..3a5994b50571 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -172,16 +172,23 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; - int dif = skb->dev->ifindex; + int dif, sdif; if (skb->protocol == htons(ETH_P_IP)) { + dif = inet_iif(skb); + sdif = inet_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", (int)ident, &ip_hdr(skb)->daddr, dif); #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { + dif = inet6_iif(skb); + sdif = inet6_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", (int)ident, &ipv6_hdr(skb)->daddr, dif); #endif + } else { + pr_err("ping: protocol(%x) is not supported\n", ntohs(skb->protocol)); + return NULL; } read_lock_bh(&ping_table.lock); @@ -221,7 +228,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && - sk->sk_bound_dev_if != inet_sdif(skb)) + sk->sk_bound_dev_if != sdif) continue; sock_hold(sk); -- cgit v1.2.3 From 9fcf986cc4bc6a3a39f23fbcbbc3a9e52d3c24fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Feb 2022 09:32:16 -0800 Subject: ipv4: fix data races in fib_alias_hw_flags_set fib_alias_hw_flags_set() can be used by concurrent threads, and is only RCU protected. We need to annotate accesses to following fields of struct fib_alias: offload, trap, offload_failed Because of READ_ONCE()WRITE_ONCE() limitations, make these field u8. BUG: KCSAN: data-race in fib_alias_hw_flags_set / fib_alias_hw_flags_set read to 0xffff888134224a6a of 1 bytes by task 2013 on cpu 1: fib_alias_hw_flags_set+0x28a/0x470 net/ipv4/fib_trie.c:1050 nsim_fib4_rt_hw_flags_set drivers/net/netdevsim/fib.c:350 [inline] nsim_fib4_rt_add drivers/net/netdevsim/fib.c:367 [inline] nsim_fib4_rt_insert drivers/net/netdevsim/fib.c:429 [inline] nsim_fib4_event drivers/net/netdevsim/fib.c:461 [inline] nsim_fib_event drivers/net/netdevsim/fib.c:881 [inline] nsim_fib_event_work+0x1852/0x2cf0 drivers/net/netdevsim/fib.c:1477 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307 process_scheduled_works kernel/workqueue.c:2370 [inline] worker_thread+0x7df/0xa70 kernel/workqueue.c:2456 kthread+0x1bf/0x1e0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 write to 0xffff888134224a6a of 1 bytes by task 4872 on cpu 0: fib_alias_hw_flags_set+0x2d5/0x470 net/ipv4/fib_trie.c:1054 nsim_fib4_rt_hw_flags_set drivers/net/netdevsim/fib.c:350 [inline] nsim_fib4_rt_add drivers/net/netdevsim/fib.c:367 [inline] nsim_fib4_rt_insert drivers/net/netdevsim/fib.c:429 [inline] nsim_fib4_event drivers/net/netdevsim/fib.c:461 [inline] nsim_fib_event drivers/net/netdevsim/fib.c:881 [inline] nsim_fib_event_work+0x1852/0x2cf0 drivers/net/netdevsim/fib.c:1477 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307 process_scheduled_works kernel/workqueue.c:2370 [inline] worker_thread+0x7df/0xa70 kernel/workqueue.c:2456 kthread+0x1bf/0x1e0 kernel/kthread.c:377 ret_from_fork+0x1f/0x30 value changed: 0x00 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 4872 Comm: kworker/0:0 Not tainted 5.17.0-rc3-syzkaller-00188-g1d41d2e82623-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events nsim_fib_event_work Fixes: 90b93f1b31f8 ("ipv4: Add "offload" and "trap" indications to routes") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20220216173217.3792411-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_lookup.h | 7 +++---- net/ipv4/fib_semantics.c | 6 +++--- net/ipv4/fib_trie.c | 22 +++++++++++++--------- net/ipv4/route.c | 4 ++-- 4 files changed, 21 insertions(+), 18 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index e184bcb19943..78e40ea42e58 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -16,10 +16,9 @@ struct fib_alias { u8 fa_slen; u32 tb_id; s16 fa_default; - u8 offload:1, - trap:1, - offload_failed:1, - unused:5; + u8 offload; + u8 trap; + u8 offload_failed; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b4589861b84c..2dd375f7407b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -525,9 +525,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, fri.dst_len = dst_len; fri.tos = fa->fa_tos; fri.type = fa->fa_type; - fri.offload = fa->offload; - fri.trap = fa->trap; - fri.offload_failed = fa->offload_failed; + fri.offload = READ_ONCE(fa->offload); + fri.trap = READ_ONCE(fa->trap); + fri.offload_failed = READ_ONCE(fa->offload_failed); err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 8060524f4256..f7f74d5c14da 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1047,19 +1047,23 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) if (!fa_match) goto out; - if (fa_match->offload == fri->offload && fa_match->trap == fri->trap && - fa_match->offload_failed == fri->offload_failed) + /* These are paired with the WRITE_ONCE() happening in this function. + * The reason is that we are only protected by RCU at this point. + */ + if (READ_ONCE(fa_match->offload) == fri->offload && + READ_ONCE(fa_match->trap) == fri->trap && + READ_ONCE(fa_match->offload_failed) == fri->offload_failed) goto out; - fa_match->offload = fri->offload; - fa_match->trap = fri->trap; + WRITE_ONCE(fa_match->offload, fri->offload); + WRITE_ONCE(fa_match->trap, fri->trap); /* 2 means send notifications only if offload_failed was changed. */ if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && - fa_match->offload_failed == fri->offload_failed) + READ_ONCE(fa_match->offload_failed) == fri->offload_failed) goto out; - fa_match->offload_failed = fri->offload_failed; + WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); if (!net->ipv4.sysctl_fib_notify_on_flag_change) goto out; @@ -2297,9 +2301,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, fri.dst_len = KEYLENGTH - fa->fa_slen; fri.tos = fa->fa_tos; fri.type = fa->fa_type; - fri.offload = fa->offload; - fri.trap = fa->trap; - fri.offload_failed = fa->offload_failed; + fri.offload = READ_ONCE(fa->offload); + fri.trap = READ_ONCE(fa->trap); + fri.offload_failed = READ_ONCE(fa->offload_failed); err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ff6f91cdb6c4..f33ad1f383b6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3395,8 +3395,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fa->fa_tos == fri.tos && fa->fa_info == res.fi && fa->fa_type == fri.type) { - fri.offload = fa->offload; - fri.trap = fa->trap; + fri.offload = READ_ONCE(fa->offload); + fri.trap = READ_ONCE(fa->trap); break; } } -- cgit v1.2.3