From 2f1e8ea726e9020e01e9e2ae29c2d5eb11133032 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Sep 2020 02:48:42 +0300 Subject: net: dsa: link interfaces with the DSA master to get rid of lockdep warnings Since commit 845e0ebb4408 ("net: change addr_list_lock back to static key"), cascaded DSA setups (DSA switch port as DSA master for another DSA switch port) are emitting this lockdep warning: ============================================ WARNING: possible recursive locking detected 5.8.0-rc1-00133-g923e4b5032dd-dirty #208 Not tainted -------------------------------------------- dhcpcd/323 is trying to acquire lock: ffff000066dd4268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 but task is already holding lock: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&dsa_master_addr_list_lock_key/1); lock(&dsa_master_addr_list_lock_key/1); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by dhcpcd/323: #0: ffffdbd1381dda18 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x24/0x30 #1: ffff00006614b268 (_xmit_ETHER){+...}-{2:2}, at: dev_set_rx_mode+0x28/0x48 #2: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 stack backtrace: Call trace: dump_backtrace+0x0/0x1e0 show_stack+0x20/0x30 dump_stack+0xec/0x158 __lock_acquire+0xca0/0x2398 lock_acquire+0xe8/0x440 _raw_spin_lock_nested+0x64/0x90 dev_mc_sync+0x44/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_mc_sync+0x84/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_set_rx_mode+0x30/0x48 __dev_open+0x10c/0x180 __dev_change_flags+0x170/0x1c8 dev_change_flags+0x2c/0x70 devinet_ioctl+0x774/0x878 inet_ioctl+0x348/0x3b0 sock_do_ioctl+0x50/0x310 sock_ioctl+0x1f8/0x580 ksys_ioctl+0xb0/0xf0 __arm64_sys_ioctl+0x28/0x38 el0_svc_common.constprop.0+0x7c/0x180 do_el0_svc+0x2c/0x98 el0_sync_handler+0x9c/0x1b8 el0_sync+0x158/0x180 Since DSA never made use of the netdev API for describing links between upper devices and lower devices, the dev->lower_level value of a DSA switch interface would be 1, which would warn when it is a DSA master. We can use netdev_upper_dev_link() to describe the relationship between a DSA slave and a DSA master. To be precise, a DSA "slave" (switch port) is an "upper" to a DSA "master" (host port). The relationship is "many uppers to one lower", like in the case of VLAN. So, for that reason, we use the same function as VLAN uses. There might be a chance that somebody will try to take hold of this interface and use it immediately after register_netdev() and before netdev_upper_dev_link(). To avoid that, we do the registration and linkage while holding the RTNL, and we use the RTNL-locked cousin of register_netdev(), which is register_netdevice(). Since this warning was not there when lockdep was using dynamic keys for addr_list_lock, we are blaming the lockdep patch itself. The network stack _has_ been using static lockdep keys before, and it _is_ likely that stacked DSA setups have been triggering these lockdep warnings since forever, however I can't test very old kernels on this particular stacked DSA setup, to ensure I'm not in fact introducing regressions. Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key") Suggested-by: Cong Wang Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'net/dsa') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9af1a2d0cec4..16e5f98d4882 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1799,15 +1799,27 @@ int dsa_slave_create(struct dsa_port *port) dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); - ret = register_netdev(slave_dev); + rtnl_lock(); + + ret = register_netdevice(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); + rtnl_unlock(); goto out_phy; } + ret = netdev_upper_dev_link(master, slave_dev, NULL); + + rtnl_unlock(); + + if (ret) + goto out_unregister; + return 0; +out_unregister: + unregister_netdev(slave_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); @@ -1824,16 +1836,18 @@ out_free: void dsa_slave_destroy(struct net_device *slave_dev) { + struct net_device *master = dsa_slave_to_master(slave_dev); struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); netif_carrier_off(slave_dev); rtnl_lock(); + netdev_upper_dev_unlink(master, slave_dev); + unregister_netdevice(slave_dev); phylink_disconnect_phy(dp->pl); rtnl_unlock(); dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); - unregister_netdev(slave_dev); phylink_destroy(dp->pl); gro_cells_destroy(&p->gcells); free_percpu(p->stats64); -- cgit v1.2.3 From 6565243c0677aa2befa5a953cf11bc7b4a6f0a47 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:24 +0300 Subject: net: mscc: ocelot: add locking for the port TX timestamp ID The ocelot_port->ts_id is used to: (a) populate skb->cb[0] for matching the TX timestamp in the PTP IRQ with an skb. (b) populate the REW_OP from the injection header of the ongoing skb. Only then is ocelot_port->ts_id incremented. This is a problem because, at least theoretically, another timestampable skb might use the same ocelot_port->ts_id before that is incremented. Normally all transmit calls are serialized by the netdev transmit spinlock, but in this case, ocelot_port_add_txtstamp_skb() is also called by DSA, which has started declaring the NETIF_F_LLTX feature since commit 2b86cb829976 ("net: dsa: declare lockless TX feature for slave ports"). So the logic of using and incrementing the timestamp id should be atomic per port. The solution is to use the global ocelot_port->ts_id only while protected by the associated ocelot_port->ts_id_lock. That's where we populate skb->cb[0]. Note that for ocelot, ocelot_port_add_txtstamp_skb is called for the actual skb, but for felix, it is called for the skb's clone. That is something which will also be changed in the future. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 8 +++++++- drivers/net/ethernet/mscc/ocelot_net.c | 6 ++---- include/soc/mscc/ocelot.h | 1 + net/dsa/tag_ocelot.c | 11 +++++++---- 4 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net/dsa') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5abb7d2b0a9e..83eb7c325061 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -421,10 +421,15 @@ int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port, if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { + spin_lock(&ocelot_port->ts_id_lock); + shinfo->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in cb[0] of sk_buff */ - skb->cb[0] = ocelot_port->ts_id % 4; + skb->cb[0] = ocelot_port->ts_id; + ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4; skb_queue_tail(&ocelot_port->tx_skbs, skb); + + spin_unlock(&ocelot_port->ts_id_lock); return 0; } return -ENODATA; @@ -1300,6 +1305,7 @@ void ocelot_init_port(struct ocelot *ocelot, int port) struct ocelot_port *ocelot_port = ocelot->ports[port]; skb_queue_head_init(&ocelot_port->tx_skbs); + spin_lock_init(&ocelot_port->ts_id_lock); /* Basic L2 initialization */ diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index cacabc23215a..8490e42e9e2d 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -349,10 +349,8 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) { info.rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - info.rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + info.rew_op |= skb->cb[0] << 3; } ocelot_gen_ifh(ifh, &info); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index da369b12005f..4521dd602ddc 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -566,6 +566,7 @@ struct ocelot_port { u8 ptp_cmd; struct sk_buff_head tx_skbs; u8 ts_id; + spinlock_t ts_id_lock; phy_interface_t phy_mode; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 42f327c06dca..b4fc05cafaa6 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -160,11 +160,14 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct sk_buff *clone = DSA_SKB_CB(skb)->clone; + rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + /* Retrieve timestamp ID populated inside skb->cb[0] of the + * clone by ocelot_port_add_txtstamp_skb + */ + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + rew_op |= clone->cb[0] << 3; packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0); } -- cgit v1.2.3