From 5038517119d50ed0240059b1d7fc2faa92371c08 Mon Sep 17 00:00:00 2001 From: Kadlecsik József Date: Sat, 25 Jan 2020 20:39:25 +0100 Subject: netfilter: ipset: fix suspicious RCU usage in find_set_and_id find_set_and_id() is called when the NFNL_SUBSYS_IPSET mutex is held. However, in the error path there can be a follow-up recvmsg() without the mutex held. Use the start() function of struct netlink_dump_control instead of dump() to verify and report if the specified set does not exist. Thanks to Pablo Neira Ayuso for helping me to understand the subleties of the netlink protocol. Reported-by: syzbot+fc69d7cb21258ab4ae4d@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index cf895bc80871..69c107f9ba8d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1483,31 +1483,34 @@ ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -dump_init(struct netlink_callback *cb, struct ip_set_net *inst) +ip_set_dump_start(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; + struct sk_buff *skb = cb->skb; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type; - ip_set_id_t index; int ret; ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_dump_policy, NULL); if (ret) - return ret; + goto error; cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { + ip_set_id_t index; struct ip_set *set; set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (!set) - return -ENOENT; - + if (!set) { + ret = -ENOENT; + goto error; + } dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; } else { @@ -1523,10 +1526,17 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) cb->args[IPSET_CB_DUMP] = dump_type; return 0; + +error: + /* We have to create and send the error message manually :-( */ + if (nlh->nlmsg_flags & NLM_F_ACK) { + netlink_ack(cb->skb, nlh, ret, NULL); + } + return ret; } static int -ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) +ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb) { ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; @@ -1537,18 +1547,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) bool is_destroyed; int ret = 0; - if (!cb->args[IPSET_CB_DUMP]) { - ret = dump_init(cb, inst); - if (ret < 0) { - nlh = nlmsg_hdr(cb->skb); - /* We have to create and send the error message - * manually :-( - */ - if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(cb->skb, nlh, ret, NULL); - return ret; - } - } + if (!cb->args[IPSET_CB_DUMP]) + return -EINVAL; if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; @@ -1684,7 +1684,8 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, { struct netlink_dump_control c = { - .dump = ip_set_dump_start, + .start = ip_set_dump_start, + .dump = ip_set_dump_do, .done = ip_set_dump_done, }; return netlink_dump_start(ctnl, skb, nlh, &c); -- cgit v1.2.3 From fac20b9e738523fc884ee3ea5be360a321cd8bad Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:35 +0000 Subject: rxrpc: Fix use-after-free in rxrpc_put_local() Fix rxrpc_put_local() to not access local->debug_id after calling atomic_dec_return() as, unless that returned n==0, we no longer have the right to access the object. Fixes: 06d9532fa6b3 ("rxrpc: Fix read-after-free in rxrpc_queue_local()") Signed-off-by: David Howells --- net/rxrpc/local_object.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 36587260cabd..3aa179efcda4 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -364,11 +364,14 @@ void rxrpc_queue_local(struct rxrpc_local *local) void rxrpc_put_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + unsigned int debug_id; int n; if (local) { + debug_id = local->debug_id; + n = atomic_dec_return(&local->usage); - trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); + trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); if (n == 0) call_rcu(&local->rcu, rxrpc_local_rcu); -- cgit v1.2.3 From f71dbf2fb28489a79bde0dca1c8adfb9cdb20a6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: rxrpc: Fix insufficient receive notification generation In rxrpc_input_data(), rxrpc_notify_socket() is called if the base sequence number of the packet is immediately following the hard-ack point at the end of the function. However, this isn't sufficient, since the recvmsg side may have been advancing the window and then overrun the position in which we're adding - at which point rx_hard_ack >= seq0 and no notification is generated. Fix this by always generating a notification at the end of the input function. Without this, a long call may stall, possibly indefinitely. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells --- net/rxrpc/input.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 96d54e5bf7bc..ef10fbf71b15 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -599,10 +599,8 @@ ack: false, true, rxrpc_propose_ack_input_data); - if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) { - trace_rxrpc_notify_socket(call->debug_id, serial); - rxrpc_notify_socket(call); - } + trace_rxrpc_notify_socket(call->debug_id, serial); + rxrpc_notify_socket(call); unlock: spin_unlock(&call->input_lock); -- cgit v1.2.3 From 04d36d748fac349b068ef621611f454010054c58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: rxrpc: Fix missing active use pinning of rxrpc_local object The introduction of a split between the reference count on rxrpc_local objects and the usage count didn't quite go far enough. A number of kernel work items need to make use of the socket to perform transmission. These also need to get an active count on the local object to prevent the socket from being closed. Fix this by getting the active count in those places. Also split out the raw active count get/put functions as these places tend to hold refs on the rxrpc_local object already, so getting and putting an extra object ref is just a waste of time. The problem can lead to symptoms like: BUG: kernel NULL pointer dereference, address: 0000000000000018 .. CPU: 2 PID: 818 Comm: kworker/u9:0 Not tainted 5.5.0-fscache+ #51 ... RIP: 0010:selinux_socket_sendmsg+0x5/0x13 ... Call Trace: security_socket_sendmsg+0x2c/0x3e sock_sendmsg+0x1a/0x46 rxrpc_send_keepalive+0x131/0x1ae rxrpc_peer_keepalive_worker+0x219/0x34b process_one_work+0x18e/0x271 worker_thread+0x1a3/0x247 kthread+0xe6/0xeb ret_from_fork+0x1f/0x30 Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 2 ++ net/rxrpc/ar-internal.h | 10 ++++++++++ net/rxrpc/conn_event.c | 30 ++++++++++++++++++++---------- net/rxrpc/local_object.c | 18 +++++++----------- net/rxrpc/peer_event.c | 42 +++++++++++++++++++++++------------------- 5 files changed, 62 insertions(+), 40 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9d3c4d2d893a..fe42f986cd94 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -194,6 +194,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); rxrpc_unuse_local(local); + rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -899,6 +900,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); rxrpc_unuse_local(rx->local); + rxrpc_put_local(rx->local); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5e99df80e80a..94441fee85bc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1021,6 +1021,16 @@ void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); +static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) +{ + return atomic_dec_return(&local->active_users) == 0; +} + +static inline bool __rxrpc_use_local(struct rxrpc_local *local) +{ + return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; +} + /* * misc.c */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 808a4723f868..06fcff2ebbba 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -438,16 +438,12 @@ again: /* * connection-level event processor */ -void rxrpc_process_connection(struct work_struct *work) +static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { - struct rxrpc_connection *conn = - container_of(work, struct rxrpc_connection, processor); struct sk_buff *skb; u32 abort_code = RX_PROTOCOL_ERROR; int ret; - rxrpc_see_connection(conn); - if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -475,18 +471,32 @@ void rxrpc_process_connection(struct work_struct *work) } } -out: - rxrpc_put_connection(conn); - _leave(""); return; requeue_and_leave: skb_queue_head(&conn->rx_queue, skb); - goto out; + return; protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; rxrpc_free_skb(skb, rxrpc_skb_freed); - goto out; + return; +} + +void rxrpc_process_connection(struct work_struct *work) +{ + struct rxrpc_connection *conn = + container_of(work, struct rxrpc_connection, processor); + + rxrpc_see_connection(conn); + + if (__rxrpc_use_local(conn->params.local)) { + rxrpc_do_process_connection(conn); + rxrpc_unuse_local(conn->params.local); + } + + rxrpc_put_connection(conn); + _leave(""); + return; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 3aa179efcda4..a6c1349e965d 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -383,14 +383,11 @@ void rxrpc_put_local(struct rxrpc_local *local) */ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) { - unsigned int au; - local = rxrpc_get_local_maybe(local); if (!local) return NULL; - au = atomic_fetch_add_unless(&local->active_users, 1, 0); - if (au == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_put_local(local); return NULL; } @@ -404,14 +401,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) */ void rxrpc_unuse_local(struct rxrpc_local *local) { - unsigned int au; - if (local) { - au = atomic_dec_return(&local->active_users); - if (au == 0) + if (__rxrpc_unuse_local(local)) { + rxrpc_get_local(local); rxrpc_queue_local(local); - else - rxrpc_put_local(local); + } } } @@ -468,7 +462,7 @@ static void rxrpc_local_processor(struct work_struct *work) do { again = false; - if (atomic_read(&local->active_users) == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_local_destroyer(local); break; } @@ -482,6 +476,8 @@ static void rxrpc_local_processor(struct work_struct *work) rxrpc_process_local_events(local); again = true; } + + __rxrpc_unuse_local(local); } while (again); rxrpc_put_local(local); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 48f67a9b1037..923b263c401b 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -364,27 +364,31 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, if (!rxrpc_get_peer_maybe(peer)) continue; - spin_unlock_bh(&rxnet->peer_hash_lock); - - keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; - slot = keepalive_at - base; - _debug("%02x peer %u t=%d {%pISp}", - cursor, peer->debug_id, slot, &peer->srx.transport); + if (__rxrpc_use_local(peer->local)) { + spin_unlock_bh(&rxnet->peer_hash_lock); + + keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + slot = keepalive_at - base; + _debug("%02x peer %u t=%d {%pISp}", + cursor, peer->debug_id, slot, &peer->srx.transport); + + if (keepalive_at <= base || + keepalive_at > base + RXRPC_KEEPALIVE_TIME) { + rxrpc_send_keepalive(peer); + slot = RXRPC_KEEPALIVE_TIME; + } - if (keepalive_at <= base || - keepalive_at > base + RXRPC_KEEPALIVE_TIME) { - rxrpc_send_keepalive(peer); - slot = RXRPC_KEEPALIVE_TIME; + /* A transmission to this peer occurred since last we + * examined it so put it into the appropriate future + * bucket. + */ + slot += cursor; + slot &= mask; + spin_lock_bh(&rxnet->peer_hash_lock); + list_add_tail(&peer->keepalive_link, + &rxnet->peer_keepalive[slot & mask]); + rxrpc_unuse_local(peer->local); } - - /* A transmission to this peer occurred since last we examined - * it so put it into the appropriate future bucket. - */ - slot += cursor; - slot &= mask; - spin_lock_bh(&rxnet->peer_hash_lock); - list_add_tail(&peer->keepalive_link, - &rxnet->peer_keepalive[slot & mask]); rxrpc_put_peer_locked(peer); } -- cgit v1.2.3 From 0e0daf6ac3be70608569262246f6dc33cb3f45fe Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 30 Jan 2020 18:44:50 +0100 Subject: net: mdio: of: fix potential NULL pointer derefernce of_find_mii_timestamper() returns NULL if no timestamper is found. Therefore, guard the unregister_mii_timestamper() calls. Fixes: 1dca22b18421 ("net: mdio: of: Register discovered MII time stampers.") Signed-off-by: Michael Walle Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/of/of_mdio.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index f5c2a5487761..db0ed5879803 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -81,13 +81,15 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, else phy = get_phy_device(mdio, addr, is_c45); if (IS_ERR(phy)) { - unregister_mii_timestamper(mii_ts); + if (mii_ts) + unregister_mii_timestamper(mii_ts); return PTR_ERR(phy); } rc = of_irq_get(child, 0); if (rc == -EPROBE_DEFER) { - unregister_mii_timestamper(mii_ts); + if (mii_ts) + unregister_mii_timestamper(mii_ts); phy_device_free(phy); return rc; } @@ -116,7 +118,8 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, * register it */ rc = phy_device_register(phy); if (rc) { - unregister_mii_timestamper(mii_ts); + if (mii_ts) + unregister_mii_timestamper(mii_ts); phy_device_free(phy); of_node_put(child); return rc; -- cgit v1.2.3 From 2e1bf3a76576eebc787d8882c3f91fa4ec743efc Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 30 Jan 2020 18:44:51 +0100 Subject: net: mii_timestamper: fix static allocation by PHY driver If phydev->mii_ts is set by the PHY driver, it will always be overwritten in of_mdiobus_register_phy(). Fix it. Also make sure, that the unregister() doesn't do anything if the mii_timestamper was provided by the PHY driver. Fixes: 1dca22b18421 ("net: mdio: of: Register discovered MII time stampers.") Signed-off-by: Michael Walle Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/net/phy/mii_timestamper.c | 7 +++++++ drivers/of/of_mdio.c | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mii_timestamper.c b/drivers/net/phy/mii_timestamper.c index 2f12c5d901df..b71b7456462d 100644 --- a/drivers/net/phy/mii_timestamper.c +++ b/drivers/net/phy/mii_timestamper.c @@ -111,6 +111,13 @@ void unregister_mii_timestamper(struct mii_timestamper *mii_ts) struct mii_timestamping_desc *desc; struct list_head *this; + /* mii_timestamper statically registered by the PHY driver won't use the + * register_mii_timestamper() and thus don't have ->device set. Don't + * try to unregister these. + */ + if (!mii_ts->device) + return; + mutex_lock(&tstamping_devices_lock); list_for_each(this, &mii_timestamping_devices) { desc = list_entry(this, struct mii_timestamping_desc, list); diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index db0ed5879803..8270bbf505fb 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -124,7 +124,13 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, of_node_put(child); return rc; } - phy->mii_ts = mii_ts; + + /* phy->mii_ts may already be defined by the PHY driver. A + * mii_timestamper probed via the device tree will still have + * precedence. + */ + if (mii_ts) + phy->mii_ts = mii_ts; dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n", child, addr); -- cgit v1.2.3 From 2318ca8aef3877da2b16b92edce47a497370a86e Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Thu, 30 Jan 2020 18:54:02 +0100 Subject: net: phy: at803x: disable vddio regulator The probe() might enable a VDDIO regulator, which needs to be disabled again before calling regulator_put(). Add a remove() function. Fixes: 2f664823a470 ("net: phy: at803x: add device tree binding") Signed-off-by: Michael Walle Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/phy/at803x.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index aee62610bade..481cf48c9b9e 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -489,6 +489,14 @@ static int at803x_probe(struct phy_device *phydev) return at803x_parse_dt(phydev); } +static void at803x_remove(struct phy_device *phydev) +{ + struct at803x_priv *priv = phydev->priv; + + if (priv->vddio) + regulator_disable(priv->vddio); +} + static int at803x_clk_out_config(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; @@ -711,6 +719,7 @@ static struct phy_driver at803x_driver[] = { .name = "Qualcomm Atheros AR8035", .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, + .remove = at803x_remove, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, @@ -726,6 +735,7 @@ static struct phy_driver at803x_driver[] = { .name = "Qualcomm Atheros AR8030", .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, + .remove = at803x_remove, .config_init = at803x_config_init, .link_change_notify = at803x_link_change_notify, .set_wol = at803x_set_wol, @@ -741,6 +751,7 @@ static struct phy_driver at803x_driver[] = { .name = "Qualcomm Atheros AR8031/AR8033", .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, + .remove = at803x_remove, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, -- cgit v1.2.3 From b5ce31b5e11b768b7d685b2bab7db09ad5549493 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 30 Jan 2020 10:07:06 -0800 Subject: ionic: fix rxq comp packet type mask Be sure to include all the packet type bits in the mask. Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") Signed-off-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index f131adad96e3..ce07c2931a72 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -866,7 +866,7 @@ struct ionic_rxq_comp { #define IONIC_RXQ_COMP_CSUM_F_VLAN 0x40 #define IONIC_RXQ_COMP_CSUM_F_CALC 0x80 u8 pkt_type_color; -#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x0f +#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x7f }; enum ionic_pkt_type { -- cgit v1.2.3 From 91a7d4bf3ee3794d4090cedd17b8e865645f5f79 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 30 Jan 2020 18:51:23 -0700 Subject: mlxsw: spectrum_qdisc: Fix 64-bit division error in mlxsw_sp_qdisc_tbf_rate_kbps When building arm32 allmodconfig: ERROR: "__aeabi_uldivmod" [drivers/net/ethernet/mellanox/mlxsw/mlxsw_spectrum.ko] undefined! rate_bytes_ps has type u64, we need to use a 64-bit division helper to avoid a build error. Fixes: a44f58c41bfb ("mlxsw: spectrum_qdisc: Support offloading of TBF Qdisc") Signed-off-by: Nathan Chancellor Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 79a2801d59f6..02526c53d4f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -614,7 +614,7 @@ mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p) /* TBF interface is in bytes/s, whereas Spectrum ASIC is configured in * Kbits/s. */ - return p->rate.rate_bytes_ps / 1000 * 8; + return div_u64(p->rate.rate_bytes_ps, 1000) * 8; } static int -- cgit v1.2.3 From b9e0102a57d768bdb99cbbfa01225f73d58e03bc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 28 Jan 2020 11:07:27 -0800 Subject: netfilter: Use kvcalloc Convert the uses of kvmalloc_array with __GFP_ZERO to the equivalent kvcalloc. Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 3 +-- net/netfilter/x_tables.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f4c4b467c87e..d1305423640f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2248,8 +2248,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); - hash = kvmalloc_array(nr_slots, sizeof(struct hlist_nulls_head), - GFP_KERNEL | __GFP_ZERO); + hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL); if (hash && nulls) for (i = 0; i < nr_slots; i++) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ce70c2576bb2..e27c6c5ba9df 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -939,14 +939,14 @@ EXPORT_SYMBOL(xt_check_entry_offsets); * * @size: number of entries * - * Return: NULL or kmalloc'd or vmalloc'd array + * Return: NULL or zeroed kmalloc'd or vmalloc'd array */ unsigned int *xt_alloc_entry_offsets(unsigned int size) { if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int)) return NULL; - return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO); + return kvcalloc(size, sizeof(unsigned int), GFP_KERNEL); } EXPORT_SYMBOL(xt_alloc_entry_offsets); -- cgit v1.2.3 From 91bfaa15a379e9af24f71fb4ee08d8019b6e8ec7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Jan 2020 18:04:35 +0200 Subject: netfilter: flowtable: Fix hardware flush order on nf_flow_table_cleanup On netdev down event, nf_flow_table_cleanup() is called for the relevant device and it cleans all the tables that are on that device. If one of those tables has hardware offload flag, nf_flow_table_iterate_cleanup flushes hardware and then runs the gc. But the gc can queue more hardware work, which will take time to execute. Instead first add the work, then flush it, to execute it now. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 7e91989a1b55..14a069c72bb2 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -529,9 +529,9 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, struct net_device *dev) { - nf_flow_table_offload_flush(flowtable); nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); flush_delayed_work(&flowtable->gc_work); + nf_flow_table_offload_flush(flowtable); } void nf_flow_table_cleanup(struct net_device *dev) -- cgit v1.2.3 From 0f34f30a1be80f3f59efeaab596396bc698e7337 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Jan 2020 18:04:36 +0200 Subject: netfilter: flowtable: Fix missing flush hardware on table free If entries exist when freeing a hardware offload enabled table, we queue work for hardware while running the gc iteration. Execute it (flush) after queueing. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 14a069c72bb2..8af28e10b4e6 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -553,6 +553,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_offload_flush(flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); -- cgit v1.2.3 From c22208b7ce3ef0c2c184ff0d9f6423614b1799d9 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Jan 2020 18:04:37 +0200 Subject: netfilter: flowtable: Fix setting forgotten NF_FLOW_HW_DEAD flag During the refactor this was accidently removed. Fixes: ae29045018c8 ("netfilter: flowtable: add nf_flow_offload_tuple() helper") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index c8b70ffeef0c..83e1db37c3b0 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -675,6 +675,7 @@ static void flow_offload_work_del(struct flow_offload_work *offload) { flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY); + set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags); } static void flow_offload_tuple_stats(struct flow_offload_work *offload, -- cgit v1.2.3 From 78e06cf430934fc3768c342cbebdd1013dcd6fa7 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 30 Jan 2020 20:10:19 +0100 Subject: netfilter: nf_flowtable: fix documentation In the flowtable documentation there is a missing semicolon, the command as is would give this error: nftables.conf:5:27-33: Error: syntax error, unexpected devices, expecting newline or semicolon hook ingress priority 0 devices = { br0, pppoe-data }; ^^^^^^^ nftables.conf:4:12-13: Error: invalid hook (null) flowtable ft { ^^ Fixes: 19b351f16fd9 ("netfilter: add flowtable documentation") Signed-off-by: Matteo Croce Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_flowtable.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt index ca2136c76042..0bf32d1121be 100644 --- a/Documentation/networking/nf_flowtable.txt +++ b/Documentation/networking/nf_flowtable.txt @@ -76,7 +76,7 @@ flowtable and add one rule to your forward chain. table inet x { flowtable f { - hook ingress priority 0 devices = { eth0, eth1 }; + hook ingress priority 0; devices = { eth0, eth1 }; } chain y { type filter hook forward priority 0; policy accept; -- cgit v1.2.3 From c13c48c00a6bc1febc73902505bdec0967bd7095 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 09:14:47 -0800 Subject: tcp: clear tp->total_retrans in tcp_disconnect() total_retrans needs to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Cc: SeongJae Park Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 484485ae74c2..dd57f1e36181 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2626,6 +2626,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); + tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() -- cgit v1.2.3 From 2fbdd56251b5c62f96589f39eded277260de7267 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:22:47 -0800 Subject: tcp: clear tp->delivered in tcp_disconnect() tp->delivered needs to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: ddf1af6fa00e ("tcp: new delivery accounting") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dd57f1e36181..a8ffdfb61f42 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2622,6 +2622,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered = 0; tp->delivered_ce = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; -- cgit v1.2.3 From db7ffee6f3eb3683cdcaeddecc0a630a14546fe3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:32:41 -0800 Subject: tcp: clear tp->data_segs{in|out} in tcp_disconnect() tp->data_segs_in and tp->data_segs_out need to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: a44d6eacdaf5 ("tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In") Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a8ffdfb61f42..3b601823f69a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2643,6 +2643,8 @@ int tcp_disconnect(struct sock *sk, int flags) tp->bytes_acked = 0; tp->bytes_received = 0; tp->bytes_retrans = 0; + tp->data_segs_in = 0; + tp->data_segs_out = 0; tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; -- cgit v1.2.3 From 784f8344de750a41344f4bbbebb8507a730fc99c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:44:50 -0800 Subject: tcp: clear tp->segs_{in|out} in tcp_disconnect() tp->segs_in and tp->segs_out need to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: 2efd055c53c0 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info") Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b601823f69a..eb2d80519f8e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2639,6 +2639,8 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); tp->compressed_ack = 0; + tp->segs_in = 0; + tp->segs_out = 0; tp->bytes_sent = 0; tp->bytes_acked = 0; tp->bytes_received = 0; -- cgit v1.2.3 From 08ff78182fa170dbbe1ebf31e798e9340d6802a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 31 Jan 2020 08:02:41 +0300 Subject: octeontx2-pf: Fix an IS_ERR() vs NULL bug The otx2_mbox_get_rsp() function never returns NULL, it returns error pointers on error. Fixes: 34bfe0ebedb7 ("octeontx2-pf: MTU, MAC and RX mode config support") Signed-off-by: Dan Carpenter Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 8247d21d0432..b945bd3d5d88 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -171,9 +171,9 @@ static int otx2_hw_get_mac_addr(struct otx2_nic *pfvf, } msghdr = otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); - if (!msghdr) { + if (IS_ERR(msghdr)) { otx2_mbox_unlock(&pfvf->mbox); - return -ENOMEM; + return PTR_ERR(msghdr); } rsp = (struct nix_get_mac_addr_rsp *)msghdr; ether_addr_copy(netdev->dev_addr, rsp->mac_addr); -- cgit v1.2.3 From d32a06f5434f5e0cdef8e88dc62d079d980088e1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 31 Jan 2020 08:03:26 +0300 Subject: qed: Fix a error code in qed_hw_init() If the qed_fw_overlay_mem_alloc() then we should return -ENOMEM instead of success. Fixes: 30d5f85895fa ("qed: FW 8.42.2.0 Add fw overlay feature") Signed-off-by: Dan Carpenter Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 7912911337d4..03bdd2e26329 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3114,6 +3114,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) if (!p_hwfn->fw_overlay_mem) { DP_NOTICE(p_hwfn, "Failed to allocate fw overlay memory\n"); + rc = -ENOMEM; goto load_err; } -- cgit v1.2.3 From e8d5bb4dfaa7282e6e2672a8369bcbd38e885b4f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 31 Jan 2020 09:59:19 +0100 Subject: MAINTAINERS: Orphan HSR network protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current maintainer Arvid Brodin hasn't contributed to the kernel since 2015-02-27. His company mail address is also bouncing and the company confirmed (2020-01-31) that no Arvid Brodin is working for them: > Vi har dessvärre ingen Arvid Brodin som arbetar på ALTEN. A MIA person cannot be the maintainer. It is better to mark is as orphaned until some other person can jump in and take over the responsibility for HSR. Signed-off-by: Sven Eckelmann Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index dc833b88c8bc..2ce43a06b7b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7660,9 +7660,8 @@ S: Orphan F: drivers/net/usb/hso.c HSR NETWORK PROTOCOL -M: Arvid Brodin L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: net/hsr/ HT16K33 LED CONTROLLER DRIVER -- cgit v1.2.3 From cb3c0e6bdf64d0d124e94ce43cbe4ccbb9b37f51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 15:27:04 -0800 Subject: cls_rsvp: fix rsvp_policy NLA_BINARY can be confusing, since .len value represents the max size of the blob. cls_rsvp really wants user space to provide long enough data for TCA_RSVP_DST and TCA_RSVP_SRC attributes. BUG: KMSAN: uninit-value in rsvp_get net/sched/cls_rsvp.h:258 [inline] BUG: KMSAN: uninit-value in gen_handle net/sched/cls_rsvp.h:402 [inline] BUG: KMSAN: uninit-value in rsvp_change+0x1ae9/0x4220 net/sched/cls_rsvp.h:572 CPU: 1 PID: 13228 Comm: syz-executor.1 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 rsvp_get net/sched/cls_rsvp.h:258 [inline] gen_handle net/sched/cls_rsvp.h:402 [inline] rsvp_change+0x1ae9/0x4220 net/sched/cls_rsvp.h:572 tc_new_tfilter+0x31fe/0x5010 net/sched/cls_api.c:2104 rtnetlink_rcv_msg+0xcb7/0x1570 net/core/rtnetlink.c:5415 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45b349 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f269d43dc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f269d43e6d4 RCX: 000000000045b349 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 000000000075bfc8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000009c2 R14: 00000000004cb338 R15: 000000000075bfd4 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2774 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4382 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline] netlink_sendmsg+0x7d3/0x14d0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 6fa8c0144b77 ("[NET_SCHED]: Use nla_policy for attribute validation in classifiers") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Cong Wang Signed-off-by: Jakub Kicinski --- net/sched/cls_rsvp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index c22624131949..d36949d9382c 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data) static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; -- cgit v1.2.3 From dff6bc1bfd462b76dc13ec19dedc2c134a62ac59 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 1 Feb 2020 13:43:01 +0100 Subject: MAINTAINERS: correct entries for ISDN/mISDN section Commit 6d97985072dc ("isdn: move capi drivers to staging") cleaned up the isdn drivers and split the MAINTAINERS section for ISDN, but missed to add the terminal slash for the two directories mISDN and hardware. Hence, all files in those directories were not part of the new ISDN/mISDN SUBSYSTEM, but were considered to be part of "THE REST". Rectify the situation, and while at it, also complete the section with two further build files that belong to that subsystem. This was identified with a small script that finds all files belonging to "THE REST" according to the current MAINTAINERS file, and I investigated upon its output. Fixes: 6d97985072dc ("isdn: move capi drivers to staging") Signed-off-by: Lukas Bulwahn Acked-by: Arnd Bergmann Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2ce43a06b7b5..0ae68fb8d38f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8911,8 +8911,10 @@ L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org W: http://www.isdn4linux.de S: Maintained -F: drivers/isdn/mISDN -F: drivers/isdn/hardware +F: drivers/isdn/mISDN/ +F: drivers/isdn/hardware/ +F: drivers/isdn/Kconfig +F: drivers/isdn/Makefile ISDN/CMTP OVER BLUETOOTH M: Karsten Keil -- cgit v1.2.3 From 9603d47bad4642118fa19fd1562569663d9235f6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 2 Feb 2020 03:38:26 +0000 Subject: tcp: Reduce SYN resend delay if a suspicous ACK is received When closing a connection, the two acks that required to change closing socket's status to FIN_WAIT_2 and then TIME_WAIT could be processed in reverse order. This is possible in RSS disabled environments such as a connection inside a host. For example, expected state transitions and required packets for the disconnection will be similar to below flow. 00 (Process A) (Process B) 01 ESTABLISHED ESTABLISHED 02 close() 03 FIN_WAIT_1 04 ---FIN--> 05 CLOSE_WAIT 06 <--ACK--- 07 FIN_WAIT_2 08 <--FIN/ACK--- 09 TIME_WAIT 10 ---ACK--> 11 LAST_ACK 12 CLOSED CLOSED In some cases such as LINGER option applied socket, the FIN and FIN/ACK will be substituted to RST and RST/ACK, but there is no difference in the main logic. The acks in lines 6 and 8 are the acks. If the line 8 packet is processed before the line 6 packet, it will be just ignored as it is not a expected packet, and the later process of the line 6 packet will change the status of Process A to FIN_WAIT_2, but as it has already handled line 8 packet, it will not go to TIME_WAIT and thus will not send the line 10 packet to Process B. Thus, Process B will left in CLOSE_WAIT status, as below. 00 (Process A) (Process B) 01 ESTABLISHED ESTABLISHED 02 close() 03 FIN_WAIT_1 04 ---FIN--> 05 CLOSE_WAIT 06 (<--ACK---) 07 (<--FIN/ACK---) 08 (fired in right order) 09 <--FIN/ACK--- 10 <--ACK--- 11 (processed in reverse order) 12 FIN_WAIT_2 Later, if the Process B sends SYN to Process A for reconnection using the same port, Process A will responds with an ACK for the last flow, which has no increased sequence number. Thus, Process A will send RST, wait for TIMEOUT_INIT (one second in default), and then try reconnection. If reconnections are frequent, the one second latency spikes can be a big problem. Below is a tcpdump results of the problem: 14.436259 IP 127.0.0.1.45150 > 127.0.0.1.4242: Flags [S], seq 2560603644 14.436266 IP 127.0.0.1.4242 > 127.0.0.1.45150: Flags [.], ack 5, win 512 14.436271 IP 127.0.0.1.45150 > 127.0.0.1.4242: Flags [R], seq 2541101298 /* ONE SECOND DELAY */ 15.464613 IP 127.0.0.1.45150 > 127.0.0.1.4242: Flags [S], seq 2560603644 This commit mitigates the problem by reducing the delay for the next SYN if the suspicous ACK is received while in SYN_SENT state. Following commit will add a selftest, which can be also helpful for understanding of this issue. Signed-off-by: Eric Dumazet Signed-off-by: SeongJae Park Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e325b4506e25..316ebdf8151d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5908,8 +5908,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * the segment and return)" */ if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) || - after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) + after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { + /* Previous FIN/ACK or RST/ACK might be ignored. */ + if (icsk->icsk_retransmits == 0) + inet_csk_reset_xmit_timer(sk, + ICSK_TIME_RETRANS, + TCP_TIMEOUT_MIN, TCP_RTO_MAX); goto reset_and_undo; + } if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, -- cgit v1.2.3 From af8c8a450bf4698a8a6a7c68956ea5ccafe4cc88 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 2 Feb 2020 03:38:27 +0000 Subject: selftests: net: Add FIN_ACK processing order related latency spike test This commit adds a test for FIN_ACK process races related reconnection latency spike issues. The issue has described and solved by the previous commit ("tcp: Reduce SYN resend delay if a suspicous ACK is received"). The test program is configured with a server and a client process. The server creates and binds a socket to a port that dynamically allocated, listen on it, and start a infinite loop. Inside the loop, it accepts connection, reads 4 bytes from the socket, and closes the connection. The client is constructed as an infinite loop. Inside the loop, it creates a socket with LINGER and NODELAY option, connect to the server, send 4 bytes data, try read some data from server. After the read() returns, it measure the latency from the beginning of this loop to this point and if the latency is larger than 1 second (spike), print a message. Reviewed-by: Eric Dumazet Signed-off-by: SeongJae Park Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 2 + tools/testing/selftests/net/fin_ack_lat.c | 151 +++++++++++++++++++++++++++++ tools/testing/selftests/net/fin_ack_lat.sh | 35 +++++++ 4 files changed, 189 insertions(+) create mode 100644 tools/testing/selftests/net/fin_ack_lat.c create mode 100755 tools/testing/selftests/net/fin_ack_lat.sh diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 8aefd81fbc86..ecc52d4c034d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -22,3 +22,4 @@ ipv6_flowlabel_mgr so_txtime tcp_fastopen_backup_key nettest +fin_ack_lat diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index a8e04d665b69..b5694196430a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -11,6 +11,7 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh +TEST_PROGS += fin_ack_lat.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -18,6 +19,7 @@ TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr TEST_GEN_FILES += tcp_fastopen_backup_key +TEST_GEN_FILES += fin_ack_lat TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/fin_ack_lat.c b/tools/testing/selftests/net/fin_ack_lat.c new file mode 100644 index 000000000000..70187494b57a --- /dev/null +++ b/tools/testing/selftests/net/fin_ack_lat.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int child_pid; + +static unsigned long timediff(struct timeval s, struct timeval e) +{ + unsigned long s_us, e_us; + + s_us = s.tv_sec * 1000000 + s.tv_usec; + e_us = e.tv_sec * 1000000 + e.tv_usec; + if (s_us > e_us) + return 0; + return e_us - s_us; +} + +static void client(int port) +{ + int sock = 0; + struct sockaddr_in addr, laddr; + socklen_t len = sizeof(laddr); + struct linger sl; + int flag = 1; + int buffer; + struct timeval start, end; + unsigned long lat, sum_lat = 0, nr_lat = 0; + + while (1) { + gettimeofday(&start, NULL); + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock < 0) + error(-1, errno, "socket creation"); + + sl.l_onoff = 1; + sl.l_linger = 0; + if (setsockopt(sock, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl))) + error(-1, errno, "setsockopt(linger)"); + + if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, + &flag, sizeof(flag))) + error(-1, errno, "setsockopt(nodelay)"); + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + + if (inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr) <= 0) + error(-1, errno, "inet_pton"); + + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) + error(-1, errno, "connect"); + + send(sock, &buffer, sizeof(buffer), 0); + if (read(sock, &buffer, sizeof(buffer)) == -1) + error(-1, errno, "waiting read"); + + gettimeofday(&end, NULL); + lat = timediff(start, end); + sum_lat += lat; + nr_lat++; + if (lat < 100000) + goto close; + + if (getsockname(sock, (struct sockaddr *)&laddr, &len) == -1) + error(-1, errno, "getsockname"); + printf("port: %d, lat: %lu, avg: %lu, nr: %lu\n", + ntohs(laddr.sin_port), lat, + sum_lat / nr_lat, nr_lat); +close: + fflush(stdout); + close(sock); + } +} + +static void server(int sock, struct sockaddr_in address) +{ + int accepted; + int addrlen = sizeof(address); + int buffer; + + while (1) { + accepted = accept(sock, (struct sockaddr *)&address, + (socklen_t *)&addrlen); + if (accepted < 0) + error(-1, errno, "accept"); + + if (read(accepted, &buffer, sizeof(buffer)) == -1) + error(-1, errno, "read"); + close(accepted); + } +} + +static void sig_handler(int signum) +{ + kill(SIGTERM, child_pid); + exit(0); +} + +int main(int argc, char const *argv[]) +{ + int sock; + int opt = 1; + struct sockaddr_in address; + struct sockaddr_in laddr; + socklen_t len = sizeof(laddr); + + if (signal(SIGTERM, sig_handler) == SIG_ERR) + error(-1, errno, "signal"); + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock < 0) + error(-1, errno, "socket"); + + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT, + &opt, sizeof(opt)) == -1) + error(-1, errno, "setsockopt"); + + address.sin_family = AF_INET; + address.sin_addr.s_addr = INADDR_ANY; + /* dynamically allocate unused port */ + address.sin_port = 0; + + if (bind(sock, (struct sockaddr *)&address, sizeof(address)) < 0) + error(-1, errno, "bind"); + + if (listen(sock, 3) < 0) + error(-1, errno, "listen"); + + if (getsockname(sock, (struct sockaddr *)&laddr, &len) == -1) + error(-1, errno, "getsockname"); + + fprintf(stderr, "server port: %d\n", ntohs(laddr.sin_port)); + child_pid = fork(); + if (!child_pid) + client(ntohs(laddr.sin_port)); + else + server(sock, laddr); + + return 0; +} diff --git a/tools/testing/selftests/net/fin_ack_lat.sh b/tools/testing/selftests/net/fin_ack_lat.sh new file mode 100755 index 000000000000..a3ff6e0b2c7a --- /dev/null +++ b/tools/testing/selftests/net/fin_ack_lat.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test latency spikes caused by FIN/ACK handling race. + +set +x +set -e + +tmpfile=$(mktemp /tmp/fin_ack_latency.XXXX.log) + +cleanup() { + kill $(pidof fin_ack_lat) + rm -f $tmpfile +} + +trap cleanup EXIT + +do_test() { + RUNTIME=$1 + + ./fin_ack_lat | tee $tmpfile & + PID=$! + + sleep $RUNTIME + NR_SPIKES=$(wc -l $tmpfile | awk '{print $1}') + if [ $NR_SPIKES -gt 0 ] + then + echo "FAIL: $NR_SPIKES spikes detected" + return 1 + fi + return 0 +} + +do_test "30" +echo "test done" -- cgit v1.2.3 From 5273a191dca65a675dc0bcf3909e59c6933e2831 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: rxrpc: Fix NULL pointer deref due to call->conn being cleared on disconnect When a call is disconnected, the connection pointer from the call is cleared to make sure it isn't used again and to prevent further attempted transmission for the call. Unfortunately, there might be a daemon trying to use it at the same time to transmit a packet. Fix this by keeping call->conn set, but setting a flag on the call to indicate disconnection instead. Remove also the bits in the transmission functions where the conn pointer is checked and a ref taken under spinlock as this is now redundant. Fixes: 8d94aa381dab ("rxrpc: Calls shouldn't hold socket refs") Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 4 ++-- net/rxrpc/conn_client.c | 3 +-- net/rxrpc/conn_object.c | 4 ++-- net/rxrpc/output.c | 27 +++++++++------------------ 5 files changed, 15 insertions(+), 24 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 94441fee85bc..7d730c438404 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -490,6 +490,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ RXRPC_CALL_IS_INTR, /* The call is interruptible */ + RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ }; /* diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a31c18c09894..dbdbc4f18b5e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -493,7 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); @@ -569,6 +569,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); struct rxrpc_net *rxnet = call->rxnet; + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); kfree(call->rxtx_annotations); @@ -590,7 +591,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->conn, ==, NULL); rxrpc_cleanup_ring(call); rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 376370cd9285..ea7d4c21f889 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -785,6 +785,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) u32 cid; spin_lock(&conn->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); cid = call->cid; if (cid) { @@ -792,7 +793,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) chan = &conn->channels[channel]; } trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - call->conn = NULL; /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow @@ -908,7 +908,6 @@ out: spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); - rxrpc_put_connection(conn); _leave(""); return; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 38d718e90dc6..c0b3154f7a7e 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -171,6 +171,8 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, _enter("%d,%x", conn->debug_id, call->cid); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + if (rcu_access_pointer(chan->call) == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. @@ -223,9 +225,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); - call->conn = NULL; conn->idle_timestamp = jiffies; - rxrpc_put_connection(conn); } /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 935bb60fff56..bad3d2420344 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -129,7 +129,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, rxrpc_serial_t *_serial) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; @@ -139,18 +139,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, int ret; u8 reason; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - rxrpc_put_connection(conn); + if (!pkt) return -ENOMEM; - } + + conn = call->conn; msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -244,7 +240,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, } out: - rxrpc_put_connection(conn); kfree(pkt); return ret; } @@ -254,7 +249,7 @@ out: */ int rxrpc_send_abort_packet(struct rxrpc_call *call) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_abort_buffer pkt; struct msghdr msg; struct kvec iov[1]; @@ -271,13 +266,11 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) test_bit(RXRPC_CALL_TX_LAST, &call->flags)) return 0; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; + conn = call->conn; + msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -312,8 +305,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, rxrpc_tx_point_call_abort); rxrpc_tx_backoff(call, ret); - - rxrpc_put_connection(conn); return ret; } -- cgit v1.2.3 From 14b41a2959fbaa50932699d32ceefd6643abacc6 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 31 Jan 2020 18:01:24 -0800 Subject: net: stmmac: Delete txtimer in suspend() When running v5.5 with a rootfs on NFS, memory abort may happen in the system resume stage: Unable to handle kernel paging request at virtual address dead00000000012a [dead00000000012a] address between user and kernel address ranges pc : run_timer_softirq+0x334/0x3d8 lr : run_timer_softirq+0x244/0x3d8 x1 : ffff800011cafe80 x0 : dead000000000122 Call trace: run_timer_softirq+0x334/0x3d8 efi_header_end+0x114/0x234 irq_exit+0xd0/0xd8 __handle_domain_irq+0x60/0xb0 gic_handle_irq+0x58/0xa8 el1_irq+0xb8/0x180 arch_cpu_idle+0x10/0x18 do_idle+0x1d8/0x2b0 cpu_startup_entry+0x24/0x40 secondary_start_kernel+0x1b4/0x208 Code: f9000693 a9400660 f9000020 b4000040 (f9000401) ---[ end trace bb83ceeb4c482071 ]--- Kernel panic - not syncing: Fatal exception in interrupt SMP: stopping secondary CPUs SMP: failed to stop secondary CPUs 2-3 Kernel Offset: disabled CPU features: 0x00002,2300aa30 Memory Limit: none ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- It's found that stmmac_xmit() and stmmac_resume() sometimes might run concurrently, possibly resulting in a race condition between mod_timer() and setup_timer(), being called by stmmac_xmit() and stmmac_resume() respectively. Since the resume() runs setup_timer() every time, it'd be safer to have del_timer_sync() in the suspend() as the counterpart. Signed-off-by: Nicolin Chen Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ff1cbfc834b0..5836b21edd7e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4974,6 +4974,7 @@ int stmmac_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); + u32 chan; if (!ndev || !netif_running(ndev)) return 0; @@ -4987,6 +4988,9 @@ int stmmac_suspend(struct device *dev) stmmac_disable_all_queues(priv); + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) + del_timer_sync(&priv->tx_queue[chan].txtimer); + /* Stop TX/RX DMA */ stmmac_stop_all_dma(priv); -- cgit v1.2.3 From c16d4ee0e397163fe7ceac281eaa952e63fadec7 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 2 Feb 2020 02:41:35 -0500 Subject: bnxt_en: Refactor logic to re-enable SRIOV after firmware reset detected. Put the current logic in bnxt_open() to re-enable SRIOV after detecting firmware reset into a new function bnxt_reenable_sriov(). This call needs to be invoked in the firmware reset path also in the next patch. Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 483935b001c8..0253a8f43ac8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9241,6 +9241,17 @@ void bnxt_half_close_nic(struct bnxt *bp) bnxt_free_mem(bp, false); } +static void bnxt_reenable_sriov(struct bnxt *bp) +{ + if (BNXT_PF(bp)) { + struct bnxt_pf_info *pf = &bp->pf; + int n = pf->active_vfs; + + if (n) + bnxt_cfg_hw_sriov(bp, &n, true); + } +} + static int bnxt_open(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -9259,13 +9270,7 @@ static int bnxt_open(struct net_device *dev) bnxt_hwrm_if_change(bp, false); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { - if (BNXT_PF(bp)) { - struct bnxt_pf_info *pf = &bp->pf; - int n = pf->active_vfs; - - if (n) - bnxt_cfg_hw_sriov(bp, &n, true); - } + bnxt_reenable_sriov(bp); if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) bnxt_ulp_start(bp, 0); } -- cgit v1.2.3 From 12de2eadf87825c3990c1aa68b5e93101ca2f043 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 2 Feb 2020 02:41:36 -0500 Subject: bnxt_en: Fix RDMA driver failure with SRIOV after firmware reset. bnxt_ulp_start() needs to be called before SRIOV is re-enabled after firmware reset. Re-enabling SRIOV may consume all the resources and may cause the RDMA driver to fail to get MSIX and other resources. Fix it by calling bnxt_ulp_start() first before calling bnxt_reenable_sriov(). We re-arrange the logic so that we call bnxt_ulp_start() and bnxt_reenable_sriov() in proper sequence in bnxt_fw_reset_task() and bnxt_open(). The former is the normal coordinated firmware reset sequence and the latter is firmware reset while the function is down. This new logic is now more straight forward and will now fix both scenarios. Fixes: f3a6d206c25a ("bnxt_en: Call bnxt_ulp_stop()/bnxt_ulp_start() during error recovery.") Reported-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0253a8f43ac8..a69e46621a04 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9270,9 +9270,10 @@ static int bnxt_open(struct net_device *dev) bnxt_hwrm_if_change(bp, false); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { - bnxt_reenable_sriov(bp); - if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { bnxt_ulp_start(bp, 0); + bnxt_reenable_sriov(bp); + } } bnxt_hwmon_open(bp); } @@ -10836,6 +10837,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_ulp_start(bp, rc); + if (!rc) + bnxt_reenable_sriov(bp); bnxt_dl_health_recovery_done(bp); bnxt_dl_health_status_update(bp, true); rtnl_unlock(); -- cgit v1.2.3 From d407302895d3f3ca3a333c711744a95e0b1b0150 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 2 Feb 2020 02:41:37 -0500 Subject: bnxt_en: Fix logic that disables Bus Master during firmware reset. The current logic that calls pci_disable_device() in __bnxt_close_nic() during firmware reset is flawed. If firmware is still alive, we're disabling the device too early, causing some firmware commands to not reach the firmware. Fix it by moving the logic to bnxt_reset_close(). If firmware is in fatal condition, we call pci_disable_device() before we free any of the rings to prevent DMA corruption of the freed rings. If firmware is still alive, we call pci_disable_device() after the last firmware message has been sent. Fixes: 3bc7d4a352ef ("bnxt_en: Add BNXT_STATE_IN_FW_RESET state.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a69e46621a04..cea603372cff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9313,10 +9313,6 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bnxt_debug_dev_exit(bp); bnxt_disable_napi(bp); del_timer_sync(&bp->timer); - if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && - pci_is_enabled(bp->pdev)) - pci_disable_device(bp->pdev); - bnxt_free_skbs(bp); /* Save ring stats before shutdown */ @@ -10102,9 +10098,16 @@ static void bnxt_reset(struct bnxt *bp, bool silent) static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); + /* When firmware is fatal state, disable PCI device to prevent + * any potential bad DMAs before freeing kernel memory. + */ + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + pci_disable_device(bp->pdev); __bnxt_close_nic(bp, true, false); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); + if (pci_is_enabled(bp->pdev)) + pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp); kfree(bp->ctx); bp->ctx = NULL; -- cgit v1.2.3 From 18e4960c18f484ac288f41b43d0e6c4c88e6ea78 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 2 Feb 2020 02:41:38 -0500 Subject: bnxt_en: Fix TC queue mapping. The driver currently only calls netdev_set_tc_queue when the number of TCs is greater than 1. Instead, the comparison should be greater than or equal to 1. Even with 1 TC, we need to set the queue mapping. This bug can cause warnings when the number of TCs is changed back to 1. Fixes: 7809592d3e2e ("bnxt_en: Enable MSIX early in bnxt_init_one().") Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cea603372cff..597e6fd5bfea 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7893,7 +7893,7 @@ static void bnxt_setup_msix(struct bnxt *bp) int tcs, i; tcs = netdev_get_num_tc(dev); - if (tcs > 1) { + if (tcs) { int i, off, count; for (i = 0; i < tcs; i++) { -- cgit v1.2.3 From f5cd21605ecd249e5fc715411df22cc1bc877b32 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Feb 2020 16:42:54 +0000 Subject: netdevsim: fix using uninitialized resources When module is being initialized, __init() calls bus_register() and driver_register(). These functions internally create various resources and sysfs files. The sysfs files are used for basic operations(add/del device). /sys/bus/netdevsim/new_device /sys/bus/netdevsim/del_device These sysfs files use netdevsim resources, they are mostly allocated and initialized in ->probe() function, which is nsim_dev_probe(). But, sysfs files could be executed before ->probe() is finished. So, accessing uninitialized data would occur. Another problem is very similar. /sys/bus/netdevsim/new_device internally creates sysfs files. /sys/devices/netdevsim/new_port /sys/devices/netdevsim/del_port These sysfs files also use netdevsim resources, they are mostly allocated and initialized in creating device routine, which is nsim_bus_dev_new(). But they also could be executed before nsim_bus_dev_new() is finished. So, accessing uninitialized data would occur. To fix these problems, this patch adds flags, which means whether the operation is finished or not. The flag variable 'nsim_bus_enable' means whether netdevsim bus was initialized or not. This is protected by nsim_bus_dev_list_lock. The flag variable 'nsim_bus_dev->init' means whether nsim_bus_dev was initialized or not. This could be used in {new/del}_port_store() with no lock. Test commands: #SHELL1 modprobe netdevsim while : do echo "1 1" > /sys/bus/netdevsim/new_device echo "1 1" > /sys/bus/netdevsim/del_device done #SHELL2 while : do echo 1 > /sys/devices/netdevsim1/new_port echo 1 > /sys/devices/netdevsim1/del_port done Splat looks like: [ 47.508954][ T1008] general protection fault, probably for non-canonical address 0xdffffc0000000021: 0000 I [ 47.510793][ T1008] KASAN: null-ptr-deref in range [0x0000000000000108-0x000000000000010f] [ 47.511963][ T1008] CPU: 2 PID: 1008 Comm: bash Not tainted 5.5.0+ #322 [ 47.512823][ T1008] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 47.514041][ T1008] RIP: 0010:__mutex_lock+0x10a/0x14b0 [ 47.514699][ T1008] Code: 08 84 d2 0f 85 7f 12 00 00 44 8b 0d 10 23 65 02 45 85 c9 75 29 49 8d 7f 68 48 b8 00 00 00 0f [ 47.517163][ T1008] RSP: 0018:ffff888059b4fbb0 EFLAGS: 00010206 [ 47.517802][ T1008] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 47.518941][ T1008] RDX: 0000000000000021 RSI: ffffffff85926440 RDI: 0000000000000108 [ 47.519732][ T1008] RBP: ffff888059b4fd30 R08: ffffffffc073fad0 R09: 0000000000000000 [ 47.520729][ T1008] R10: ffff888059b4fd50 R11: ffff88804bb38040 R12: 0000000000000000 [ 47.521702][ T1008] R13: dffffc0000000000 R14: ffffffff871976c0 R15: 00000000000000a0 [ 47.522760][ T1008] FS: 00007fd4be05a740(0000) GS:ffff88806c800000(0000) knlGS:0000000000000000 [ 47.523877][ T1008] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.524627][ T1008] CR2: 0000561c82b69cf0 CR3: 0000000065dd6004 CR4: 00000000000606e0 [ 47.527662][ T1008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 47.528604][ T1008] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 47.529531][ T1008] Call Trace: [ 47.529874][ T1008] ? nsim_dev_port_add+0x50/0x150 [netdevsim] [ 47.530470][ T1008] ? mutex_lock_io_nested+0x1380/0x1380 [ 47.531018][ T1008] ? _kstrtoull+0x76/0x160 [ 47.531449][ T1008] ? _parse_integer+0xf0/0xf0 [ 47.531874][ T1008] ? kernfs_fop_write+0x1cf/0x410 [ 47.532330][ T1008] ? sysfs_file_ops+0x160/0x160 [ 47.532773][ T1008] ? kstrtouint+0x86/0x110 [ 47.533168][ T1008] ? nsim_dev_port_add+0x50/0x150 [netdevsim] [ 47.533721][ T1008] nsim_dev_port_add+0x50/0x150 [netdevsim] [ 47.534336][ T1008] ? sysfs_file_ops+0x160/0x160 [ 47.534858][ T1008] new_port_store+0x99/0xb0 [netdevsim] [ 47.535439][ T1008] ? del_port_store+0xb0/0xb0 [netdevsim] [ 47.536035][ T1008] ? sysfs_file_ops+0x112/0x160 [ 47.536544][ T1008] ? sysfs_kf_write+0x3b/0x180 [ 47.537029][ T1008] kernfs_fop_write+0x276/0x410 [ 47.537548][ T1008] ? __sb_start_write+0x215/0x2e0 [ 47.538110][ T1008] vfs_write+0x197/0x4a0 [ ... ] Fixes: f9d9db47d3ba ("netdevsim: add bus attributes to add new and delete devices") Fixes: 794b2c05ca1c ("netdevsim: extend device attrs to support port addition and deletion") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bus.c | 43 ++++++++++++++++++++++++++++++++++++--- drivers/net/netdevsim/netdevsim.h | 1 + 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 6aeed0c600f8..c086d1e522dc 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -17,6 +17,7 @@ static DEFINE_IDA(nsim_bus_dev_ids); static LIST_HEAD(nsim_bus_dev_list); static DEFINE_MUTEX(nsim_bus_dev_list_lock); +static bool nsim_bus_enable; static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev) { @@ -99,6 +100,9 @@ new_port_store(struct device *dev, struct device_attribute *attr, unsigned int port_index; int ret; + /* Prevent to use nsim_bus_dev before initialization. */ + if (!smp_load_acquire(&nsim_bus_dev->init)) + return -EBUSY; ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; @@ -116,6 +120,9 @@ del_port_store(struct device *dev, struct device_attribute *attr, unsigned int port_index; int ret; + /* Prevent to use nsim_bus_dev before initialization. */ + if (!smp_load_acquire(&nsim_bus_dev->init)) + return -EBUSY; ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; @@ -179,15 +186,30 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count) pr_err("Format for adding new device is \"id port_count\" (uint uint).\n"); return -EINVAL; } - nsim_bus_dev = nsim_bus_dev_new(id, port_count); - if (IS_ERR(nsim_bus_dev)) - return PTR_ERR(nsim_bus_dev); mutex_lock(&nsim_bus_dev_list_lock); + /* Prevent to use resource before initialization. */ + if (!smp_load_acquire(&nsim_bus_enable)) { + err = -EBUSY; + goto err; + } + + nsim_bus_dev = nsim_bus_dev_new(id, port_count); + if (IS_ERR(nsim_bus_dev)) { + err = PTR_ERR(nsim_bus_dev); + goto err; + } + + /* Allow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, true); + list_add_tail(&nsim_bus_dev->list, &nsim_bus_dev_list); mutex_unlock(&nsim_bus_dev_list_lock); return count; +err: + mutex_unlock(&nsim_bus_dev_list_lock); + return err; } static BUS_ATTR_WO(new_device); @@ -215,6 +237,11 @@ del_device_store(struct bus_type *bus, const char *buf, size_t count) err = -ENOENT; mutex_lock(&nsim_bus_dev_list_lock); + /* Prevent to use resource before initialization. */ + if (!smp_load_acquire(&nsim_bus_enable)) { + mutex_unlock(&nsim_bus_dev_list_lock); + return -EBUSY; + } list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { if (nsim_bus_dev->dev.id != id) continue; @@ -284,6 +311,8 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; nsim_bus_dev->initial_net = current->nsproxy->net_ns; + /* Disallow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, false); err = device_register(&nsim_bus_dev->dev); if (err) @@ -299,6 +328,8 @@ err_nsim_bus_dev_free: static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev) { + /* Disallow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, false); device_unregister(&nsim_bus_dev->dev); ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); kfree(nsim_bus_dev); @@ -320,6 +351,8 @@ int nsim_bus_init(void) err = driver_register(&nsim_driver); if (err) goto err_bus_unregister; + /* Allow using resources */ + smp_store_release(&nsim_bus_enable, true); return 0; err_bus_unregister: @@ -331,12 +364,16 @@ void nsim_bus_exit(void) { struct nsim_bus_dev *nsim_bus_dev, *tmp; + /* Disallow using resources */ + smp_store_release(&nsim_bus_enable, false); + mutex_lock(&nsim_bus_dev_list_lock); list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { list_del(&nsim_bus_dev->list); nsim_bus_dev_del(nsim_bus_dev); } mutex_unlock(&nsim_bus_dev_list_lock); + driver_unregister(&nsim_driver); bus_unregister(&nsim_bus); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 94df795ef4d3..ea3931391ce2 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -240,6 +240,7 @@ struct nsim_bus_dev { */ unsigned int num_vfs; struct nsim_vf_config *vfconfigs; + bool init; }; int nsim_bus_init(void); -- cgit v1.2.3 From 6ab63366e1ec4ec1900f253aa64727b4b5f4ee73 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Feb 2020 16:43:04 +0000 Subject: netdevsim: disable devlink reload when resources are being used devlink reload destroys resources and allocates resources again. So, when devices and ports resources are being used, devlink reload function should not be executed. In order to avoid this race, a new lock is added and new_port() and del_port() call devlink_reload_disable() and devlink_reload_enable(). Thread0 Thread1 {new/del}_port() {new/del}_port() devlink_reload_disable() devlink_reload_disable() devlink_reload_enable() //here devlink_reload_enable() Before Thread1's devlink_reload_enable(), the devlink is already allowed to execute reload because Thread0 allows it. devlink reload disable/enable variable type is bool. So the above case would exist. So, disable/enable should be executed atomically. In order to do that, a new lock is used. Test commands: modprobe netdevsim echo 1 > /sys/bus/netdevsim/new_device while : do echo 1 > /sys/devices/netdevsim1/new_port & echo 1 > /sys/devices/netdevsim1/del_port & devlink dev reload netdevsim/netdevsim1 & done Splat looks like: [ 23.342145][ T932] DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)) [ 23.342159][ T932] WARNING: CPU: 0 PID: 932 at kernel/locking/mutex-debug.c:103 mutex_destroy+0xc7/0xf0 [ 23.344182][ T932] Modules linked in: netdevsim openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_dx [ 23.346485][ T932] CPU: 0 PID: 932 Comm: devlink Not tainted 5.5.0+ #322 [ 23.347696][ T932] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 23.348893][ T932] RIP: 0010:mutex_destroy+0xc7/0xf0 [ 23.349505][ T932] Code: e0 07 83 c0 03 38 d0 7c 04 84 d2 75 2e 8b 05 00 ac b0 02 85 c0 75 8b 48 c7 c6 00 5e 07 96 40 [ 23.351887][ T932] RSP: 0018:ffff88806208f810 EFLAGS: 00010286 [ 23.353963][ T932] RAX: dffffc0000000008 RBX: ffff888067f6f2c0 RCX: ffffffff942c4bd4 [ 23.355222][ T932] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff96dac5b4 [ 23.356169][ T932] RBP: ffff888067f6f000 R08: fffffbfff2d235a5 R09: fffffbfff2d235a5 [ 23.357160][ T932] R10: 0000000000000001 R11: fffffbfff2d235a4 R12: ffff888067f6f208 [ 23.358288][ T932] R13: ffff88806208fa70 R14: ffff888067f6f000 R15: ffff888069ce3800 [ 23.359307][ T932] FS: 00007fe2a3876740(0000) GS:ffff88806c000000(0000) knlGS:0000000000000000 [ 23.360473][ T932] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 23.361319][ T932] CR2: 00005561357aa000 CR3: 000000005227a006 CR4: 00000000000606f0 [ 23.362323][ T932] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 23.363417][ T932] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 23.364414][ T932] Call Trace: [ 23.364828][ T932] nsim_dev_reload_destroy+0x77/0xb0 [netdevsim] [ 23.365655][ T932] nsim_dev_reload_down+0x84/0xb0 [netdevsim] [ 23.366433][ T932] devlink_reload+0xb1/0x350 [ 23.367010][ T932] genl_rcv_msg+0x580/0xe90 [ ...] [ 23.531729][ T1305] kernel BUG at lib/list_debug.c:53! [ 23.532523][ T1305] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 23.533467][ T1305] CPU: 2 PID: 1305 Comm: bash Tainted: G W 5.5.0+ #322 [ 23.534962][ T1305] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 23.536503][ T1305] RIP: 0010:__list_del_entry_valid+0xe6/0x150 [ 23.538346][ T1305] Code: 89 ea 48 c7 c7 00 73 1e 96 e8 df f7 4c ff 0f 0b 48 c7 c7 60 73 1e 96 e8 d1 f7 4c ff 0f 0b 44 [ 23.541068][ T1305] RSP: 0018:ffff888047c27b58 EFLAGS: 00010282 [ 23.542001][ T1305] RAX: 0000000000000054 RBX: ffff888067f6f318 RCX: 0000000000000000 [ 23.543051][ T1305] RDX: 0000000000000054 RSI: 0000000000000008 RDI: ffffed1008f84f61 [ 23.544072][ T1305] RBP: ffff88804aa0fca0 R08: ffffed100d940539 R09: ffffed100d940539 [ 23.545085][ T1305] R10: 0000000000000001 R11: ffffed100d940538 R12: ffff888047c27cb0 [ 23.546422][ T1305] R13: ffff88806208b840 R14: ffffffff981976c0 R15: ffff888067f6f2c0 [ 23.547406][ T1305] FS: 00007f76c0431740(0000) GS:ffff88806c800000(0000) knlGS:0000000000000000 [ 23.548527][ T1305] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 23.549389][ T1305] CR2: 00007f5048f1a2f8 CR3: 000000004b310006 CR4: 00000000000606e0 [ 23.550636][ T1305] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 23.551578][ T1305] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 23.552597][ T1305] Call Trace: [ 23.553004][ T1305] mutex_remove_waiter+0x101/0x520 [ 23.553646][ T1305] __mutex_lock+0xac7/0x14b0 [ 23.554218][ T1305] ? nsim_dev_port_del+0x4e/0x140 [netdevsim] [ 23.554908][ T1305] ? mutex_lock_io_nested+0x1380/0x1380 [ 23.555570][ T1305] ? _parse_integer+0xf0/0xf0 [ 23.556043][ T1305] ? kstrtouint+0x86/0x110 [ 23.556504][ T1305] ? nsim_dev_port_del+0x4e/0x140 [netdevsim] [ 23.557133][ T1305] nsim_dev_port_del+0x4e/0x140 [netdevsim] [ 23.558024][ T1305] del_port_store+0xcc/0xf0 [netdevsim] [ ... ] Fixes: 75ba029f3c07 ("netdevsim: implement proper devlink reload") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bus.c | 19 +++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index c086d1e522dc..e455dd1cf4d0 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -97,6 +97,8 @@ new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + struct nsim_dev *nsim_dev = dev_get_drvdata(dev); + struct devlink *devlink; unsigned int port_index; int ret; @@ -106,7 +108,14 @@ new_port_store(struct device *dev, struct device_attribute *attr, ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; + + devlink = priv_to_devlink(nsim_dev); + + mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); + devlink_reload_disable(devlink); ret = nsim_dev_port_add(nsim_bus_dev, port_index); + devlink_reload_enable(devlink); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -117,6 +126,8 @@ del_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + struct nsim_dev *nsim_dev = dev_get_drvdata(dev); + struct devlink *devlink; unsigned int port_index; int ret; @@ -126,7 +137,14 @@ del_port_store(struct device *dev, struct device_attribute *attr, ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; + + devlink = priv_to_devlink(nsim_dev); + + mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); + devlink_reload_disable(devlink); ret = nsim_dev_port_del(nsim_bus_dev, port_index); + devlink_reload_enable(devlink); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -311,6 +329,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; nsim_bus_dev->initial_net = current->nsproxy->net_ns; + mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); /* Disallow using nsim_bus_dev */ smp_store_release(&nsim_bus_dev->init, false); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index ea3931391ce2..be100b11a055 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -240,6 +240,8 @@ struct nsim_bus_dev { */ unsigned int num_vfs; struct nsim_vf_config *vfconfigs; + /* Lock for devlink->reload_enabled in netdevsim module */ + struct mutex nsim_bus_reload_lock; bool init; }; -- cgit v1.2.3 From 8526ad9646b17c59b6d430d8baa8f152a14fe177 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Feb 2020 16:43:13 +0000 Subject: netdevsim: fix panic in nsim_dev_take_snapshot_write() nsim_dev_take_snapshot_write() uses nsim_dev and nsim_dev->dummy_region. So, during this function, these data shouldn't be removed. But there is no protecting stuff in this function. There are two similar cases. 1. reload case reload could be called during nsim_dev_take_snapshot_write(). When reload is being executed, nsim_dev_reload_down() is called and it calls nsim_dev_reload_destroy(). nsim_dev_reload_destroy() calls devlink_region_destroy() to destroy nsim_dev->dummy_region. So, during nsim_dev_take_snapshot_write(), nsim_dev->dummy_region() would be removed. At this point, snapshot_write() would access freed pointer. In order to fix this case, take_snapshot file will be removed before devlink_region_destroy(). The take_snapshot file will be re-created by ->reload_up(). 2. del_device_store case del_device_store() also could call nsim_dev_reload_destroy() during nsim_dev_take_snapshot_write(). If so, panic would occur. This problem is actually the same problem with the first case. So, this problem will be fixed by the first case's solution. Test commands: modprobe netdevsim while : do echo 1 > /sys/bus/netdevsim/new_device & echo 1 > /sys/bus/netdevsim/del_device & devlink dev reload netdevsim/netdevsim1 & echo 1 > /sys/kernel/debug/netdevsim/netdevsim1/take_snapshot & done Splat looks like: [ 45.564513][ T975] general protection fault, probably for non-canonical address 0xdffffc000000003a: 0000 [#1] SMP DEI [ 45.566131][ T975] KASAN: null-ptr-deref in range [0x00000000000001d0-0x00000000000001d7] [ 45.566135][ T975] CPU: 1 PID: 975 Comm: bash Not tainted 5.5.0+ #322 [ 45.569020][ T975] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 45.569026][ T975] RIP: 0010:__mutex_lock+0x10a/0x14b0 [ 45.570518][ T975] Code: 08 84 d2 0f 85 7f 12 00 00 44 8b 0d 10 23 65 02 45 85 c9 75 29 49 8d 7f 68 48 b8 00 00 00 0f [ 45.570522][ T975] RSP: 0018:ffff888046ccfbf0 EFLAGS: 00010206 [ 45.572305][ T975] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 45.572308][ T975] RDX: 000000000000003a RSI: ffffffffac926440 RDI: 00000000000001d0 [ 45.576843][ T975] RBP: ffff888046ccfd70 R08: ffffffffab610645 R09: 0000000000000000 [ 45.576847][ T975] R10: ffff888046ccfd90 R11: ffffed100d6360ad R12: 0000000000000000 [ 45.578471][ T975] R13: dffffc0000000000 R14: ffffffffae1976c0 R15: 0000000000000168 [ 45.578475][ T975] FS: 00007f614d6e7740(0000) GS:ffff88806c400000(0000) knlGS:0000000000000000 [ 45.581492][ T975] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 45.582942][ T975] CR2: 00005618677d1cf0 CR3: 000000005fb9c002 CR4: 00000000000606e0 [ 45.584543][ T975] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 45.586633][ T975] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 45.589889][ T975] Call Trace: [ 45.591445][ T975] ? devlink_region_snapshot_create+0x55/0x4a0 [ 45.601250][ T975] ? mutex_lock_io_nested+0x1380/0x1380 [ 45.602817][ T975] ? mutex_lock_io_nested+0x1380/0x1380 [ 45.603875][ T975] ? mark_held_locks+0xa5/0xe0 [ 45.604769][ T975] ? _raw_spin_unlock_irqrestore+0x2d/0x50 [ 45.606147][ T975] ? __mutex_unlock_slowpath+0xd0/0x670 [ 45.607723][ T975] ? crng_backtrack_protect+0x80/0x80 [ 45.613530][ T975] ? wait_for_completion+0x390/0x390 [ 45.615152][ T975] ? devlink_region_snapshot_create+0x55/0x4a0 [ 45.616834][ T975] devlink_region_snapshot_create+0x55/0x4a0 [ ... ] Fixes: 4418f862d675 ("netdevsim: implement support for devlink region and snapshots") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 13 +++++++++++-- drivers/net/netdevsim/netdevsim.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index b53fbc06e104..06b178be3017 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -88,8 +88,11 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) &nsim_dev->max_macs); debugfs_create_bool("test1", 0600, nsim_dev->ddir, &nsim_dev->test1); - debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, - &nsim_dev_take_snapshot_fops); + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", + 0200, + nsim_dev->ddir, + nsim_dev, + &nsim_dev_take_snapshot_fops); debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir, &nsim_dev->dont_allow_reload); debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir, @@ -740,6 +743,11 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, if (err) goto err_health_exit; + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", + 0200, + nsim_dev->ddir, + nsim_dev, + &nsim_dev_take_snapshot_fops); return 0; err_health_exit: @@ -853,6 +861,7 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) if (devlink_is_reload_failed(devlink)) return; + debugfs_remove(nsim_dev->take_snapshot); nsim_dev_port_del_all(nsim_dev); nsim_dev_health_exit(nsim_dev); nsim_dev_traps_exit(devlink); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index be100b11a055..2eb7b0dc1594 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -160,6 +160,7 @@ struct nsim_dev { struct nsim_trap_data *trap_data; struct dentry *ddir; struct dentry *ports_ddir; + struct dentry *take_snapshot; struct bpf_offload_dev *bpf_dev; bool bpf_bind_accept; u32 bpf_bind_verifier_delay; -- cgit v1.2.3 From 6fb8852b1298200da39bd85788bc5755d1d56f32 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Feb 2020 16:43:22 +0000 Subject: netdevsim: fix stack-out-of-bounds in nsim_dev_debugfs_init() When netdevsim dev is being created, a debugfs directory is created. The variable "dev_ddir_name" is 16bytes device name pointer and device name is "netdevsim". The maximum dev id length is 10. So, 16bytes for device name isn't enough. Test commands: modprobe netdevsim echo "1000000000 0" > /sys/bus/netdevsim/new_device Splat looks like: [ 249.622710][ T900] BUG: KASAN: stack-out-of-bounds in number+0x824/0x880 [ 249.623658][ T900] Write of size 1 at addr ffff88804c527988 by task bash/900 [ 249.624521][ T900] [ 249.624830][ T900] CPU: 1 PID: 900 Comm: bash Not tainted 5.5.0+ #322 [ 249.625691][ T900] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 249.626712][ T900] Call Trace: [ 249.627103][ T900] dump_stack+0x96/0xdb [ 249.627639][ T900] ? number+0x824/0x880 [ 249.628173][ T900] print_address_description.constprop.5+0x1be/0x360 [ 249.629022][ T900] ? number+0x824/0x880 [ 249.629569][ T900] ? number+0x824/0x880 [ 249.630105][ T900] __kasan_report+0x12a/0x170 [ 249.630717][ T900] ? number+0x824/0x880 [ 249.631201][ T900] kasan_report+0xe/0x20 [ 249.631723][ T900] number+0x824/0x880 [ 249.632235][ T900] ? put_dec+0xa0/0xa0 [ 249.632716][ T900] ? rcu_read_lock_sched_held+0x90/0xc0 [ 249.633392][ T900] vsnprintf+0x63c/0x10b0 [ 249.633983][ T900] ? pointer+0x5b0/0x5b0 [ 249.634543][ T900] ? mark_lock+0x11d/0xc40 [ 249.635200][ T900] sprintf+0x9b/0xd0 [ 249.635750][ T900] ? scnprintf+0xe0/0xe0 [ 249.636370][ T900] nsim_dev_probe+0x63c/0xbf0 [netdevsim] [ ... ] Reviewed-by: Jakub Kicinski Fixes: ab1d0cc004d7 ("netdevsim: change debugfs tree topology") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 06b178be3017..273a24245d0b 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -73,7 +73,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = { static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { - char dev_ddir_name[16]; + char dev_ddir_name[sizeof(DRV_NAME) + 10]; sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); -- cgit v1.2.3 From 6556ff32f12d0a5380dd2fa6bbaa01373925a7d1 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Feb 2020 16:43:30 +0000 Subject: netdevsim: use IS_ERR instead of IS_ERR_OR_NULL for debugfs Debugfs APIs return valid pointer or error pointer. it doesn't return NULL. So, using IS_ERR is enough, not using IS_ERR_OR_NULL. Reviewed-by: Jakub Kicinski Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bpf.c | 10 ++++++---- drivers/net/netdevsim/dev.c | 16 ++++++++-------- drivers/net/netdevsim/health.c | 4 ++-- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 2b74425822ab..0b362b8dac17 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -218,6 +218,7 @@ static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev, { struct nsim_bpf_bound_prog *state; char name[16]; + int ret; state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) @@ -230,9 +231,10 @@ static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev, /* Program id is not populated yet when we create the state. */ sprintf(name, "%u", nsim_dev->prog_id_gen++); state->ddir = debugfs_create_dir(name, nsim_dev->ddir_bpf_bound_progs); - if (IS_ERR_OR_NULL(state->ddir)) { + if (IS_ERR(state->ddir)) { + ret = PTR_ERR(state->ddir); kfree(state); - return -ENOMEM; + return ret; } debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id); @@ -587,8 +589,8 @@ int nsim_bpf_dev_init(struct nsim_dev *nsim_dev) nsim_dev->ddir_bpf_bound_progs = debugfs_create_dir("bpf_bound_progs", nsim_dev->ddir); - if (IS_ERR_OR_NULL(nsim_dev->ddir_bpf_bound_progs)) - return -ENOMEM; + if (IS_ERR(nsim_dev->ddir_bpf_bound_progs)) + return PTR_ERR(nsim_dev->ddir_bpf_bound_progs); nsim_dev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops, nsim_dev); err = PTR_ERR_OR_ZERO(nsim_dev->bpf_dev); diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 273a24245d0b..5c5427c840b6 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -77,11 +77,11 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); - if (IS_ERR_OR_NULL(nsim_dev->ddir)) - return PTR_ERR_OR_ZERO(nsim_dev->ddir) ?: -EINVAL; + if (IS_ERR(nsim_dev->ddir)) + return PTR_ERR(nsim_dev->ddir); nsim_dev->ports_ddir = debugfs_create_dir("ports", nsim_dev->ddir); - if (IS_ERR_OR_NULL(nsim_dev->ports_ddir)) - return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL; + if (IS_ERR(nsim_dev->ports_ddir)) + return PTR_ERR(nsim_dev->ports_ddir); debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir, &nsim_dev->fw_update_status); debugfs_create_u32("max_macs", 0600, nsim_dev->ddir, @@ -115,8 +115,8 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, sprintf(port_ddir_name, "%u", nsim_dev_port->port_index); nsim_dev_port->ddir = debugfs_create_dir(port_ddir_name, nsim_dev->ports_ddir); - if (IS_ERR_OR_NULL(nsim_dev_port->ddir)) - return -ENOMEM; + if (IS_ERR(nsim_dev_port->ddir)) + return PTR_ERR(nsim_dev_port->ddir); sprintf(dev_link_name, "../../../" DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); @@ -934,8 +934,8 @@ int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, int nsim_dev_init(void) { nsim_dev_ddir = debugfs_create_dir(DRV_NAME, NULL); - if (IS_ERR_OR_NULL(nsim_dev_ddir)) - return -ENOMEM; + if (IS_ERR(nsim_dev_ddir)) + return PTR_ERR(nsim_dev_ddir); return 0; } diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c index 9aa637d162eb..30595b1299bd 100644 --- a/drivers/net/netdevsim/health.c +++ b/drivers/net/netdevsim/health.c @@ -285,8 +285,8 @@ int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink) } health->ddir = debugfs_create_dir("health", nsim_dev->ddir); - if (IS_ERR_OR_NULL(health->ddir)) { - err = PTR_ERR_OR_ZERO(health->ddir) ?: -EINVAL; + if (IS_ERR(health->ddir)) { + err = PTR_ERR(health->ddir); goto err_dummy_reporter_destroy; } -- cgit v1.2.3 From 83cf4213bafc4e3c747f0a25ad22cfbf55af7e84 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Feb 2020 16:43:39 +0000 Subject: netdevsim: use __GFP_NOWARN to avoid memalloc warning vfnum buffer size and binary_len buffer size is received by user-space. So, this buffer size could be too large. If so, kmalloc will internally print a warning message. This warning message is actually not necessary for the netdevsim module. So, this patch adds __GFP_NOWARN. Test commands: modprobe netdevsim echo 1 > /sys/bus/netdevsim/new_device echo 1000000000 > /sys/devices/netdevsim1/sriov_numvfs Splat looks like: [ 357.847266][ T1000] WARNING: CPU: 0 PID: 1000 at mm/page_alloc.c:4738 __alloc_pages_nodemask+0x2f3/0x740 [ 357.850273][ T1000] Modules linked in: netdevsim veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrx [ 357.852989][ T1000] CPU: 0 PID: 1000 Comm: bash Tainted: G B 5.5.0-rc5+ #270 [ 357.854334][ T1000] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 357.855703][ T1000] RIP: 0010:__alloc_pages_nodemask+0x2f3/0x740 [ 357.856669][ T1000] Code: 64 fe ff ff 65 48 8b 04 25 c0 0f 02 00 48 05 f0 12 00 00 41 be 01 00 00 00 49 89 47 0 [ 357.860272][ T1000] RSP: 0018:ffff8880b7f47bd8 EFLAGS: 00010246 [ 357.861009][ T1000] RAX: ffffed1016fe8f80 RBX: 1ffff11016fe8fae RCX: 0000000000000000 [ 357.861843][ T1000] RDX: 0000000000000000 RSI: 0000000000000017 RDI: 0000000000000000 [ 357.862661][ T1000] RBP: 0000000000040dc0 R08: 1ffff11016fe8f67 R09: dffffc0000000000 [ 357.863509][ T1000] R10: ffff8880b7f47d68 R11: fffffbfff2798180 R12: 1ffff11016fe8f80 [ 357.864355][ T1000] R13: 0000000000000017 R14: 0000000000000017 R15: ffff8880c2038d68 [ 357.865178][ T1000] FS: 00007fd9a5b8c740(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000 [ 357.866248][ T1000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 357.867531][ T1000] CR2: 000055ce01ba8100 CR3: 00000000b7dbe005 CR4: 00000000000606f0 [ 357.868972][ T1000] Call Trace: [ 357.869423][ T1000] ? lock_contended+0xcd0/0xcd0 [ 357.870001][ T1000] ? __alloc_pages_slowpath+0x21d0/0x21d0 [ 357.870673][ T1000] ? _kstrtoull+0x76/0x160 [ 357.871148][ T1000] ? alloc_pages_current+0xc1/0x1a0 [ 357.871704][ T1000] kmalloc_order+0x22/0x80 [ 357.872184][ T1000] kmalloc_order_trace+0x1d/0x140 [ 357.872733][ T1000] __kmalloc+0x302/0x3a0 [ 357.873204][ T1000] nsim_bus_dev_numvfs_store+0x1ab/0x260 [netdevsim] [ 357.873919][ T1000] ? kernfs_get_active+0x12c/0x180 [ 357.874459][ T1000] ? new_device_store+0x450/0x450 [netdevsim] [ 357.875111][ T1000] ? kernfs_get_parent+0x70/0x70 [ 357.875632][ T1000] ? sysfs_file_ops+0x160/0x160 [ 357.876152][ T1000] kernfs_fop_write+0x276/0x410 [ 357.876680][ T1000] ? __sb_start_write+0x1ba/0x2e0 [ 357.877225][ T1000] vfs_write+0x197/0x4a0 [ 357.877671][ T1000] ksys_write+0x141/0x1d0 [ ... ] Reviewed-by: Jakub Kicinski Fixes: 79579220566c ("netdevsim: add SR-IOV functionality") Fixes: 82c93a87bf8b ("netdevsim: implement couple of testing devlink health reporters") Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bus.c | 2 +- drivers/net/netdevsim/health.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index e455dd1cf4d0..7971dc4f54f1 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -29,7 +29,7 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, { nsim_bus_dev->vfconfigs = kcalloc(num_vfs, sizeof(struct nsim_vf_config), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!nsim_bus_dev->vfconfigs) return -ENOMEM; nsim_bus_dev->num_vfs = num_vfs; diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c index 30595b1299bd..ba8d9ad60feb 100644 --- a/drivers/net/netdevsim/health.c +++ b/drivers/net/netdevsim/health.c @@ -82,7 +82,7 @@ static int nsim_dev_dummy_fmsg_put(struct devlink_fmsg *fmsg, u32 binary_len) if (err) return err; - binary = kmalloc(binary_len, GFP_KERNEL); + binary = kmalloc(binary_len, GFP_KERNEL | __GFP_NOWARN); if (!binary) return -ENOMEM; get_random_bytes(binary, binary_len); -- cgit v1.2.3 From 245311637fddeca96c1f0758a649eb1fb437978e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Feb 2020 16:43:48 +0000 Subject: netdevsim: remove unused sdev code sdev.c code is merged into dev.c and is not used anymore. it would be removed. Reviewed-by: Jakub Kicinski Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/sdev.c | 69 -------------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 drivers/net/netdevsim/sdev.c diff --git a/drivers/net/netdevsim/sdev.c b/drivers/net/netdevsim/sdev.c deleted file mode 100644 index 6712da3340d6..000000000000 --- a/drivers/net/netdevsim/sdev.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ - -#include -#include -#include -#include - -#include "netdevsim.h" - -static struct dentry *nsim_sdev_ddir; - -static u32 nsim_sdev_id; - -struct netdevsim_shared_dev *nsim_sdev_get(struct netdevsim *joinns) -{ - struct netdevsim_shared_dev *sdev; - char sdev_ddir_name[10]; - int err; - - if (joinns) { - if (WARN_ON(!joinns->sdev)) - return ERR_PTR(-EINVAL); - sdev = joinns->sdev; - sdev->refcnt++; - return sdev; - } - - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); - if (!sdev) - return ERR_PTR(-ENOMEM); - sdev->refcnt = 1; - sdev->switch_id = nsim_sdev_id++; - - sprintf(sdev_ddir_name, "%u", sdev->switch_id); - sdev->ddir = debugfs_create_dir(sdev_ddir_name, nsim_sdev_ddir); - if (IS_ERR_OR_NULL(sdev->ddir)) { - err = PTR_ERR_OR_ZERO(sdev->ddir) ?: -EINVAL; - goto err_sdev_free; - } - - return sdev; - -err_sdev_free: - nsim_sdev_id--; - kfree(sdev); - return ERR_PTR(err); -} - -void nsim_sdev_put(struct netdevsim_shared_dev *sdev) -{ - if (--sdev->refcnt) - return; - debugfs_remove_recursive(sdev->ddir); - kfree(sdev); -} - -int nsim_sdev_init(void) -{ - nsim_sdev_ddir = debugfs_create_dir(DRV_NAME "_sdev", NULL); - if (IS_ERR_OR_NULL(nsim_sdev_ddir)) - return -ENOMEM; - return 0; -} - -void nsim_sdev_exit(void) -{ - debugfs_remove_recursive(nsim_sdev_ddir); -} -- cgit v1.2.3 From 2b5b8251bc9fe2f9118411f037862ee17cf81e97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Feb 2020 10:15:07 -0800 Subject: net: hsr: fix possible NULL deref in hsr_handle_frame() hsr_port_get_rcu() can return NULL, so we need to be careful. general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 1 PID: 10249 Comm: syz-executor.5 Not tainted 5.5.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:199 [inline] RIP: 0010:hsr_addr_is_self+0x86/0x330 net/hsr/hsr_framereg.c:44 Code: 04 00 f3 f3 f3 65 48 8b 04 25 28 00 00 00 48 89 45 d0 31 c0 e8 6b ff 94 f9 4c 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 75 02 00 00 48 8b 43 30 49 39 c6 49 89 47 c0 0f RSP: 0018:ffffc90000da8a90 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff87e0cc33 RDX: 0000000000000006 RSI: ffffffff87e035d5 RDI: 0000000000000000 RBP: ffffc90000da8b20 R08: ffff88808e7de040 R09: ffffed1015d2707c R10: ffffed1015d2707b R11: ffff8880ae9383db R12: ffff8880a689bc5e R13: 1ffff920001b5153 R14: 0000000000000030 R15: ffffc90000da8af8 FS: 00007fd7a42be700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32338000 CR3: 00000000a928c000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_handle_frame+0x1c5/0x630 net/hsr/hsr_slave.c:31 __netif_receive_skb_core+0xfbc/0x30b0 net/core/dev.c:5099 __netif_receive_skb_one_core+0xa8/0x1a0 net/core/dev.c:5196 __netif_receive_skb+0x2c/0x1d0 net/core/dev.c:5312 process_backlog+0x206/0x750 net/core/dev.c:6144 napi_poll net/core/dev.c:6582 [inline] net_rx_action+0x508/0x1120 net/core/dev.c:6650 __do_softirq+0x262/0x98c kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/hsr/hsr_slave.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index ee561297d8a7..fbfd0db182b7 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) rcu_read_lock(); /* hsr->node_db, hsr->ports */ port = hsr_port_get_rcu(skb->dev); + if (!port) + goto finish_pass; if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { /* Directly kill frames sent by ourselves */ -- cgit v1.2.3 From 7145fcfffef1fad4266aaf5ca96727696916edb7 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 3 Feb 2020 16:29:29 +0100 Subject: tc-testing: fix eBPF tests failure on linux fresh clones when the following command is done on a fresh clone of the kernel tree, [root@f31 tc-testing]# ./tdc.py -c bpf test cases that need to build the eBPF sample program fail systematically, because 'buildebpfPlugin' is unable to install the kernel headers (i.e, the 'khdr' target fails). Pass the correct environment to 'make', in place of ENVIR, to allow running these tests. Fixes: 4c2d39bd40c1 ("tc-testing: use a plugin to build eBPF program") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py index e98c36750fae..d34fe06268d2 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py @@ -54,7 +54,7 @@ class SubPlugin(TdcPlugin): shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=ENVIR) + env=os.environ.copy()) (rawout, serr) = proc.communicate() if proc.returncode != 0 and len(serr) > 0: -- cgit v1.2.3 From e9ed4fa7b4400d7b2cf03108842a30e6c9bd0eb2 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 3 Feb 2020 16:29:30 +0100 Subject: tc-testing: add missing 'nsPlugin' to basic.json since tdc tests for cls_basic need $DEV1, use 'nsPlugin' so that the following command can be run without errors: [root@f31 tc-testing]# ./tdc.py -c basic Fixes: 4717b05328ba ("tc-testing: Introduced tdc tests for basic filter") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/basic.json | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index 2e361cea63bc..98a20faf3198 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -6,6 +6,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -25,6 +28,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -44,6 +50,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -63,6 +72,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -82,6 +94,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -101,6 +116,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -120,6 +138,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -139,6 +160,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -158,6 +182,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -177,6 +204,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -196,6 +226,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -215,6 +248,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -234,6 +270,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -253,6 +292,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -272,6 +314,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -291,6 +336,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -310,6 +358,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], -- cgit v1.2.3 From 83b43045308ea0600099830292955f18818f8d5e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 4 Feb 2020 02:24:41 +0000 Subject: qed: Remove set but not used variable 'p_link' Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/ethernet/qlogic/qed/qed_cxt.c: In function 'qed_qm_init_pf': drivers/net/ethernet/qlogic/qed/qed_cxt.c:1401:29: warning: variable 'p_link' set but not used [-Wunused-but-set-variable] commit 92fae6fb231f ("qed: FW 8.42.2.0 Queue Manager changes") leave behind this unused variable. Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index fbfff2b1dc93..1a636bad717d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -1398,14 +1398,11 @@ void qed_qm_init_pf(struct qed_hwfn *p_hwfn, { struct qed_qm_info *qm_info = &p_hwfn->qm_info; struct qed_qm_pf_rt_init_params params; - struct qed_mcp_link_state *p_link; struct qed_qm_iids iids; memset(&iids, 0, sizeof(iids)); qed_cxt_qm_iids(p_hwfn, &iids); - p_link = &QED_LEADING_HWFN(p_hwfn->cdev)->mcp_info->link_output; - memset(¶ms, 0, sizeof(params)); params.port_id = p_hwfn->port_id; params.pf_id = p_hwfn->rel_pf_id; -- cgit v1.2.3 From 599be01ee567b61f4471ee8078870847d0a11e8e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Feb 2020 21:14:35 -0800 Subject: net_sched: fix an OOB access in cls_tcindex As Eric noticed, tcindex_alloc_perfect_hash() uses cp->hash to compute the size of memory allocation, but cp->hash is set again after the allocation, this caused an out-of-bound access. So we have to move all cp->hash initialization and computation before the memory allocation. Move cp->mask and cp->shift together as cp->hash may need them for computation too. Reported-and-tested-by: syzbot+35d4dea36c387813ed31@syzkaller.appspotmail.com Fixes: 331b72922c5f ("net: sched: RCU cls_tcindex") Cc: Eric Dumazet Cc: John Fastabend Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Jakub Kicinski Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 3d4a1280352f..0323aee03de7 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->fall_through = p->fall_through; cp->tp = tp; + if (tb[TCA_TCINDEX_HASH]) + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + + if (tb[TCA_TCINDEX_MASK]) + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + + if (tb[TCA_TCINDEX_SHIFT]) + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + + if (!cp->hash) { + /* Hash not specified, use perfect hash if the upper limit + * of the hashing index is below the threshold. + */ + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; + else + cp->hash = DEFAULT_HASH_SIZE; + } + if (p->perfect) { int i; if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; - for (i = 0; i < cp->hash; i++) + for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; } @@ -350,15 +369,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (old_r) cr = r->res; - if (tb[TCA_TCINDEX_HASH]) - cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); - - if (tb[TCA_TCINDEX_MASK]) - cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - - if (tb[TCA_TCINDEX_SHIFT]) - cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - err = -EBUSY; /* Hash already allocated, make sure that we still meet the @@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_FALL_THROUGH]) cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - if (!cp->perfect && !cp->h) cp->alloc_hash = cp->hash; -- cgit v1.2.3 From b4b771fd51fda70577dc980429347c41e807c64f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 4 Feb 2020 13:33:13 +0800 Subject: r8152: Add MAC passthrough support to new device Device 0xa387 also supports MAC passthrough, therefore add it to the whitelst. BugLink: https://bugs.launchpad.net/bugs/1827961/comments/30 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e8cd8c05b156..78ddbaf6401b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -698,6 +698,9 @@ enum rtl8152_flags { #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 +#define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082 +#define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2 0xa387 + #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 @@ -6759,9 +6762,13 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } - if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO && - le16_to_cpu(udev->descriptor.idProduct) == 0x3082) - set_bit(LENOVO_MACPASSTHRU, &tp->flags); + if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO) { + switch (le16_to_cpu(udev->descriptor.idProduct)) { + case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2: + case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2: + set_bit(LENOVO_MACPASSTHRU, &tp->flags); + } + } if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && (!strcmp(udev->serial, "000001000000") || -- cgit v1.2.3 From 0d0d9a388a858e271bb70e71e99e7fe2a6fd6f64 Mon Sep 17 00:00:00 2001 From: Ridge Kennedy Date: Tue, 4 Feb 2020 12:24:00 +1300 Subject: l2tp: Allow duplicate session creation with UDP In the past it was possible to create multiple L2TPv3 sessions with the same session id as long as the sessions belonged to different tunnels. The resulting sessions had issues when used with IP encapsulated tunnels, but worked fine with UDP encapsulated ones. Some applications began to rely on this behaviour to avoid having to negotiate unique session ids. Some time ago a change was made to require session ids to be unique across all tunnels, breaking the applications making use of this "feature". This change relaxes the duplicate session id check to allow duplicates if both of the colliding sessions belong to UDP encapsulated tunnels. Fixes: dbdbc73b4478 ("l2tp: fix duplicate session creation") Signed-off-by: Ridge Kennedy Acked-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c99223cb3338..fcb53ed1c4fb 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -320,8 +320,13 @@ int l2tp_session_register(struct l2tp_session *session, spin_lock_bh(&pn->l2tp_session_hlist_lock); + /* IP encap expects session IDs to be globally unique, while + * UDP encap doesn't. + */ hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id) { + if (session_walk->session_id == session->session_id && + (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || + tunnel->encap == L2TP_ENCAPTYPE_IP)) { err = -EEXIST; goto err_tlock_pnlock; } -- cgit v1.2.3 From bd5cd35b782abf5437fbd01dfaee12437d20e832 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 4 Feb 2020 03:24:59 +0000 Subject: gtp: use __GFP_NOWARN to avoid memalloc warning gtp hashtable size is received by user-space. So, this hashtable size could be too large. If so, kmalloc will internally print a warning message. This warning message is actually not necessary for the gtp module. So, this patch adds __GFP_NOWARN to avoid this message. Splat looks like: [ 2171.200049][ T1860] WARNING: CPU: 1 PID: 1860 at mm/page_alloc.c:4713 __alloc_pages_nodemask+0x2f3/0x740 [ 2171.238885][ T1860] Modules linked in: gtp veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv] [ 2171.262680][ T1860] CPU: 1 PID: 1860 Comm: gtp-link Not tainted 5.5.0+ #321 [ 2171.263567][ T1860] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 2171.264681][ T1860] RIP: 0010:__alloc_pages_nodemask+0x2f3/0x740 [ 2171.265332][ T1860] Code: 64 fe ff ff 65 48 8b 04 25 c0 0f 02 00 48 05 f0 12 00 00 41 be 01 00 00 00 49 89 47 0 [ 2171.267301][ T1860] RSP: 0018:ffff8880b51af1f0 EFLAGS: 00010246 [ 2171.268320][ T1860] RAX: ffffed1016a35e43 RBX: 0000000000000000 RCX: 0000000000000000 [ 2171.269517][ T1860] RDX: 0000000000000000 RSI: 000000000000000b RDI: 0000000000000000 [ 2171.270305][ T1860] RBP: 0000000000040cc0 R08: ffffed1018893109 R09: dffffc0000000000 [ 2171.275973][ T1860] R10: 0000000000000001 R11: ffffed1018893108 R12: 1ffff11016a35e43 [ 2171.291039][ T1860] R13: 000000000000000b R14: 000000000000000b R15: 00000000000f4240 [ 2171.292328][ T1860] FS: 00007f53cbc83740(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 2171.293409][ T1860] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2171.294586][ T1860] CR2: 000055f540014508 CR3: 00000000b49f2004 CR4: 00000000000606e0 [ 2171.295424][ T1860] Call Trace: [ 2171.295756][ T1860] ? mark_held_locks+0xa5/0xe0 [ 2171.296659][ T1860] ? __alloc_pages_slowpath+0x21b0/0x21b0 [ 2171.298283][ T1860] ? gtp_encap_enable_socket+0x13e/0x400 [gtp] [ 2171.298962][ T1860] ? alloc_pages_current+0xc1/0x1a0 [ 2171.299475][ T1860] kmalloc_order+0x22/0x80 [ 2171.299936][ T1860] kmalloc_order_trace+0x1d/0x140 [ 2171.300437][ T1860] __kmalloc+0x302/0x3a0 [ 2171.300896][ T1860] gtp_newlink+0x293/0xba0 [gtp] [ ... ] Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/gtp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 7032a2405e1a..af07ea760b35 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -767,12 +767,12 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) int i; gtp->addr_hash = kmalloc_array(hsize, sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (gtp->addr_hash == NULL) return -ENOMEM; gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (gtp->tid_hash == NULL) goto err1; -- cgit v1.2.3