summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-25 20:55:26 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-25 20:55:26 +0300
commit50fb587e6a56dba74c3c56a7a09c48bff25cc5fa (patch)
treef645e50353fa7420e235d4128121bc96ec416837
parenteb03e318135419b8b781f83cdfa7dbb9252afad6 (diff)
parentad42a35bdfc6d3c0fc4cb4027d7b2757ce665665 (diff)
downloadlinux-50fb587e6a56dba74c3c56a7a09c48bff25cc5fa.tar.xz
Merge tag 'net-6.4-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from bluetooth and bpf. Current release - regressions: - net: fix skb leak in __skb_tstamp_tx() - eth: mtk_eth_soc: fix QoS on DSA MAC on non MTK_NETSYS_V2 SoCs Current release - new code bugs: - handshake: - fix sock->file allocation - fix handshake_dup() ref counting - bluetooth: - fix potential double free caused by hci_conn_unlink - fix UAF in hci_conn_hash_flush Previous releases - regressions: - core: fix stack overflow when LRO is disabled for virtual interfaces - tls: fix strparser rx issues - bpf: - fix many sockmap/TCP related issues - fix a memory leak in the LRU and LRU_PERCPU hash maps - init the offload table earlier - eth: mlx5e: - do as little as possible in napi poll when budget is 0 - fix using eswitch mapping in nic mode - fix deadlock in tc route query code Previous releases - always broken: - udplite: fix NULL pointer dereference in __sk_mem_raise_allocated() - raw: fix output xfrm lookup wrt protocol - smc: reset connection when trying to use SMCRv2 fails - phy: mscc: enable VSC8501/2 RGMII RX clock - eth: octeontx2-pf: fix TSOv6 offload - eth: cdc_ncm: deal with too low values of dwNtbOutMaxSize" * tag 'net-6.4-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (79 commits) udplite: Fix NULL pointer dereference in __sk_mem_raise_allocated(). net: phy: mscc: enable VSC8501/2 RGMII RX clock net: phy: mscc: remove unnecessary phydev locking net: phy: mscc: add support for VSC8501 net: phy: mscc: add VSC8502 to MODULE_DEVICE_TABLE net/handshake: Enable the SNI extension to work properly net/handshake: Unpin sock->file if a handshake is cancelled net/handshake: handshake_genl_notify() shouldn't ignore @flags net/handshake: Fix uninitialized local variable net/handshake: Fix handshake_dup() ref counting net/handshake: Remove unneeded check from handshake_dup() ipv6: Fix out-of-bounds access in ipv6_find_tlv() net: ethernet: mtk_eth_soc: fix QoS on DSA MAC on non MTK_NETSYS_V2 SoCs docs: netdev: document the existence of the mail bot net: fix skb leak in __skb_tstamp_tx() r8169: Use a raw_spinlock_t for the register locks. page_pool: fix inconsistency for page_pool_ring_[un]lock() bpf, sockmap: Test progs verifier error with latest clang bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer with drops bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer ...
-rw-r--r--Documentation/netlink/specs/handshake.yaml4
-rw-r--r--Documentation/networking/tls-handshake.rst5
-rw-r--r--Documentation/process/maintainer-netdev.rst33
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/bluetooth/btnxpuart.c6
-rw-r--r--drivers/net/bonding/bond_main.c8
-rw-r--r--drivers/net/ethernet/3com/3c589_cs.c11
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c17
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c4
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c3
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c10
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c1
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c44
-rw-r--r--drivers/net/ethernet/sfc/efx_devlink.c95
-rw-r--r--drivers/net/phy/mscc/mscc.h2
-rw-r--r--drivers/net/phy/mscc/mscc_main.c82
-rw-r--r--drivers/net/team/team.c7
-rw-r--r--drivers/net/usb/cdc_ncm.c24
-rw-r--r--include/linux/if_team.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
-rw-r--r--include/linux/skbuff.h10
-rw-r--r--include/linux/skmsg.h3
-rw-r--r--include/net/bluetooth/hci_core.h2
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/handshake.h1
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/page_pool.h18
-rw-r--r--include/net/tcp.h10
-rw-r--r--include/net/tls.h1
-rw-r--r--include/uapi/linux/handshake.h1
-rw-r--r--include/uapi/linux/in.h1
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/offload.c2
-rw-r--r--kernel/bpf/verifier.c2
-rw-r--r--net/bluetooth/hci_conn.c77
-rw-r--r--net/core/page_pool.c28
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/skmsg.c81
-rw-r--r--net/core/sock_map.c3
-rw-r--r--net/handshake/handshake-test.c44
-rw-r--r--net/handshake/handshake.h1
-rw-r--r--net/handshake/netlink.c12
-rw-r--r--net/handshake/request.c4
-rw-r--r--net/handshake/tlshd.c8
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/raw.c5
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_bpf.c79
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/sctp/transport.c11
-rw-r--r--net/smc/af_smc.c9
-rw-r--r--net/smc/smc_core.c1
-rw-r--r--net/tls/tls.h5
-rw-r--r--net/tls/tls_device.c22
-rw-r--r--net/tls/tls_strp.c185
-rw-r--r--net/tls/tls_sw.c4
-rw-r--r--net/unix/af_unix.c7
-rw-r--r--net/vmw_vsock/virtio_transport_common.c5
-rw-r--r--samples/bpf/hbm.c1
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h390
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c370
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h12
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c32
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh2
88 files changed, 1488 insertions, 792 deletions
diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml
index 614f1a585511..6d89e30f5fd5 100644
--- a/Documentation/netlink/specs/handshake.yaml
+++ b/Documentation/netlink/specs/handshake.yaml
@@ -68,6 +68,9 @@ attribute-sets:
type: nest
nested-attributes: x509
multi-attr: true
+ -
+ name: peername
+ type: string
-
name: done
attributes:
@@ -105,6 +108,7 @@ operations:
- auth-mode
- peer-identity
- certificate
+ - peername
-
name: done
doc: Handler reports handshake completion
diff --git a/Documentation/networking/tls-handshake.rst b/Documentation/networking/tls-handshake.rst
index a2817a88e905..6f5ea1646a47 100644
--- a/Documentation/networking/tls-handshake.rst
+++ b/Documentation/networking/tls-handshake.rst
@@ -53,6 +53,7 @@ fills in a structure that contains the parameters of the request:
struct socket *ta_sock;
tls_done_func_t ta_done;
void *ta_data;
+ const char *ta_peername;
unsigned int ta_timeout_ms;
key_serial_t ta_keyring;
key_serial_t ta_my_cert;
@@ -71,6 +72,10 @@ instantiated a struct file in sock->file.
has completed. Further explanation of this function is in the "Handshake
Completion" sesction below.
+The consumer can provide a NUL-terminated hostname in the @ta_peername
+field that is sent as part of ClientHello. If no peername is provided,
+the DNS hostname associated with the server's IP address is used instead.
+
The consumer can fill in the @ta_timeout_ms field to force the servicing
handshake agent to exit after a number of milliseconds. This enables the
socket to be fully closed once both the kernel and the handshake agent
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index f73ac9e175a8..83614cec9328 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -127,13 +127,32 @@ the value of ``Message-ID`` to the URL above.
Updating patch status
~~~~~~~~~~~~~~~~~~~~~
-It may be tempting to help the maintainers and update the state of your
-own patches when you post a new version or spot a bug. Please **do not**
-do that.
-Interfering with the patch status on patchwork will only cause confusion. Leave
-it to the maintainer to figure out what is the most recent and current
-version that should be applied. If there is any doubt, the maintainer
-will reply and ask what should be done.
+Contributors and reviewers do not have the permissions to update patch
+state directly in patchwork. Patchwork doesn't expose much information
+about the history of the state of patches, therefore having multiple
+people update the state leads to confusion.
+
+Instead of delegating patchwork permissions netdev uses a simple mail
+bot which looks for special commands/lines within the emails sent to
+the mailing list. For example to mark a series as Changes Requested
+one needs to send the following line anywhere in the email thread::
+
+ pw-bot: changes-requested
+
+As a result the bot will set the entire series to Changes Requested.
+This may be useful when author discovers a bug in their own series
+and wants to prevent it from getting applied.
+
+The use of the bot is entirely optional, if in doubt ignore its existence
+completely. Maintainers will classify and update the state of the patches
+themselves. No email should ever be sent to the list with the main purpose
+of communicating with the bot, the bot commands should be seen as metadata.
+
+The use of the bot is restricted to authors of the patches (the ``From:``
+header on patch submission and command must match!), maintainers themselves
+and a handful of senior reviewers. Bot records its activity here:
+
+ https://patchwork.hopto.org/pw-bot.html
Review timelines
~~~~~~~~~~~~~~~~
diff --git a/MAINTAINERS b/MAINTAINERS
index bc201627c2e0..d685be5aa25d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8153,6 +8153,7 @@ F: include/linux/spi/spi-fsl-dspi.h
FREESCALE ENETC ETHERNET DRIVERS
M: Claudiu Manoil <claudiu.manoil@nxp.com>
+M: Vladimir Oltean <vladimir.oltean@nxp.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/freescale/enetc/
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 3a34d7c1475b..52ef44688d38 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -1319,17 +1319,17 @@ static void nxp_serdev_remove(struct serdev_device *serdev)
hci_free_dev(hdev);
}
-static struct btnxpuart_data w8987_data = {
+static struct btnxpuart_data w8987_data __maybe_unused = {
.helper_fw_name = NULL,
.fw_name = FIRMWARE_W8987,
};
-static struct btnxpuart_data w8997_data = {
+static struct btnxpuart_data w8997_data __maybe_unused = {
.helper_fw_name = FIRMWARE_HELPER,
.fw_name = FIRMWARE_W8997,
};
-static const struct of_device_id nxpuart_of_match_table[] = {
+static const struct of_device_id nxpuart_of_match_table[] __maybe_unused = {
{ .compatible = "nxp,88w8987-bt", .data = &w8987_data },
{ .compatible = "nxp,88w8997-bt", .data = &w8997_data },
{ }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3fed888629f7..edbaa1444f8e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3947,7 +3947,11 @@ static int bond_slave_netdev_event(unsigned long event,
unblock_netpoll_tx();
break;
case NETDEV_FEAT_CHANGE:
- bond_compute_features(bond);
+ if (!bond->notifier_ctx) {
+ bond->notifier_ctx = true;
+ bond_compute_features(bond);
+ bond->notifier_ctx = false;
+ }
break;
case NETDEV_RESEND_IGMP:
/* Propagate to master device */
@@ -6342,6 +6346,8 @@ static int bond_init(struct net_device *bond_dev)
if (!bond->wq)
return -ENOMEM;
+ bond->notifier_ctx = false;
+
spin_lock_init(&bond->stats_lock);
netdev_lockdep_set_classes(bond_dev);
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 82f94b1635bf..5267e9dcd87e 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -195,6 +195,7 @@ static int tc589_probe(struct pcmcia_device *link)
{
struct el3_private *lp;
struct net_device *dev;
+ int ret;
dev_dbg(&link->dev, "3c589_attach()\n");
@@ -218,7 +219,15 @@ static int tc589_probe(struct pcmcia_device *link)
dev->ethtool_ops = &netdev_ethtool_ops;
- return tc589_config(link);
+ ret = tc589_config(link);
+ if (ret)
+ goto err_free_netdev;
+
+ return 0;
+
+err_free_netdev:
+ free_netdev(dev);
+ return ret;
}
static void tc589_detach(struct pcmcia_device *link)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 577d94821b3e..38e5b5abe067 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3834,6 +3834,11 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
index = fec_enet_get_bd_index(last_bdp, &txq->bd);
txq->tx_skbuff[index] = NULL;
+ /* Make sure the updates to rest of the descriptor are performed before
+ * transferring ownership.
+ */
+ dma_wmb();
+
/* Send it on its way. Tell FEC it's ready, interrupt when done,
* it's the last BD of the frame, and to put the CRC on the end.
*/
@@ -3843,8 +3848,14 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
/* If this was the last BD in the ring, start at the beginning again. */
bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
+ /* Make sure the update to bdp are performed before txq->bd.cur. */
+ dma_wmb();
+
txq->bd.cur = bdp;
+ /* Trigger transmission start */
+ writel(0, txq->bd.reg_desc_active);
+
return 0;
}
@@ -3873,12 +3884,6 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
sent_frames++;
}
- /* Make sure the update to bdp and tx_skbuff are performed. */
- wmb();
-
- /* Trigger transmission start */
- writel(0, txq->bd.reg_desc_active);
-
__netif_tx_unlock(nq);
return sent_frames;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 7045fedfd73a..7af223b0a37f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -652,9 +652,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
htons(ext->lso_sb - skb_network_offset(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
ext->lso_format = pfvf->hw.lso_tsov6_idx;
-
- ipv6_hdr(skb)->payload_len =
- htons(ext->lso_sb - skb_network_offset(skb));
+ ipv6_hdr(skb)->payload_len = htons(tcp_hdrlen(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
__be16 l3_proto = vlan_get_protocol(skb);
struct udphdr *udph = udp_hdr(skb);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index a75fd072082c..834c644b67db 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3269,18 +3269,14 @@ static int mtk_open(struct net_device *dev)
eth->dsa_meta[i] = md_dst;
}
} else {
- /* Hardware special tag parsing needs to be disabled if at least
- * one MAC does not use DSA.
+ /* Hardware DSA untagging and VLAN RX offloading need to be
+ * disabled if at least one MAC does not use DSA.
*/
u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
val &= ~MTK_CDMP_STAG_EN;
mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
- val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
- val &= ~MTK_CDMQ_STAG_EN;
- mtk_w32(eth, val, MTK_CDMQ_IG_CTRL);
-
mtk_w32(eth, 0, MTK_CDMP_EG_CTRL);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d53de39539a8..d532883b42d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1920,9 +1920,10 @@ static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err)
{
+ const char *namep = mlx5_command_str(opcode);
struct mlx5_cmd_stats *stats;
- if (!err)
+ if (!err || !(strcmp(namep, "unknown command opcode")))
return;
stats = &dev->cmd.stats[opcode];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index eb5abd0e55d9..3cbebfba582b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -175,6 +175,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
/* ensure cq space is freed before enabling more cqes */
wmb();
+ mlx5e_txqsq_wake(&ptpsq->txqsq);
+
return work_done == budget;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 20c2d2ecaf93..6a052c6cfc15 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -1369,11 +1369,13 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow;
list_for_each_entry(flow, encap_flows, tmp_list) {
- struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_attr *attr;
if (!mlx5e_is_offloaded_flow(flow))
continue;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
if (flow_flag_test(flow, SLOW))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 47381e949f1f..879d698b6119 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -193,6 +193,8 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi;
}
+void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
+
static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 728b82ce4031..e95414ef1f04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1665,11 +1665,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
{
struct mlx5e_priv *out_priv, *route_priv;
- struct mlx5_devcom *devcom = NULL;
struct mlx5_core_dev *route_mdev;
struct mlx5_eswitch *esw;
u16 vhca_id;
- int err;
out_priv = netdev_priv(out_dev);
esw = out_priv->mdev->priv.eswitch;
@@ -1678,6 +1676,9 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
if (mlx5_lag_is_active(out_priv->mdev)) {
+ struct mlx5_devcom *devcom;
+ int err;
+
/* In lag case we may get devices from different eswitch instances.
* If we failed to get vport num, it means, mostly, that we on the wrong
* eswitch.
@@ -1686,16 +1687,16 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
if (err != -ENOENT)
return err;
+ rcu_read_lock();
devcom = out_priv->mdev->priv.devcom;
- esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
- if (!esw)
- return -ENODEV;
+ esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
+ rcu_read_unlock();
+
+ return err;
}
- err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
- if (devcom)
- mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
- return err;
+ return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
}
static int
@@ -5301,6 +5302,8 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
goto err_action_counter;
}
+ mlx5_esw_offloads_devcom_init(esw);
+
return 0;
err_action_counter:
@@ -5329,7 +5332,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
priv = netdev_priv(rpriv->netdev);
esw = priv->mdev->priv.eswitch;
- mlx5e_tc_clean_fdb_peer_flows(esw);
+ mlx5_esw_offloads_devcom_cleanup(esw);
mlx5e_tc_tun_cleanup(uplink_priv->encap);
@@ -5643,22 +5646,43 @@ bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
0, NULL);
}
+static struct mapping_ctx *
+mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc;
+ struct mlx5_eswitch *esw;
+ struct mapping_ctx *ctx;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ esw = priv->mdev->priv.eswitch;
+ ctx = esw->offloads.reg_c0_obj_pool;
+ } else {
+ tc = mlx5e_fs_get_tc(priv->fs);
+ ctx = tc->mapping;
+ }
+
+ return ctx;
+}
+
int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u64 act_miss_cookie, u32 *act_miss_mapping)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_mapped_obj mapped_obj = {};
+ struct mlx5_eswitch *esw;
struct mapping_ctx *ctx;
int err;
- ctx = esw->offloads.reg_c0_obj_pool;
-
+ ctx = mlx5e_get_priv_obj_mapping(priv);
mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
mapped_obj.act_miss_cookie = act_miss_cookie;
err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
if (err)
return err;
+ if (!is_mdev_switchdev_mode(priv->mdev))
+ return 0;
+
+ esw = priv->mdev->priv.eswitch;
attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
if (IS_ERR(attr->act_id_restore_rule))
goto err_rule;
@@ -5673,10 +5697,9 @@ err_rule:
void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u32 act_miss_mapping)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mapping_ctx *ctx;
+ struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv);
- ctx = esw->offloads.reg_c0_obj_pool;
- mlx5_del_flow_rules(attr->act_id_restore_rule);
+ if (is_mdev_switchdev_mode(priv->mdev))
+ mlx5_del_flow_rules(attr->act_id_restore_rule);
mapping_remove(ctx, act_miss_mapping);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index df5e780e8e6a..c7eb6b238c2b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -762,6 +762,17 @@ static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_t
}
}
+void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq)
+{
+ if (netif_tx_queue_stopped(sq->txq) &&
+ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
+ mlx5e_ptpsq_fifo_has_room(sq) &&
+ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+ netif_tx_wake_queue(sq->txq);
+ sq->stats->wake++;
+ }
+}
+
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
@@ -861,13 +872,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
- if (netif_tx_queue_stopped(sq->txq) &&
- mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
- mlx5e_ptpsq_fifo_has_room(sq) &&
- !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
- netif_tx_wake_queue(sq->txq);
- stats->wake++;
- }
+ mlx5e_txqsq_wake(sq);
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index a50bfda18e96..fbb2d963fb7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -161,20 +161,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
}
}
+ /* budget=0 means we may be in IRQ context, do as little as possible */
+ if (unlikely(!budget))
+ goto out;
+
busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp)
busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
- if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- if (xsk_open)
- work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+ if (xsk_open)
+ work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
- if (likely(budget - work_done))
- work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+ if (likely(budget - work_done))
+ work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
- busy |= work_done == budget;
- }
+ busy |= work_done == budget;
mlx5e_poll_ico_cq(&c->icosq.cq);
if (mlx5e_poll_ico_cq(&c->async_icosq.cq))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 1c35d721a31d..fe698c79616c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1104,7 +1104,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- mlx5_irq_table_destroy(dev);
+ mlx5_irq_table_free_irqs(dev);
mutex_unlock(&table->lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 1a042c981713..add6cfa432a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -342,6 +342,7 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
+ bool paired[MLX5_MAX_PORTS];
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -369,6 +370,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
+void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw);
+void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, const u8 *mac);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -767,6 +770,8 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
+static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {}
+static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {}
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 69215ffb9999..8d19c20d3447 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2742,6 +2742,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
+ if (esw->paired[mlx5_get_dev_index(peer_esw->dev)])
+ break;
+
err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
if (err)
goto err_out;
@@ -2753,14 +2756,18 @@ static int mlx5_esw_offloads_devcom_event(int event,
if (err)
goto err_pair;
+ esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
+ peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
break;
case ESW_OFFLOADS_DEVCOM_UNPAIR:
- if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
break;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+ esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
+ peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
mlx5_esw_offloads_unpair(peer_esw);
mlx5_esw_offloads_unpair(esw);
mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
@@ -2779,7 +2786,7 @@ err_out:
return err;
}
-static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@@ -2802,7 +2809,7 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
ESW_OFFLOADS_DEVCOM_PAIR, esw);
}
-static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@@ -3250,8 +3257,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_vports;
- esw_offloads_devcom_init(esw);
-
return 0;
err_vports:
@@ -3292,7 +3297,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_disable(struct mlx5_eswitch *esw)
{
- esw_offloads_devcom_cleanup(esw);
mlx5_eswitch_disable_pf_vf_vports(esw);
esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
esw_set_passing_vport_metadata(esw, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
index adefde3ea941..b7d779d08d83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -3,6 +3,7 @@
#include <linux/mlx5/vport.h>
#include "lib/devcom.h"
+#include "mlx5_core.h"
static LIST_HEAD(devcom_list);
@@ -13,7 +14,7 @@ static LIST_HEAD(devcom_list);
struct mlx5_devcom_component {
struct {
- void *data;
+ void __rcu *data;
} device[MLX5_DEVCOM_PORTS_SUPPORTED];
mlx5_devcom_event_handler_t handler;
@@ -77,6 +78,7 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
return NULL;
+ mlx5_dev_list_lock();
sguid0 = mlx5_query_nic_system_image_guid(dev);
list_for_each_entry(iter, &devcom_list, list) {
struct mlx5_core_dev *tmp_dev = NULL;
@@ -102,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (!priv) {
priv = mlx5_devcom_list_alloc();
- if (!priv)
- return ERR_PTR(-ENOMEM);
+ if (!priv) {
+ devcom = ERR_PTR(-ENOMEM);
+ goto out;
+ }
idx = 0;
new_priv = true;
@@ -112,13 +116,16 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
priv->devs[idx] = dev;
devcom = mlx5_devcom_alloc(priv, idx);
if (!devcom) {
- kfree(priv);
- return ERR_PTR(-ENOMEM);
+ if (new_priv)
+ kfree(priv);
+ devcom = ERR_PTR(-ENOMEM);
+ goto out;
}
if (new_priv)
list_add(&priv->list, &devcom_list);
-
+out:
+ mlx5_dev_list_unlock();
return devcom;
}
@@ -131,6 +138,7 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
if (IS_ERR_OR_NULL(devcom))
return;
+ mlx5_dev_list_lock();
priv = devcom->priv;
priv->devs[devcom->idx] = NULL;
@@ -141,10 +149,12 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
break;
if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
- return;
+ goto out;
list_del(&priv->list);
kfree(priv);
+out:
+ mlx5_dev_list_unlock();
}
void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
@@ -162,7 +172,7 @@ void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
comp->handler = handler;
- comp->device[devcom->idx].data = data;
+ rcu_assign_pointer(comp->device[devcom->idx].data, data);
up_write(&comp->sem);
}
@@ -176,8 +186,9 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
- comp->device[devcom->idx].data = NULL;
+ RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL);
up_write(&comp->sem);
+ synchronize_rcu();
}
int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
@@ -193,12 +204,15 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
- for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
- if (i != devcom->idx && comp->device[i].data) {
- err = comp->handler(event, comp->device[i].data,
- event_data);
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
+ void *data = rcu_dereference_protected(comp->device[i].data,
+ lockdep_is_held(&comp->sem));
+
+ if (i != devcom->idx && data) {
+ err = comp->handler(event, data, event_data);
break;
}
+ }
up_write(&comp->sem);
return err;
@@ -213,7 +227,7 @@ void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
WARN_ON(!rwsem_is_locked(&comp->sem));
- comp->paired = paired;
+ WRITE_ONCE(comp->paired, paired);
}
bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
@@ -222,7 +236,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
if (IS_ERR_OR_NULL(devcom))
return false;
- return devcom->priv->components[id].paired;
+ return READ_ONCE(devcom->priv->components[id].paired);
}
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
@@ -236,7 +250,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_read(&comp->sem);
- if (!comp->paired) {
+ if (!READ_ONCE(comp->paired)) {
up_read(&comp->sem);
return NULL;
}
@@ -245,7 +259,29 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
if (i != devcom->idx)
break;
- return comp->device[i].data;
+ return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
+}
+
+void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return NULL;
+
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+ if (i != devcom->idx)
+ break;
+
+ comp = &devcom->priv->components[id];
+ /* This can change concurrently, however 'data' pointer will remain
+ * valid for the duration of RCU read section.
+ */
+ if (!READ_ONCE(comp->paired))
+ return NULL;
+
+ return rcu_dereference(comp->device[i].data);
}
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index 94313c18bb64..9a496f4722da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -41,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id);
+void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 995eb2d5ace0..a7eb65cd0bdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1049,7 +1049,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm))
- mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
+ mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index efd0c299c5c7..aa403a5ea34e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -15,6 +15,7 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 2245d3b2f393..db5687d9fec9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -32,6 +32,7 @@ struct mlx5_irq {
struct mlx5_irq_pool *pool;
int refcount;
struct msi_map map;
+ u32 pool_index;
};
struct mlx5_irq_table {
@@ -132,7 +133,7 @@ static void irq_release(struct mlx5_irq *irq)
struct cpu_rmap *rmap;
#endif
- xa_erase(&pool->irqs, irq->map.index);
+ xa_erase(&pool->irqs, irq->pool_index);
/* free_irq requires that affinity_hint and rmap will be cleared before
* calling it. To satisfy this requirement, we call
* irq_cpu_rmap_remove() to remove the notifier
@@ -276,11 +277,11 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
}
irq->pool = pool;
irq->refcount = 1;
- irq->map.index = i;
- err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL));
+ irq->pool_index = i;
+ err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
- irq->map.index, err);
+ irq->pool_index, err);
goto err_xa;
}
return irq;
@@ -567,7 +568,7 @@ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
struct mlx5_irq *irq;
int i;
- af_desc.is_managed = 1;
+ af_desc.is_managed = false;
for (i = 0; i < nirqs; i++) {
cpumask_set_cpu(cpus[i], &af_desc.mask);
irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
@@ -691,6 +692,24 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
irq_pool_free(table->pcif_pool);
}
+static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
+{
+ struct mlx5_irq *irq;
+ unsigned long index;
+
+ xa_for_each(&pool->irqs, index, irq)
+ free_irq(irq->map.virq, &irq->nh);
+}
+
+static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
+{
+ if (table->sf_ctrl_pool) {
+ mlx5_irq_pool_free_irqs(table->sf_comp_pool);
+ mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
+ }
+ mlx5_irq_pool_free_irqs(table->pcif_pool);
+}
+
/* irq_table API */
int mlx5_irq_table_init(struct mlx5_core_dev *dev)
@@ -774,6 +793,17 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
pci_free_irq_vectors(dev->pdev);
}
+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ mlx5_irq_pools_free_irqs(table);
+ pci_free_irq_vectors(dev->pdev);
+}
+
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
{
if (table->sf_comp_pool)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 3835ba3f4dda..1aa525e509f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version);
+ caps->roce_caps.fl_rc_qp_when_roce_disabled =
+ MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
if (MLX5_CAP_GEN(mdev, roce)) {
err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
@@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
return err;
caps->roce_caps.roce_en = roce_en;
- caps->roce_caps.fl_rc_qp_when_roce_disabled =
+ caps->roce_caps.fl_rc_qp_when_roce_disabled |=
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
caps->roce_caps.fl_rc_qp_when_roce_enabled =
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 9413aaf51251..e94fbb015efa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
{
u32 crc = crc32(0, input_data, length);
- return (__force u32)htonl(crc);
+ return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
+ ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
}
bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 2b6e046e1d10..ee2698698d71 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -1039,6 +1039,16 @@ static int lan966x_reset_switch(struct lan966x *lan966x)
reset_control_reset(switch_reset);
+ /* Don't reinitialize the switch core, if it is already initialized. In
+ * case it is initialized twice, some pointers inside the queue system
+ * in HW will get corrupted and then after a while the queue system gets
+ * full and no traffic is passing through the switch. The issue is seen
+ * when loading and unloading the driver and sending traffic through the
+ * switch.
+ */
+ if (lan_rd(lan966x, SYS_RESET_CFG) & SYS_RESET_CFG_CORE_ENA)
+ return 0;
+
lan_wr(SYS_RESET_CFG_CORE_ENA_SET(0), lan966x, SYS_RESET_CFG);
lan_wr(SYS_RAM_INIT_RAM_INIT_SET(1), lan966x, SYS_RAM_INIT);
ret = readx_poll_timeout(lan966x_ram_init, lan966x,
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 0605d1ee490d..7a549b834e97 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -6138,6 +6138,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
return 0;
out_error:
+ nv_mgmt_release_sema(dev);
if (phystate_orig)
writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
out_freering:
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a7e376e7e689..4b19803a7dd0 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -616,10 +616,10 @@ struct rtl8169_private {
struct work_struct work;
} wk;
- spinlock_t config25_lock;
- spinlock_t mac_ocp_lock;
+ raw_spinlock_t config25_lock;
+ raw_spinlock_t mac_ocp_lock;
- spinlock_t cfg9346_usage_lock;
+ raw_spinlock_t cfg9346_usage_lock;
int cfg9346_usage_count;
unsigned supports_gmii:1;
@@ -671,20 +671,20 @@ static void rtl_lock_config_regs(struct rtl8169_private *tp)
{
unsigned long flags;
- spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
+ raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!--tp->cfg9346_usage_count)
RTL_W8(tp, Cfg9346, Cfg9346_Lock);
- spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_unlock_config_regs(struct rtl8169_private *tp)
{
unsigned long flags;
- spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
+ raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!tp->cfg9346_usage_count++)
RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_pci_commit(struct rtl8169_private *tp)
@@ -698,10 +698,10 @@ static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags;
u8 val;
- spin_lock_irqsave(&tp->config25_lock, flags);
+ raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config2);
RTL_W8(tp, Config2, (val & ~clear) | set);
- spin_unlock_irqrestore(&tp->config25_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
}
static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
@@ -709,10 +709,10 @@ static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags;
u8 val;
- spin_lock_irqsave(&tp->config25_lock, flags);
+ raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config5);
RTL_W8(tp, Config5, (val & ~clear) | set);
- spin_unlock_irqrestore(&tp->config25_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
}
static bool rtl_is_8125(struct rtl8169_private *tp)
@@ -899,9 +899,9 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
{
unsigned long flags;
- spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
__r8168_mac_ocp_write(tp, reg, data);
- spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
}
static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
@@ -919,9 +919,9 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
unsigned long flags;
u16 val;
- spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
val = __r8168_mac_ocp_read(tp, reg);
- spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
return val;
}
@@ -932,10 +932,10 @@ static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask,
unsigned long flags;
u16 data;
- spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
data = __r8168_mac_ocp_read(tp, reg);
__r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
- spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
}
/* Work around a hw issue with RTL8168g PHY, the quirk disables
@@ -1420,14 +1420,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0);
}
- spin_lock_irqsave(&tp->config25_lock, flags);
+ raw_spin_lock_irqsave(&tp->config25_lock, flags);
for (i = 0; i < tmp; i++) {
options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
if (wolopts & cfg[i].opt)
options |= cfg[i].mask;
RTL_W8(tp, cfg[i].reg, options);
}
- spin_unlock_irqrestore(&tp->config25_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
@@ -5179,9 +5179,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->eee_adv = -1;
tp->ocp_base = OCP_STD_PHY_BASE;
- spin_lock_init(&tp->cfg9346_usage_lock);
- spin_lock_init(&tp->config25_lock);
- spin_lock_init(&tp->mac_ocp_lock);
+ raw_spin_lock_init(&tp->cfg9346_usage_lock);
+ raw_spin_lock_init(&tp->config25_lock);
+ raw_spin_lock_init(&tp->mac_ocp_lock);
dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
struct pcpu_sw_netstats);
diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c
index 381b805659d3..ef9971cbb695 100644
--- a/drivers/net/ethernet/sfc/efx_devlink.c
+++ b/drivers/net/ethernet/sfc/efx_devlink.c
@@ -171,9 +171,14 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
rc = efx_mcdi_nvram_metadata(efx, partition_type, NULL, version, NULL,
0);
+
+ /* If the partition does not exist, that is not an error. */
+ if (rc == -ENOENT)
+ return 0;
+
if (rc) {
- netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed\n",
- version_name);
+ netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed (rc=%d)\n",
+ version_name, rc);
return rc;
}
@@ -187,36 +192,33 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
static int efx_devlink_info_stored_versions(struct efx_nic *efx,
struct devlink_info_req *req)
{
- int rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_BUNDLE,
- DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID);
- if (rc)
- return rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_MC_FIRMWARE,
- DEVLINK_INFO_VERSION_GENERIC_FW_MGMT);
- if (rc)
- return rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_SUC_FIRMWARE,
- EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC);
- if (rc)
- return rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_EXPANSION_ROM,
- EFX_DEVLINK_INFO_VERSION_FW_EXPROM);
- if (rc)
- return rc;
+ int err;
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_EXPANSION_UEFI,
- EFX_DEVLINK_INFO_VERSION_FW_UEFI);
- return rc;
+ /* We do not care here about the specific error but just if an error
+ * happened. The specific error will be reported inside the call
+ * through system messages, and if any error happened in any call
+ * below, we report it through extack.
+ */
+ err = efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_BUNDLE,
+ DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_MC_FIRMWARE,
+ DEVLINK_INFO_VERSION_GENERIC_FW_MGMT);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_SUC_FIRMWARE,
+ EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_EXPANSION_ROM,
+ EFX_DEVLINK_INFO_VERSION_FW_EXPROM);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_EXPANSION_UEFI,
+ EFX_DEVLINK_INFO_VERSION_FW_UEFI);
+ return err;
}
#define EFX_VER_FLAG(_f) \
@@ -587,27 +589,20 @@ static int efx_devlink_info_get(struct devlink *devlink,
{
struct efx_devlink *devlink_private = devlink_priv(devlink);
struct efx_nic *efx = devlink_private->efx;
- int rc;
+ int err;
- /* Several different MCDI commands are used. We report first error
- * through extack returning at that point. Specific error
- * information via system messages.
+ /* Several different MCDI commands are used. We report if errors
+ * happened through extack. Specific error information via system
+ * messages inside the calls.
*/
- rc = efx_devlink_info_board_cfg(efx, req);
- if (rc) {
- NL_SET_ERR_MSG_MOD(extack, "Getting board info failed");
- return rc;
- }
- rc = efx_devlink_info_stored_versions(efx, req);
- if (rc) {
- NL_SET_ERR_MSG_MOD(extack, "Getting stored versions failed");
- return rc;
- }
- rc = efx_devlink_info_running_versions(efx, req);
- if (rc) {
- NL_SET_ERR_MSG_MOD(extack, "Getting running versions failed");
- return rc;
- }
+ err = efx_devlink_info_board_cfg(efx, req);
+
+ err |= efx_devlink_info_stored_versions(efx, req);
+
+ err |= efx_devlink_info_running_versions(efx, req);
+
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "Errors when getting device info. Check system messages");
return 0;
}
diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h
index a50235fdf7d9..defe5cc6d4fc 100644
--- a/drivers/net/phy/mscc/mscc.h
+++ b/drivers/net/phy/mscc/mscc.h
@@ -179,6 +179,7 @@ enum rgmii_clock_delay {
#define VSC8502_RGMII_CNTL 20
#define VSC8502_RGMII_RX_DELAY_MASK 0x0070
#define VSC8502_RGMII_TX_DELAY_MASK 0x0007
+#define VSC8502_RGMII_RX_CLK_DISABLE 0x0800
#define MSCC_PHY_WOL_LOWER_MAC_ADDR 21
#define MSCC_PHY_WOL_MID_MAC_ADDR 22
@@ -276,6 +277,7 @@ enum rgmii_clock_delay {
/* Microsemi PHY ID's
* Code assumes lowest nibble is 0
*/
+#define PHY_ID_VSC8501 0x00070530
#define PHY_ID_VSC8502 0x00070630
#define PHY_ID_VSC8504 0x000704c0
#define PHY_ID_VSC8514 0x00070670
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 62bf99e45af1..28df8a2e4230 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -519,16 +519,27 @@ out_unlock:
* * 2.0 ns (which causes the data to be sampled at exactly half way between
* clock transitions at 1000 Mbps) if delays should be enabled
*/
-static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
- u16 rgmii_rx_delay_mask,
- u16 rgmii_tx_delay_mask)
+static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
+ u16 rgmii_rx_delay_mask,
+ u16 rgmii_tx_delay_mask)
{
u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1;
u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1;
u16 reg_val = 0;
- int rc;
+ u16 mask = 0;
+ int rc = 0;
- mutex_lock(&phydev->lock);
+ /* For traffic to pass, the VSC8502 family needs the RX_CLK disable bit
+ * to be unset for all PHY modes, so do that as part of the paged
+ * register modification.
+ * For some family members (like VSC8530/31/40/41) this bit is reserved
+ * and read-only, and the RX clock is enabled by default.
+ */
+ if (rgmii_cntl == VSC8502_RGMII_CNTL)
+ mask |= VSC8502_RGMII_RX_CLK_DISABLE;
+
+ if (phy_interface_is_rgmii(phydev))
+ mask |= rgmii_rx_delay_mask | rgmii_tx_delay_mask;
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID ||
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
@@ -537,31 +548,20 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos;
- rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
- rgmii_cntl,
- rgmii_rx_delay_mask | rgmii_tx_delay_mask,
- reg_val);
-
- mutex_unlock(&phydev->lock);
+ if (mask)
+ rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
+ rgmii_cntl, mask, reg_val);
return rc;
}
static int vsc85xx_default_config(struct phy_device *phydev)
{
- int rc;
-
phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
- if (phy_interface_mode_is_rgmii(phydev->interface)) {
- rc = vsc85xx_rgmii_set_skews(phydev, VSC8502_RGMII_CNTL,
- VSC8502_RGMII_RX_DELAY_MASK,
- VSC8502_RGMII_TX_DELAY_MASK);
- if (rc)
- return rc;
- }
-
- return 0;
+ return vsc85xx_update_rgmii_cntl(phydev, VSC8502_RGMII_CNTL,
+ VSC8502_RGMII_RX_DELAY_MASK,
+ VSC8502_RGMII_TX_DELAY_MASK);
}
static int vsc85xx_get_tunable(struct phy_device *phydev,
@@ -1758,13 +1758,11 @@ static int vsc8584_config_init(struct phy_device *phydev)
if (ret)
return ret;
- if (phy_interface_is_rgmii(phydev)) {
- ret = vsc85xx_rgmii_set_skews(phydev, VSC8572_RGMII_CNTL,
- VSC8572_RGMII_RX_DELAY_MASK,
- VSC8572_RGMII_TX_DELAY_MASK);
- if (ret)
- return ret;
- }
+ ret = vsc85xx_update_rgmii_cntl(phydev, VSC8572_RGMII_CNTL,
+ VSC8572_RGMII_RX_DELAY_MASK,
+ VSC8572_RGMII_TX_DELAY_MASK);
+ if (ret)
+ return ret;
ret = genphy_soft_reset(phydev);
if (ret)
@@ -2317,6 +2315,30 @@ static int vsc85xx_probe(struct phy_device *phydev)
/* Microsemi VSC85xx PHYs */
static struct phy_driver vsc85xx_driver[] = {
{
+ .phy_id = PHY_ID_VSC8501,
+ .name = "Microsemi GE VSC8501 SyncE",
+ .phy_id_mask = 0xfffffff0,
+ /* PHY_BASIC_FEATURES */
+ .soft_reset = &genphy_soft_reset,
+ .config_init = &vsc85xx_config_init,
+ .config_aneg = &vsc85xx_config_aneg,
+ .read_status = &vsc85xx_read_status,
+ .handle_interrupt = vsc85xx_handle_interrupt,
+ .config_intr = &vsc85xx_config_intr,
+ .suspend = &genphy_suspend,
+ .resume = &genphy_resume,
+ .probe = &vsc85xx_probe,
+ .set_wol = &vsc85xx_wol_set,
+ .get_wol = &vsc85xx_wol_get,
+ .get_tunable = &vsc85xx_get_tunable,
+ .set_tunable = &vsc85xx_set_tunable,
+ .read_page = &vsc85xx_phy_read_page,
+ .write_page = &vsc85xx_phy_write_page,
+ .get_sset_count = &vsc85xx_get_sset_count,
+ .get_strings = &vsc85xx_get_strings,
+ .get_stats = &vsc85xx_get_stats,
+},
+{
.phy_id = PHY_ID_VSC8502,
.name = "Microsemi GE VSC8502 SyncE",
.phy_id_mask = 0xfffffff0,
@@ -2656,6 +2678,8 @@ static struct phy_driver vsc85xx_driver[] = {
module_phy_driver(vsc85xx_driver);
static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
+ { PHY_ID_VSC8501, 0xfffffff0, },
+ { PHY_ID_VSC8502, 0xfffffff0, },
{ PHY_ID_VSC8504, 0xfffffff0, },
{ PHY_ID_VSC8514, 0xfffffff0, },
{ PHY_ID_VSC8530, 0xfffffff0, },
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index d10606f257c4..555b0b1e9a78 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1629,6 +1629,7 @@ static int team_init(struct net_device *dev)
team->dev = dev;
team_set_no_mode(team);
+ team->notifier_ctx = false;
team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
if (!team->pcpu_stats)
@@ -3022,7 +3023,11 @@ static int team_device_event(struct notifier_block *unused,
team_del_slave(port->team->dev, dev);
break;
case NETDEV_FEAT_CHANGE:
- team_compute_features(port->team);
+ if (!port->team->notifier_ctx) {
+ port->team->notifier_ctx = true;
+ team_compute_features(port->team);
+ port->team->notifier_ctx = false;
+ }
break;
case NETDEV_PRECHANGEMTU:
/* Forbid to change mtu of underlaying device */
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 6ce8f4f0c70e..db05622f1f70 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -181,9 +181,12 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
else
min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32);
- max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
- if (max == 0)
+ if (le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) == 0)
max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */
+ else
+ max = clamp_t(u32, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize),
+ USB_CDC_NCM_NTB_MIN_OUT_SIZE,
+ CDC_NCM_NTB_MAX_SIZE_TX);
/* some devices set dwNtbOutMaxSize too low for the above default */
min = min(min, max);
@@ -1244,6 +1247,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
* further.
*/
if (skb_out == NULL) {
+ /* If even the smallest allocation fails, abort. */
+ if (ctx->tx_curr_size == USB_CDC_NCM_NTB_MIN_OUT_SIZE)
+ goto alloc_failed;
ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
(unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
@@ -1262,13 +1268,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
/* No allocation possible so we will abort */
- if (skb_out == NULL) {
- if (skb != NULL) {
- dev_kfree_skb_any(skb);
- dev->net->stats.tx_dropped++;
- }
- goto exit_no_skb;
- }
+ if (!skb_out)
+ goto alloc_failed;
ctx->tx_low_mem_val--;
}
if (ctx->is_ndp16) {
@@ -1461,6 +1462,11 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
return skb_out;
+alloc_failed:
+ if (skb) {
+ dev_kfree_skb_any(skb);
+ dev->net->stats.tx_dropped++;
+ }
exit_no_skb:
/* Start timer, if there is a remaining non-empty skb */
if (ctx->tx_curr_skb != NULL && n > 0)
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index fc985e5c739d..8de6b6e67829 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -208,6 +208,7 @@ struct team {
bool queue_override_enabled;
struct list_head *qom_lists; /* array of queue override mapping lists */
bool port_mtu_change_allowed;
+ bool notifier_ctx;
struct {
unsigned int count;
unsigned int interval; /* in ms */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dc5e2cb302a5..b89778d0d326 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1705,7 +1705,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 rc[0x1];
u8 uar_4k[0x1];
- u8 reserved_at_241[0x9];
+ u8 reserved_at_241[0x7];
+ u8 fl_rc_qp_when_roce_disabled[0x1];
+ u8 regexp_params[0x1];
u8 uar_sz[0x6];
u8 port_selection_cap[0x1];
u8 reserved_at_248[0x1];
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 738776ab8838..0b40417457cd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1587,6 +1587,16 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
to->l4_hash = from->l4_hash;
};
+static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
+ const struct sk_buff *skb2)
+{
+#ifdef CONFIG_TLS_DEVICE
+ return skb2->decrypted - skb1->decrypted;
+#else
+ return 0;
+#endif
+}
+
static inline void skb_copy_decrypted(struct sk_buff *to,
const struct sk_buff *from)
{
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 84f787416a54..054d7911bfc9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -71,7 +71,6 @@ struct sk_psock_link {
};
struct sk_psock_work_state {
- struct sk_buff *skb;
u32 len;
u32 off;
};
@@ -105,7 +104,7 @@ struct sk_psock {
struct proto *sk_proto;
struct mutex work_mutex;
struct sk_psock_work_state work_state;
- struct work_struct work;
+ struct delayed_work work;
struct rcu_work rwork;
};
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a6c8aee2f256..8baf34639939 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1327,7 +1327,7 @@ int hci_le_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role);
-int hci_conn_del(struct hci_conn *conn);
+void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev);
void hci_conn_check_pending(struct hci_dev *hdev);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 0efef2a952b7..59955ac33157 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -221,6 +221,7 @@ struct bonding {
struct bond_up_slave __rcu *usable_slaves;
struct bond_up_slave __rcu *all_slaves;
bool force_primary;
+ bool notifier_ctx;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *);
diff --git a/include/net/handshake.h b/include/net/handshake.h
index 3352b1ab43b3..2e26e436e85f 100644
--- a/include/net/handshake.h
+++ b/include/net/handshake.h
@@ -24,6 +24,7 @@ struct tls_handshake_args {
struct socket *ta_sock;
tls_done_func_t ta_done;
void *ta_data;
+ const char *ta_peername;
unsigned int ta_timeout_ms;
key_serial_t ta_keyring;
key_serial_t ta_my_cert;
diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..acec504c469a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@ struct ipcm_cookie {
__be32 addr;
int oif;
struct ip_options_rcu *opt;
+ __u8 protocol;
__u8 ttl;
__s16 tos;
char priority;
@@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
+ ipcm->protocol = inet->inet_num;
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index c8ec2f34722b..126f9e294389 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -399,22 +399,4 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
-static inline void page_pool_ring_lock(struct page_pool *pool)
- __acquires(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_lock(&pool->ring.producer_lock);
- else
- spin_lock_bh(&pool->ring.producer_lock);
-}
-
-static inline void page_pool_ring_unlock(struct page_pool *pool)
- __releases(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_unlock(&pool->ring.producer_lock);
- else
- spin_unlock_bh(&pool->ring.producer_lock);
-}
-
#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 04a31643cda3..18a038d16434 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1470,6 +1470,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
}
void tcp_cleanup_rbuf(struct sock *sk, int copied);
+void __tcp_cleanup_rbuf(struct sock *sk, int copied);
+
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
@@ -2326,6 +2328,14 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
+#ifdef CONFIG_INET
+void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
+#else
+static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
+{
+}
+#endif
+
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
diff --git a/include/net/tls.h b/include/net/tls.h
index 6056ce5a2aa5..596595c4b1af 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -126,6 +126,7 @@ struct tls_strparser {
u32 mark : 8;
u32 stopped : 1;
u32 copy_mode : 1;
+ u32 mixed_decrypted : 1;
u32 msg_ready : 1;
struct strp_msg stm;
diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h
index 1de4d0b95325..3d7ea58778c9 100644
--- a/include/uapi/linux/handshake.h
+++ b/include/uapi/linux/handshake.h
@@ -44,6 +44,7 @@ enum {
HANDSHAKE_A_ACCEPT_AUTH_MODE,
HANDSHAKE_A_ACCEPT_PEER_IDENTITY,
HANDSHAKE_A_ACCEPT_CERTIFICATE,
+ HANDSHAKE_A_ACCEPT_PEERNAME,
__HANDSHAKE_A_ACCEPT_MAX,
HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1)
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 4b7f2df66b99..e682ab628dfa 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -163,6 +163,7 @@ struct in_addr {
#define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50
#define IP_LOCAL_PORT_RANGE 51
+#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 00c253b84bf5..9901efee4339 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1215,7 +1215,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
- return ret;
+ goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@@ -1236,6 +1236,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
err:
htab_unlock_bucket(htab, b, hash, flags);
+err_lock_bucket:
if (ret)
htab_lru_push_free(htab, l_new);
else if (l_old)
@@ -1338,7 +1339,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
- return ret;
+ goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@@ -1361,6 +1362,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = 0;
err:
htab_unlock_bucket(htab, b, hash, flags);
+err_lock_bucket:
if (l_new)
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
return ret;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index d9c9f45e3529..8a26cd8814c1 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -859,4 +859,4 @@ static int __init bpf_offload_init(void)
return rhashtable_init(&offdevs, &offdevs_params);
}
-late_initcall(bpf_offload_init);
+core_initcall(bpf_offload_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fbcf5a4e2fcd..5871aa78d01a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17033,7 +17033,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
insn->dst_reg,
shift);
- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
(1ULL << size * 8) - 1);
}
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 640b951bf40a..f75ef12f18f7 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1083,8 +1083,28 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->parent) {
struct hci_link *link, *t;
- list_for_each_entry_safe(link, t, &conn->link_list, list)
- hci_conn_unlink(link->conn);
+ list_for_each_entry_safe(link, t, &conn->link_list, list) {
+ struct hci_conn *child = link->conn;
+
+ hci_conn_unlink(child);
+
+ /* If hdev is down it means
+ * hci_dev_close_sync/hci_conn_hash_flush is in progress
+ * and links don't need to be cleanup as all connections
+ * would be cleanup.
+ */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ continue;
+
+ /* Due to race, SCO connection might be not established
+ * yet at this point. Delete it now, otherwise it is
+ * possible for it to be stuck and can't be deleted.
+ */
+ if ((child->type == SCO_LINK ||
+ child->type == ESCO_LINK) &&
+ child->handle == HCI_CONN_HANDLE_UNSET)
+ hci_conn_del(child);
+ }
return;
}
@@ -1092,35 +1112,30 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->link)
return;
- hci_conn_put(conn->parent);
- conn->parent = NULL;
-
list_del_rcu(&conn->link->list);
synchronize_rcu();
+ hci_conn_drop(conn->parent);
+ hci_conn_put(conn->parent);
+ conn->parent = NULL;
+
kfree(conn->link);
conn->link = NULL;
-
- /* Due to race, SCO connection might be not established
- * yet at this point. Delete it now, otherwise it is
- * possible for it to be stuck and can't be deleted.
- */
- if (conn->handle == HCI_CONN_HANDLE_UNSET)
- hci_conn_del(conn);
}
-int hci_conn_del(struct hci_conn *conn)
+void hci_conn_del(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle);
+ hci_conn_unlink(conn);
+
cancel_delayed_work_sync(&conn->disc_work);
cancel_delayed_work_sync(&conn->auto_accept_work);
cancel_delayed_work_sync(&conn->idle_work);
if (conn->type == ACL_LINK) {
- hci_conn_unlink(conn);
/* Unacked frames */
hdev->acl_cnt += conn->sent;
} else if (conn->type == LE_LINK) {
@@ -1131,13 +1146,6 @@ int hci_conn_del(struct hci_conn *conn)
else
hdev->acl_cnt += conn->sent;
} else {
- struct hci_conn *acl = conn->parent;
-
- if (acl) {
- hci_conn_unlink(conn);
- hci_conn_drop(acl);
- }
-
/* Unacked ISO frames */
if (conn->type == ISO_LINK) {
if (hdev->iso_pkts)
@@ -1160,8 +1168,6 @@ int hci_conn_del(struct hci_conn *conn)
* rest of hci_conn_del.
*/
hci_conn_cleanup(conn);
-
- return 0;
}
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
@@ -2462,22 +2468,21 @@ timer:
/* Drop all connection on the device */
void hci_conn_hash_flush(struct hci_dev *hdev)
{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c, *n;
+ struct list_head *head = &hdev->conn_hash.list;
+ struct hci_conn *conn;
BT_DBG("hdev %s", hdev->name);
- list_for_each_entry_safe(c, n, &h->list, list) {
- c->state = BT_CLOSED;
-
- hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
-
- /* Unlink before deleting otherwise it is possible that
- * hci_conn_del removes the link which may cause the list to
- * contain items already freed.
- */
- hci_conn_unlink(c);
- hci_conn_del(c);
+ /* We should not traverse the list here, because hci_conn_del
+ * can remove extra links, which may cause the list traversal
+ * to hit items that have already been released.
+ */
+ while ((conn = list_first_entry_or_null(head,
+ struct hci_conn,
+ list)) != NULL) {
+ conn->state = BT_CLOSED;
+ hci_disconn_cfm(conn, HCI_ERROR_LOCAL_HOST_TERM);
+ hci_conn_del(conn);
}
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e212e9d7edcb..a3e12a61d456 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -134,6 +134,29 @@ EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#define recycle_stat_add(pool, __stat, val)
#endif
+static bool page_pool_producer_lock(struct page_pool *pool)
+ __acquires(&pool->ring.producer_lock)
+{
+ bool in_softirq = in_softirq();
+
+ if (in_softirq)
+ spin_lock(&pool->ring.producer_lock);
+ else
+ spin_lock_bh(&pool->ring.producer_lock);
+
+ return in_softirq;
+}
+
+static void page_pool_producer_unlock(struct page_pool *pool,
+ bool in_softirq)
+ __releases(&pool->ring.producer_lock)
+{
+ if (in_softirq)
+ spin_unlock(&pool->ring.producer_lock);
+ else
+ spin_unlock_bh(&pool->ring.producer_lock);
+}
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -617,6 +640,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
int i, bulk_len = 0;
+ bool in_softirq;
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
@@ -635,7 +659,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
return;
/* Bulk producer into ptr_ring page_pool cache */
- page_pool_ring_lock(pool);
+ in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) {
if (__ptr_ring_produce(&pool->ring, data[i])) {
/* ring full */
@@ -644,7 +668,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
}
recycle_stat_add(pool, ring, i);
- page_pool_ring_unlock(pool);
+ page_pool_producer_unlock(pool, in_softirq);
/* Hopefully all pages was return into ptr_ring */
if (likely(i == bulk_len))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 515ec5cdc79c..cea28d30abb5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5224,8 +5224,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
} else {
skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (skb_orphan_frags_rx(skb, GFP_ATOMIC))
+ if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
+ kfree_skb(skb);
return;
+ }
}
if (!skb)
return;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index f81883759d38..a9060e1f0e43 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
msg_rx = sk_psock_peek_msg(psock);
}
out:
- if (psock->work_state.skb && copied > 0)
- schedule_work(&psock->work);
return copied;
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@@ -624,42 +622,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
static void sk_psock_skb_state(struct sk_psock *psock,
struct sk_psock_work_state *state,
- struct sk_buff *skb,
int len, int off)
{
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
- state->skb = skb;
state->len = len;
state->off = off;
- } else {
- sock_drop(psock->sk, skb);
}
spin_unlock_bh(&psock->ingress_lock);
}
static void sk_psock_backlog(struct work_struct *work)
{
- struct sk_psock *psock = container_of(work, struct sk_psock, work);
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
struct sk_buff *skb = NULL;
+ u32 len = 0, off = 0;
bool ingress;
- u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
- if (unlikely(state->skb)) {
- spin_lock_bh(&psock->ingress_lock);
- skb = state->skb;
+ if (unlikely(state->len)) {
len = state->len;
off = state->off;
- state->skb = NULL;
- spin_unlock_bh(&psock->ingress_lock);
}
- if (skb)
- goto start;
- while ((skb = skb_dequeue(&psock->ingress_skb))) {
+ while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
if (skb_bpf_strparser(skb)) {
@@ -668,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
-start:
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -678,22 +666,28 @@ start:
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
- sk_psock_skb_state(psock, state, skb,
- len, off);
+ sk_psock_skb_state(psock, state, len, off);
+
+ /* Delay slightly to prioritize any
+ * other work that might be here.
+ */
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ schedule_delayed_work(&psock->work, 1);
goto end;
}
/* Hard errors break pipe and stop xmit. */
sk_psock_report_error(psock, ret ? -ret : EPIPE);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
- sock_drop(psock->sk, skb);
goto end;
}
off += ret;
len -= ret;
} while (len);
- if (!ingress)
+ skb = skb_dequeue(&psock->ingress_skb);
+ if (!ingress) {
kfree_skb(skb);
+ }
}
end:
mutex_unlock(&psock->work_mutex);
@@ -734,7 +728,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
- INIT_WORK(&psock->work, sk_psock_backlog);
+ INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
mutex_init(&psock->work_mutex);
INIT_LIST_HEAD(&psock->ingress_msg);
spin_lock_init(&psock->ingress_lock);
@@ -786,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb);
}
- kfree_skb(psock->work_state.skb);
- /* We null the skb here to ensure that calls to sk_psock_backlog
- * do not pick up the free'd skb.
- */
- psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
@@ -809,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
- __sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
}
@@ -823,7 +811,8 @@ static void sk_psock_destroy(struct work_struct *work)
sk_psock_done_strp(psock);
- cancel_work_sync(&psock->work);
+ cancel_delayed_work_sync(&psock->work);
+ __sk_psock_zap_ingress(psock);
mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
@@ -938,7 +927,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
}
skb_queue_tail(&psock_other->ingress_skb, skb);
- schedule_work(&psock_other->work);
+ schedule_delayed_work(&psock_other->work, 0);
spin_unlock_bh(&psock_other->ingress_lock);
return 0;
}
@@ -990,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
err = -EIO;
sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) ||
- !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
- skb_bpf_redirect_clear(skb);
+ !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
goto out_free;
- }
skb_bpf_set_ingress(skb);
@@ -1018,22 +1005,23 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb);
- schedule_work(&psock->work);
+ schedule_delayed_work(&psock->work, 0);
err = 0;
}
spin_unlock_bh(&psock->ingress_lock);
- if (err < 0) {
- skb_bpf_redirect_clear(skb);
+ if (err < 0)
goto out_free;
- }
}
break;
case __SK_REDIRECT:
+ tcp_eat_skb(psock->sk, skb);
err = sk_psock_skb_redirect(psock, skb);
break;
case __SK_DROP:
default:
out_free:
+ skb_bpf_redirect_clear(skb);
+ tcp_eat_skb(psock->sk, skb);
sock_drop(psock->sk, skb);
}
@@ -1049,7 +1037,7 @@ static void sk_psock_write_space(struct sock *sk)
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
- schedule_work(&psock->work);
+ schedule_delayed_work(&psock->work, 0);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
@@ -1078,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
- if (ret == SK_PASS)
- skb_bpf_set_strparser(skb);
+ skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
@@ -1183,12 +1170,11 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
int ret = __SK_DROP;
int len = skb->len;
- skb_get(skb);
-
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
len = 0;
+ tcp_eat_skb(sk, skb);
sock_drop(sk, skb);
goto out;
}
@@ -1212,12 +1198,21 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ int copied;
trace_sk_data_ready(sk);
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
- sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ if (copied >= 0) {
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ psock->saved_data_ready(sk);
+ rcu_read_unlock();
+ }
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 7c189c2e2fbf..00afb66cd095 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout)
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
- cancel_work_sync(&psock->work);
+ cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
}
+
/* Make sure we do not recurse. This is a bug.
* Leak the socket instead of crashing on a stack overflow.
*/
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index e6adc5dec11a..6d37bab35c8f 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -102,7 +102,7 @@ struct handshake_req_alloc_test_param handshake_req_alloc_params[] = {
{
.desc = "handshake_req_alloc excessive privsize",
.proto = &handshake_req_alloc_proto_6,
- .gfp = GFP_KERNEL,
+ .gfp = GFP_KERNEL | __GFP_NOWARN,
.expect_success = false,
},
{
@@ -209,6 +209,7 @@ static void handshake_req_submit_test4(struct kunit *test)
{
struct handshake_req *req, *result;
struct socket *sock;
+ struct file *filp;
int err;
/* Arrange */
@@ -218,9 +219,10 @@ static void handshake_req_submit_test4(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -241,6 +243,7 @@ static void handshake_req_submit_test5(struct kunit *test)
struct handshake_req *req;
struct handshake_net *hn;
struct socket *sock;
+ struct file *filp;
struct net *net;
int saved, err;
@@ -251,9 +254,10 @@ static void handshake_req_submit_test5(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+ sock->file = filp;
net = sock_net(sock->sk);
hn = handshake_pernet(net);
@@ -276,6 +280,7 @@ static void handshake_req_submit_test6(struct kunit *test)
{
struct handshake_req *req1, *req2;
struct socket *sock;
+ struct file *filp;
int err;
/* Arrange */
@@ -287,9 +292,10 @@ static void handshake_req_submit_test6(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+ sock->file = filp;
/* Act */
err = handshake_req_submit(sock, req1, GFP_KERNEL);
@@ -307,6 +313,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
{
struct handshake_req *req;
struct socket *sock;
+ struct file *filp;
bool result;
int err;
@@ -318,8 +325,9 @@ static void handshake_req_cancel_test1(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -340,6 +348,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
struct handshake_req *req, *next;
struct handshake_net *hn;
struct socket *sock;
+ struct file *filp;
struct net *net;
bool result;
int err;
@@ -352,8 +361,9 @@ static void handshake_req_cancel_test2(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -380,6 +390,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
struct handshake_req *req, *next;
struct handshake_net *hn;
struct socket *sock;
+ struct file *filp;
struct net *net;
bool result;
int err;
@@ -392,8 +403,9 @@ static void handshake_req_cancel_test3(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -436,6 +448,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
{
struct handshake_req *req;
struct socket *sock;
+ struct file *filp;
int err;
/* Arrange */
@@ -448,8 +461,9 @@ static void handshake_req_destroy_test1(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h
index 4dac965c99df..8aeaadca844f 100644
--- a/net/handshake/handshake.h
+++ b/net/handshake/handshake.h
@@ -31,6 +31,7 @@ struct handshake_req {
struct list_head hr_list;
struct rhash_head hr_rhash;
unsigned long hr_flags;
+ struct file *hr_file;
const struct handshake_proto *hr_proto;
struct sock *hr_sk;
void (*hr_odestruct)(struct sock *sk);
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 35c9c445e0b8..1086653e1fad 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -48,7 +48,7 @@ int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
proto->hp_handler_class))
return -ESRCH;
- msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, flags);
if (!msg)
return -ENOMEM;
@@ -99,9 +99,6 @@ static int handshake_dup(struct socket *sock)
struct file *file;
int newfd;
- if (!sock->file)
- return -EBADF;
-
file = get_file(sock->file);
newfd = get_unused_fd_flags(O_CLOEXEC);
if (newfd < 0) {
@@ -142,15 +139,16 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
goto out_complete;
}
err = req->hr_proto->hp_accept(req, info, fd);
- if (err)
+ if (err) {
+ fput(sock->file);
goto out_complete;
+ }
trace_handshake_cmd_accept(net, req, req->hr_sk, fd);
return 0;
out_complete:
handshake_complete(req, -EIO, NULL);
- fput(sock->file);
out_status:
trace_handshake_cmd_accept_err(net, req, NULL, err);
return err;
@@ -159,8 +157,8 @@ out_status:
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
+ struct handshake_req *req = NULL;
struct socket *sock = NULL;
- struct handshake_req *req;
int fd, status, err;
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
diff --git a/net/handshake/request.c b/net/handshake/request.c
index 94d5cef3e048..d78d41abb3d9 100644
--- a/net/handshake/request.c
+++ b/net/handshake/request.c
@@ -239,6 +239,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req,
}
req->hr_odestruct = req->hr_sk->sk_destruct;
req->hr_sk->sk_destruct = handshake_sk_destruct;
+ req->hr_file = sock->file;
ret = -EOPNOTSUPP;
net = sock_net(req->hr_sk);
@@ -334,6 +335,9 @@ bool handshake_req_cancel(struct sock *sk)
return false;
}
+ /* Request accepted and waiting for DONE */
+ fput(req->hr_file);
+
out_true:
trace_handshake_cancel(net, req, sk);
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index fcbeb63b4eb1..b735f5cced2f 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -31,6 +31,7 @@ struct tls_handshake_req {
int th_type;
unsigned int th_timeout_ms;
int th_auth_mode;
+ const char *th_peername;
key_serial_t th_keyring;
key_serial_t th_certificate;
key_serial_t th_privkey;
@@ -48,6 +49,7 @@ tls_handshake_req_init(struct handshake_req *req,
treq->th_timeout_ms = args->ta_timeout_ms;
treq->th_consumer_done = args->ta_done;
treq->th_consumer_data = args->ta_data;
+ treq->th_peername = args->ta_peername;
treq->th_keyring = args->ta_keyring;
treq->th_num_peerids = 0;
treq->th_certificate = TLS_NO_CERT;
@@ -214,6 +216,12 @@ static int tls_handshake_accept(struct handshake_req *req,
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type);
if (ret < 0)
goto out_cancel;
+ if (treq->th_peername) {
+ ret = nla_put_string(msg, HANDSHAKE_A_ACCEPT_PEERNAME,
+ treq->th_peername);
+ if (ret < 0)
+ goto out_cancel;
+ }
if (treq->th_timeout_ms) {
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_TIMEOUT, treq->th_timeout_ms);
if (ret < 0)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b511ff0adc0a..8e97d8d4cc9d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -317,7 +317,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->tos = val;
ipc->priority = rt_tos2priority(ipc->tos);
break;
-
+ case IP_PROTOCOL:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->protocol = val;
+ break;
default:
return -EINVAL;
}
@@ -1761,6 +1768,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
+ case IP_PROTOCOL:
+ val = inet_sk(sk)->inet_num;
+ break;
default:
sockopt_release_sock(sk);
return -ENOPROTOOPT;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ff712bf2a98d..eadf1c9ef7e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -532,6 +532,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
+ /* Keep backward compat */
+ if (hdrincl)
+ ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -599,7 +602,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE,
- hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d6392c16b7a..a60f6f4e7cd9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1571,7 +1571,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
-static void __tcp_cleanup_rbuf(struct sock *sk, int copied)
+void __tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
@@ -1773,7 +1773,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
used = recv_actor(sk, skb);
- consume_skb(skb);
if (used < 0) {
if (!copied)
copied = used;
@@ -1787,14 +1786,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
break;
}
}
- WRITE_ONCE(tp->copied_seq, seq);
-
- tcp_rcv_space_adjust(sk);
-
- /* Clean up data we have read: This will do ACK frames. */
- if (copied > 0)
- __tcp_cleanup_rbuf(sk, copied);
-
return copied;
}
EXPORT_SYMBOL(tcp_read_skb);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 2e9547467edb..5f93918c063c 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -11,6 +11,24 @@
#include <net/inet_common.h>
#include <net/tls.h>
+void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tcp;
+ int copied;
+
+ if (!skb || !skb->len || !sk_is_tcp(sk))
+ return;
+
+ if (skb_bpf_strparser(skb))
+ return;
+
+ tcp = tcp_sk(sk);
+ copied = tcp->copied_seq + skb->len;
+ WRITE_ONCE(tcp->copied_seq, copied);
+ tcp_rcv_space_adjust(sk);
+ __tcp_cleanup_rbuf(sk, skb->len);
+}
+
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
{
@@ -174,14 +192,34 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
+static bool is_next_msg_fin(struct sk_psock *psock)
+{
+ struct scatterlist *sge;
+ struct sk_msg *msg_rx;
+ int i;
+
+ msg_rx = sk_psock_peek_msg(psock);
+ i = msg_rx->sg.start;
+ sge = sk_msg_elem(msg_rx, i);
+ if (!sge->length) {
+ struct sk_buff *skb = msg_rx->skb;
+
+ if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ return true;
+ }
+ return false;
+}
+
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
int flags,
int *addr_len)
{
+ struct tcp_sock *tcp = tcp_sk(sk);
+ u32 seq = tcp->copied_seq;
struct sk_psock *psock;
- int copied;
+ int copied = 0;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -194,8 +232,43 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
+
+ /* We may have received data on the sk_receive_queue pre-accept and
+ * then we can not use read_skb in this context because we haven't
+ * assigned a sk_socket yet so have no link to the ops. The work-around
+ * is to check the sk_receive_queue and in these cases read skbs off
+ * queue again. The read_skb hook is not running at this point because
+ * of lock_sock so we avoid having multiple runners in read_skb.
+ */
+ if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
+ tcp_data_ready(sk);
+ /* This handles the ENOMEM errors if we both receive data
+ * pre accept and are already under memory pressure. At least
+ * let user know to retry.
+ */
+ if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
+ copied = -EAGAIN;
+ goto out;
+ }
+ }
+
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ /* The typical case for EFAULT is the socket was gracefully
+ * shutdown with a FIN pkt. So check here the other case is
+ * some error on copy_page_to_iter which would be unexpected.
+ * On fin return correct return code to zero.
+ */
+ if (copied == -EFAULT) {
+ bool is_fin = is_next_msg_fin(psock);
+
+ if (is_fin) {
+ copied = 0;
+ seq++;
+ goto out;
+ }
+ }
+ seq += copied;
if (!copied) {
long timeo;
int data;
@@ -233,6 +306,10 @@ msg_bytes_ready:
copied = -EAGAIN;
}
out:
+ WRITE_ONCE(tcp->copied_seq, seq);
+ tcp_rcv_space_adjust(sk);
+ if (copied > 0)
+ __tcp_cleanup_rbuf(sk, copied);
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index aa32afd871ee..9482def1f310 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1818,7 +1818,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;
- int err, copied;
+ int err;
try_again:
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
@@ -1837,10 +1837,7 @@ try_again:
}
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
- copied = recv_actor(sk, skb);
- kfree_skb(skb);
-
- return copied;
+ return recv_actor(sk, skb);
}
EXPORT_SYMBOL(udp_read_skb);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index e0c9cc39b81e..56d94d23b9e0 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -64,6 +64,8 @@ struct proto udplite_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table,
};
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index da46c4284676..49e31e4ae7b7 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -143,6 +143,8 @@ int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type)
optlen = 1;
break;
default:
+ if (len < 2)
+ goto bad;
optlen = nh[offset + 1] + 2;
if (optlen > len)
goto bad;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 7d0adb612bdd..44ee7a2e72ac 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -793,7 +793,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto)
proto = inet->inet_num;
- else if (proto != inet->inet_num)
+ else if (proto != inet->inet_num &&
+ inet->inet_num != IPPROTO_RAW)
return -EINVAL;
if (proto > 255)
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 67eaf3ca14ce..3bab0cc13697 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -60,6 +60,8 @@ struct proto udplitev6_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,
};
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2f66a2006517..2abe45af98e7 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -324,9 +324,12 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
t->pl.probe_size += SCTP_PL_BIG_STEP;
} else if (t->pl.state == SCTP_PL_SEARCH) {
if (!t->pl.probe_high) {
- t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
- SCTP_MAX_PLPMTU);
- return false;
+ if (t->pl.probe_size < SCTP_MAX_PLPMTU) {
+ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+ SCTP_MAX_PLPMTU);
+ return false;
+ }
+ t->pl.probe_high = SCTP_MAX_PLPMTU;
}
t->pl.probe_size += SCTP_PL_MIN_STEP;
if (t->pl.probe_size >= t->pl.probe_high) {
@@ -341,7 +344,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
} else if (t->pl.state == SCTP_PL_COMPLETE) {
/* Raise probe_size again after 30 * interval in Search Complete */
t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
- t->pl.probe_size += SCTP_PL_MIN_STEP;
+ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU);
}
return t->pl.state == SCTP_PL_COMPLETE;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 50c38b624f77..538e9c6ec8c9 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2000,8 +2000,10 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
return rc;
/* create send buffer and rmb */
- if (smc_buf_create(new_smc, false))
+ if (smc_buf_create(new_smc, false)) {
+ smc_conn_abort(new_smc, ini->first_contact_local);
return SMC_CLC_DECL_MEM;
+ }
return 0;
}
@@ -2217,8 +2219,11 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
smcr_version = ini->smcr_version;
ini->smcr_version = SMC_V2;
rc = smc_listen_rdma_init(new_smc, ini);
- if (!rc)
+ if (!rc) {
rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
+ if (rc)
+ smc_conn_abort(new_smc, ini->first_contact_local);
+ }
if (!rc)
return;
ini->smcr_version = smcr_version;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 454356771cda..3f465faf2b68 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -127,6 +127,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
int i, j;
/* do link balancing */
+ conn->lnk = NULL; /* reset conn->lnk first */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &conn->lgr->lnk[i];
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 804c3880d028..0672acab2773 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -167,6 +167,11 @@ static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
return ctx->strp.msg_ready;
}
+static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
+{
+ return ctx->strp.mixed_decrypted;
+}
+
#ifdef CONFIG_TLS_DEVICE
int tls_device_init(void);
void tls_device_cleanup(void);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a7cc4f9faac2..bf69c9d6d06c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -1007,20 +1007,14 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_buff *skb = tls_strp_msg(sw_ctx);
struct strp_msg *rxm = strp_msg(skb);
- int is_decrypted = skb->decrypted;
- int is_encrypted = !is_decrypted;
- struct sk_buff *skb_iter;
- int left;
-
- left = rxm->full_len - skb->len;
- /* Check if all the data is decrypted already */
- skb_iter = skb_shinfo(skb)->frag_list;
- while (skb_iter && left > 0) {
- is_decrypted &= skb_iter->decrypted;
- is_encrypted &= !skb_iter->decrypted;
-
- left -= skb_iter->len;
- skb_iter = skb_iter->next;
+ int is_decrypted, is_encrypted;
+
+ if (!tls_strp_msg_mixed_decrypted(sw_ctx)) {
+ is_decrypted = skb->decrypted;
+ is_encrypted = !is_decrypted;
+ } else {
+ is_decrypted = 0;
+ is_encrypted = 0;
}
trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 955ac3e0bf4d..da95abbb7ea3 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -29,34 +29,50 @@ static void tls_strp_anchor_free(struct tls_strparser *strp)
struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
- shinfo->frag_list = NULL;
+ if (!strp->copy_mode)
+ shinfo->frag_list = NULL;
consume_skb(strp->anchor);
strp->anchor = NULL;
}
-/* Create a new skb with the contents of input copied to its page frags */
-static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+static struct sk_buff *
+tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb,
+ int offset, int len)
{
- struct strp_msg *rxm;
struct sk_buff *skb;
- int i, err, offset;
+ int i, err;
- skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER,
+ skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER,
&err, strp->sk->sk_allocation);
if (!skb)
return NULL;
- offset = strp->stm.offset;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
+ WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
skb_frag_address(frag),
skb_frag_size(frag)));
offset += skb_frag_size(frag);
}
- skb_copy_header(skb, strp->anchor);
+ skb->len = len;
+ skb->data_len = len;
+ skb_copy_header(skb, in_skb);
+ return skb;
+}
+
+/* Create a new skb with the contents of input copied to its page frags */
+static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+{
+ struct strp_msg *rxm;
+ struct sk_buff *skb;
+
+ skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset,
+ strp->stm.full_len);
+ if (!skb)
+ return NULL;
+
rxm = strp_msg(skb);
rxm->offset = 0;
return skb;
@@ -180,22 +196,22 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i], false);
shinfo->nr_frags = 0;
+ if (strp->copy_mode) {
+ kfree_skb_list(shinfo->frag_list);
+ shinfo->frag_list = NULL;
+ }
strp->copy_mode = 0;
+ strp->mixed_decrypted = 0;
}
-static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
- unsigned int offset, size_t in_len)
+static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
+ struct sk_buff *in_skb, unsigned int offset,
+ size_t in_len)
{
- struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
- struct sk_buff *skb;
- skb_frag_t *frag;
size_t len, chunk;
+ skb_frag_t *frag;
int sz;
- if (strp->msg_ready)
- return 0;
-
- skb = strp->anchor;
frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
len = in_len;
@@ -208,19 +224,26 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
skb_frag_size(frag),
chunk));
- sz = tls_rx_msg_size(strp, strp->anchor);
- if (sz < 0) {
- desc->error = sz;
- return 0;
- }
-
- /* We may have over-read, sz == 0 is guaranteed under-read */
- if (sz > 0)
- chunk = min_t(size_t, chunk, sz - skb->len);
-
skb->len += chunk;
skb->data_len += chunk;
skb_frag_size_add(frag, chunk);
+
+ sz = tls_rx_msg_size(strp, skb);
+ if (sz < 0)
+ return sz;
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (unlikely(sz && sz < skb->len)) {
+ int over = skb->len - sz;
+
+ WARN_ON_ONCE(over > chunk);
+ skb->len -= over;
+ skb->data_len -= over;
+ skb_frag_size_add(frag, -over);
+
+ chunk -= over;
+ }
+
frag++;
len -= chunk;
offset += chunk;
@@ -247,15 +270,99 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
offset += chunk;
}
- if (strp->stm.full_len == skb->len) {
+read_done:
+ return in_len - len;
+}
+
+static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb,
+ struct sk_buff *in_skb, unsigned int offset,
+ size_t in_len)
+{
+ struct sk_buff *nskb, *first, *last;
+ struct skb_shared_info *shinfo;
+ size_t chunk;
+ int sz;
+
+ if (strp->stm.full_len)
+ chunk = strp->stm.full_len - skb->len;
+ else
+ chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
+ chunk = min(chunk, in_len);
+
+ nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk);
+ if (!nskb)
+ return -ENOMEM;
+
+ shinfo = skb_shinfo(skb);
+ if (!shinfo->frag_list) {
+ shinfo->frag_list = nskb;
+ nskb->prev = nskb;
+ } else {
+ first = shinfo->frag_list;
+ last = first->prev;
+ last->next = nskb;
+ first->prev = nskb;
+ }
+
+ skb->len += chunk;
+ skb->data_len += chunk;
+
+ if (!strp->stm.full_len) {
+ sz = tls_rx_msg_size(strp, skb);
+ if (sz < 0)
+ return sz;
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (unlikely(sz && sz < skb->len)) {
+ int over = skb->len - sz;
+
+ WARN_ON_ONCE(over > chunk);
+ skb->len -= over;
+ skb->data_len -= over;
+ __pskb_trim(nskb, nskb->len - over);
+
+ chunk -= over;
+ }
+
+ strp->stm.full_len = sz;
+ }
+
+ return chunk;
+}
+
+static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+ unsigned int offset, size_t in_len)
+{
+ struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
+ struct sk_buff *skb;
+ int ret;
+
+ if (strp->msg_ready)
+ return 0;
+
+ skb = strp->anchor;
+ if (!skb->len)
+ skb_copy_decrypted(skb, in_skb);
+ else
+ strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb);
+
+ if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted)
+ ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len);
+ else
+ ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len);
+ if (ret < 0) {
+ desc->error = ret;
+ ret = 0;
+ }
+
+ if (strp->stm.full_len && strp->stm.full_len == skb->len) {
desc->count = 0;
strp->msg_ready = 1;
tls_rx_msg_ready(strp);
}
-read_done:
- return in_len - len;
+ return ret;
}
static int tls_strp_read_copyin(struct tls_strparser *strp)
@@ -315,15 +422,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0;
}
-static bool tls_strp_check_no_dup(struct tls_strparser *strp)
+static bool tls_strp_check_queue_ok(struct tls_strparser *strp)
{
unsigned int len = strp->stm.offset + strp->stm.full_len;
- struct sk_buff *skb;
+ struct sk_buff *first, *skb;
u32 seq;
- skb = skb_shinfo(strp->anchor)->frag_list;
- seq = TCP_SKB_CB(skb)->seq;
+ first = skb_shinfo(strp->anchor)->frag_list;
+ skb = first;
+ seq = TCP_SKB_CB(first)->seq;
+ /* Make sure there's no duplicate data in the queue,
+ * and the decrypted status matches.
+ */
while (skb->len < len) {
seq += skb->len;
len -= skb->len;
@@ -331,6 +442,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp)
if (TCP_SKB_CB(skb)->seq != seq)
return false;
+ if (skb_cmp_decrypted(first, skb))
+ return false;
}
return true;
@@ -411,7 +524,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
return tls_strp_read_copy(strp, true);
}
- if (!tls_strp_check_no_dup(strp))
+ if (!tls_strp_check_queue_ok(strp))
return tls_strp_read_copy(strp, false);
strp->msg_ready = 1;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 635b8bf6b937..6e6a7c37d685 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2304,10 +2304,14 @@ static void tls_data_ready(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
+ gfp_t alloc_save;
trace_sk_data_ready(sk);
+ alloc_save = sk->sk_allocation;
+ sk->sk_allocation = GFP_ATOMIC;
tls_strp_data_ready(&ctx->strp);
+ sk->sk_allocation = alloc_save;
psock = sk_psock_get(sk);
if (psock) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index cc695c9f09ec..e7728b57a8c7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2553,7 +2553,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
- int err, copied;
+ int err;
mutex_lock(&u->iolock);
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
@@ -2561,10 +2561,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
if (!skb)
return err;
- copied = recv_actor(sk, skb);
- kfree_skb(skb);
-
- return copied;
+ return recv_actor(sk, skb);
}
/*
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e4878551f140..b769fc258931 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1441,7 +1441,6 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
struct sock *sk = sk_vsock(vsk);
struct sk_buff *skb;
int off = 0;
- int copied;
int err;
spin_lock_bh(&vvs->rx_lock);
@@ -1454,9 +1453,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
if (!skb)
return err;
- copied = recv_actor(sk, skb);
- kfree_skb(skb);
- return copied;
+ return recv_actor(sk, skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index 6448b7826107..bf66277115e2 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -498,7 +498,6 @@ int main(int argc, char **argv)
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
- __fallthrough;
default:
Usage();
return 0;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c49e5403ad0e..28d2c77262be 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -197,7 +197,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$(call msg,SIGN-FILE,,$@)
- $(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \
+ $(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
$< -o $@ \
$(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 0ce25a967481..064cc5e8d9ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -2,6 +2,7 @@
// Copyright (c) 2020 Cloudflare
#include <error.h>
#include <netinet/tcp.h>
+#include <sys/epoll.h>
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
@@ -9,8 +10,12 @@
#include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h"
#include "test_sockmap_progs_query.skel.h"
+#include "test_sockmap_pass_prog.skel.h"
+#include "test_sockmap_drop_prog.skel.h"
#include "bpf_iter_sockmap.skel.h"
+#include "sockmap_helpers.h"
+
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
#define TCP_REPAIR_ON 1
@@ -350,6 +355,126 @@ out:
test_sockmap_progs_query__destroy(skel);
}
+#define MAX_EVENTS 10
+static void test_sockmap_skb_verdict_shutdown(void)
+{
+ struct epoll_event ev, events[MAX_EVENTS];
+ int n, err, map, verdict, s, c1, p1;
+ struct test_sockmap_pass_prog *skel;
+ int epollfd;
+ int zero = 0;
+ char b;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (s < 0)
+ goto out;
+ err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+ if (err < 0)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (err < 0)
+ goto out_close;
+
+ shutdown(p1, SHUT_WR);
+
+ ev.events = EPOLLIN;
+ ev.data.fd = c1;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
+ goto out_close;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
+ goto out_close;
+ err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+ if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
+ goto out_close;
+
+ n = recv(c1, &b, 1, SOCK_NONBLOCK);
+ ASSERT_EQ(n, 0, "recv_timeout(fin)");
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+{
+ int expected, zero = 0, sent, recvd, avail;
+ int err, map, verdict, s, c0, c1, p0, p1;
+ struct test_sockmap_pass_prog *pass;
+ struct test_sockmap_drop_prog *drop;
+ char buf[256] = "0123456789";
+
+ if (pass_prog) {
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+ expected = sizeof(buf);
+ } else {
+ drop = test_sockmap_drop_prog__open_and_load();
+ if (!ASSERT_OK_PTR(drop, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
+ map = bpf_map__fd(drop->maps.sock_map_rx);
+ /* On drop data is consumed immediately and copied_seq inc'd */
+ expected = 0;
+ }
+
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
+ goto out;
+ err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs(s)"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+
+ sent = xsend(p1, &buf, sizeof(buf), 0);
+ ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
+ /* On DROP test there will be no data to read */
+ if (pass_prog) {
+ recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+ ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
+ }
+
+out_close:
+ close(c0);
+ close(p0);
+ close(c1);
+ close(p1);
+out:
+ if (pass_prog)
+ test_sockmap_pass_prog__destroy(pass);
+ else
+ test_sockmap_drop_prog__destroy(drop);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -384,4 +509,10 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
if (test__start_subtest("sockmap skb_verdict progs query"))
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict shutdown"))
+ test_sockmap_skb_verdict_shutdown();
+ if (test__start_subtest("sockmap skb_verdict fionread"))
+ test_sockmap_skb_verdict_fionread(true);
+ if (test__start_subtest("sockmap skb_verdict fionread on drop"))
+ test_sockmap_skb_verdict_fionread(false);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
new file mode 100644
index 000000000000..d12665490a90
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -0,0 +1,390 @@
+#ifndef __SOCKMAP_HELPERS__
+#define __SOCKMAP_HELPERS__
+
+#include <linux/vm_sockets.h>
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+#define MAX_TEST_NAME 80
+
+/* workaround for older vm_sockets.h */
+#ifndef VMADDR_CID_LOCAL
+#define VMADDR_CID_LOCAL 1
+#endif
+
+#define __always_unused __attribute__((__unused__))
+
+#define _FAIL(errnum, fmt...) \
+ ({ \
+ error_at_line(0, (errnum), __func__, __LINE__, fmt); \
+ CHECK_FAIL(true); \
+ })
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg) \
+ ({ \
+ char __buf[MAX_STRERR_LEN]; \
+ libbpf_strerror((err), __buf, sizeof(__buf)); \
+ FAIL("%s: %s", (msg), __buf); \
+ })
+
+/* Wrappers that fail the test on error and report it. */
+
+#define xaccept_nonblock(fd, addr, len) \
+ ({ \
+ int __ret = \
+ accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("accept"); \
+ __ret; \
+ })
+
+#define xbind(fd, addr, len) \
+ ({ \
+ int __ret = bind((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("bind"); \
+ __ret; \
+ })
+
+#define xclose(fd) \
+ ({ \
+ int __ret = close((fd)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("close"); \
+ __ret; \
+ })
+
+#define xconnect(fd, addr, len) \
+ ({ \
+ int __ret = connect((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("connect"); \
+ __ret; \
+ })
+
+#define xgetsockname(fd, addr, len) \
+ ({ \
+ int __ret = getsockname((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockname"); \
+ __ret; \
+ })
+
+#define xgetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = getsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xlisten(fd, backlog) \
+ ({ \
+ int __ret = listen((fd), (backlog)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("listen"); \
+ __ret; \
+ })
+
+#define xsetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = setsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("setsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xsend(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = send((fd), (buf), (len), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("send"); \
+ __ret; \
+ })
+
+#define xrecv_nonblock(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
+ IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("recv"); \
+ __ret; \
+ })
+
+#define xsocket(family, sotype, flags) \
+ ({ \
+ int __ret = socket(family, sotype, flags); \
+ if (__ret == -1) \
+ FAIL_ERRNO("socket"); \
+ __ret; \
+ })
+
+#define xbpf_map_delete_elem(fd, key) \
+ ({ \
+ int __ret = bpf_map_delete_elem((fd), (key)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_delete"); \
+ __ret; \
+ })
+
+#define xbpf_map_lookup_elem(fd, key, val) \
+ ({ \
+ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_lookup"); \
+ __ret; \
+ })
+
+#define xbpf_map_update_elem(fd, key, val, flags) \
+ ({ \
+ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_update"); \
+ __ret; \
+ })
+
+#define xbpf_prog_attach(prog, target, type, flags) \
+ ({ \
+ int __ret = \
+ bpf_prog_attach((prog), (target), (type), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_attach(" #type ")"); \
+ __ret; \
+ })
+
+#define xbpf_prog_detach2(prog, target, type) \
+ ({ \
+ int __ret = bpf_prog_detach2((prog), (target), (type)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_detach2(" #type ")"); \
+ __ret; \
+ })
+
+#define xpthread_create(thread, attr, func, arg) \
+ ({ \
+ int __ret = pthread_create((thread), (attr), (func), (arg)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_create"); \
+ __ret; \
+ })
+
+#define xpthread_join(thread, retval) \
+ ({ \
+ int __ret = pthread_join((thread), (retval)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_join"); \
+ __ret; \
+ })
+
+static inline int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
+static inline void init_addr_loopback4(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static inline void init_addr_loopback6(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = 0;
+ addr6->sin6_addr = in6addr_loopback;
+ *len = sizeof(*addr6);
+}
+
+static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->svm_family = AF_VSOCK;
+ addr->svm_port = VMADDR_PORT_ANY;
+ addr->svm_cid = VMADDR_CID_LOCAL;
+ *len = sizeof(*addr);
+}
+
+static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ switch (family) {
+ case AF_INET:
+ init_addr_loopback4(ss, len);
+ return;
+ case AF_INET6:
+ init_addr_loopback6(ss, len);
+ return;
+ case AF_VSOCK:
+ init_addr_loopback_vsock(ss, len);
+ return;
+ default:
+ FAIL("unsupported address family %d", family);
+ }
+}
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+ return (struct sockaddr *)ss;
+}
+
+static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+ u64 value;
+ u32 key;
+ int err;
+
+ key = 0;
+ value = fd1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ return err;
+
+ key = 1;
+ value = fd2;
+ return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
+static inline int create_pair(int s, int family, int sotype, int *c, int *p)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err = 0;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ *c = xsocket(family, sotype, 0);
+ if (*c < 0)
+ return errno;
+ err = xconnect(*c, sockaddr(&addr), len);
+ if (err) {
+ err = errno;
+ goto close_cli0;
+ }
+
+ *p = xaccept_nonblock(s, NULL, NULL);
+ if (*p < 0) {
+ err = errno;
+ goto close_cli0;
+ }
+ return err;
+close_cli0:
+ close(*c);
+ return err;
+}
+
+static inline int create_socket_pairs(int s, int family, int sotype,
+ int *c0, int *c1, int *p0, int *p1)
+{
+ int err;
+
+ err = create_pair(s, family, sotype, c0, p0);
+ if (err)
+ return err;
+
+ err = create_pair(s, family, sotype, c1, p1);
+ if (err) {
+ close(*c0);
+ close(*p0);
+ }
+ return err;
+}
+
+static inline int enable_reuseport(int s, int progfd)
+{
+ int err, one = 1;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err)
+ return -1;
+ err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+ sizeof(progfd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return -1;
+
+ if (progfd >= 0)
+ enable_reuseport(s, progfd);
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ if (sotype & SOCK_DGRAM)
+ return s;
+
+ err = xlisten(s, SOMAXCONN);
+ if (err)
+ goto close;
+
+ return s;
+close:
+ xclose(s);
+ return -1;
+}
+
+static inline int socket_loopback(int family, int sotype)
+{
+ return socket_loopback_reuseport(family, sotype, -1);
+}
+
+
+#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 141c1e5944ee..b4f6f3a50ae5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -20,11 +20,6 @@
#include <unistd.h>
#include <linux/vm_sockets.h>
-/* workaround for older vm_sockets.h */
-#ifndef VMADDR_CID_LOCAL
-#define VMADDR_CID_LOCAL 1
-#endif
-
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -32,315 +27,7 @@
#include "test_progs.h"
#include "test_sockmap_listen.skel.h"
-#define IO_TIMEOUT_SEC 30
-#define MAX_STRERR_LEN 256
-#define MAX_TEST_NAME 80
-
-#define __always_unused __attribute__((__unused__))
-
-#define _FAIL(errnum, fmt...) \
- ({ \
- error_at_line(0, (errnum), __func__, __LINE__, fmt); \
- CHECK_FAIL(true); \
- })
-#define FAIL(fmt...) _FAIL(0, fmt)
-#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
-#define FAIL_LIBBPF(err, msg) \
- ({ \
- char __buf[MAX_STRERR_LEN]; \
- libbpf_strerror((err), __buf, sizeof(__buf)); \
- FAIL("%s: %s", (msg), __buf); \
- })
-
-/* Wrappers that fail the test on error and report it. */
-
-#define xaccept_nonblock(fd, addr, len) \
- ({ \
- int __ret = \
- accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("accept"); \
- __ret; \
- })
-
-#define xbind(fd, addr, len) \
- ({ \
- int __ret = bind((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("bind"); \
- __ret; \
- })
-
-#define xclose(fd) \
- ({ \
- int __ret = close((fd)); \
- if (__ret == -1) \
- FAIL_ERRNO("close"); \
- __ret; \
- })
-
-#define xconnect(fd, addr, len) \
- ({ \
- int __ret = connect((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("connect"); \
- __ret; \
- })
-
-#define xgetsockname(fd, addr, len) \
- ({ \
- int __ret = getsockname((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockname"); \
- __ret; \
- })
-
-#define xgetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = getsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xlisten(fd, backlog) \
- ({ \
- int __ret = listen((fd), (backlog)); \
- if (__ret == -1) \
- FAIL_ERRNO("listen"); \
- __ret; \
- })
-
-#define xsetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = setsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("setsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xsend(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = send((fd), (buf), (len), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("send"); \
- __ret; \
- })
-
-#define xrecv_nonblock(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
- IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("recv"); \
- __ret; \
- })
-
-#define xsocket(family, sotype, flags) \
- ({ \
- int __ret = socket(family, sotype, flags); \
- if (__ret == -1) \
- FAIL_ERRNO("socket"); \
- __ret; \
- })
-
-#define xbpf_map_delete_elem(fd, key) \
- ({ \
- int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret < 0) \
- FAIL_ERRNO("map_delete"); \
- __ret; \
- })
-
-#define xbpf_map_lookup_elem(fd, key, val) \
- ({ \
- int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret < 0) \
- FAIL_ERRNO("map_lookup"); \
- __ret; \
- })
-
-#define xbpf_map_update_elem(fd, key, val, flags) \
- ({ \
- int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret < 0) \
- FAIL_ERRNO("map_update"); \
- __ret; \
- })
-
-#define xbpf_prog_attach(prog, target, type, flags) \
- ({ \
- int __ret = \
- bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret < 0) \
- FAIL_ERRNO("prog_attach(" #type ")"); \
- __ret; \
- })
-
-#define xbpf_prog_detach2(prog, target, type) \
- ({ \
- int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret < 0) \
- FAIL_ERRNO("prog_detach2(" #type ")"); \
- __ret; \
- })
-
-#define xpthread_create(thread, attr, func, arg) \
- ({ \
- int __ret = pthread_create((thread), (attr), (func), (arg)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_create"); \
- __ret; \
- })
-
-#define xpthread_join(thread, retval) \
- ({ \
- int __ret = pthread_join((thread), (retval)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_join"); \
- __ret; \
- })
-
-static int poll_read(int fd, unsigned int timeout_sec)
-{
- struct timeval timeout = { .tv_sec = timeout_sec };
- fd_set rfds;
- int r;
-
- FD_ZERO(&rfds);
- FD_SET(fd, &rfds);
-
- r = select(fd + 1, &rfds, NULL, NULL, &timeout);
- if (r == 0)
- errno = ETIME;
-
- return r == 1 ? 0 : -1;
-}
-
-static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return accept(fd, addr, len);
-}
-
-static int recv_timeout(int fd, void *buf, size_t len, int flags,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return recv(fd, buf, len, flags);
-}
-
-static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
-
- addr4->sin_family = AF_INET;
- addr4->sin_port = 0;
- addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- *len = sizeof(*addr4);
-}
-
-static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
-
- addr6->sin6_family = AF_INET6;
- addr6->sin6_port = 0;
- addr6->sin6_addr = in6addr_loopback;
- *len = sizeof(*addr6);
-}
-
-static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
-
- addr->svm_family = AF_VSOCK;
- addr->svm_port = VMADDR_PORT_ANY;
- addr->svm_cid = VMADDR_CID_LOCAL;
- *len = sizeof(*addr);
-}
-
-static void init_addr_loopback(int family, struct sockaddr_storage *ss,
- socklen_t *len)
-{
- switch (family) {
- case AF_INET:
- init_addr_loopback4(ss, len);
- return;
- case AF_INET6:
- init_addr_loopback6(ss, len);
- return;
- case AF_VSOCK:
- init_addr_loopback_vsock(ss, len);
- return;
- default:
- FAIL("unsupported address family %d", family);
- }
-}
-
-static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
-{
- return (struct sockaddr *)ss;
-}
-
-static int enable_reuseport(int s, int progfd)
-{
- int err, one = 1;
-
- err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
- if (err)
- return -1;
- err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
- sizeof(progfd));
- if (err)
- return -1;
-
- return 0;
-}
-
-static int socket_loopback_reuseport(int family, int sotype, int progfd)
-{
- struct sockaddr_storage addr;
- socklen_t len;
- int err, s;
-
- init_addr_loopback(family, &addr, &len);
-
- s = xsocket(family, sotype, 0);
- if (s == -1)
- return -1;
-
- if (progfd >= 0)
- enable_reuseport(s, progfd);
-
- err = xbind(s, sockaddr(&addr), len);
- if (err)
- goto close;
-
- if (sotype & SOCK_DGRAM)
- return s;
-
- err = xlisten(s, SOMAXCONN);
- if (err)
- goto close;
-
- return s;
-close:
- xclose(s);
- return -1;
-}
-
-static int socket_loopback(int family, int sotype)
-{
- return socket_loopback_reuseport(family, sotype, -1);
-}
+#include "sockmap_helpers.h"
static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
int family, int sotype, int mapfd)
@@ -984,31 +671,12 @@ static const char *redir_mode_str(enum redir_mode mode)
}
}
-static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
-{
- u64 value;
- u32 key;
- int err;
-
- key = 0;
- value = fd1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- return err;
-
- key = 1;
- value = fd2;
- return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
-}
-
static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
- struct sockaddr_storage addr;
int s, c0, c1, p0, p1;
unsigned int pass;
- socklen_t len;
int err, n;
u32 key;
char b;
@@ -1019,36 +687,13 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (s < 0)
return;
- len = sizeof(addr);
- err = xgetsockname(s, sockaddr(&addr), &len);
+ err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
if (err)
goto close_srv;
- c0 = xsocket(family, sotype, 0);
- if (c0 < 0)
- goto close_srv;
- err = xconnect(c0, sockaddr(&addr), len);
- if (err)
- goto close_cli0;
-
- p0 = xaccept_nonblock(s, NULL, NULL);
- if (p0 < 0)
- goto close_cli0;
-
- c1 = xsocket(family, sotype, 0);
- if (c1 < 0)
- goto close_peer0;
- err = xconnect(c1, sockaddr(&addr), len);
- if (err)
- goto close_cli1;
-
- p1 = xaccept_nonblock(s, NULL, NULL);
- if (p1 < 0)
- goto close_cli1;
-
err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
- goto close_peer1;
+ goto close;
n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
if (n < 0)
@@ -1056,12 +701,12 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete write", log_prefix);
if (n < 1)
- goto close_peer1;
+ goto close;
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
- goto close_peer1;
+ goto close;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
@@ -1070,13 +715,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete recv", log_prefix);
-close_peer1:
+close:
xclose(p1);
-close_cli1:
xclose(c1);
-close_peer0:
xclose(p0);
-close_cli0:
xclose(c0);
close_srv:
xclose(s);
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
new file mode 100644
index 000000000000..29314805ce42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
@@ -0,0 +1,32 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index baf9ebc6d903..99d2ea9fb658 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -191,7 +191,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
- int op, err, ret;
+ int op, ret;
op = (int) skops->op;
@@ -203,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) {
ret = 1;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -218,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) {
ret = 10;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -230,8 +230,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
break;
}
- __sink(err);
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
new file mode 100644
index 000000000000..1d86a717a290
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -0,0 +1,32 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 7da8ec838c63..35d89dfa6f11 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -68,7 +68,7 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
- ip netns del ns1
+ ip netns del ns1 &> /dev/null
ip netns del ns2 &> /dev/null
}