summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/ax25/Kconfig16
-rw-r--r--net/bluetooth/hci_conn.c63
-rw-r--r--net/bluetooth/hci_core.c14
-rw-r--r--net/bluetooth/hci_event.c1
-rw-r--r--net/bluetooth/hci_request.h2
-rw-r--r--net/bluetooth/hci_sync.c14
-rw-r--r--net/bluetooth/iso.c9
-rw-r--r--net/bpf/test_run.c1
-rw-r--r--net/bridge/br_forward.c4
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/can/j1939/socket.c10
-rw-r--r--net/ceph/messenger.c78
-rw-r--r--net/ceph/messenger_v1.c98
-rw-r--r--net/ceph/messenger_v2.c289
-rw-r--r--net/ceph/osd_client.c334
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/flow_dissector.c5
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/skbuff.c54
-rw-r--r--net/core/skmsg.c12
-rw-r--r--net/core/sock.c27
-rw-r--r--net/core/sock_map.c40
-rw-r--r--net/dccp/ipv4.c9
-rw-r--r--net/dccp/ipv6.c9
-rw-r--r--net/ethtool/plca.c45
-rw-r--r--net/handshake/handshake-test.c14
-rw-r--r--net/handshake/netlink.c18
-rw-r--r--net/hsr/hsr_forward.c1
-rw-r--r--net/hsr/hsr_framereg.c4
-rw-r--r--net/hsr/hsr_main.h2
-rw-r--r--net/ipv4/devinet.c10
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/inet_hashtables.c36
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_output.c9
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipmr.c1
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_output.c9
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c3
-rw-r--r--net/ipv6/tcp_ipv6.c10
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/kcm/kcmsock.c15
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/mac80211/cfg.c6
-rw-r--r--net/mac80211/ibss.c2
-rw-r--r--net/mac80211/ieee80211_i.h3
-rw-r--r--net/mac80211/key.c22
-rw-r--r--net/mac80211/mesh.c8
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c45
-rw-r--r--net/mac80211/tx.c3
-rw-r--r--net/mac80211/vht.c16
-rw-r--r--net/mptcp/options.c5
-rw-r--r--net/mptcp/pm_userspace.c6
-rw-r--r--net/mptcp/protocol.c216
-rw-r--r--net/mptcp/protocol.h59
-rw-r--r--net/mptcp/subflow.c49
-rw-r--r--net/ncsi/ncsi-aen.c5
-rw-r--r--net/netfilter/ipset/ip_set_core.c12
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c8
-rw-r--r--net/netfilter/nf_conntrack_bpf.c2
-rw-r--r--net/netfilter/nf_conntrack_extend.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c43
-rw-r--r--net/netfilter/nf_tables_api.c173
-rw-r--r--net/netfilter/nfnetlink_osf.c8
-rw-r--r--net/netfilter/nft_exthdr.c42
-rw-r--r--net/netfilter/nft_payload.c13
-rw-r--r--net/netfilter/nft_set_hash.c11
-rw-r--r--net/netfilter/nft_set_pipapo.c4
-rw-r--r--net/netfilter/nft_set_rbtree.c62
-rw-r--r--net/netfilter/xt_sctp.c2
-rw-r--r--net/netfilter/xt_u32.c21
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/nfc/llcp_core.c2
-rw-r--r--net/rds/rdma_transport.c12
-rw-r--r--net/rds/tcp_connect.c4
-rw-r--r--net/rds/tcp_listen.c2
-rw-r--r--net/rfkill/core.c32
-rw-r--r--net/sched/sch_fq_pie.c27
-rw-r--r--net/sched/sch_plug.c2
-rw-r--r--net/sched/sch_qfq.c22
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/proc.c2
-rw-r--r--net/sctp/socket.c11
-rw-r--r--net/smc/smc_core.c2
-rw-r--r--net/smc/smc_stats.h3
-rw-r--r--net/socket.c51
-rw-r--r--net/sunrpc/auth.c11
-rw-r--r--net/sunrpc/auth_tls.c4
-rw-r--r--net/sunrpc/clnt.c25
-rw-r--r--net/sunrpc/xprtsock.c6
-rw-r--r--net/tipc/crypto.c4
-rw-r--r--net/tls/tls_sw.c4
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/unix/scm.c6
-rw-r--r--net/wireless/core.c14
-rw-r--r--net/wireless/core.h7
-rw-r--r--net/wireless/mlme.c3
-rw-r--r--net/wireless/nl80211.c114
-rw-r--r--net/wireless/scan.c4
-rw-r--r--net/xdp/xsk.c22
-rw-r--r--net/xdp/xsk_diag.c3
118 files changed, 1930 insertions, 711 deletions
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index d3a9843a043d..fdb666607f10 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -10,7 +10,7 @@ menuconfig HAMRADIO
If you want to connect your Linux box to an amateur radio, answer Y
here. You want to read <https://www.tapr.org/>
and more specifically about AX.25 on Linux
- <http://www.linux-ax25.org/>.
+ <https://linux-ax25.in-berlin.de>.
Note that the answer to this question won't directly affect the
kernel: saying N will just cause the configurator to skip all
@@ -61,7 +61,7 @@ config AX25_DAMA_SLAVE
configuration. Linux cannot yet act as a DAMA server. This option
only compiles DAMA slave support into the kernel. It still needs to
be enabled at runtime. For more about DAMA see
- <http://www.linux-ax25.org>. If unsure, say Y.
+ <https://linux-ax25.in-berlin.de>. If unsure, say Y.
# placeholder until implemented
config AX25_DAMA_MASTER
@@ -87,9 +87,9 @@ config NETROM
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
- <http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.rst>. More information about
- digital amateur radio in general is on the WWW at
+ <https://linux-ax25.in-berlin.de>. You also might want to check out
+ the file <file:Documentation/networking/ax25.rst>. More information
+ about digital amateur radio in general is on the WWW at
<https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
@@ -106,9 +106,9 @@ config ROSE
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
- <http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.rst>. More information about
- digital amateur radio in general is on the WWW at
+ <https://linux-ax25.in-berlin.de>. You also might want to check out
+ the file <file:Documentation/networking/ax25.rst>. More information
+ about digital amateur radio in general is on the WWW at
<https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 9d5057cef30a..7a6f20338db8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -2413,34 +2413,41 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
if (!test_bit(HCI_CONN_AUTH, &conn->flags))
goto auth;
- /* An authenticated FIPS approved combination key has sufficient
- * security for security level 4. */
- if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 &&
- sec_level == BT_SECURITY_FIPS)
- goto encrypt;
-
- /* An authenticated combination key has sufficient security for
- security level 3. */
- if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 ||
- conn->key_type == HCI_LK_AUTH_COMBINATION_P256) &&
- sec_level == BT_SECURITY_HIGH)
- goto encrypt;
-
- /* An unauthenticated combination key has sufficient security for
- security level 1 and 2. */
- if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 ||
- conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) &&
- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW))
- goto encrypt;
-
- /* A combination key has always sufficient security for the security
- levels 1 or 2. High security level requires the combination key
- is generated using maximum PIN code length (16).
- For pre 2.1 units. */
- if (conn->key_type == HCI_LK_COMBINATION &&
- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW ||
- conn->pin_length == 16))
- goto encrypt;
+ switch (conn->key_type) {
+ case HCI_LK_AUTH_COMBINATION_P256:
+ /* An authenticated FIPS approved combination key has
+ * sufficient security for security level 4 or lower.
+ */
+ if (sec_level <= BT_SECURITY_FIPS)
+ goto encrypt;
+ break;
+ case HCI_LK_AUTH_COMBINATION_P192:
+ /* An authenticated combination key has sufficient security for
+ * security level 3 or lower.
+ */
+ if (sec_level <= BT_SECURITY_HIGH)
+ goto encrypt;
+ break;
+ case HCI_LK_UNAUTH_COMBINATION_P192:
+ case HCI_LK_UNAUTH_COMBINATION_P256:
+ /* An unauthenticated combination key has sufficient security
+ * for security level 2 or lower.
+ */
+ if (sec_level <= BT_SECURITY_MEDIUM)
+ goto encrypt;
+ break;
+ case HCI_LK_COMBINATION:
+ /* A combination key has always sufficient security for the
+ * security levels 2 or lower. High security level requires the
+ * combination key is generated using maximum PIN code length
+ * (16). For pre 2.1 units.
+ */
+ if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16)
+ goto encrypt;
+ break;
+ default:
+ break;
+ }
auth:
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a5992f1b3c9b..195aea2198a9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2617,7 +2617,11 @@ int hci_register_dev(struct hci_dev *hdev)
if (id < 0)
return id;
- snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
+ error = dev_set_name(&hdev->dev, "hci%u", id);
+ if (error)
+ return error;
+
+ hdev->name = dev_name(&hdev->dev);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
@@ -2639,8 +2643,6 @@ int hci_register_dev(struct hci_dev *hdev)
if (!IS_ERR_OR_NULL(bt_debugfs))
hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs);
- dev_set_name(&hdev->dev, "%s", hdev->name);
-
error = device_add(&hdev->dev);
if (error < 0)
goto err_wqueue;
@@ -2784,6 +2786,7 @@ void hci_release_dev(struct hci_dev *hdev)
hci_conn_params_clear_all(hdev);
hci_discovery_filter_clear(hdev);
hci_blocked_keys_clear(hdev);
+ hci_codec_list_clear(&hdev->local_codecs);
hci_dev_unlock(hdev);
ida_simple_remove(&hci_index_ida, hdev->id);
@@ -3418,7 +3421,12 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
if (c->type == type && c->sent) {
bt_dev_err(hdev, "killing stalled connection %pMR",
&c->dst);
+ /* hci_disconnect might sleep, so, we have to release
+ * the RCU read lock before calling it.
+ */
+ rcu_read_unlock();
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
+ rcu_read_lock();
}
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 35f251041eeb..31d02b54eea1 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -33,6 +33,7 @@
#include "hci_request.h"
#include "hci_debugfs.h"
+#include "hci_codec.h"
#include "a2mp.h"
#include "amp.h"
#include "smp.h"
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index b9c5a9823837..0be75cf0efed 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -71,7 +71,5 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn);
void hci_req_add_le_passive_scan(struct hci_request *req);
-void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next);
-
void hci_request_setup(struct hci_dev *hdev);
void hci_request_cancel_all(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 9b93653c6197..d06e07a0ea5a 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -413,11 +413,6 @@ static int hci_le_scan_restart_sync(struct hci_dev *hdev)
LE_SCAN_FILTER_DUP_ENABLE);
}
-static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
-{
- return hci_le_scan_restart_sync(hdev);
-}
-
static void le_scan_restart(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -427,15 +422,15 @@ static void le_scan_restart(struct work_struct *work)
bt_dev_dbg(hdev, "");
- hci_dev_lock(hdev);
-
- status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
+ status = hci_le_scan_restart_sync(hdev);
if (status) {
bt_dev_err(hdev, "failed to restart LE scan: status %d",
status);
- goto unlock;
+ return;
}
+ hci_dev_lock(hdev);
+
if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
!hdev->discovery.scan_start)
goto unlock;
@@ -5079,6 +5074,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
+ hci_codec_list_clear(&hdev->local_codecs);
hci_dev_put(hdev);
return err;
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 16da946f5881..71248163ce9a 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -502,7 +502,7 @@ drop:
}
/* -------- Socket interface ---------- */
-static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba)
+static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *src, bdaddr_t *dst)
{
struct sock *sk;
@@ -510,7 +510,10 @@ static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba)
if (sk->sk_state != BT_LISTEN)
continue;
- if (!bacmp(&iso_pi(sk)->src, ba))
+ if (bacmp(&iso_pi(sk)->dst, dst))
+ continue;
+
+ if (!bacmp(&iso_pi(sk)->src, src))
return sk;
}
@@ -952,7 +955,7 @@ static int iso_listen_cis(struct sock *sk)
write_lock(&iso_sk_list.lock);
- if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src))
+ if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src, &iso_pi(sk)->dst))
err = -EADDRINUSE;
write_unlock(&iso_sk_list.lock);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 57a7a64b84ed..0841f8d82419 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -543,6 +543,7 @@ struct bpf_fentry_test_t {
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
+ asm volatile ("");
return (long)arg;
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 9d7bc8b96b53..7431f89e897b 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -124,7 +124,7 @@ static int deliver_clone(const struct net_bridge_port *prev,
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return -ENOMEM;
}
@@ -268,7 +268,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
skb = skb_copy(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index c34a0b0901b0..c729528b5e85 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -181,12 +181,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if ((mdst && mdst->host_joined) ||
br_multicast_is_router(brmctx, skb)) {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
mcast_hit = true;
} else {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
break;
case BR_PKT_UNICAST:
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 15186247b59a..033034d68f1f 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -294,7 +294,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
- ret = neigh->output(neigh, skb);
+ ret = READ_ONCE(neigh->output)(neigh, skb);
}
neigh_release(neigh);
return ret;
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index feaec4ad6d16..b28c976f52a0 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
struct sock_exterr_skb *serr;
struct sk_buff *skb;
char *state = "UNK";
+ u32 tsflags;
int err;
jsk = j1939_sk(sk);
@@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
+ tsflags = READ_ONCE(sk->sk_tsflags);
switch (type) {
case J1939_ERRQUEUE_TX_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
return;
break;
case J1939_ERRQUEUE_TX_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
return;
break;
case J1939_ERRQUEUE_TX_ABORT:
@@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
case J1939_ERRQUEUE_RX_DPO:
fallthrough;
case J1939_ERRQUEUE_RX_ABORT:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+ if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
return;
break;
default:
@@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
}
serr->opt_stats = true;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ if (tsflags & SOF_TIMESTAMPING_OPT_ID)
serr->ee.ee_data = session->tskey;
netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5eb4898cccd4..10a41cd9c523 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -969,6 +969,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
return true;
}
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+
+ cursor->iov_iter = data->iter;
+ cursor->lastlen = 0;
+ iov_iter_truncate(&cursor->iov_iter, length);
+ cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct page *page;
+ ssize_t len;
+
+ if (cursor->lastlen)
+ iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+ len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+ 1, page_offset);
+ BUG_ON(len < 0);
+
+ cursor->lastlen = len;
+
+ /*
+ * FIXME: The assumption is that the pages represented by the iov_iter
+ * are pinned, with the references held by the upper-level
+ * callers, or by virtue of being under writeback. Eventually,
+ * we'll get an iov_iter_get_pages2 variant that doesn't take
+ * page refs. Until then, just put the page ref.
+ */
+ VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+ put_page(page);
+
+ *length = min_t(size_t, len, cursor->resid);
+ return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ BUG_ON(bytes > cursor->resid);
+ cursor->resid -= bytes;
+
+ if (bytes < cursor->lastlen) {
+ cursor->lastlen -= bytes;
+ } else {
+ iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+ cursor->lastlen = 0;
+ }
+
+ return cursor->resid;
+}
+
/*
* Message data is handled (sent or received) in pieces, where each
* piece resides on a single page. The network layer might not
@@ -996,6 +1052,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
case CEPH_MSG_DATA_BVECS:
ceph_msg_data_bvecs_cursor_init(cursor, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ ceph_msg_data_iter_cursor_init(cursor, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
/* BUG(); */
@@ -1013,6 +1072,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->total_resid = length;
cursor->data = msg->data;
+ cursor->sr_resid = 0;
__ceph_msg_data_cursor_init(cursor);
}
@@ -1042,6 +1102,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
case CEPH_MSG_DATA_BVECS:
page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ page = ceph_msg_data_iter_next(cursor, page_offset, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
page = NULL;
@@ -1080,6 +1143,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
case CEPH_MSG_DATA_BVECS:
new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
break;
+ case CEPH_MSG_DATA_ITER:
+ new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+ break;
case CEPH_MSG_DATA_NONE:
default:
BUG();
@@ -1879,6 +1945,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
}
EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter)
+{
+ struct ceph_msg_data *data;
+
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_ITER;
+ data->iter = *iter;
+
+ msg->data_length += iov_iter_count(&data->iter);
+}
+
/*
* construct a new message with given type, size
* the new msg has a ref count of 1.
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 3d57bb48a2b4..f9a50d7f0d20 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -159,9 +159,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
- /* Initialize data cursor */
-
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ /* Initialize data cursor if it's not a sparse read */
+ if (!msg->sparse_read)
+ ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
}
/*
@@ -960,9 +960,9 @@ static void process_ack(struct ceph_connection *con)
prepare_read_tag(con);
}
-static int read_partial_message_section(struct ceph_connection *con,
- struct kvec *section,
- unsigned int sec_len, u32 *crc)
+static int read_partial_message_chunk(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
{
int ret, left;
@@ -978,11 +978,91 @@ static int read_partial_message_section(struct ceph_connection *con,
section->iov_len += ret;
}
if (section->iov_len == sec_len)
- *crc = crc32c(0, section->iov_base, section->iov_len);
+ *crc = crc32c(*crc, section->iov_base, section->iov_len);
return 1;
}
+static inline int read_partial_message_section(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
+{
+ *crc = 0;
+ return read_partial_message_chunk(con, section, sec_len, crc);
+}
+
+static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
+
+ if (do_bounce && unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ while (cursor->sr_resid > 0) {
+ struct page *page, *rpage;
+ size_t off, len;
+ int ret;
+
+ page = ceph_msg_data_next(cursor, &off, &len);
+ rpage = do_bounce ? con->bounce_page : page;
+
+ /* clamp to what remains in extent */
+ len = min_t(int, len, cursor->sr_resid);
+ ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
+ if (ret <= 0)
+ return ret;
+ *crc = ceph_crc32c_page(*crc, rpage, off, ret);
+ ceph_msg_data_advance(cursor, (size_t)ret);
+ cursor->sr_resid -= ret;
+ if (do_bounce)
+ memcpy_page(page, off, rpage, off, ret);
+ }
+ return 1;
+}
+
+static int read_sparse_msg_data(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
+ u32 crc = 0;
+ int ret = 1;
+
+ if (do_datacrc)
+ crc = con->in_data_crc;
+
+ do {
+ if (con->v1.in_sr_kvec.iov_base)
+ ret = read_partial_message_chunk(con,
+ &con->v1.in_sr_kvec,
+ con->v1.in_sr_len,
+ &crc);
+ else if (cursor->sr_resid > 0)
+ ret = read_sparse_msg_extent(con, &crc);
+
+ if (ret <= 0) {
+ if (do_datacrc)
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
+ ret = con->ops->sparse_read(con, cursor,
+ (char **)&con->v1.in_sr_kvec.iov_base);
+ con->v1.in_sr_len = ret;
+ } while (ret > 0);
+
+ if (do_datacrc)
+ con->in_data_crc = crc;
+
+ return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
+}
+
static int read_partial_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
@@ -1173,7 +1253,9 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
- if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ if (m->sparse_read)
+ ret = read_sparse_msg_data(con);
+ else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
ret = read_partial_msg_data(con);
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 1df1d29dee92..d09a39ff2cf0 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -8,9 +8,9 @@
#include <linux/ceph/ceph_debug.h>
#include <crypto/aead.h>
-#include <crypto/algapi.h> /* for crypto_memneq() */
#include <crypto/hash.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <linux/bvec.h>
#include <linux/crc32c.h>
#include <linux/net.h>
@@ -52,14 +52,16 @@
#define FRAME_LATE_STATUS_COMPLETE 0xe
#define FRAME_LATE_STATUS_ABORTED_MASK 0xf
-#define IN_S_HANDLE_PREAMBLE 1
-#define IN_S_HANDLE_CONTROL 2
-#define IN_S_HANDLE_CONTROL_REMAINDER 3
-#define IN_S_PREPARE_READ_DATA 4
-#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_PREPARE_READ_ENC_PAGE 6
-#define IN_S_HANDLE_EPILOGUE 7
-#define IN_S_FINISH_SKIP 8
+#define IN_S_HANDLE_PREAMBLE 1
+#define IN_S_HANDLE_CONTROL 2
+#define IN_S_HANDLE_CONTROL_REMAINDER 3
+#define IN_S_PREPARE_READ_DATA 4
+#define IN_S_PREPARE_READ_DATA_CONT 5
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_PREPARE_SPARSE_DATA 7
+#define IN_S_PREPARE_SPARSE_DATA_CONT 8
+#define IN_S_HANDLE_EPILOGUE 9
+#define IN_S_FINISH_SKIP 10
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -967,12 +969,48 @@ static void init_sgs_cursor(struct scatterlist **sg,
}
}
+/**
+ * init_sgs_pages: set up scatterlist on an array of page pointers
+ * @sg: scatterlist to populate
+ * @pages: pointer to page array
+ * @dpos: position in the array to start (bytes)
+ * @dlen: len to add to sg (bytes)
+ * @pad: pointer to pad destination (if any)
+ *
+ * Populate the scatterlist from the page array, starting at an arbitrary
+ * byte in the array and running for a specified length.
+ */
+static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
+ int dpos, int dlen, u8 *pad)
+{
+ int idx = dpos >> PAGE_SHIFT;
+ int off = offset_in_page(dpos);
+ int resid = dlen;
+
+ do {
+ int len = min(resid, (int)PAGE_SIZE - off);
+
+ sg_set_page(*sg, pages[idx], len, off);
+ *sg = sg_next(*sg);
+ off = 0;
+ ++idx;
+ resid -= len;
+ } while (resid);
+
+ if (need_padding(dlen)) {
+ sg_set_buf(*sg, pad, padding_len(dlen));
+ *sg = sg_next(*sg);
+ }
+}
+
static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
u8 *front_pad, u8 *middle_pad, u8 *data_pad,
- void *epilogue, bool add_tag)
+ void *epilogue, struct page **pages, int dpos,
+ bool add_tag)
{
struct ceph_msg_data_cursor cursor;
struct scatterlist *cur_sg;
+ int dlen = data_len(msg);
int sg_cnt;
int ret;
@@ -986,9 +1024,15 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
if (middle_len(msg))
sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
middle_len(msg));
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- sg_cnt += calc_sg_cnt_cursor(&cursor);
+ if (dlen) {
+ if (pages) {
+ sg_cnt += calc_pages_for(dpos, dlen);
+ if (need_padding(dlen))
+ sg_cnt++;
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ sg_cnt += calc_sg_cnt_cursor(&cursor);
+ }
}
ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
@@ -1002,9 +1046,13 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
if (middle_len(msg))
init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
middle_pad);
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ if (dlen) {
+ if (pages) {
+ init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ }
}
WARN_ON(!sg_is_last(cur_sg));
@@ -1039,10 +1087,53 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
+/* Process sparse read data that lives in a buffer */
+static int process_v2_sparse_read(struct ceph_connection *con,
+ struct page **pages, int spos)
+{
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+ int ret;
+
+ for (;;) {
+ char *buf = NULL;
+
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0)
+ return ret;
+
+ dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
+
+ do {
+ int idx = spos >> PAGE_SHIFT;
+ int soff = offset_in_page(spos);
+ struct page *spage = con->v2.in_enc_pages[idx];
+ int len = min_t(int, ret, PAGE_SIZE - soff);
+
+ if (buf) {
+ memcpy_from_page(buf, spage, soff, len);
+ buf += len;
+ } else {
+ struct bio_vec bv;
+
+ get_bvec_at(cursor, &bv);
+ len = min_t(int, len, bv.bv_len);
+ memcpy_page(bv.bv_page, bv.bv_offset,
+ spage, soff, len);
+ ceph_msg_data_advance(cursor, len);
+ }
+ spos += len;
+ ret -= len;
+ } while (ret);
+ }
+}
+
static int decrypt_tail(struct ceph_connection *con)
{
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ struct page **pages = NULL;
+ bool sparse = con->in_msg->sparse_read;
+ int dpos = 0;
int tail_len;
int ret;
@@ -1053,9 +1144,14 @@ static int decrypt_tail(struct ceph_connection *con)
if (ret)
goto out;
+ if (sparse) {
+ dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
+ pages = con->v2.in_enc_pages;
+ }
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
- MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
- con->v2.in_buf, true);
+ MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
+ con->v2.in_buf, pages, dpos, true);
if (ret)
goto out;
@@ -1065,6 +1161,12 @@ static int decrypt_tail(struct ceph_connection *con)
if (ret)
goto out;
+ if (sparse && data_len(con->in_msg)) {
+ ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
+ if (ret)
+ goto out;
+ }
+
WARN_ON(!con->v2.in_enc_page_cnt);
ceph_release_page_vector(con->v2.in_enc_pages,
con->v2.in_enc_page_cnt);
@@ -1588,7 +1690,7 @@ static int prepare_message_secure(struct ceph_connection *con)
encode_epilogue_secure(con, false);
ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
- &con->v2.out_epil, false);
+ &con->v2.out_epil, NULL, 0, false);
if (ret)
goto out;
@@ -1825,6 +1927,123 @@ static void prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_sparse_read_cont(struct ceph_connection *con)
+{
+ int ret;
+ struct bio_vec bv;
+ char *buf = NULL;
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+
+ WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
+
+ if (iov_iter_is_bvec(&con->v2.in_iter)) {
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ get_bvec_at(cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
+ con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
+ con->v2.in_bvec.bv_page,
+ con->v2.in_bvec.bv_offset,
+ con->v2.in_bvec.bv_len);
+ }
+
+ ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
+ cursor->sr_resid -= con->v2.in_bvec.bv_len;
+ dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
+ con->v2.in_bvec.bv_len, cursor->sr_resid);
+ WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
+ if (cursor->sr_resid) {
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= bv.bv_len;
+ return 0;
+ }
+ } else if (iov_iter_is_kvec(&con->v2.in_iter)) {
+ /* On first call, we have no kvec so don't compute crc */
+ if (con->v2.in_kvec_cnt) {
+ WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
+ con->in_data_crc = crc32c(con->in_data_crc,
+ con->v2.in_kvecs[0].iov_base,
+ con->v2.in_kvecs[0].iov_len);
+ }
+ } else {
+ return -EIO;
+ }
+
+ /* get next extent */
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0) {
+ if (ret < 0)
+ return ret;
+
+ reset_in_kvecs(con);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+ return 0;
+ }
+
+ if (buf) {
+ /* receive into buffer */
+ reset_in_kvecs(con);
+ add_in_kvec(con, buf, ret);
+ con->v2.data_len_remain -= ret;
+ return 0;
+ }
+
+ if (ret > cursor->total_resid) {
+ pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
+ __func__, ret, cursor->total_resid, cursor->resid);
+ return -EIO;
+ }
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= ret;
+ return ret;
+}
+
+static int prepare_sparse_read_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ dout("%s: starting sparse read\n", __func__);
+
+ if (WARN_ON_ONCE(!con->ops->sparse_read))
+ return -EOPNOTSUPP;
+
+ if (!con_secure(con))
+ con->in_data_crc = -1;
+
+ reset_in_kvecs(con);
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
+ con->v2.data_len_remain = data_len(msg);
+ return prepare_sparse_read_cont(con);
+}
+
static int prepare_read_tail_plain(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
@@ -1845,7 +2064,10 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
+ if (msg->sparse_read)
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
+ else
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
@@ -2898,6 +3120,12 @@ static int populate_in_iter(struct ceph_connection *con)
prepare_read_enc_page(con);
ret = 0;
break;
+ case IN_S_PREPARE_SPARSE_DATA:
+ ret = prepare_sparse_read_data(con);
+ break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ ret = prepare_sparse_read_cont(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3489,6 +3717,23 @@ static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
+{
+ int resid; /* current piece of data */
+ int remaining;
+
+ WARN_ON(con_secure(con));
+ WARN_ON(!data_len(con->in_msg));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ dout("%s con %p resid %d\n", __func__, con, resid);
+
+ remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + remaining);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
@@ -3505,6 +3750,7 @@ static void revoke_at_handle_epilogue(struct ceph_connection *con)
void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
{
switch (con->v2.in_state) {
+ case IN_S_PREPARE_SPARSE_DATA:
case IN_S_PREPARE_READ_DATA:
revoke_at_prepare_read_data(con);
break;
@@ -3514,6 +3760,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_ENC_PAGE:
revoke_at_prepare_read_enc_page(con);
break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ revoke_at_prepare_sparse_data(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 658a6f2320cf..d3a759e052c8 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
osd_data->num_bvecs = num_bvecs;
}
+static void ceph_osd_iter_init(struct ceph_osd_data *osd_data,
+ struct iov_iter *iter)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_ITER;
+ osd_data->iter = *iter;
+}
+
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
{
@@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+/**
+ * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer
+ * @osd_req: The request to set up
+ * @which: Index of the operation in which to set the iter
+ * @iter: The buffer iterator
+ */
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_iter_init(osd_data, iter);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_iter);
+
static void osd_req_op_cls_request_info_pagelist(
struct ceph_osd_request *osd_req,
unsigned int which, struct ceph_pagelist *pagelist)
@@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
#endif /* CONFIG_BLOCK */
case CEPH_OSD_DATA_TYPE_BVECS:
return osd_data->bvec_pos.iter.bi_size;
+ case CEPH_OSD_DATA_TYPE_ITER:
+ return iov_iter_count(&osd_data->iter);
default:
WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
return 0;
@@ -376,8 +401,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
switch (op->op) {
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
+ kfree(op->extent.sparse_ext);
ceph_osd_data_release(&op->extent.osd_data);
break;
case CEPH_OSD_OP_CALL:
@@ -669,6 +696,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
/* reply */
case CEPH_OSD_OP_STAT:
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_LIST_WATCHERS:
*num_reply_data_items += 1;
break;
@@ -738,7 +766,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ);
op->extent.offset = offset;
op->extent.length = length;
@@ -951,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
#endif
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) {
+ ceph_msg_data_add_iter(msg, &osd_data->iter);
} else {
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
}
@@ -963,6 +993,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_STAT:
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
case CEPH_OSD_OP_ZERO:
@@ -1017,6 +1048,10 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
dst->copy_from.src_fadvise_flags =
cpu_to_le32(src->copy_from.src_fadvise_flags);
break;
+ case CEPH_OSD_OP_ASSERT_VER:
+ dst->assert_ver.unused = cpu_to_le64(0);
+ dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver);
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
@@ -1059,7 +1094,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
- opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
+ opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE &&
+ opcode != CEPH_OSD_OP_SPARSE_READ);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
@@ -1100,15 +1136,30 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
if (flags & CEPH_OSD_FLAG_WRITE)
req->r_data_offset = off;
- if (num_ops > 1)
+ if (num_ops > 1) {
+ int num_req_ops, num_rep_ops;
+
/*
- * This is a special case for ceph_writepages_start(), but it
- * also covers ceph_uninline_data(). If more multi-op request
- * use cases emerge, we will need a separate helper.
+ * If this is a multi-op write request, assume that we'll need
+ * request ops. If it's a multi-op read then assume we'll need
+ * reply ops. Anything else and call it -EINVAL.
*/
- r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
- else
+ if (flags & CEPH_OSD_FLAG_WRITE) {
+ num_req_ops = num_ops;
+ num_rep_ops = 0;
+ } else if (flags & CEPH_OSD_FLAG_READ) {
+ num_req_ops = 0;
+ num_rep_ops = num_ops;
+ } else {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops,
+ num_rep_ops);
+ } else {
r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+ }
if (r)
goto fail;
@@ -1120,6 +1171,18 @@ fail:
}
EXPORT_SYMBOL(ceph_osdc_new_request);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+{
+ op->extent.sparse_ext_cnt = cnt;
+ op->extent.sparse_ext = kmalloc_array(cnt,
+ sizeof(*op->extent.sparse_ext),
+ GFP_NOFS);
+ if (!op->extent.sparse_ext)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map);
+
/*
* We keep osd requests in an rbtree, sorted by ->r_tid.
*/
@@ -1177,6 +1240,7 @@ static void osd_init(struct ceph_osd *osd)
{
refcount_set(&osd->o_ref, 1);
RB_CLEAR_NODE(&osd->o_node);
+ spin_lock_init(&osd->o_requests_lock);
osd->o_requests = RB_ROOT;
osd->o_linger_requests = RB_ROOT;
osd->o_backoff_mappings = RB_ROOT;
@@ -1187,6 +1251,13 @@ static void osd_init(struct ceph_osd *osd)
mutex_init(&osd->lock);
}
+static void ceph_init_sparse_read(struct ceph_sparse_read *sr)
+{
+ kfree(sr->sr_extent);
+ memset(sr, '\0', sizeof(*sr));
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+}
+
static void osd_cleanup(struct ceph_osd *osd)
{
WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
@@ -1197,6 +1268,8 @@ static void osd_cleanup(struct ceph_osd *osd)
WARN_ON(!list_empty(&osd->o_osd_lru));
WARN_ON(!list_empty(&osd->o_keepalive_item));
+ ceph_init_sparse_read(&osd->o_sparse_read);
+
if (osd->o_auth.authorizer) {
WARN_ON(osd_homeless(osd));
ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
@@ -1216,6 +1289,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
osd_init(osd);
osd->o_osdc = osdc;
osd->o_osd = onum;
+ osd->o_sparse_op_idx = -1;
+
+ ceph_init_sparse_read(&osd->o_sparse_read);
ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
@@ -1406,7 +1482,9 @@ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req)
atomic_inc(&osd->o_osdc->num_homeless);
get_osd(osd);
+ spin_lock(&osd->o_requests_lock);
insert_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
req->r_osd = osd;
}
@@ -1418,7 +1496,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
req, req->r_tid);
req->r_osd = NULL;
+ spin_lock(&osd->o_requests_lock);
erase_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
put_osd(osd);
if (!osd_homeless(osd))
@@ -2016,6 +2096,7 @@ static void setup_request_data(struct ceph_osd_request *req)
&op->raw_data_in);
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
ceph_osdc_msg_data_add(reply_msg,
&op->extent.osd_data);
break;
@@ -2435,8 +2516,10 @@ static void finish_request(struct ceph_osd_request *req)
req->r_end_latency = ktime_get();
- if (req->r_osd)
+ if (req->r_osd) {
+ ceph_init_sparse_read(&req->r_osd->o_sparse_read);
unlink_request(req->r_osd, req);
+ }
atomic_dec(&osdc->num_requests);
/*
@@ -3795,6 +3878,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
* one (type of) reply back.
*/
WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+ req->r_version = m.user_version;
req->r_result = m.result ?: data_len;
finish_request(req);
mutex_unlock(&osd->lock);
@@ -5348,6 +5432,24 @@ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_msg_put(msg);
}
+/* How much sparse data was requested? */
+static u64 sparse_data_requested(struct ceph_osd_request *req)
+{
+ u64 len = 0;
+
+ if (req->r_flags & CEPH_OSD_FLAG_READ) {
+ int i;
+
+ for (i = 0; i < req->r_num_ops; ++i) {
+ struct ceph_osd_req_op *op = &req->r_ops[i];
+
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ len += op->extent.length;
+ }
+ }
+ return len;
+}
+
/*
* Lookup and return message for incoming reply. Don't try to do
* anything about a larger than preallocated data portion of the
@@ -5364,6 +5466,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
int front_len = le32_to_cpu(hdr->front_len);
int data_len = le32_to_cpu(hdr->data_len);
u64 tid = le64_to_cpu(hdr->tid);
+ u64 srlen;
down_read(&osdc->lock);
if (!osd_registered(osd)) {
@@ -5396,7 +5499,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
req->r_reply = m;
}
- if (data_len > req->r_reply->data_length) {
+ srlen = sparse_data_requested(req);
+ if (!srlen && data_len > req->r_reply->data_length) {
pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
__func__, osd->o_osd, req->r_tid, data_len,
req->r_reply->data_length);
@@ -5406,6 +5510,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
+ m->sparse_read = (bool)srlen;
+
dout("get_reply tid %lld %p\n", tid, m);
out_unlock_session:
@@ -5638,9 +5744,217 @@ static int osd_check_message_signature(struct ceph_msg *msg)
return ceph_auth_check_message_signature(auth, msg);
}
+static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len,
+ bool zero)
+{
+ while (len) {
+ struct page *page;
+ size_t poff, plen;
+
+ page = ceph_msg_data_next(cursor, &poff, &plen);
+ if (plen > len)
+ plen = len;
+ if (zero)
+ zero_user_segment(page, poff, poff + plen);
+ len -= plen;
+ ceph_msg_data_advance(cursor, plen);
+ }
+}
+
+static int prep_next_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ struct ceph_osd_request *req;
+ struct ceph_osd_req_op *op;
+
+ spin_lock(&o->o_requests_lock);
+ req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid));
+ if (!req) {
+ spin_unlock(&o->o_requests_lock);
+ return -EBADR;
+ }
+
+ if (o->o_sparse_op_idx < 0) {
+ u64 srlen = sparse_data_requested(req);
+
+ dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
+ __func__, o->o_osd, srlen);
+ ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+ } else {
+ u64 end;
+
+ op = &req->r_ops[o->o_sparse_op_idx];
+
+ WARN_ON_ONCE(op->extent.sparse_ext);
+
+ /* hand back buffer we took earlier */
+ op->extent.sparse_ext = sr->sr_extent;
+ sr->sr_extent = NULL;
+ op->extent.sparse_ext_cnt = sr->sr_count;
+ sr->sr_ext_len = 0;
+ dout("%s: [%d] completed extent array len %d cursor->resid %zd\n",
+ __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid);
+ /* Advance to end of data for this operation */
+ end = ceph_sparse_ext_map_end(op);
+ if (end < sr->sr_req_len)
+ advance_cursor(cursor, sr->sr_req_len - end, false);
+ }
+
+ ceph_init_sparse_read(sr);
+
+ /* find next op in this request (if any) */
+ while (++o->o_sparse_op_idx < req->r_num_ops) {
+ op = &req->r_ops[o->o_sparse_op_idx];
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ goto found;
+ }
+
+ /* reset for next sparse read request */
+ spin_unlock(&o->o_requests_lock);
+ o->o_sparse_op_idx = -1;
+ return 0;
+found:
+ sr->sr_req_off = op->extent.offset;
+ sr->sr_req_len = op->extent.length;
+ sr->sr_pos = sr->sr_req_off;
+ dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__,
+ o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len);
+
+ /* hand off request's sparse extent map buffer */
+ sr->sr_ext_len = op->extent.sparse_ext_cnt;
+ op->extent.sparse_ext_cnt = 0;
+ sr->sr_extent = op->extent.sparse_ext;
+ op->extent.sparse_ext = NULL;
+
+ spin_unlock(&o->o_requests_lock);
+ return 1;
+}
+
+#ifdef __BIG_ENDIAN
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+ int i;
+
+ for (i = 0; i < sr->sr_count; i++) {
+ struct ceph_sparse_extent *ext = &sr->sr_extent[i];
+
+ ext->off = le64_to_cpu((__force __le64)ext->off);
+ ext->len = le64_to_cpu((__force __le64)ext->len);
+ }
+}
+#else
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+}
+#endif
+
+#define MAX_EXTENTS 4096
+
+static int osd_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor,
+ char **pbuf)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ u32 count = sr->sr_count;
+ u64 eoff, elen;
+ int ret;
+
+ switch (sr->sr_state) {
+ case CEPH_SPARSE_READ_HDR:
+next_op:
+ ret = prep_next_sparse_read(con, cursor);
+ if (ret <= 0)
+ return ret;
+
+ /* number of extents */
+ ret = sizeof(sr->sr_count);
+ *pbuf = (char *)&sr->sr_count;
+ sr->sr_state = CEPH_SPARSE_READ_EXTENTS;
+ break;
+ case CEPH_SPARSE_READ_EXTENTS:
+ /* Convert sr_count to host-endian */
+ count = le32_to_cpu((__force __le32)sr->sr_count);
+ sr->sr_count = count;
+ dout("[%d] got %u extents\n", o->o_osd, count);
+
+ if (count > 0) {
+ if (!sr->sr_extent || count > sr->sr_ext_len) {
+ /*
+ * Apply a hard cap to the number of extents.
+ * If we have more, assume something is wrong.
+ */
+ if (count > MAX_EXTENTS) {
+ dout("%s: OSD returned 0x%x extents in a single reply!\n",
+ __func__, count);
+ return -EREMOTEIO;
+ }
+
+ /* no extent array provided, or too short */
+ kfree(sr->sr_extent);
+ sr->sr_extent = kmalloc_array(count,
+ sizeof(*sr->sr_extent),
+ GFP_NOIO);
+ if (!sr->sr_extent)
+ return -ENOMEM;
+ sr->sr_ext_len = count;
+ }
+ ret = count * sizeof(*sr->sr_extent);
+ *pbuf = (char *)sr->sr_extent;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_LEN;
+ break;
+ }
+ /* No extents? Read data len */
+ fallthrough;
+ case CEPH_SPARSE_READ_DATA_LEN:
+ convert_extent_map(sr);
+ ret = sizeof(sr->sr_datalen);
+ *pbuf = (char *)&sr->sr_datalen;
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ break;
+ case CEPH_SPARSE_READ_DATA:
+ if (sr->sr_index >= count) {
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+ goto next_op;
+ }
+
+ eoff = sr->sr_extent[sr->sr_index].off;
+ elen = sr->sr_extent[sr->sr_index].len;
+
+ dout("[%d] ext %d off 0x%llx len 0x%llx\n",
+ o->o_osd, sr->sr_index, eoff, elen);
+
+ if (elen > INT_MAX) {
+ dout("Sparse read extent length too long (0x%llx)\n",
+ elen);
+ return -EREMOTEIO;
+ }
+
+ /* zero out anything from sr_pos to start of extent */
+ if (sr->sr_pos < eoff)
+ advance_cursor(cursor, eoff - sr->sr_pos, true);
+
+ /* Set position to end of extent */
+ sr->sr_pos = eoff + elen;
+
+ /* send back the new length and nullify the ptr */
+ cursor->sr_resid = elen;
+ ret = elen;
+ *pbuf = NULL;
+
+ /* Bump the array index */
+ ++sr->sr_index;
+ break;
+ }
+ return ret;
+}
+
static const struct ceph_connection_operations osd_con_ops = {
.get = osd_get_con,
.put = osd_put_con,
+ .sparse_read = osd_sparse_read,
.alloc_msg = osd_alloc_msg,
.dispatch = osd_dispatch,
.fault = osd_fault,
diff --git a/net/core/dev.c b/net/core/dev.c
index ccff2b6ef958..85df22f05c38 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -69,7 +69,7 @@
*/
#include <linux/uaccess.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/types.h>
@@ -1080,7 +1080,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -EINVAL;
/* Use one page as a bit array of possible slots */
- inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
+ inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
if (!inuse)
return -ENOMEM;
@@ -1109,7 +1109,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
}
i = find_first_zero_bit(inuse, max_netdevices);
- free_page((unsigned long) inuse);
+ bitmap_free(inuse);
}
snprintf(buf, IFNAMSIZ, name, i);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 89d15ceaf9af..272f09251343 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1446,7 +1446,7 @@ proto_again:
break;
}
- nhoff += ntohs(hdr->message_length);
+ nhoff += sizeof(struct ptp_header);
fdret = FLOW_DISSECT_RET_OUT_GOOD;
break;
}
@@ -1831,8 +1831,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
memset(&keys, 0, sizeof(keys));
__skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
- &keys, NULL, 0, 0, 0,
- FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ &keys, NULL, 0, 0, 0, 0);
return __flow_hash_from_keys(&keys, &hashrnd);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6b76cd103195..9c09f091cbff 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -410,7 +410,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
*/
__skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
- n->output = neigh_blackhole;
+ WRITE_ONCE(n->output, neigh_blackhole);
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
else
@@ -920,7 +920,7 @@ static void neigh_suspect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->output = neigh->ops->output;
+ WRITE_ONCE(neigh->output, neigh->ops->output);
}
/* Neighbour state is OK;
@@ -932,7 +932,7 @@ static void neigh_connect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is connected\n", neigh);
- neigh->output = neigh->ops->connected_output;
+ WRITE_ONCE(neigh->output, neigh->ops->connected_output);
}
static void neigh_periodic_work(struct work_struct *work)
@@ -988,7 +988,9 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -1447,7 +1449,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
if (n2)
n1 = n2;
}
- n1->output(n1, skb);
+ READ_ONCE(n1->output)(n1, skb);
if (n2)
neigh_release(n2);
rcu_read_unlock();
@@ -3153,7 +3155,7 @@ int neigh_xmit(int index, struct net_device *dev,
rcu_read_unlock();
goto out_kfree_skb;
}
- err = neigh->output(neigh, skb);
+ err = READ_ONCE(neigh->output)(neigh, skb);
rcu_read_unlock();
}
else if (index == NEIGH_LINK_TABLE) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 45707059082f..4eaf7ed0d1f4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -550,7 +550,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
bool *pfmemalloc)
{
bool ret_pfmemalloc = false;
- unsigned int obj_size;
+ size_t obj_size;
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
@@ -567,7 +567,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
goto out;
}
- *size = obj_size = kmalloc_size_roundup(obj_size);
+
+ obj_size = kmalloc_size_roundup(obj_size);
+ /* The following cast might truncate high-order bits of obj_size, this
+ * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
+ */
+ *size = (unsigned int)obj_size;
+
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
@@ -4423,21 +4429,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
- struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int partial_segs = 0;
unsigned int headroom;
unsigned int len = head_skb->len;
+ struct sk_buff *frag_skb;
+ skb_frag_t *frag;
__be16 proto;
bool csum, sg;
- int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
- int pos;
+ int nfrags, pos;
if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4514,6 +4519,13 @@ normal:
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+ return ERR_PTR(-ENOMEM);
+
+ nfrags = skb_shinfo(head_skb)->nr_frags;
+ frag = skb_shinfo(head_skb)->frags;
+ frag_skb = head_skb;
+
do {
struct sk_buff *nskb;
skb_frag_t *nskb_frag;
@@ -4534,6 +4546,10 @@ normal:
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4552,12 +4568,8 @@ normal:
frag++;
}
- nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
- if (unlikely(!nskb))
- goto err;
-
if (unlikely(pskb_trim(nskb, len))) {
kfree_skb(nskb);
goto err;
@@ -4633,12 +4645,16 @@ normal:
skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
SKBFL_SHARED_FRAG;
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
goto err;
while (pos < offset + len) {
if (i >= nfrags) {
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, list_skb,
+ GFP_ATOMIC))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4652,10 +4668,6 @@ normal:
i--;
frag--;
}
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
list_skb = list_skb->next;
}
@@ -5207,7 +5219,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats;
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
@@ -5263,21 +5275,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
{
struct sk_buff *skb;
bool tsonly, opt_stats = false;
+ u32 tsflags;
if (!sk)
return;
- if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
return;
- tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+ tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
if (tsonly) {
#ifdef CONFIG_INET
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk_is_tcp(sk)) {
skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
ack_skb);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a0659fc29bcc..6c31eefbd777 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
+ int err = 0;
+
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- return sk_psock_skb_ingress(psock, skb, off, len);
+ skb_get(skb);
+ err = sk_psock_skb_ingress(psock, skb, off, len);
+ if (err < 0)
+ kfree_skb(skb);
+ return err;
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
} while (len);
skb = skb_dequeue(&psock->ingress_skb);
- if (!ingress) {
- kfree_skb(skb);
- }
+ kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
diff --git a/net/core/sock.c b/net/core/sock.c
index 666a17cab4f5..16584e2dd648 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -765,7 +765,8 @@ bool sk_mc_loop(struct sock *sk)
return false;
if (!sk)
return true;
- switch (sk->sk_family) {
+ /* IPV6_ADDRFORM can change sk->sk_family under us. */
+ switch (READ_ONCE(sk->sk_family)) {
case AF_INET:
return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
@@ -893,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
if (!match)
return -EINVAL;
- sk->sk_bind_phc = phc_index;
+ WRITE_ONCE(sk->sk_bind_phc, phc_index);
return 0;
}
@@ -936,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
return ret;
}
- sk->sk_tsflags = val;
+ WRITE_ONCE(sk->sk_tsflags, val);
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
@@ -1044,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
return -ENOMEM;
}
- sk->sk_forward_alloc += pages << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
WRITE_ONCE(sk->sk_reserved_mem,
sk->sk_reserved_mem + (pages << PAGE_SHIFT));
@@ -1718,8 +1719,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_TIMESTAMPING_OLD:
lv = sizeof(v.timestamping);
- v.timestamping.flags = sk->sk_tsflags;
- v.timestamping.bind_phc = sk->sk_bind_phc;
+ v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+ v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
break;
case SO_RCVTIMEO_OLD:
@@ -2746,9 +2747,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
break;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
break;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
break;
timeo = schedule_timeout(timeo);
}
@@ -2776,7 +2777,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
goto failure;
err = -EPIPE;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto failure;
if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
@@ -3138,10 +3139,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
int ret, amt = sk_mem_pages(size);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
ret = __sk_mem_raise_allocated(sk, size, amt, kind);
if (!ret)
- sk->sk_forward_alloc -= amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
return ret;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -3173,7 +3174,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
void __sk_mem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- sk->sk_forward_alloc -= amount << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
__sk_mem_reduce_allocated(sk, amount);
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -3742,7 +3743,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8f07fea39d9e..4292c2ed1828 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -18,7 +18,7 @@ struct bpf_stab {
struct bpf_map map;
struct sock **sks;
struct sk_psock_progs progs;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
#define SOCK_CREATE_FLAG_MASK \
@@ -44,7 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
- raw_spin_lock_init(&stab->lock);
+ spin_lock_init(&stab->lock);
stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
sizeof(struct sock *),
@@ -411,7 +411,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
sk = xchg(psk, NULL);
@@ -421,7 +421,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
else
err = -EINVAL;
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return err;
}
@@ -487,7 +487,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
psock = sk_psock(sk);
WARN_ON_ONCE(!psock);
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
osk = stab->sks[idx];
if (osk && flags == BPF_NOEXIST) {
ret = -EEXIST;
@@ -501,10 +501,10 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
stab->sks[idx] = sk;
if (osk)
sock_map_unref(osk, &stab->sks[idx]);
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
if (psock)
sk_psock_put(sk, psock);
out_free:
@@ -668,6 +668,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
@@ -835,7 +837,7 @@ struct bpf_shtab_elem {
struct bpf_shtab_bucket {
struct hlist_head head;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
struct bpf_shtab {
@@ -910,7 +912,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
* is okay since it's going away only after RCU grace period.
* However, we need to check whether it's still present.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
elem->key, map->key_size);
if (elem_probe && elem_probe == elem) {
@@ -918,7 +920,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
}
static long sock_hash_delete_elem(struct bpf_map *map, void *key)
@@ -932,7 +934,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem) {
hlist_del_rcu(&elem->node);
@@ -940,7 +942,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
sock_hash_free_elem(htab, elem);
ret = 0;
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return ret;
}
@@ -1000,7 +1002,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem && flags == BPF_NOEXIST) {
ret = -EEXIST;
@@ -1026,10 +1028,10 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
sk_psock_put(sk, psock);
out_free:
sk_psock_free_link(link);
@@ -1115,7 +1117,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
for (i = 0; i < htab->buckets_num; i++) {
INIT_HLIST_HEAD(&htab->buckets[i].head);
- raw_spin_lock_init(&htab->buckets[i].lock);
+ spin_lock_init(&htab->buckets[i].lock);
}
return &htab->map;
@@ -1147,11 +1149,11 @@ static void sock_hash_free(struct bpf_map *map)
* exists, psock exists and holds a ref to socket. That
* lets us to grab a socket ref too.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
hlist_for_each_entry(elem, &bucket->head, node)
sock_hold(elem->sk);
hlist_move_list(&bucket->head, &unlink_list);
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
/* Process removed entries out of atomic context to
* block for socket lock before deleting the psock's
@@ -1267,6 +1269,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
sk = __sock_hash_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8f56e8723c73..69453b936bd5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -254,13 +254,8 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
- * which is in byte 7 of the dccp header.
- * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
- *
- * Later on, we want to access the sequence number fields, which are
- * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
- */
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return -EINVAL;
dh = (struct dccp_hdr *)(skb->data + offset);
if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
return -EINVAL;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 33f6ccf6ba77..c693a570682f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -83,13 +83,8 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
struct net *net = dev_net(skb->dev);
- /* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
- * which is in byte 7 of the dccp header.
- * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
- *
- * Later on, we want to access the sequence number fields, which are
- * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
- */
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return -EINVAL;
dh = (struct dccp_hdr *)(skb->data + offset);
if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
return -EINVAL;
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index b238a1afe9ae..b1e2e3b5027f 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -21,16 +21,6 @@ struct plca_reply_data {
#define PLCA_REPDATA(__reply_base) \
container_of(__reply_base, struct plca_reply_data, base)
-static void plca_update_sint(int *dst, const struct nlattr *attr,
- bool *mod)
-{
- if (!attr)
- return;
-
- *dst = nla_get_u32(attr);
- *mod = true;
-}
-
// PLCA get configuration message ------------------------------------------- //
const struct nla_policy ethnl_plca_get_cfg_policy[] = {
@@ -38,6 +28,29 @@ const struct nla_policy ethnl_plca_get_cfg_policy[] = {
NLA_POLICY_NESTED(ethnl_header_policy),
};
+static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid,
+ bool *mod)
+{
+ const struct nlattr *attr = tb[attrid];
+
+ if (!attr ||
+ WARN_ON_ONCE(attrid >= ARRAY_SIZE(ethnl_plca_set_cfg_policy)))
+ return;
+
+ switch (ethnl_plca_set_cfg_policy[attrid].type) {
+ case NLA_U8:
+ *dst = nla_get_u8(attr);
+ break;
+ case NLA_U32:
+ *dst = nla_get_u32(attr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ *mod = true;
+}
+
static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
const struct genl_info *info)
@@ -144,13 +157,13 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
return -EOPNOTSUPP;
memset(&plca_cfg, 0xff, sizeof(plca_cfg));
- plca_update_sint(&plca_cfg.enabled, tb[ETHTOOL_A_PLCA_ENABLED], &mod);
- plca_update_sint(&plca_cfg.node_id, tb[ETHTOOL_A_PLCA_NODE_ID], &mod);
- plca_update_sint(&plca_cfg.node_cnt, tb[ETHTOOL_A_PLCA_NODE_CNT], &mod);
- plca_update_sint(&plca_cfg.to_tmr, tb[ETHTOOL_A_PLCA_TO_TMR], &mod);
- plca_update_sint(&plca_cfg.burst_cnt, tb[ETHTOOL_A_PLCA_BURST_CNT],
+ plca_update_sint(&plca_cfg.enabled, tb, ETHTOOL_A_PLCA_ENABLED, &mod);
+ plca_update_sint(&plca_cfg.node_id, tb, ETHTOOL_A_PLCA_NODE_ID, &mod);
+ plca_update_sint(&plca_cfg.node_cnt, tb, ETHTOOL_A_PLCA_NODE_CNT, &mod);
+ plca_update_sint(&plca_cfg.to_tmr, tb, ETHTOOL_A_PLCA_TO_TMR, &mod);
+ plca_update_sint(&plca_cfg.burst_cnt, tb, ETHTOOL_A_PLCA_BURST_CNT,
&mod);
- plca_update_sint(&plca_cfg.burst_tmr, tb[ETHTOOL_A_PLCA_BURST_TMR],
+ plca_update_sint(&plca_cfg.burst_tmr, tb, ETHTOOL_A_PLCA_BURST_TMR,
&mod);
if (!mod)
return 0;
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 6d37bab35c8f..16ed7bfd29e4 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -235,7 +235,7 @@ static void handshake_req_submit_test4(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, req, result);
handshake_req_cancel(sock->sk);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_submit_test5(struct kunit *test)
@@ -272,7 +272,7 @@ static void handshake_req_submit_test5(struct kunit *test)
/* Assert */
KUNIT_EXPECT_EQ(test, err, -EAGAIN);
- sock_release(sock);
+ fput(filp);
hn->hn_pending = saved;
}
@@ -306,7 +306,7 @@ static void handshake_req_submit_test6(struct kunit *test)
KUNIT_EXPECT_EQ(test, err, -EBUSY);
handshake_req_cancel(sock->sk);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test1(struct kunit *test)
@@ -340,7 +340,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
/* Assert */
KUNIT_EXPECT_TRUE(test, result);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test2(struct kunit *test)
@@ -382,7 +382,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
/* Assert */
KUNIT_EXPECT_TRUE(test, result);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test3(struct kunit *test)
@@ -427,7 +427,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
/* Assert */
KUNIT_EXPECT_FALSE(test, result);
- sock_release(sock);
+ fput(filp);
}
static struct handshake_req *handshake_req_destroy_test;
@@ -471,7 +471,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
handshake_req_cancel(sock->sk);
/* Act */
- sock_release(sock);
+ fput(filp);
/* Assert */
KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 1086653e1fad..d0bc1dd8e65a 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -157,26 +157,24 @@ out_status:
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
- struct handshake_req *req = NULL;
- struct socket *sock = NULL;
+ struct handshake_req *req;
+ struct socket *sock;
int fd, status, err;
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
return -EINVAL;
fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
- err = 0;
sock = sockfd_lookup(fd, &err);
- if (err) {
- err = -EBADF;
- goto out_status;
- }
+ if (!sock)
+ return err;
req = handshake_req_hash_lookup(sock->sk);
if (!req) {
err = -EBUSY;
+ trace_handshake_cmd_done_err(net, req, sock->sk, err);
fput(sock->file);
- goto out_status;
+ return err;
}
trace_handshake_cmd_done(net, req, sock->sk, fd);
@@ -188,10 +186,6 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
handshake_complete(req, status, info);
fput(sock->file);
return 0;
-
-out_status:
- trace_handshake_cmd_done_err(net, req, sock->sk, err);
- return err;
}
static unsigned int handshake_net_id;
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 629daacc9607..b71dab630a87 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -594,6 +594,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
/* FIXME: */
netdev_warn_once(skb->dev, "VLAN not yet supported");
+ return -EINVAL;
}
frame->is_from_san = false;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index b77f1189d19d..6d14d935ee82 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -288,13 +288,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
/* And leave the HSR tag. */
if (ethhdr->h_proto == htons(ETH_P_HSR)) {
- pull_size = sizeof(struct ethhdr);
+ pull_size = sizeof(struct hsr_tag);
skb_pull(skb, pull_size);
total_pull_size += pull_size;
}
/* And leave the HSR sup tag. */
- pull_size = sizeof(struct hsr_tag);
+ pull_size = sizeof(struct hsr_sup_tag);
skb_pull(skb, pull_size);
total_pull_size += pull_size;
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 6851e33df7d1..18e01791ad79 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -83,7 +83,7 @@ struct hsr_vlan_ethhdr {
struct hsr_sup_tlv {
u8 HSR_TLV_type;
u8 HSR_TLV_length;
-};
+} __packed;
/* HSR/PRP Supervision Frame data types.
* Field names as defined in the IEC:2010 standard for HSR.
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9cf64ee47dd2..ca0ff15dc8fa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -355,14 +355,14 @@ static void __inet_del_ifa(struct in_device *in_dev,
{
struct in_ifaddr *promote = NULL;
struct in_ifaddr *ifa, *ifa1;
- struct in_ifaddr *last_prim;
+ struct in_ifaddr __rcu **last_prim;
struct in_ifaddr *prev_prom = NULL;
int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
ASSERT_RTNL();
ifa1 = rtnl_dereference(*ifap);
- last_prim = rtnl_dereference(in_dev->ifa_list);
+ last_prim = ifap;
if (in_dev->dead)
goto no_promotions;
@@ -376,7 +376,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
ifa1->ifa_scope <= ifa->ifa_scope)
- last_prim = ifa;
+ last_prim = &ifa->ifa_next;
if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
ifa1->ifa_mask != ifa->ifa_mask ||
@@ -440,9 +440,9 @@ no_promotions:
rcu_assign_pointer(prev_prom->ifa_next, next_sec);
- last_sec = rtnl_dereference(last_prim->ifa_next);
+ last_sec = rtnl_dereference(*last_prim);
rcu_assign_pointer(promote->ifa_next, last_sec);
- rcu_assign_pointer(last_prim->ifa_next, promote);
+ rcu_assign_pointer(*last_prim, promote);
}
promote->ifa_flags &= ~IFA_F_SECONDARY;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 65ba18a91865..1ea82bc33ef1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
- fi->fib_dead = 1;
+ /* Paired with READ_ONCE() from fib_table_lookup() */
+ WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
@@ -1581,6 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
link_it:
ofi = fib_find_info(fi);
if (ofi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
refcount_inc(&ofi->fib_treeref);
@@ -1619,6 +1621,7 @@ err_inval:
failure:
if (fi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
}
@@ -1884,6 +1887,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
continue;
if (fi->fib_prefsrc == local) {
fi->fib_flags |= RTNH_F_DEAD;
+ fi->pfsrc_removed = true;
ret++;
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 74d403dbd2b4..9bdfdab906fe 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1582,7 +1582,8 @@ found:
if (fa->fa_dscp &&
inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
- if (fi->fib_dead)
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fi->fib_dead))
continue;
if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
@@ -2026,6 +2027,7 @@ void fib_table_flush_external(struct fib_table *tb)
int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
{
struct trie *t = (struct trie *)tb->tb_data;
+ struct nl_info info = { .nl_net = net };
struct key_vector *pn = t->kv;
unsigned long cindex = 1;
struct hlist_node *tmp;
@@ -2088,6 +2090,9 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
fib_notify_alias_delete(net, n->key, &n->leaf, fa,
NULL);
+ if (fi->pfsrc_removed)
+ rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa,
+ KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 0c9e768e5628..418e5fb58fd3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -353,8 +353,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu;
+ unsigned int size;
+ size = min(mtu, IP_MAX_MTU);
while (1) {
skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7876b7d703cb..c32f5e28758b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -815,41 +815,45 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
const struct net *net, unsigned short port,
int l3mdev, const struct sock *sk)
{
+ if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+ tb->l3mdev != l3mdev)
+ return false;
+
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb->family)
+ if (sk->sk_family != tb->family) {
+ if (sk->sk_family == AF_INET)
+ return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
+ tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
+
return false;
+ }
if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
+ return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
+ return tb->rcv_saddr == sk->sk_rcv_saddr;
}
bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
unsigned short port, int l3mdev, const struct sock *sk)
{
+ if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+ tb->l3mdev != l3mdev)
+ return false;
+
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family != tb->family) {
if (sk->sk_family == AF_INET)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_any(&tb->v6_rcv_saddr);
+ return ipv6_addr_any(&tb->v6_rcv_saddr) ||
+ ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr);
return false;
}
if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_any(&tb->v6_rcv_saddr);
- else
+ return ipv6_addr_any(&tb->v6_rcv_saddr);
#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+ return tb->rcv_saddr == 0;
}
/* The socket's bhash2 hashbucket spinlock must be held when this is called */
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e18931a6d153..66fac1216d46 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,7 +67,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
struct ip_options *opt = &(IPCB(skb)->opt);
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fe9ead9ee863..5e9c8156656a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
static struct sk_buff *ip_extract_route_hint(const struct net *net,
struct sk_buff *skb, int rt_type)
{
- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+ IPCB(skb)->flags & IPSKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 43ba4b77b248..4ab877cf6d35 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -207,6 +207,9 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
+ /* OUTOCTETS should be counted after fragment */
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
+
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
if (!skb)
@@ -366,8 +369,6 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -424,8 +425,6 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -982,7 +981,7 @@ static int __ip_append_data(struct sock *sk,
paged = !!cork->gso_size;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d1c73660b844..cce9cb25f3b3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -511,7 +511,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
info = PKTINFO_SKB_CB(skb);
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3f0c6d602fb7..9e222a57bc2b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1804,7 +1804,6 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
struct ip_options *opt = &(IPCB(skb)->opt);
IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d8c99bdc6170..b214b5a2e045 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1213,6 +1213,7 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
+ struct net_device *dev;
struct ip_options opt;
int res;
@@ -1230,7 +1231,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
- res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
+ res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
rcu_read_unlock();
if (res)
@@ -2144,6 +2146,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
+ IPCB(skb)->flags |= IPSKB_MULTIPATH;
}
#endif
@@ -3414,6 +3417,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_type == fri.type) {
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed =
+ READ_ONCE(fa->offload_failed);
break;
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b1559481898d..3f66cdeef7de 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1621,16 +1621,13 @@ EXPORT_SYMBOL(tcp_read_sock);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
struct sk_buff *skb;
int copied = 0;
- u32 offset;
if (sk->sk_state == TCP_LISTEN)
return -ENOTCONN;
- while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
u8 tcp_flags;
int used;
@@ -1643,13 +1640,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
copied = used;
break;
}
- seq += used;
copied += used;
- if (tcp_flags & TCPHDR_FIN) {
- ++seq;
+ if (tcp_flags & TCPHDR_FIN)
break;
- }
}
return copied;
}
@@ -2256,14 +2250,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
- if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 81f0dff69e0b..327268203001 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -222,6 +222,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int *addr_len)
{
struct tcp_sock *tcp = tcp_sk(sk);
+ int peek = flags & MSG_PEEK;
u32 seq = tcp->copied_seq;
struct sk_psock *psock;
int copied = 0;
@@ -311,7 +312,8 @@ msg_bytes_ready:
copied = -EAGAIN;
}
out:
- WRITE_ONCE(tcp->copied_seq, seq);
+ if (!peek)
+ WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 06fe1cf645d5..8afb0950a697 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -253,6 +253,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
if (unlikely(len > icsk->icsk_ack.rcv_mss +
MAX_TCP_OPTION_SPACE))
tcp_gro_dev_warn(sk, skb, len);
+ /* If the skb has a len of exactly 1*MSS and has the PSH bit
+ * set then it is likely the end of an application write. So
+ * more data may not be arriving soon, and yet the data sender
+ * may be waiting for an ACK if cwnd-bound or using TX zero
+ * copy. So we set ICSK_ACK_PUSHED here so that
+ * tcp_cleanup_rbuf() will send an ACK immediately if the app
+ * reads all of the data and is not ping-pong. If len > MSS
+ * then this logic does not matter (and does not hurt) because
+ * tcp_cleanup_rbuf() will always ACK immediately if the app
+ * reads data and there is more than an MSS of unACKed data.
+ */
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e6b4fbd642f7..aa0fc8c766e5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -177,8 +177,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
- u32 rcv_nxt)
+static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -192,7 +191,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
- tcp_dec_quickack_mode(sk, pkts);
+ tcp_dec_quickack_mode(sk);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -1387,7 +1386,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+ tcp_event_ack_sent(sk, rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@@ -3474,7 +3473,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
if (delta <= 0)
return;
amt = sk_mem_pages(delta);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0794a2c46a56..f39b9c844580 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1414,9 +1414,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
spin_lock(&sk_queue->lock);
- sk->sk_forward_alloc += size;
+ sk_forward_alloc_add(sk, size);
amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
- sk->sk_forward_alloc -= amt;
+ sk_forward_alloc_add(sk, -amt);
if (amt)
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1527,7 +1527,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
goto uncharge_drop;
}
- sk->sk_forward_alloc -= size;
+ sk_forward_alloc_add(sk, -size);
/* no need to setup a destructor, we will explicitly release the
* forward allocated memory on dequeue
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 967913ad65e5..0b6ee962c84e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1378,7 +1378,7 @@ retry:
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
- max_desync_factor = min_t(__u32,
+ max_desync_factor = min_t(long,
idev->cnf.max_desync_factor,
cnf_temp_preferred_lft - regen_advance);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index d94041bb4287..b8378814532c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
static struct sk_buff *ip6_extract_route_hint(const struct net *net,
struct sk_buff *skb)
{
- if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+ if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
+ IP6CB(skb)->flags & IP6SKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0665e8b09968..54fc4c711f2c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -451,7 +451,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
struct dst_entry *dst = skb_dst(skb);
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
@@ -1502,7 +1501,7 @@ static int __ip6_append_data(struct sock *sk,
orig_mtu = mtu;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 67a3b8f6e72b..30ca064b76ef 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2010,8 +2010,6 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
{
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 1b2772834972..5831aaa53d75 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
ipcm6_init_sk(&ipc6, np);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0eae7661a85c..42fcec3ecf5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -772,7 +772,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d15a9e3aa24a..9c687b357e6a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -423,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (match->nh && have_oif_match && res->nh)
return;
+ if (skb)
+ IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3a88545a265d..44b6949d72b2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1640,9 +1640,12 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- drop_reason = tcp_inbound_md5_hash(sk, skb,
- &hdr->saddr, &hdr->daddr,
- AF_INET6, dif, sdif);
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
+ else
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
+ &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
if (drop_reason) {
sk_drops_add(sk, skb);
reqsk_put(req);
@@ -1689,6 +1692,7 @@ process:
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ebc6ae47cfea..86b5d509a468 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1339,7 +1339,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 393f01b2a7e6..dd1d8ffd5f59 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -930,15 +930,18 @@ partial_message:
out_error:
kcm_push(kcm);
- if (copied && sock->type == SOCK_SEQPACKET) {
+ if (sock->type == SOCK_SEQPACKET) {
/* Wrote some bytes before encountering an
* error, return partial success.
*/
- goto partial_message;
- }
-
- if (head != kcm->seq_skb)
+ if (copied)
+ goto partial_message;
+ if (head != kcm->seq_skb)
+ kfree_skb(head);
+ } else {
kfree_skb(head);
+ kcm->seq_skb = NULL;
+ }
err = sk_stream_error(sk, msg->msg_flags, err);
@@ -1859,6 +1862,8 @@ static __net_exit void kcm_exit_net(struct net *net)
* that all multiplexors and psocks have been destroyed.
*/
WARN_ON(!list_empty(&knet->mux_list));
+
+ mutex_destroy(&knet->mutex);
}
static struct pernet_operations kcm_net_ops = {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ed8ebb6f5909..11f3d375cec0 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -507,7 +507,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
*/
if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
- ulen = len + transhdrlen;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
@@ -628,6 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
+ ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 45e7a5d9c7d9..0e3a1753a51c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -566,6 +566,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
}
err = ieee80211_key_link(key, link, sta);
+ /* KRACK protection, shouldn't happen but just silently accept key */
+ if (err == -EALREADY)
+ err = 0;
out_unlock:
mutex_unlock(&local->sta_mtx);
@@ -1857,7 +1860,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
/* VHT can override some HT caps such as the A-MSDU max length */
if (params->vht_capa)
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- params->vht_capa, link_sta);
+ params->vht_capa, NULL,
+ link_sta);
if (params->he_capa)
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e1900077bc4b..5542c93edfba 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1072,7 +1072,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
&chandef);
memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- &cap_ie,
+ &cap_ie, NULL,
&sta->deflink);
if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap)))
rates_updated |= true;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 06bd406846d2..98ef1fe1226e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -676,7 +676,7 @@ struct ieee80211_if_mesh {
struct timer_list mesh_path_root_timer;
unsigned long wrkq_flags;
- unsigned long mbss_changed;
+ unsigned long mbss_changed[64 / BITS_PER_LONG];
bool userspace_handles_dfs;
@@ -2141,6 +2141,7 @@ void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_vht_cap *vht_cap_ie,
+ const struct ieee80211_vht_cap *vht_cap_ie2,
struct link_sta_info *link_sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 13050dc9321f..0665ff5e456e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -802,6 +802,9 @@ static void ieee80211_key_destroy(struct ieee80211_key *key,
void ieee80211_key_free_unused(struct ieee80211_key *key)
{
+ if (!key)
+ return;
+
WARN_ON(key->sdata || key->local);
ieee80211_key_free_common(key);
}
@@ -854,7 +857,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
* can cause warnings to appear.
*/
bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION;
- int ret = -EOPNOTSUPP;
+ int ret;
mutex_lock(&sdata->local->key_mtx);
@@ -868,8 +871,10 @@ int ieee80211_key_link(struct ieee80211_key *key,
* the same cipher. Enforce the assumption for pairwise keys.
*/
if ((alt_key && alt_key->conf.cipher != key->conf.cipher) ||
- (old_key && old_key->conf.cipher != key->conf.cipher))
+ (old_key && old_key->conf.cipher != key->conf.cipher)) {
+ ret = -EOPNOTSUPP;
goto out;
+ }
} else if (sta) {
struct link_sta_info *link_sta = &sta->deflink;
int link_id = key->conf.link_id;
@@ -895,8 +900,10 @@ int ieee80211_key_link(struct ieee80211_key *key,
/* Non-pairwise keys must also not switch the cipher on rekey */
if (!pairwise) {
- if (old_key && old_key->conf.cipher != key->conf.cipher)
+ if (old_key && old_key->conf.cipher != key->conf.cipher) {
+ ret = -EOPNOTSUPP;
goto out;
+ }
}
/*
@@ -904,9 +911,8 @@ int ieee80211_key_link(struct ieee80211_key *key,
* new version of the key to avoid nonce reuse or replay issues.
*/
if (ieee80211_key_identical(sdata, old_key, key)) {
- ieee80211_key_free_unused(key);
- ret = 0;
- goto out;
+ ret = -EALREADY;
+ goto unlock;
}
key->local = sdata->local;
@@ -930,7 +936,11 @@ int ieee80211_key_link(struct ieee80211_key *key,
ieee80211_key_free(key, delay_tailroom);
}
+ key = NULL;
+
out:
+ ieee80211_key_free_unused(key);
+ unlock:
mutex_unlock(&sdata->local->key_mtx);
return ret;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index af8c5fc2db14..e31c312c124a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1175,7 +1175,7 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
/* if we race with running work, worst case this work becomes a noop */
for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE)
- set_bit(bit, &ifmsh->mbss_changed);
+ set_bit(bit, ifmsh->mbss_changed);
set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags);
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
@@ -1257,7 +1257,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
/* clear any mesh work (for next join) we may have accrued */
ifmsh->wrkq_flags = 0;
- ifmsh->mbss_changed = 0;
+ memset(ifmsh->mbss_changed, 0, sizeof(ifmsh->mbss_changed));
local->fif_other_bss--;
atomic_dec(&local->iff_allmultis);
@@ -1724,9 +1724,9 @@ static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata)
u32 bit;
u64 changed = 0;
- for_each_set_bit(bit, &ifmsh->mbss_changed,
+ for_each_set_bit(bit, ifmsh->mbss_changed,
sizeof(changed) * BITS_PER_BYTE) {
- clear_bit(bit, &ifmsh->mbss_changed);
+ clear_bit(bit, ifmsh->mbss_changed);
changed |= BIT(bit);
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f3d5bb0a59f1..a1e526419e9d 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -451,7 +451,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
changed |= IEEE80211_RC_BW_CHANGED;
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- elems->vht_cap_elem,
+ elems->vht_cap_elem, NULL,
&sta->deflink);
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f93eb38ae0b8..0c9198997482 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4202,10 +4202,33 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
elems->ht_cap_elem,
link_sta);
- if (elems->vht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT))
+ if (elems->vht_cap_elem &&
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+ const struct ieee80211_vht_cap *bss_vht_cap = NULL;
+ const struct cfg80211_bss_ies *ies;
+
+ /*
+ * Cisco AP module 9115 with FW 17.3 has a bug and sends a
+ * too large maximum MPDU length in the association response
+ * (indicating 12k) that it cannot actually process ...
+ * Work around that.
+ */
+ rcu_read_lock();
+ ies = rcu_dereference(cbss->ies);
+ if (ies) {
+ const struct element *elem;
+
+ elem = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY,
+ ies->data, ies->len);
+ if (elem && elem->datalen >= sizeof(*bss_vht_cap))
+ bss_vht_cap = (const void *)elem->data;
+ }
+
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
elems->vht_cap_elem,
- link_sta);
+ bss_vht_cap, link_sta);
+ rcu_read_unlock();
+ }
if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
elems->he_cap) {
@@ -5107,9 +5130,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
continue;
valid_links |= BIT(link_id);
- if (assoc_data->link[link_id].disabled) {
+ if (assoc_data->link[link_id].disabled)
dormant_links |= BIT(link_id);
- } else if (link_id != assoc_data->assoc_link_id) {
+
+ if (link_id != assoc_data->assoc_link_id) {
err = ieee80211_sta_allocate_link(sta, link_id);
if (err)
goto out_err;
@@ -5124,7 +5148,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *link;
struct link_sta_info *link_sta;
- if (!cbss || assoc_data->link[link_id].disabled)
+ if (!cbss)
continue;
link = sdata_dereference(sdata->link[link_id], sdata);
@@ -5429,17 +5453,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
struct ieee80211_link_data *link;
- link = sdata_dereference(sdata->link[link_id], sdata);
- if (!link)
- continue;
-
if (!assoc_data->link[link_id].bss)
continue;
resp.links[link_id].bss = assoc_data->link[link_id].bss;
- resp.links[link_id].addr = link->conf->addr;
+ ether_addr_copy(resp.links[link_id].addr,
+ assoc_data->link[link_id].addr);
resp.links[link_id].status = assoc_data->link[link_id].status;
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+
/* get uapsd queues configuration - same for all links */
resp.uapsd_queues = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7fe7280e8437..d45d4be63dd8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -665,7 +665,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
}
if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED &&
- !ieee80211_is_deauth(hdr->frame_control)))
+ !ieee80211_is_deauth(hdr->frame_control)) &&
+ tx->skb->protocol != tx->sdata->control_port_protocol)
return TX_DROP;
if (!skip_hw && tx->key &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c1250aa47808..b3a5c3e96a72 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
*
* Portions of this file
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2022 Intel Corporation
+ * Copyright (C) 2018 - 2023 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -116,12 +116,14 @@ void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const struct ieee80211_vht_cap *vht_cap_ie,
+ const struct ieee80211_vht_cap *vht_cap_ie2,
struct link_sta_info *link_sta)
{
struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap;
struct ieee80211_sta_vht_cap own_cap;
u32 cap_info, i;
bool have_80mhz;
+ u32 mpdu_len;
memset(vht_cap, 0, sizeof(*vht_cap));
@@ -318,10 +320,20 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
/*
+ * Work around the Cisco 9115 FW 17.3 bug by taking the min of
+ * both reported MPDU lengths.
+ */
+ mpdu_len = vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK;
+ if (vht_cap_ie2)
+ mpdu_len = min_t(u32, mpdu_len,
+ le32_get_bits(vht_cap_ie2->vht_cap_info,
+ IEEE80211_VHT_CAP_MAX_MPDU_MASK));
+
+ /*
* FIXME - should the amsdu len be per link? store per link
* and maintain a minimum?
*/
- switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
+ switch (mpdu_len) {
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
break;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c254accb14de..cd15ec73073e 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1269,12 +1269,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (rcv_wnd == rcv_wnd_old)
break;
- if (before64(rcv_wnd_new, rcv_wnd)) {
+
+ rcv_wnd_old = rcv_wnd;
+ if (before64(rcv_wnd_new, rcv_wnd_old)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
goto raise_win;
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
- rcv_wnd_old = rcv_wnd;
}
return;
}
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index b5a8aa4c1ebd..d042d32beb4d 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -307,12 +307,6 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- if (addr_l.id == 0) {
- NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id");
- err = -EINVAL;
- goto create_err;
- }
-
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 933b257eee02..c3b83cb390d9 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -134,9 +134,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
+static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
+{
+ WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
+ mptcp_sk(sk)->rmem_fwd_alloc + size);
+}
+
static void mptcp_rmem_charge(struct sock *sk, int size)
{
- mptcp_sk(sk)->rmem_fwd_alloc -= size;
+ mptcp_rmem_fwd_alloc_add(sk, -size);
}
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
@@ -177,7 +183,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
+ mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
__sk_mem_reduce_allocated(sk, amount);
}
@@ -186,7 +192,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
struct mptcp_sock *msk = mptcp_sk(sk);
int reclaimable;
- msk->rmem_fwd_alloc += size;
+ mptcp_rmem_fwd_alloc_add(sk, size);
reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
/* see sk_mem_uncharge() for the rationale behind the following schema */
@@ -341,7 +347,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
return false;
- msk->rmem_fwd_alloc += amount;
+ mptcp_rmem_fwd_alloc_add(sk, amount);
return true;
}
@@ -399,7 +405,7 @@ drop:
return false;
}
-static void mptcp_stop_timer(struct sock *sk)
+static void mptcp_stop_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -764,6 +770,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}
+static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+{
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ return false;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ return true;
+}
+
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow)
+ if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
+ break;
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
@@ -846,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
+ mptcp_stop_tout_timer(sk);
return true;
}
@@ -865,12 +912,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
}
}
-static bool mptcp_timer_pending(struct sock *sk)
+static bool mptcp_rtx_timer_pending(struct sock *sk)
{
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
}
-static void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned long tout;
@@ -1004,10 +1051,10 @@ static void __mptcp_clean_una(struct sock *sk)
out:
if (snd_una == READ_ONCE(msk->snd_nxt) &&
snd_una == READ_ONCE(msk->write_seq)) {
- if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
- mptcp_stop_timer(sk);
+ if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+ mptcp_stop_rtx_timer(sk);
} else {
- mptcp_reset_timer(sk);
+ mptcp_reset_rtx_timer(sk);
}
}
@@ -1580,8 +1627,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_push_release(ssk, &info);
/* ensure the rtx timer is running */
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (do_check_data_fin)
mptcp_check_send_data_fin(sk);
}
@@ -1644,8 +1691,8 @@ out:
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
@@ -1800,7 +1847,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
/* data successfully copied into the write queue */
- sk->sk_forward_alloc -= total_ts;
+ sk_forward_alloc_add(sk, -total_ts);
copied += psize;
dfrag->data_len += psize;
frag_truesize += psize;
@@ -2214,7 +2261,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}
-static void mptcp_timeout_timer(struct timer_list *t)
+static void mptcp_tout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2323,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due
- * to an incoming reset, mptcp either:
- * - if either the subflow or the msk are dead, destroy the context
- * (the subflow socket is deleted by inet_child_forget) and the msk
- * - otherwise do nothing at the moment and take action at accept and/or
- * listener shutdown - user-space must be able to accept() the closed
- * socket.
+ * to an incoming reset or listener shutdown, the subflow socket is
+ * already deleted by inet_child_forget() and the mptcp socket can't
+ * survive too.
*/
- if (msk->in_accept_queue && msk->first == ssk) {
- if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
- return;
-
+ if (msk->in_accept_queue && msk->first == ssk &&
+ (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
/* ensure later check in mptcp_worker() will dispose the msk */
+ mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
@@ -2386,6 +2429,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
out_release:
+ __mptcp_subflow_error_report(sk, ssk);
release_sock(ssk);
sock_put(ssk);
@@ -2396,6 +2440,22 @@ out_release:
out:
if (need_push)
__mptcp_push_pending(sk, 0);
+
+ /* Catch every 'all subflows closed' scenario, including peers silently
+ * closing them, e.g. due to timeout.
+ * For established sockets, allow an additional timeout before closing,
+ * as the protocol can still create more subflows.
+ */
+ if (list_is_singular(&msk->conn_list) && msk->first &&
+ inet_sk_state_load(msk->first) == TCP_CLOSE) {
+ if (sk->sk_state != TCP_ESTABLISHED ||
+ msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_close_wake_up(sk);
+ } else {
+ mptcp_start_tout_timer(sk);
+ }
+ }
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2439,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)
}
-static bool mptcp_should_close(const struct sock *sk)
+static bool mptcp_close_tout_expired(const struct sock *sk)
{
- s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
- struct mptcp_subflow_context *subflow;
-
- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
- return true;
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
+ sk->sk_state == TCP_CLOSE)
+ return false;
- /* if all subflows are in closed status don't bother with additional
- * timeout
- */
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
- if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
- TCP_CLOSE)
- return false;
- }
- return true;
+ return time_after32(tcp_jiffies32,
+ inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
}
static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2582,27 +2633,28 @@ static void __mptcp_retrans(struct sock *sk)
reset_timer:
mptcp_check_and_set_pending(sk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
/* schedule the timeout timer for the relevant event: either close timeout
* or mp_fail timeout. The close timeout takes precedence on the mp_fail one
*/
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
{
struct sock *sk = (struct sock *)msk;
unsigned long timeout, close_timeout;
- if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
+ TCP_TIMEWAIT_LEN;
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
*/
- timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+ timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
sk_reset_timer(sk, &sk->sk_timer, timeout);
}
@@ -2621,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
mptcp_subflow_reset(ssk);
WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
unlock_sock_fast(ssk, slow);
-
- mptcp_reset_timeout(msk, 0);
}
static void mptcp_do_fastclose(struct sock *sk)
@@ -2659,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(sk);
- /* There is no point in keeping around an orphaned sk timedout or
- * closed, but we need the msk around to reply to incoming DATA_FIN,
- * even if it is orphaned and in FIN_WAIT2 state
- */
- if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_should_close(sk))
- mptcp_do_fastclose(sk);
+ if (mptcp_close_tout_expired(sk)) {
+ mptcp_do_fastclose(sk);
+ mptcp_close_wake_up(sk);
+ }
- if (sk->sk_state == TCP_CLOSE) {
- __mptcp_destroy_sock(sk);
- goto unlock;
- }
+ if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
+ __mptcp_destroy_sock(sk);
+ goto unlock;
}
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
@@ -2711,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
- timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
+ timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
}
static void mptcp_ca_reset(struct sock *sk)
@@ -2802,8 +2848,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
break;
}
@@ -2886,7 +2932,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- mptcp_stop_timer(sk);
+ mptcp_stop_rtx_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
mptcp_release_sched(msk);
@@ -2969,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
cleanup:
/* orphan all the subflows */
- inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
@@ -3006,7 +3051,7 @@ cleanup:
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- mptcp_reset_timeout(msk, 0);
+ mptcp_start_tout_timer(sk);
}
return do_cancel_work;
@@ -3069,8 +3114,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_stop_timer(sk);
- sk_stop_timer(sk, &sk->sk_timer);
+ mptcp_stop_rtx_timer(sk);
+ mptcp_stop_tout_timer(sk);
if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
@@ -3257,8 +3302,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
* inet_sock_destruct() will dispose it
*/
- sk->sk_forward_alloc += msk->rmem_fwd_alloc;
- msk->rmem_fwd_alloc = 0;
+ sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
mptcp_free_local_addr_list(msk);
@@ -3380,24 +3425,21 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}
-void mptcp_subflow_process_delegated(struct sock *ssk)
+void mptcp_subflow_process_delegated(struct sock *ssk, long status)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
- if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+ if (status & BIT(MPTCP_DELEGATE_SEND)) {
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk, true);
else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
- mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
}
- if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
+ if (status & BIT(MPTCP_DELEGATE_ACK))
schedule_3rdack_retransmission(ssk);
- mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
- }
}
static int mptcp_hash(struct sock *sk)
@@ -3522,7 +3564,8 @@ static void mptcp_shutdown(struct sock *sk, int how)
static int mptcp_forward_alloc_get(const struct sock *sk)
{
- return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+ return READ_ONCE(sk->sk_forward_alloc) +
+ READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
}
static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
@@ -3922,14 +3965,17 @@ static int mptcp_napi_poll(struct napi_struct *napi, int budget)
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bh_lock_sock_nested(ssk);
- if (!sock_owned_by_user(ssk) &&
- mptcp_subflow_has_delegated_action(subflow))
- mptcp_subflow_process_delegated(ssk);
- /* ... elsewhere tcp_release_cb_override already processed
- * the action or will do at next release_sock().
- * In both case must dequeue the subflow here - on the same
- * CPU that scheduled it.
- */
+ if (!sock_owned_by_user(ssk)) {
+ mptcp_subflow_process_delegated(ssk, xchg(&subflow->delegated_status, 0));
+ } else {
+ /* tcp_release_cb_override already processed
+ * the action or will do at next release_sock().
+ * In both case must dequeue the subflow here - on the same
+ * CPU that scheduled it.
+ */
+ smp_wmb();
+ clear_bit(MPTCP_DELEGATE_SCHEDULED, &subflow->delegated_status);
+ }
bh_unlock_sock(ssk);
sock_put(ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7254b3562575..3612545fa62e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -444,9 +444,11 @@ struct mptcp_delegated_action {
DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
-#define MPTCP_DELEGATE_SEND 0
-#define MPTCP_DELEGATE_ACK 1
+#define MPTCP_DELEGATE_SCHEDULED 0
+#define MPTCP_DELEGATE_SEND 1
+#define MPTCP_DELEGATE_ACK 2
+#define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED))
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
@@ -564,23 +566,24 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
}
-void mptcp_subflow_process_delegated(struct sock *ssk);
+void mptcp_subflow_process_delegated(struct sock *ssk, long actions);
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
{
+ long old, set_bits = BIT(MPTCP_DELEGATE_SCHEDULED) | BIT(action);
struct mptcp_delegated_action *delegated;
bool schedule;
/* the caller held the subflow bh socket lock */
lockdep_assert_in_softirq();
- /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
- * ensures the below list check sees list updates done prior to status
- * bit changes
+ /* The implied barrier pairs with tcp_release_cb_override()
+ * mptcp_napi_poll(), and ensures the below list check sees list
+ * updates done prior to delegated status bits changes
*/
- if (!test_and_set_bit(action, &subflow->delegated_status)) {
- /* still on delegated list from previous scheduling */
- if (!list_empty(&subflow->delegated_node))
+ old = set_mask_bits(&subflow->delegated_status, 0, set_bits);
+ if (!(old & BIT(MPTCP_DELEGATE_SCHEDULED))) {
+ if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node)))
return;
delegated = this_cpu_ptr(&mptcp_delegated_actions);
@@ -605,20 +608,6 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
return ret;
}
-static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
-{
- return !!READ_ONCE(subflow->delegated_status);
-}
-
-static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
-{
- /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
- * touching the status bit
- */
- smp_wmb();
- clear_bit(action, &subflow->delegated_status);
-}
-
int mptcp_is_enabled(const struct net *net);
unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(const struct net *net);
@@ -718,7 +707,29 @@ void mptcp_get_options(const struct sk_buff *skb,
void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk);
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
+
+static inline void mptcp_stop_tout_timer(struct sock *sk)
+{
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
+ return;
+
+ sk_stop_timer(sk, &sk->sk_timer);
+ inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
+}
+
+static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
+{
+ /* avoid 0 timestamp, as that means no close timeout */
+ inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
+}
+
+static inline void mptcp_start_tout_timer(struct sock *sk)
+{
+ mptcp_set_close_tout(sk, tcp_jiffies32);
+ mptcp_reset_tout_timer(mptcp_sk(sk), 0);
+}
+
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9bf3c7bc1762..9c1f8d1d63d2 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
WRITE_ONCE(subflow->fail_tout, fail_tout);
tcp_send_ack(ssk);
- mptcp_reset_timeout(msk, subflow->fail_tout);
+ mptcp_reset_tout_timer(msk, subflow->fail_tout);
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
-void __mptcp_error_report(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow;
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int err = sock_error(ssk);
- int ssk_state;
-
- if (!err)
- continue;
-
- /* only propagate errors on fallen-back sockets or
- * on MPC connect
- */
- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
- continue;
-
- /* We need to propagate only transition to CLOSE state.
- * Orphaned socket will see such state change via
- * subflow_sched_work_if_closed() and that path will properly
- * destroy the msk as needed.
- */
- ssk_state = inet_sk_state_load(ssk);
- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
- WRITE_ONCE(sk->sk_err, -err);
-
- /* This barrier is coupled with smp_rmb() in mptcp_poll() */
- smp_wmb();
- sk_error_report(sk);
- break;
- }
-}
-
static void subflow_error_report(struct sock *ssk)
{
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
@@ -1588,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
+ mptcp_stop_tout_timer(sk);
return 0;
failed_unlink:
@@ -1991,9 +1956,15 @@ static void subflow_ulp_clone(const struct request_sock *req,
static void tcp_release_cb_override(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ long status;
- if (mptcp_subflow_has_delegated_action(subflow))
- mptcp_subflow_process_delegated(ssk);
+ /* process and clear all the pending actions, but leave the subflow into
+ * the napi queue. To respect locking, only the same CPU that originated
+ * the action can touch the list. mptcp_napi_poll will take care of it.
+ */
+ status = set_mask_bits(&subflow->delegated_status, MPTCP_DELEGATE_ACTIONS_MASK, 0);
+ if (status)
+ mptcp_subflow_process_delegated(ssk, status);
tcp_release_cb(ssk);
}
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 62fb1031763d..f8854bff286c 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -89,6 +89,11 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
if ((had_link == has_link) || chained)
return 0;
+ if (had_link)
+ netif_carrier_off(ndp->ndev.dev);
+ else
+ netif_carrier_on(ndp->ndev.dev);
+
if (!ndp->multi_package && !nc->package->multi_channel) {
if (had_link) {
ndp->flags |= NCSI_DEV_RESHUFFLE;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e564b5174261..35d2f9c9ada0 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -683,6 +683,14 @@ __ip_set_put(struct ip_set *set)
* a separate reference counter
*/
static void
+__ip_set_get_netlink(struct ip_set *set)
+{
+ write_lock_bh(&ip_set_ref_lock);
+ set->ref_netlink++;
+ write_unlock_bh(&ip_set_ref_lock);
+}
+
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -1693,11 +1701,11 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
do {
if (retried) {
- __ip_set_get(set);
+ __ip_set_get_netlink(set);
nfnl_unlock(NFNL_SUBSYS_IPSET);
cond_resched();
nfnl_lock(NFNL_SUBSYS_IPSET);
- __ip_set_put(set);
+ __ip_set_put_netlink(set);
}
ip_set_lock(set);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 005a7ce87217..bf4f91b78e1d 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -36,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
#define IP_SET_HASH_WITH_PROTO
#define IP_SET_HASH_WITH_NETS
#define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
/* IPv4 variant */
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index da5af28ff57b..4174076c66fa 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1439,7 +1439,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
sin.sin_addr.s_addr = addr;
sin.sin_port = 0;
- return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+ return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin));
}
static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
@@ -1505,8 +1505,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id,
}
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
- result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
- salen, 0);
+ result = kernel_connect(sock, (struct sockaddr *)&mcast_addr,
+ salen, 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
goto error;
@@ -1546,7 +1546,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id,
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
sock->sk->sk_bound_dev_if = dev->ifindex;
- result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
+ result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
goto error;
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index c7a6114091ae..b21799d468d2 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -381,6 +381,8 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
+ if (!nf_ct_is_confirmed(nfct))
+ nfct->timeout += nfct_time_stamp;
nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) {
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 0b513f7bf9f3..dd62cc12e775 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -40,10 +40,10 @@ static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
[NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache),
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
- [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct),
+ [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_tstamp),
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp),
+ [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_timeout),
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
[NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels),
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b6bcc8f2f46b..c6bd533983c1 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
@@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
@@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
/* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
+ } else if (sch->type == SCTP_CID_COOKIE_ACK) {
+ ct->proto.sctp.init[dir] = 0;
+ ct->proto.sctp.init[!dir] = 0;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
if (ct->proto.sctp.vtag[dir] == 0) {
pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir);
@@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}
/* If it is an INIT or an INIT ACK note down the vtag */
- if (sch->type == SCTP_CID_INIT ||
- sch->type == SCTP_CID_INIT_ACK) {
- struct sctp_inithdr _inithdr, *ih;
+ if (sch->type == SCTP_CID_INIT) {
+ struct sctp_inithdr _ih, *ih;
- ih = skb_header_pointer(skb, offset + sizeof(_sch),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL)
+ ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+ if (!ih)
goto out_unlock;
- pr_debug("Setting vtag %x for dir %d\n",
- ih->init_tag, !dir);
+
+ if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
+ ct->proto.sctp.init[!dir] = 0;
+ ct->proto.sctp.init[dir] = 1;
+
+ pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
/* don't renew timeout on init retransmit so
@@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
old_state == SCTP_CONNTRACK_CLOSED &&
nf_ct_is_confirmed(ct))
ignore = true;
+ } else if (sch->type == SCTP_CID_INIT_ACK) {
+ struct sctp_inithdr _ih, *ih;
+ __be32 vtag;
+
+ ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+ if (!ih)
+ goto out_unlock;
+
+ vtag = ct->proto.sctp.vtag[!dir];
+ if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
+ goto out_unlock;
+ /* collision */
+ if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] &&
+ vtag != ih->init_tag)
+ goto out_unlock;
+
+ pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
+ ct->proto.sctp.vtag[!dir] = ih->init_tag;
}
ct->proto.sctp.state = new_state;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 41b826dff6f5..a72b6aeefb1b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -102,6 +102,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
[NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
[NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
};
static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
@@ -1218,6 +1219,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
flags & NFT_TABLE_F_OWNER))
return -EOPNOTSUPP;
+ /* No dormant off/on/off/on games in single transaction */
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
@@ -1431,7 +1436,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx->chain = chain;
@@ -1445,8 +1450,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, set))
continue;
- if (nft_set_is_anonymous(set) &&
- !list_empty(&set->bindings))
+ if (nft_set_is_anonymous(set))
continue;
err = nft_delset(ctx, set);
@@ -1476,7 +1480,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx->chain = chain;
@@ -2909,6 +2913,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(chain);
}
+ if (nft_chain_binding(chain))
+ return -EOPNOTSUPP;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (nla[NFTA_CHAIN_HOOK]) {
@@ -3421,6 +3428,18 @@ err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
+static void audit_log_rule_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+ table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
struct nft_rule_dump_ctx {
char *table;
char *chain;
@@ -3436,6 +3455,8 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
const struct nft_rule *rule, *prule;
unsigned int s_idx = cb->args[0];
+ unsigned int entries = 0;
+ int ret = 0;
u64 handle;
prule = NULL;
@@ -3458,16 +3479,22 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
- table, chain, rule, handle, reset) < 0)
- return 1;
-
+ table, chain, rule, handle, reset) < 0) {
+ ret = 1;
+ break;
+ }
+ entries++;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
prule = rule;
cont_skip:
(*idx)++;
}
- return 0;
+
+ if (reset && entries)
+ audit_log_rule_reset(table, cb->seq, entries);
+
+ return ret;
}
static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -3634,6 +3661,9 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
goto err_fill_rule_info;
+ if (reset)
+ audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
+
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
err_fill_rule_info:
@@ -3951,6 +3981,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (nft_chain_binding(chain)) {
+ err = -EOPNOTSUPP;
+ goto err_destroy_flow_rule;
+ }
+
err = nft_delrule(&ctx, old_rule);
if (err < 0)
goto err_destroy_flow_rule;
@@ -4058,7 +4093,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
return -EOPNOTSUPP;
}
@@ -4092,7 +4127,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx.chain = chain;
@@ -5624,13 +5659,25 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
+static void audit_log_nft_set_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
struct nft_set_dump_ctx {
const struct nft_set *set;
struct nft_ctx ctx;
};
static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
- const struct nft_set *set, bool reset)
+ const struct nft_set *set, bool reset,
+ unsigned int base_seq)
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_cur(net);
@@ -5646,6 +5693,8 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
elem.priv = catchall->elem;
ret = nf_tables_fill_setelem(skb, set, &elem, reset);
+ if (reset && !ret)
+ audit_log_nft_set_reset(set->table, base_seq, 1);
break;
}
@@ -5725,12 +5774,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
if (!args.iter.err && args.iter.count == cb->args[0])
- args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
- rcu_read_unlock();
-
+ args.iter.err = nft_set_catchall_dump(net, skb, set,
+ reset, cb->seq);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
+ if (reset && args.iter.count > args.iter.skip)
+ audit_log_nft_set_reset(table, cb->seq,
+ args.iter.count - args.iter.skip);
+
+ rcu_read_unlock();
+
if (args.iter.err && args.iter.err != -EMSGSIZE)
return args.iter.err;
if (args.iter.count == cb->args[0])
@@ -5955,13 +6009,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
+ int rem, err = 0, nelems = 0;
struct net *net = info->net;
struct nft_table *table;
struct nft_set *set;
struct nlattr *attr;
struct nft_ctx ctx;
bool reset = false;
- int rem, err = 0;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
genmask, 0);
@@ -6004,8 +6058,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
NL_SET_BAD_ATTR(extack, attr);
break;
}
+ nelems++;
}
+ if (reset)
+ audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
+ nelems);
+
return err;
}
@@ -7139,8 +7198,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) &&
- (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
+ if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -7810,24 +7871,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
return nft_delobj(&ctx, obj);
}
-void nft_obj_notify(struct net *net, const struct nft_table *table,
- struct nft_object *obj, u32 portid, u32 seq, int event,
- u16 flags, int family, int report, gfp_t gfp)
+static void
+__nft_obj_notify(struct net *net, const struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
int err;
- char *buf = kasprintf(gfp, "%s:%u",
- table->name, nft_net->base_seq);
-
- audit_log_nfcfg(buf,
- family,
- obj->handle,
- event == NFT_MSG_NEWOBJ ?
- AUDIT_NFT_OP_OBJ_REGISTER :
- AUDIT_NFT_OP_OBJ_UNREGISTER,
- gfp);
- kfree(buf);
if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -7850,13 +7901,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
+
+void nft_obj_notify(struct net *net, const struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ u16 flags, int family, int report, gfp_t gfp)
+{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ char *buf = kasprintf(gfp, "%s:%u",
+ table->name, nft_net->base_seq);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ event == NFT_MSG_NEWOBJ ?
+ AUDIT_NFT_OP_OBJ_REGISTER :
+ AUDIT_NFT_OP_OBJ_UNREGISTER,
+ gfp);
+ kfree(buf);
+
+ __nft_obj_notify(net, table, obj, portid, seq, event,
+ flags, family, report, gfp);
+}
EXPORT_SYMBOL_GPL(nft_obj_notify);
static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
- nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->flags, ctx->family, ctx->report, GFP_KERNEL);
+ __nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
+ ctx->seq, event, ctx->flags, ctx->family,
+ ctx->report, GFP_KERNEL);
}
/*
@@ -9518,12 +9591,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans)
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
+ struct nft_set *set;
+
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
nft_trans_gc_queue_work(gc);
- return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+ return nft_trans_gc_alloc(set, gc_seq, gfp);
}
void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
@@ -9538,15 +9614,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
{
+ struct nft_set *set;
+
if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
return NULL;
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
call_rcu(&gc->rcu, nft_trans_gc_trans_free);
- return nft_trans_gc_alloc(gc->set, 0, gfp);
+ return nft_trans_gc_alloc(set, 0, gfp);
}
void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
@@ -9561,8 +9640,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
call_rcu(&trans->rcu, nft_trans_gc_trans_free);
}
-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
- unsigned int gc_seq)
+static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq,
+ bool sync)
{
struct nft_set_elem_catchall *catchall;
const struct nft_set *set = gc->set;
@@ -9578,7 +9658,11 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
nft_set_elem_dead(ext);
dead_elem:
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (sync)
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ else
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+
if (!gc)
return NULL;
@@ -9588,6 +9672,17 @@ dead_elem:
return gc;
}
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
+{
+ return nft_trans_gc_catchall(gc, gc_seq, false);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
+{
+ return nft_trans_gc_catchall(gc, 0, true);
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -11010,7 +11105,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.family = table->family;
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx.chain = chain;
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 8f1bfa6ccc2d..50723ba08289 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -315,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+ if (f->opt_num > ARRAY_SIZE(f->opt))
+ return -EINVAL;
+
+ if (!memchr(f->genre, 0, MAXGENRELEN) ||
+ !memchr(f->subtype, 0, MAXGENRELEN) ||
+ !memchr(f->version, 0, MAXGENRELEN))
+ return -EINVAL;
+
kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
if (!kf)
return -ENOMEM;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 7f856ceb3a66..3fbaa7bf41f9 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
return opt[offset + 1];
}
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+ if (len % NFT_REG32_SIZE)
+ dest[len / NFT_REG32_SIZE] = 0;
+
+ return skb_copy_bits(skb, offset, dest, len);
+}
+
static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -210,7 +216,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
*dest = 1;
} else {
- dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
memcpy(dest, opt + offset, priv->len);
}
@@ -238,7 +245,12 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (!tcph)
goto err;
+ if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+ goto err;
+
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
union {
__be16 v16;
@@ -253,15 +265,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
goto err;
- if (skb_ensure_writable(pkt->skb,
- nft_thoff(pkt) + i + priv->len))
- goto err;
-
- tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
- &tcphdr_len);
- if (!tcph)
- goto err;
-
offset = i + priv->offset;
switch (priv->len) {
@@ -325,9 +328,9 @@ static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
goto drop;
- opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
- if (!opt)
- goto err;
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+ opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
unsigned int j;
@@ -392,9 +395,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
offset + ntohs(sch->length) > pkt->skb->len)
break;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset + priv->offset,
- dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
+ dest, priv->len) < 0)
break;
return;
}
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 8cb800989947..120f6d395b98 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -154,6 +154,17 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
return pkt->inneroff;
}
+static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
+{
+ unsigned int len = priv->offset + priv->len;
+
+ /* data past ether src/dst requested, copy needed */
+ if (len > offsetof(struct ethhdr, h_proto))
+ return true;
+
+ return false;
+}
+
void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -172,7 +183,7 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err;
if (skb_vlan_tag_present(skb) &&
- priv->offset >= offsetof(struct ethhdr, h_proto)) {
+ nft_payload_need_vlan_copy(priv)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 524763659f25..2013de934cef 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -338,12 +338,9 @@ static void nft_rhash_gc(struct work_struct *work)
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN) {
- nft_trans_gc_destroy(gc);
- gc = NULL;
- goto try_later;
- }
- continue;
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
}
/* Ruleset has been updated, try later. */
@@ -372,7 +369,7 @@ dead_elem:
nft_trans_gc_elem_add(gc, he);
}
- gc = nft_trans_gc_catchall(gc, gc_seq);
+ gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later:
/* catchall list iteration requires rcu read side lock. */
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 6af9c9ed4b5c..c0dcc40de358 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1596,7 +1596,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (!gc)
- break;
+ return;
nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
@@ -1610,7 +1610,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
}
}
- gc = nft_trans_gc_catchall(gc, 0);
+ gc = nft_trans_gc_catchall_sync(gc);
if (gc) {
nft_trans_gc_queue_sync_done(gc);
priv->last_gc = jiffies;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index c6435e709231..2660ceab3759 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
rb_erase(&rbe->node, &priv->root);
}
-static int nft_rbtree_gc_elem(const struct nft_set *__set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe,
- u8 genmask)
+static const struct nft_rbtree_elem *
+nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe, u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
if (!gc)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
/* search for end interval coming before this element.
* end intervals don't carry a timeout extension, they
@@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
prev = rb_prev(prev);
}
+ rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
nft_rbtree_gc_remove(net, set, priv, rbe_prev);
@@ -272,7 +272,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
*/
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe_prev);
}
@@ -280,13 +280,13 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
nft_rbtree_gc_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
nft_trans_gc_elem_add(gc, rbe);
nft_trans_gc_queue_sync_done(gc);
- return 0;
+ return rbe_prev;
}
static bool nft_rbtree_update_first(const struct nft_set *set,
@@ -312,8 +312,9 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
+ u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
- int d, err;
+ int d;
/* Descend the tree to search for an existing element greater than the
* key value to insert that is greater than the new element. This is the
@@ -357,11 +358,19 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (!nft_set_elem_active(&rbe->ext, genmask))
continue;
- /* perform garbage collection to avoid bogus overlap reports. */
- if (nft_set_elem_expired(&rbe->ext)) {
- err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
- if (err < 0)
- return err;
+ /* perform garbage collection to avoid bogus overlap reports
+ * but skip new elements in this transaction.
+ */
+ if (nft_set_elem_expired(&rbe->ext) &&
+ nft_set_elem_active(&rbe->ext, cur_genmask)) {
+ const struct nft_rbtree_elem *removed_end;
+
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ if (IS_ERR(removed_end))
+ return PTR_ERR(removed_end);
+
+ if (removed_end == rbe_le || removed_end == rbe_ge)
+ return -EAGAIN;
continue;
}
@@ -482,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *rbe = elem->priv;
int err;
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
- err = __nft_rbtree_insert(net, set, rbe, ext);
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ do {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ cond_resched();
+
+ write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
+ err = __nft_rbtree_insert(net, set, rbe, ext);
+ write_seqcount_end(&priv->count);
+ write_unlock_bh(&priv->lock);
+ } while (err == -EAGAIN);
return err;
}
@@ -618,8 +634,7 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!gc)
goto done;
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
+ read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
/* Ruleset has been updated, try later. */
@@ -666,11 +681,10 @@ dead_elem:
nft_trans_gc_elem_add(gc, rbe);
}
- gc = nft_trans_gc_catchall(gc, gc_seq);
+ gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later:
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ read_unlock_bh(&priv->lock);
if (gc)
nft_trans_gc_queue_async_done(gc);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index e8961094a282..b46a6a512058 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_sctp_info *info = par->matchinfo;
+ if (info->flag_count > ARRAY_SIZE(info->flag_info))
+ return -EINVAL;
if (info->flags & ~XT_SCTP_VALID_FLAGS)
return -EINVAL;
if (info->invflags & ~XT_SCTP_VALID_FLAGS)
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 177b40d08098..117d4615d668 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ret ^ data->invert;
}
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct xt_u32 *data = par->matchinfo;
+ const struct xt_u32_test *ct;
+ unsigned int i;
+
+ if (data->ntests > ARRAY_SIZE(data->tests))
+ return -EINVAL;
+
+ for (i = 0; i < data->ntests; ++i) {
+ ct = &data->tests[i];
+
+ if (ct->nnums > ARRAY_SIZE(ct->location) ||
+ ct->nvalues > ARRAY_SIZE(ct->value))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct xt_match xt_u32_mt_reg __read_mostly = {
.name = "u32",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = u32_mt,
+ .checkentry = u32_mt_checkentry,
.matchsize = sizeof(struct xt_u32),
.me = THIS_MODULE,
};
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 642b9d382fb4..eb086b06d60d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk)
if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
- sk->sk_err = ENOBUFS;
+ WRITE_ONCE(sk->sk_err, ENOBUFS);
sk_error_report(sk);
}
}
@@ -1605,7 +1605,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
goto out;
}
- sk->sk_err = p->code;
+ WRITE_ONCE(sk->sk_err, p->code);
sk_error_report(sk);
out:
return ret;
@@ -1991,7 +1991,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = -ret;
+ WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
}
}
@@ -2511,7 +2511,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
err_bad_put:
nlmsg_free(skb);
err_skb:
- NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
+ WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS);
sk_error_report(NETLINK_CB(in_skb).sk);
}
EXPORT_SYMBOL(netlink_ack);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index f60e424e0607..6705bb895e23 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1636,7 +1636,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
+ spin_lock(&llcp_devices_lock);
list_add(&local->list, &llcp_devices);
+ spin_unlock(&llcp_devices_lock);
return 0;
}
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index d36f3f6b4351..b15cf316b23a 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -86,11 +86,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ADDR_RESOLVED:
- rdma_set_service_type(cm_id, conn->c_tos);
- rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
- /* XXX do we need to clean up if this fails? */
- ret = rdma_resolve_route(cm_id,
- RDS_RDMA_RESOLVE_TIMEOUT_MS);
+ if (conn) {
+ rdma_set_service_type(cm_id, conn->c_tos);
+ rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
+ /* XXX do we need to clean up if this fails? */
+ ret = rdma_resolve_route(cm_id,
+ RDS_RDMA_RESOLVE_TIMEOUT_MS);
+ }
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index f0c477c5d1db..a0046e99d6df 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -145,7 +145,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
addrlen = sizeof(sin);
}
- ret = sock->ops->bind(sock, addr, addrlen);
+ ret = kernel_bind(sock, addr, addrlen);
if (ret) {
rdsdebug("bind failed with %d at address %pI6c\n",
ret, &conn->c_laddr);
@@ -173,7 +173,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
* own the socket
*/
rds_tcp_set_callbacks(sock, cp);
- ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK);
+ ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK);
rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 014fa24418c1..53b3535a1e4a 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -306,7 +306,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
addr_len = sizeof(*sin);
}
- ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
+ ret = kernel_bind(sock, (struct sockaddr *)&ss, addr_len);
if (ret < 0) {
rdsdebug("could not bind %s listener socket: %d\n",
isv6 ? "IPv6" : "IPv4", ret);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 01fca7a10b4b..08630896b6c8 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -48,6 +48,7 @@ struct rfkill {
bool persistent;
bool polling_paused;
bool suspended;
+ bool need_sync;
const struct rfkill_ops *ops;
void *data;
@@ -368,6 +369,17 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
rfkill_event(rfkill);
}
+static void rfkill_sync(struct rfkill *rfkill)
+{
+ lockdep_assert_held(&rfkill_global_mutex);
+
+ if (!rfkill->need_sync)
+ return;
+
+ rfkill_set_block(rfkill, rfkill_global_states[rfkill->type].cur);
+ rfkill->need_sync = false;
+}
+
static void rfkill_update_global_state(enum rfkill_type type, bool blocked)
{
int i;
@@ -730,6 +742,10 @@ static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
+ mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
+ mutex_unlock(&rfkill_global_mutex);
+
return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0);
}
@@ -751,6 +767,7 @@ static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
rfkill_set_block(rfkill, state);
mutex_unlock(&rfkill_global_mutex);
@@ -783,6 +800,10 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
+ mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
+ mutex_unlock(&rfkill_global_mutex);
+
return sysfs_emit(buf, "%d\n", user_state_from_blocked(rfkill->state));
}
@@ -805,6 +826,7 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
mutex_lock(&rfkill_global_mutex);
+ rfkill_sync(rfkill);
rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED);
mutex_unlock(&rfkill_global_mutex);
@@ -1032,14 +1054,10 @@ static void rfkill_uevent_work(struct work_struct *work)
static void rfkill_sync_work(struct work_struct *work)
{
- struct rfkill *rfkill;
- bool cur;
-
- rfkill = container_of(work, struct rfkill, sync_work);
+ struct rfkill *rfkill = container_of(work, struct rfkill, sync_work);
mutex_lock(&rfkill_global_mutex);
- cur = rfkill_global_states[rfkill->type].cur;
- rfkill_set_block(rfkill, cur);
+ rfkill_sync(rfkill);
mutex_unlock(&rfkill_global_mutex);
}
@@ -1087,6 +1105,7 @@ int __must_check rfkill_register(struct rfkill *rfkill)
round_jiffies_relative(POLL_INTERVAL));
if (!rfkill->persistent || rfkill_epo_lock_active) {
+ rfkill->need_sync = true;
schedule_work(&rfkill->sync_work);
} else {
#ifdef CONFIG_RFKILL_INPUT
@@ -1171,6 +1190,7 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev)
goto free;
+ rfkill_sync(rfkill);
rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD);
list_add_tail(&ev->list, &data->events);
}
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 591d87d5e5c0..68e6acd0f130 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -61,6 +61,7 @@ struct fq_pie_sched_data {
struct pie_params p_params;
u32 ecn_prob;
u32 flows_cnt;
+ u32 flows_cursor;
u32 quantum;
u32 memory_limit;
u32 new_flow_count;
@@ -375,22 +376,32 @@ flow_error:
static void fq_pie_timer(struct timer_list *t)
{
struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+ unsigned long next, tupdate;
struct Qdisc *sch = q->sch;
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
- u32 idx;
+ int max_cnt, i;
rcu_read_lock();
root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
- for (idx = 0; idx < q->flows_cnt; idx++)
- pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
- q->flows[idx].backlog);
-
- /* reset the timer to fire after 'tupdate' jiffies. */
- if (q->p_params.tupdate)
- mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
+ /* Limit this expensive loop to 2048 flows per round. */
+ max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
+ for (i = 0; i < max_cnt; i++) {
+ pie_calculate_probability(&q->p_params,
+ &q->flows[q->flows_cursor].vars,
+ q->flows[q->flows_cursor].backlog);
+ q->flows_cursor++;
+ }
+ tupdate = q->p_params.tupdate;
+ next = 0;
+ if (q->flows_cursor >= q->flows_cnt) {
+ q->flows_cursor = 0;
+ next = tupdate;
+ }
+ if (tupdate)
+ mod_timer(&q->adapt_timer, jiffies + next);
spin_unlock(root_lock);
rcu_read_unlock();
}
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index ea8c4a7174bb..35f49edf63db 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct plug_sched_data),
.enqueue = plug_enqueue,
.dequeue = plug_dequeue,
- .peek = qdisc_peek_head,
+ .peek = qdisc_peek_dequeued,
.init = plug_init,
.change = plug_change,
.reset = qdisc_reset_queue,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 1a25752f1a9a..546c10adcacd 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
}
/* Dequeue head packet of the head class in the DRR queue of the aggregate. */
-static void agg_dequeue(struct qfq_aggregate *agg,
- struct qfq_class *cl, unsigned int len)
+static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
+ struct qfq_class *cl, unsigned int len)
{
- qdisc_dequeue_peeked(cl->qdisc);
+ struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
+
+ if (!skb)
+ return NULL;
cl->deficit -= (int) len;
@@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
+
+ return skb;
}
static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
@@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
if (!skb)
return NULL;
- qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
+
+ skb = agg_dequeue(in_serv_agg, cl, len);
+
+ if (!skb) {
+ sch->q.qlen++;
+ return NULL;
+ }
+
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
- agg_dequeue(in_serv_agg, cl, len);
/* If lmax is lowered, through qfq_change_class, for a class
* owning pending packets with larger size than the new value
* of lmax, then the following condition may hold.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 796529167e8d..c45c192b7878 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1159,8 +1159,7 @@ int sctp_assoc_update(struct sctp_association *asoc,
/* Add any peer addresses from the new association. */
list_for_each_entry(trans, &new->peer.transport_addr_list,
transports)
- if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) &&
- !sctp_assoc_add_peer(asoc, &trans->ipaddr,
+ if (!sctp_assoc_add_peer(asoc, &trans->ipaddr,
GFP_ATOMIC, trans->state))
return -ENOMEM;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index f13d6a34f32f..ec00ee75d59a 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -282,7 +282,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
refcount_read(&sk->sk_wmem_alloc),
- sk->sk_wmem_queued,
+ READ_ONCE(sk->sk_wmem_queued),
sk->sk_sndbuf,
sk->sk_rcvbuf);
seq_printf(seq, "\n");
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fd0631e70d46..7f89e43154c0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -69,7 +69,7 @@
#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
-static bool sctp_writeable(struct sock *sk);
+static bool sctp_writeable(const struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
size_t msg_len);
@@ -140,7 +140,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
- sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
sk_mem_charge(sk, chunk->skb->truesize);
}
@@ -2450,6 +2450,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
if (trans) {
trans->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
+ sctp_transport_reset_hb_timer(trans);
} else if (asoc) {
asoc->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
@@ -9144,7 +9145,7 @@ static void sctp_wfree(struct sk_buff *skb)
struct sock *sk = asoc->base.sk;
sk_mem_uncharge(sk, skb->truesize);
- sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
&sk->sk_wmem_alloc));
@@ -9299,9 +9300,9 @@ void sctp_write_space(struct sock *sk)
* UDP-style sockets or TCP-style sockets, this code should work.
* - Daisy
*/
-static bool sctp_writeable(struct sock *sk)
+static bool sctp_writeable(const struct sock *sk)
{
- return sk->sk_sndbuf > sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
}
/* Wait for an association to go into ESTABLISHED state. If timeout is 0,
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bd01dd31e4bd..d520ee62c8ec 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1662,6 +1662,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *n;
+ spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
struct smc_link *link;
@@ -1680,6 +1681,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
if (link)
smc_llc_add_link_local(link);
}
+ spin_unlock_bh(&smc_lgr_list.lock);
}
/* link is down - switch connections to alternate link,
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index b60fe1eb37ab..aa8928975cc6 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -243,8 +243,9 @@ while (0)
#define SMC_STAT_SERV_SUCC_INC(net, _ini) \
do { \
typeof(_ini) i = (_ini); \
- bool is_v2 = (i->smcd_version & SMC_V2); \
bool is_smcd = (i->is_smcd); \
+ u8 version = is_smcd ? i->smcd_version : i->smcr_version; \
+ bool is_v2 = (version & SMC_V2); \
typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \
if (is_v2 && is_smcd) \
this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
diff --git a/net/socket.c b/net/socket.c
index 77f28328e387..c4a6f5532955 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -737,6 +737,14 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
return ret;
}
+static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
+{
+ int err = security_socket_sendmsg(sock, msg,
+ msg_data_left(msg));
+
+ return err ?: sock_sendmsg_nosec(sock, msg);
+}
+
/**
* sock_sendmsg - send a message through @sock
* @sock: socket
@@ -747,10 +755,19 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
*/
int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
- int err = security_socket_sendmsg(sock, msg,
- msg_data_left(msg));
+ struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
+ struct sockaddr_storage address;
+ int ret;
- return err ?: sock_sendmsg_nosec(sock, msg);
+ if (msg->msg_name) {
+ memcpy(&address, msg->msg_name, msg->msg_namelen);
+ msg->msg_name = &address;
+ }
+
+ ret = __sock_sendmsg(sock, msg);
+ msg->msg_name = save_addr;
+
+ return ret;
}
EXPORT_SYMBOL(sock_sendmsg);
@@ -827,7 +844,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
{
- bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+ bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
struct net_device *orig_dev;
ktime_t hwtstamp;
@@ -879,12 +896,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
struct scm_timestamping_internal tss;
-
int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
int if_index;
ktime_t hwtstamp;
+ u32 tsflags;
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
@@ -926,11 +943,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
}
memset(&tss, 0, sizeof(tss));
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
empty = 0;
if (shhwtstamps &&
- (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
if_index = 0;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
@@ -938,14 +956,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
else
hwtstamp = shhwtstamps->hwtstamp;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
hwtstamp = ptp_convert_timestamp(&hwtstamp,
- sk->sk_bind_phc);
+ READ_ONCE(sk->sk_bind_phc));
if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
empty = 0;
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
!skb_is_err_queue(skb))
put_ts_pktinfo(msg, skb, if_index);
}
@@ -1137,7 +1155,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
- res = sock_sendmsg(sock, &msg);
+ res = __sock_sendmsg(sock, &msg);
*from = msg.msg_iter;
return res;
}
@@ -2173,7 +2191,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = sock_sendmsg(sock, &msg);
+ err = __sock_sendmsg(sock, &msg);
out_put:
fput_light(sock->file, fput_needed);
@@ -2537,7 +2555,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
err = sock_sendmsg_nosec(sock, msg_sys);
goto out_freectl;
}
- err = sock_sendmsg(sock, msg_sys);
+ err = __sock_sendmsg(sock, msg_sys);
/*
* If this is sendmmsg() and sending to current destination address was
* successful, remember it.
@@ -3498,7 +3516,12 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
- return READ_ONCE(sock->ops)->bind(sock, addr, addrlen);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
+ addrlen);
}
EXPORT_SYMBOL(kernel_bind);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 2f16f9d17966..814b0169f972 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -769,9 +769,14 @@ int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
* @task: controlling RPC task
* @xdr: xdr_stream containing RPC Reply header
*
- * On success, @xdr is updated to point past the verifier and
- * zero is returned. Otherwise, @xdr is in an undefined state
- * and a negative errno is returned.
+ * Return values:
+ * %0: Verifier is valid. @xdr now points past the verifier.
+ * %-EIO: Verifier is corrupted or message ended early.
+ * %-EACCES: Verifier is intact but not valid.
+ * %-EPROTONOSUPPORT: Server does not support the requested auth type.
+ *
+ * When a negative errno is returned, @xdr is left in an undefined
+ * state.
*/
int
rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c
index de7678f8a23d..87f570fd3b00 100644
--- a/net/sunrpc/auth_tls.c
+++ b/net/sunrpc/auth_tls.c
@@ -129,9 +129,9 @@ static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr)
if (*p != rpc_auth_null)
return -EIO;
if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len)
- return -EIO;
+ return -EPROTONOSUPPORT;
if (memcmp(str, starttls_token, starttls_len))
- return -EIO;
+ return -EPROTONOSUPPORT;
return 0;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8d75290f1a31..9c210273d06b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2476,8 +2476,7 @@ call_status(struct rpc_task *task)
goto out_exit;
}
task->tk_action = call_encode;
- if (status != -ECONNRESET && status != -ECONNABORTED)
- rpc_check_timeout(task);
+ rpc_check_timeout(task);
return;
out_exit:
rpc_call_rpcerror(task, status);
@@ -2725,7 +2724,15 @@ out_unparsable:
out_verifier:
trace_rpc_bad_verifier(task);
- goto out_err;
+ switch (error) {
+ case -EPROTONOSUPPORT:
+ goto out_err;
+ case -EACCES:
+ /* Re-encode with a fresh cred */
+ fallthrough;
+ default:
+ goto out_garbage;
+ }
out_msg_denied:
error = -EACCES;
@@ -2751,6 +2758,7 @@ out_msg_denied:
case rpc_autherr_rejectedverf:
case rpcsec_gsserr_credproblem:
case rpcsec_gsserr_ctxproblem:
+ rpcauth_invalcred(task);
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
@@ -2907,19 +2915,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
* @clnt: pointer to struct rpc_clnt
* @xps: pointer to struct rpc_xprt_switch,
* @xprt: pointer struct rpc_xprt
- * @dummy: unused
+ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport
*/
int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
- void *dummy)
+ void *in_max_connect)
{
struct rpc_cb_add_xprt_calldata *data;
struct rpc_task *task;
+ int max_connect = clnt->cl_max_connect;
- if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+ if (in_max_connect)
+ max_connect = *(int *)in_max_connect;
+ if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
rcu_read_lock();
pr_warn("SUNRPC: reached max allowed number (%d) did not add "
- "transport to server: %s\n", clnt->cl_max_connect,
+ "transport to server: %s\n", max_connect,
rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
rcu_read_unlock();
return -EINVAL;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 71cd916e384f..a15bf2ede89b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2672,6 +2672,10 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work)
rcu_read_lock();
lower_xprt = rcu_dereference(lower_clnt->cl_xprt);
rcu_read_unlock();
+
+ if (wait_on_bit_lock(&lower_xprt->state, XPRT_LOCKED, TASK_KILLABLE))
+ goto out_unlock;
+
status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec);
if (status) {
trace_rpc_tls_not_started(upper_clnt, upper_xprt);
@@ -2681,6 +2685,7 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work)
status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport);
if (status)
goto out_close;
+ xprt_release_write(lower_xprt, NULL);
trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0);
if (!xprt_test_and_set_connected(upper_xprt)) {
@@ -2702,6 +2707,7 @@ out_unlock:
return;
out_close:
+ xprt_release_write(lower_xprt, NULL);
rpc_shutdown_client(lower_clnt);
/* xprt_force_disconnect() wakes tasks with a fixed tk_status code.
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 302fd749c424..43c3f1c971b8 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1441,14 +1441,14 @@ static int tipc_crypto_key_revoke(struct net *net, u8 tx_key)
struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
struct tipc_key key;
- spin_lock(&tx->lock);
+ spin_lock_bh(&tx->lock);
key = tx->key;
WARN_ON(!key.active || tx_key != key.active);
/* Free the active key */
tipc_crypto_key_set_state(tx, key.passive, 0, key.pending);
tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
- spin_unlock(&tx->lock);
+ spin_unlock_bh(&tx->lock);
pr_warn("%s: key is revoked\n", tx->name);
return -EKEYREVOKED;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1ed4a611631f..d1fc295b83b5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -817,7 +817,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
psock = sk_psock_get(sk);
if (!psock || !policy) {
err = tls_push_record(sk, flags, record_type);
- if (err && sk->sk_err == EBADMSG) {
+ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
err = -sk->sk_err;
@@ -846,7 +846,7 @@ more_data:
switch (psock->eval) {
case __SK_PASS:
err = tls_push_record(sk, flags, record_type);
- if (err && sk->sk_err == EBADMSG) {
+ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
err = -sk->sk_err;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 86930a8ed012..3e8a04a13668 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -680,7 +680,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
* What the above comment does talk about? --ANK(980817)
*/
- if (unix_tot_inflight)
+ if (READ_ONCE(unix_tot_inflight))
unix_gc(); /* Garbage collect fds */
}
diff --git a/net/unix/scm.c b/net/unix/scm.c
index e9dde7176c8a..6ff628f2349f 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -64,7 +64,7 @@ void unix_inflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
- user->unix_inflight++;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
spin_unlock(&unix_gc_lock);
}
@@ -85,7 +85,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
- user->unix_inflight--;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
spin_unlock(&unix_gc_lock);
}
@@ -99,7 +99,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
{
struct user_struct *user = current_user();
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
return false;
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 25bc2e50a061..64e861617110 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1181,16 +1181,11 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);
-void cfg80211_cqm_config_free(struct wireless_dev *wdev)
-{
- kfree(wdev->cqm_config);
- wdev->cqm_config = NULL;
-}
-
static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
bool unregister_netdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_cqm_config *cqm_config;
unsigned int link_id;
ASSERT_RTNL();
@@ -1227,7 +1222,10 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
kfree_sensitive(wdev->wext.keys);
wdev->wext.keys = NULL;
#endif
- cfg80211_cqm_config_free(wdev);
+ wiphy_work_cancel(wdev->wiphy, &wdev->cqm_rssi_work);
+ /* deleted from the list, so can't be found from nl80211 any more */
+ cqm_config = rcu_access_pointer(wdev->cqm_config);
+ kfree_rcu(cqm_config, rcu_head);
/*
* Ensure that all events have been processed and
@@ -1379,6 +1377,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
#endif
+ wiphy_work_init(&wdev->cqm_rssi_work, cfg80211_cqm_rssi_notify_work);
+
if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT)
wdev->ps = true;
else
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 507d184b8b40..ba9c7170afa4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -295,12 +295,17 @@ struct cfg80211_beacon_registration {
};
struct cfg80211_cqm_config {
+ struct rcu_head rcu_head;
u32 rssi_hyst;
s32 last_rssi_event_value;
+ enum nl80211_cqm_rssi_threshold_event last_rssi_event_type;
int n_rssi_thresholds;
s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
};
+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
+
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
/* free object */
@@ -566,8 +571,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
#define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; })
#endif
-void cfg80211_cqm_config_free(struct wireless_dev *wdev);
-
void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid);
void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev);
void cfg80211_pmsr_free_wk(struct work_struct *work);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 775cac4d6100..3e2c398abddc 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -52,7 +52,8 @@ void cfg80211_rx_assoc_resp(struct net_device *dev,
cr.links[link_id].bssid = data->links[link_id].bss->bssid;
cr.links[link_id].addr = data->links[link_id].addr;
/* need to have local link addresses for MLO connections */
- WARN_ON(cr.ap_mld_addr && !cr.links[link_id].addr);
+ WARN_ON(cr.ap_mld_addr &&
+ !is_valid_ether_addr(cr.links[link_id].addr));
BUG_ON(!cr.links[link_id].bss->channel);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index de47838aca4f..931a03f4549c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5909,6 +5909,21 @@ out:
nlmsg_free(msg);
}
+static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params)
+{
+ struct ieee80211_channel *channel = params->chandef.chan;
+
+ if ((params->he_cap || params->he_oper) &&
+ (channel->flags & IEEE80211_CHAN_NO_HE))
+ return -EOPNOTSUPP;
+
+ if ((params->eht_cap || params->eht_oper) &&
+ (channel->flags & IEEE80211_CHAN_NO_EHT))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6178,6 +6193,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (err)
goto out_unlock;
+ err = nl80211_validate_ap_phy_operation(params);
+ if (err)
+ goto out_unlock;
+
if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS])
params->flags = nla_get_u32(
info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]);
@@ -8482,7 +8501,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct mesh_config cfg;
+ struct mesh_config cfg = {};
u32 mask;
int err;
@@ -12796,7 +12815,8 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
}
static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev,
+ struct cfg80211_cqm_config *cqm_config)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
s32 last, low, high;
@@ -12805,7 +12825,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
int err;
/* RSSI reporting disabled? */
- if (!wdev->cqm_config)
+ if (!cqm_config)
return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
/*
@@ -12814,7 +12834,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
* connection is established and enough beacons received to calculate
* the average.
*/
- if (!wdev->cqm_config->last_rssi_event_value &&
+ if (!cqm_config->last_rssi_event_value &&
wdev->links[0].client.current_bss &&
rdev->ops->get_station) {
struct station_info sinfo = {};
@@ -12828,30 +12848,30 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
cfg80211_sinfo_release_content(&sinfo);
if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
- wdev->cqm_config->last_rssi_event_value =
+ cqm_config->last_rssi_event_value =
(s8) sinfo.rx_beacon_signal_avg;
}
- last = wdev->cqm_config->last_rssi_event_value;
- hyst = wdev->cqm_config->rssi_hyst;
- n = wdev->cqm_config->n_rssi_thresholds;
+ last = cqm_config->last_rssi_event_value;
+ hyst = cqm_config->rssi_hyst;
+ n = cqm_config->n_rssi_thresholds;
for (i = 0; i < n; i++) {
i = array_index_nospec(i, n);
- if (last < wdev->cqm_config->rssi_thresholds[i])
+ if (last < cqm_config->rssi_thresholds[i])
break;
}
low_index = i - 1;
if (low_index >= 0) {
low_index = array_index_nospec(low_index, n);
- low = wdev->cqm_config->rssi_thresholds[low_index] - hyst;
+ low = cqm_config->rssi_thresholds[low_index] - hyst;
} else {
low = S32_MIN;
}
if (i < n) {
i = array_index_nospec(i, n);
- high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1;
+ high = cqm_config->rssi_thresholds[i] + hyst - 1;
} else {
high = S32_MAX;
}
@@ -12864,6 +12884,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
u32 hysteresis)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_cqm_config *cqm_config = NULL, *old;
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
int i, err;
@@ -12881,10 +12902,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
return -EOPNOTSUPP;
- wdev_lock(wdev);
- cfg80211_cqm_config_free(wdev);
- wdev_unlock(wdev);
-
if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
@@ -12901,9 +12918,10 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
n_thresholds = 0;
wdev_lock(wdev);
- if (n_thresholds) {
- struct cfg80211_cqm_config *cqm_config;
+ old = rcu_dereference_protected(wdev->cqm_config,
+ lockdep_is_held(&wdev->mtx));
+ if (n_thresholds) {
cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds,
n_thresholds),
GFP_KERNEL);
@@ -12918,11 +12936,18 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
flex_array_size(cqm_config, rssi_thresholds,
n_thresholds));
- wdev->cqm_config = cqm_config;
+ rcu_assign_pointer(wdev->cqm_config, cqm_config);
+ } else {
+ RCU_INIT_POINTER(wdev->cqm_config, NULL);
}
- err = cfg80211_cqm_rssi_update(rdev, dev);
-
+ err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
+ if (err) {
+ rcu_assign_pointer(wdev->cqm_config, old);
+ kfree_rcu(cqm_config, rcu_head);
+ } else {
+ kfree_rcu(old, rcu_head);
+ }
unlock:
wdev_unlock(wdev);
@@ -19073,9 +19098,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
enum nl80211_cqm_rssi_threshold_event rssi_event,
s32 rssi_level, gfp_t gfp)
{
- struct sk_buff *msg;
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_cqm_config *cqm_config;
trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level);
@@ -19083,18 +19107,41 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH))
return;
- if (wdev->cqm_config) {
- wdev->cqm_config->last_rssi_event_value = rssi_level;
+ rcu_read_lock();
+ cqm_config = rcu_dereference(wdev->cqm_config);
+ if (cqm_config) {
+ cqm_config->last_rssi_event_value = rssi_level;
+ cqm_config->last_rssi_event_type = rssi_event;
+ wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
- cfg80211_cqm_rssi_update(rdev, dev);
+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct wireless_dev *wdev = container_of(work, struct wireless_dev,
+ cqm_rssi_work);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ enum nl80211_cqm_rssi_threshold_event rssi_event;
+ struct cfg80211_cqm_config *cqm_config;
+ struct sk_buff *msg;
+ s32 rssi_level;
- if (rssi_level == 0)
- rssi_level = wdev->cqm_config->last_rssi_event_value;
- }
+ wdev_lock(wdev);
+ cqm_config = rcu_dereference_protected(wdev->cqm_config,
+ lockdep_is_held(&wdev->mtx));
+ if (!wdev->cqm_config)
+ goto unlock;
- msg = cfg80211_prepare_cqm(dev, NULL, gfp);
+ cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);
+
+ rssi_level = cqm_config->last_rssi_event_value;
+ rssi_event = cqm_config->last_rssi_event_type;
+
+ msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL);
if (!msg)
- return;
+ goto unlock;
if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
rssi_event))
@@ -19104,14 +19151,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_level))
goto nla_put_failure;
- cfg80211_send_cqm(msg, gfp);
+ cfg80211_send_cqm(msg, GFP_KERNEL);
- return;
+ goto unlock;
nla_put_failure:
nlmsg_free(msg);
+ unlock:
+ wdev_unlock(wdev);
}
-EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
void cfg80211_cqm_txe_notify(struct net_device *dev,
const u8 *peer, u32 num_packets,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0cf1ce7b6934..939deecf0bbe 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -908,6 +908,10 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
!cfg80211_find_ssid_match(ap, request))
continue;
+ if (!is_broadcast_ether_addr(request->bssid) &&
+ !ether_addr_equal(request->bssid, ap->bssid))
+ continue;
+
if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
continue;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index fcfc8472f73d..55f8b9b0e06d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -602,7 +602,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
if (unlikely(i >= MAX_SKB_FRAGS))
- return ERR_PTR(-EFAULT);
+ return ERR_PTR(-EOVERFLOW);
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
@@ -655,15 +655,17 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_put(skb, len);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err))
+ if (unlikely(err)) {
+ kfree_skb(skb);
goto free_err;
+ }
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
u8 *vaddr;
if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
- err = -EFAULT;
+ err = -EOVERFLOW;
goto free_err;
}
@@ -690,12 +692,14 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
return skb;
free_err:
- if (err == -EAGAIN) {
- xsk_cq_cancel_locked(xs, 1);
- } else {
- xsk_set_destructor_arg(skb);
- xsk_drop_skb(skb);
+ if (err == -EOVERFLOW) {
+ /* Drop the packet */
+ xsk_set_destructor_arg(xs->skb);
+ xsk_drop_skb(xs->skb);
xskq_cons_release(xs->tx);
+ } else {
+ /* Let application retry */
+ xsk_cq_cancel_locked(xs, 1);
}
return ERR_PTR(err);
@@ -738,7 +742,7 @@ static int __xsk_generic_xmit(struct sock *sk)
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
- if (err == -EAGAIN)
+ if (err != -EOVERFLOW)
goto out;
err = 0;
continue;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index c014217f5fa7..22b36c8143cf 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -111,6 +111,9 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
sock_diag_save_cookie(sk, msg->xdiag_cookie);
mutex_lock(&xs->mutex);
+ if (READ_ONCE(xs->state) == XSK_UNBOUND)
+ goto out_nlmsg_trim;
+
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
goto out_nlmsg_trim;