summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJoel Stanley <joel@jms.id.au>2022-04-14 11:19:26 +0300
committerJoel Stanley <joel@jms.id.au>2022-04-14 11:19:30 +0300
commitde814262c8d79706d05d4660c56fbcaf7d8be133 (patch)
tree427833c411c3cf40c44ba5221922a11adf98d99a /net
parent986e1d447f78b3e82da2080b044536545f7b6617 (diff)
parent1b86fc15ba6d04e393d6e65753f2013963d407f3 (diff)
downloadlinux-de814262c8d79706d05d4660c56fbcaf7d8be133.tar.xz
Merge tag 'v5.15.34' into dev-5.15
This is the 5.15.34 stable release Signed-off-by: Joel Stanley <joel@jms.id.au>
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/bluetooth/hci_conn.c2
-rw-r--r--net/bluetooth/hci_event.c3
-rw-r--r--net/bluetooth/l2cap_core.c1
-rw-r--r--net/bpf/test_run.c4
-rw-r--r--net/can/isotp.c81
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/filter.c30
-rw-r--r--net/core/net_namespace.c17
-rw-r--r--net/core/rtnetlink.c13
-rw-r--r--net/core/skbuff.c66
-rw-r--r--net/core/skmsg.c17
-rw-r--r--net/dsa/dsa2.c6
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/fib_semantics.c7
-rw-r--r--net/ipv4/inet_hashtables.c53
-rw-r--r--net/ipv4/route.c18
-rw-r--r--net/ipv4/tcp_bpf.c14
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/inet6_hashtables.c5
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ip6mr.c8
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/xfrm6_output.c16
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/llc/af_llc.c48
-rw-r--r--net/mac80211/cfg.c3
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/main.c13
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mlme.c15
-rw-r--r--net/mac80211/util.c27
-rw-r--r--net/mctp/route.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c89
-rw-r--r--net/netfilter/nf_conntrack_helper.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c17
-rw-r--r--net/netfilter/nf_flow_table_inet.c17
-rw-r--r--net/netfilter/nf_flow_table_ip.c18
-rw-r--r--net/netfilter/nf_tables_api.c22
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nft_ct.c3
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/openvswitch/actions.c2
-rw-r--r--net/openvswitch/conntrack.c132
-rw-r--r--net/openvswitch/flow_netlink.c103
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/rfkill/core.c48
-rw-r--r--net/rxrpc/ar-internal.h15
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c40
-rw-r--r--net/rxrpc/net_ns.c2
-rw-r--r--net/rxrpc/server_key.c7
-rw-r--r--net/sched/act_ct.c19
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/smc/af_smc.c8
-rw-r--r--net/smc/smc_core.c2
-rw-r--r--net/smc/smc_tx.c25
-rw-r--r--net/smc/smc_tx.h1
-rw-r--r--net/sunrpc/clnt.c11
-rw-r--r--net/sunrpc/sched.c11
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xprt.c26
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtsock.c73
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/unix/af_unix.c16
-rw-r--r--net/vmw_vsock/af_vsock.c9
-rw-r--r--net/vmw_vsock/virtio_transport.c18
-rw-r--r--net/vmw_vsock/vmci_transport.c5
-rw-r--r--net/wireless/scan.c9
-rw-r--r--net/x25/af_x25.c11
-rw-r--r--net/xdp/xsk.c69
-rw-r--r--net/xfrm/xfrm_interface.c5
79 files changed, 954 insertions, 439 deletions
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 6e3419beca09..2853634a3979 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -134,7 +134,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev)
{
struct inet6_dev *in6_dev = __in6_dev_get(dev);
- if (in6_dev && in6_dev->cnf.mc_forwarding)
+ if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding))
return BATADV_NO_FLAGS;
else
return BATADV_MCAST_WANT_NO_RTR6;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2b5059a56cda..7a7e92be1652 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -541,7 +541,9 @@ static void le_conn_timeout(struct work_struct *work)
if (conn->role == HCI_ROLE_SLAVE) {
/* Disable LE Advertising */
le_disable_advertising(hdev);
+ hci_dev_lock(hdev);
hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_dev_unlock(hdev);
return;
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 868a22df3285..e984a8b4b914 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5153,8 +5153,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (hcon) {
+ if (hcon && hcon->type == AMP_LINK) {
hcon->state = BT_CLOSED;
+ hci_disconn_cfm(hcon, ev->reason);
hci_conn_del(hcon);
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 77ba68209dbd..c57a45df7a26 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1436,6 +1436,7 @@ static void l2cap_ecred_connect(struct l2cap_chan *chan)
l2cap_ecred_init(chan, 0);
+ memset(&data, 0, sizeof(data));
data.pdu.req.psm = chan->psm;
data.pdu.req.mtu = cpu_to_le16(chan->imtu);
data.pdu.req.mps = cpu_to_le16(chan->mps);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index b5f4ef35357c..655ee0e2de86 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -954,7 +954,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
goto out;
- if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+ if (user_ctx->local_port > U16_MAX) {
ret = -ERANGE;
goto out;
}
@@ -962,7 +962,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
ctx.family = (u16)user_ctx->family;
ctx.protocol = (u16)user_ctx->protocol;
ctx.dport = (u16)user_ctx->local_port;
- ctx.sport = (__force __be16)user_ctx->remote_port;
+ ctx.sport = user_ctx->remote_port;
switch (ctx.family) {
case AF_INET:
diff --git a/net/can/isotp.c b/net/can/isotp.c
index d2a430b6a13b..5bce7c66c121 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -141,6 +141,7 @@ struct isotp_sock {
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
+ u32 frame_txtime;
u32 force_tx_stmin;
u32 force_rx_stmin;
struct tpcon rx, tx;
@@ -360,7 +361,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
so->tx_gap = ktime_set(0, 0);
/* add transmission time for CAN frame N_As */
- so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+ so->tx_gap = ktime_add_ns(so->tx_gap, so->frame_txtime);
/* add waiting time for consecutive frames N_Cs */
if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
so->tx_gap = ktime_add_ns(so->tx_gap,
@@ -1005,26 +1006,29 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
- int err = 0;
- int noblock;
+ struct isotp_sock *so = isotp_sk(sk);
+ int noblock = flags & MSG_DONTWAIT;
+ int ret = 0;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EINVAL;
+
+ if (!so->bound)
+ return -EADDRNOTAVAIL;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ flags &= ~MSG_DONTWAIT;
+ skb = skb_recv_datagram(sk, flags, noblock, &ret);
if (!skb)
- return err;
+ return ret;
if (size < skb->len)
msg->msg_flags |= MSG_TRUNC;
else
size = skb->len;
- err = memcpy_to_msg(msg, skb->data, size);
- if (err < 0) {
- skb_free_datagram(sk, skb);
- return err;
- }
+ ret = memcpy_to_msg(msg, skb->data, size);
+ if (ret < 0)
+ goto out_err;
sock_recv_timestamp(msg, sk, skb);
@@ -1034,9 +1038,13 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* set length of return value */
+ ret = (flags & MSG_TRUNC) ? skb->len : size;
+
+out_err:
skb_free_datagram(sk, skb);
- return size;
+ return ret;
}
static int isotp_release(struct socket *sock)
@@ -1104,6 +1112,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct net *net = sock_net(sk);
int ifindex;
struct net_device *dev;
+ canid_t tx_id, rx_id;
int err = 0;
int notify_enetdown = 0;
int do_rx_reg = 1;
@@ -1111,8 +1120,18 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (len < ISOTP_MIN_NAMELEN)
return -EINVAL;
- if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
- return -EADDRNOTAVAIL;
+ /* sanitize tx/rx CAN identifiers */
+ tx_id = addr->can_addr.tp.tx_id;
+ if (tx_id & CAN_EFF_FLAG)
+ tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ tx_id &= CAN_SFF_MASK;
+
+ rx_id = addr->can_addr.tp.rx_id;
+ if (rx_id & CAN_EFF_FLAG)
+ rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ rx_id &= CAN_SFF_MASK;
if (!addr->can_ifindex)
return -ENODEV;
@@ -1124,21 +1143,13 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
do_rx_reg = 0;
/* do not validate rx address for functional addressing */
- if (do_rx_reg) {
- if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
-
- if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
+ if (do_rx_reg && rx_id == tx_id) {
+ err = -EADDRNOTAVAIL;
+ goto out;
}
if (so->bound && addr->can_ifindex == so->ifindex &&
- addr->can_addr.tp.rx_id == so->rxid &&
- addr->can_addr.tp.tx_id == so->txid)
+ rx_id == so->rxid && tx_id == so->txid)
goto out;
dev = dev_get_by_index(net, addr->can_ifindex);
@@ -1162,8 +1173,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
if (do_rx_reg)
- can_rx_register(net, dev, addr->can_addr.tp.rx_id,
- SINGLE_MASK(addr->can_addr.tp.rx_id),
+ can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
dev_put(dev);
@@ -1183,8 +1193,8 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
/* switch to new settings */
so->ifindex = ifindex;
- so->rxid = addr->can_addr.tp.rx_id;
- so->txid = addr->can_addr.tp.tx_id;
+ so->rxid = rx_id;
+ so->txid = tx_id;
so->bound = 1;
out:
@@ -1238,6 +1248,14 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
/* no separate rx_ext_address is given => use ext_address */
if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
so->opt.rx_ext_address = so->opt.ext_address;
+
+ /* check for frame_txtime changes (0 => no changes) */
+ if (so->opt.frame_txtime) {
+ if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
+ so->frame_txtime = 0;
+ else
+ so->frame_txtime = so->opt.frame_txtime;
+ }
break;
case CAN_ISOTP_RECV_FC:
@@ -1439,6 +1457,7 @@ static int isotp_init(struct sock *sk)
so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+ so->frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
diff --git a/net/core/dev.c b/net/core/dev.c
index 33dc2a3ff7d7..804aba2228c2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11378,8 +11378,7 @@ static int __net_init netdev_init(struct net *net)
BUILD_BUG_ON(GRO_HASH_BUCKETS >
8 * sizeof_field(struct napi_struct, gro_bitmask));
- if (net != &init_net)
- INIT_LIST_HEAD(&net->dev_base_head);
+ INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
diff --git a/net/core/filter.c b/net/core/filter.c
index 76e406965b6f..cdd7e92db303 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6719,24 +6719,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (!th->ack || th->rst || th->syn)
return -ENOENT;
+ if (unlikely(iph_len < sizeof(struct iphdr)))
+ return -EINVAL;
+
if (tcp_synq_no_recent_overflow(sk))
return -ENOENT;
cookie = ntohl(th->ack_seq) - 1;
- switch (sk->sk_family) {
- case AF_INET:
- if (unlikely(iph_len < sizeof(struct iphdr)))
+ /* Both struct iphdr and struct ipv6hdr have the version field at the
+ * same offset so we can cast to the shorter header (struct iphdr).
+ */
+ switch (((struct iphdr *)iph)->version) {
+ case 4:
+ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
break;
#if IS_BUILTIN(CONFIG_IPV6)
- case AF_INET6:
+ case 6:
if (unlikely(iph_len < sizeof(struct ipv6hdr)))
return -EINVAL;
+ if (sk->sk_family != AF_INET6)
+ return -EINVAL;
+
ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
break;
#endif /* CONFIG_IPV6 */
@@ -7966,6 +7975,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
+ int field_size;
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
@@ -7977,7 +7987,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case offsetof(struct bpf_sock, family):
case offsetof(struct bpf_sock, type):
case offsetof(struct bpf_sock, protocol):
- case offsetof(struct bpf_sock, dst_port):
case offsetof(struct bpf_sock, src_port):
case offsetof(struct bpf_sock, rx_queue_mapping):
case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -7986,6 +7995,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
+ case bpf_ctx_range(struct bpf_sock, dst_port):
+ field_size = size == size_default ?
+ size_default : sizeof_field(struct bpf_sock, dst_port);
+ bpf_ctx_record_field_size(info, field_size);
+ return bpf_ctx_narrow_access_ok(off, size, field_size);
+ case offsetofend(struct bpf_sock, dst_port) ...
+ offsetof(struct bpf_sock, dst_ip4) - 1:
+ return false;
}
return size == size_default;
@@ -10523,7 +10540,8 @@ static bool sk_lookup_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
- case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case offsetof(struct bpf_sk_lookup, remote_port) ...
+ offsetof(struct bpf_sk_lookup, local_ip4) - 1:
case bpf_ctx_range(struct bpf_sk_lookup, local_port):
bpf_ctx_record_field_size(info, sizeof(__u32));
return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9702d2b0d920..9745cb6fdf51 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem);
static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
#endif
-struct net init_net = {
- .ns.count = REFCOUNT_INIT(1),
- .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
-#ifdef CONFIG_KEYS
- .key_domain = &init_net_key_domain,
-#endif
-};
+struct net init_net;
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
@@ -1081,7 +1075,7 @@ out:
rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}
-static int __init net_ns_init(void)
+void __init net_ns_init(void)
{
struct net_generic *ng;
@@ -1102,6 +1096,9 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
+#ifdef CONFIG_KEYS
+ init_net.key_domain = &init_net_key_domain;
+#endif
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
@@ -1116,12 +1113,8 @@ static int __init net_ns_init(void)
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
RTNL_FLAG_DOIT_UNLOCKED);
-
- return 0;
}
-pure_initcall(net_ns_init);
-
static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
{
ops_pre_exit_list(ops, net_exit_list);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 91d7a5a5a08d..9c0e8ccf9bc5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3631,13 +3631,24 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
bool *changed, struct netlink_ext_ack *extack)
{
char *alt_ifname;
+ size_t size;
int err;
err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
if (err)
return err;
- alt_ifname = nla_strdup(attr, GFP_KERNEL);
+ if (cmd == RTM_NEWLINKPROP) {
+ size = rtnl_prop_list_size(dev);
+ size += nla_total_size(ALTIFNAMSIZ);
+ if (size >= U16_MAX) {
+ NL_SET_ERR_MSG(extack,
+ "effective property list too long");
+ return -EINVAL;
+ }
+ }
+
+ alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (!alt_ifname)
return -ENOMEM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5861e52da647..0db93122adeb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -204,7 +204,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
+ skb_set_end_offset(skb, size);
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
@@ -1739,11 +1739,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->head = data;
skb->head_frag = 0;
skb->data += off;
+
+ skb_set_end_offset(skb, size);
#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
off = nhead;
-#else
- skb->end = skb->head + size;
#endif
skb->tail += off;
skb_headers_offset_update(skb, nhead);
@@ -1791,6 +1790,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+ unsigned int saved_end_offset, saved_truesize;
+ struct skb_shared_info *shinfo;
+ int res;
+
+ saved_end_offset = skb_end_offset(skb);
+ saved_truesize = skb->truesize;
+
+ res = pskb_expand_head(skb, 0, 0, pri);
+ if (res)
+ return res;
+
+ skb->truesize = saved_truesize;
+
+ if (likely(skb_end_offset(skb) == saved_end_offset))
+ return 0;
+
+ shinfo = skb_shinfo(skb);
+
+ /* We are about to change back skb->end,
+ * we need to move skb_shinfo() to its new location.
+ */
+ memmove(skb->head + saved_end_offset,
+ shinfo,
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
+
+ skb_set_end_offset(skb, saved_end_offset);
+
+ return 0;
+}
+
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
@@ -5366,11 +5397,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (skb_cloned(to))
return false;
- /* The page pool signature of struct page will eventually figure out
- * which pages can be recycled or not but for now let's prohibit slab
- * allocated and page_pool allocated SKBs from being coalesced.
+ /* In general, avoid mixing slab allocated and page_pool allocated
+ * pages within the same SKB. However when @to is not pp_recycle and
+ * @from is cloned, we can transition frag pages from page_pool to
+ * reference counted.
+ *
+ * On the other hand, don't allow coalescing two pp_recycle SKBs if
+ * @from is cloned, in case the SKB is using page_pool fragment
+ * references (PP_FLAG_PAGE_FRAG). Since we only take full page
+ * references for cloned SKBs at the moment that would result in
+ * inconsistent reference counts.
*/
- if (to->pp_recycle != from->pp_recycle)
+ if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from)))
return false;
if (len <= skb_tailroom(to)) {
@@ -6166,11 +6204,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb->head = data;
skb->data = data;
skb->head_frag = 0;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
-#else
- skb->end = skb->head + size;
-#endif
+ skb_set_end_offset(skb, size);
skb_set_tail_pointer(skb, skb_headlen(skb));
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
@@ -6308,11 +6342,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb->head = data;
skb->head_frag = 0;
skb->data = data;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- skb->end = size;
-#else
- skb->end = skb->head + size;
-#endif
+ skb_set_end_offset(skb, size);
skb_reset_tail_pointer(skb);
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 929a2b096b04..cc381165ea08 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce)
{
struct page_frag *pfrag = sk_page_frag(sk);
+ u32 osize = msg->sg.size;
int ret = 0;
len -= msg->sg.size;
@@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
u32 orig_offset;
int use, i;
- if (!sk_page_frag_refill(sk, pfrag))
- return -ENOMEM;
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
orig_offset = pfrag->offset;
use = min_t(int, len, pfrag->size - orig_offset);
- if (!sk_wmem_schedule(sk, use))
- return -ENOMEM;
+ if (!sk_wmem_schedule(sk, use)) {
+ ret = -ENOMEM;
+ goto msg_trim;
+ }
i = msg->sg.end;
sk_msg_iter_var_prev(i);
@@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
}
return ret;
+
+msg_trim:
+ sk_msg_trim(sk, msg, osize);
+ return ret;
}
EXPORT_SYMBOL_GPL(sk_msg_alloc);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index e9911b18bdbf..64a56db3de58 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1341,6 +1341,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
const char *user_protocol;
master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
if (!master)
return -EPROBE_DEFER;
@@ -1630,6 +1631,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
struct dsa_port *dp;
mutex_lock(&dsa2_mutex);
+
+ if (!ds->setup)
+ goto out;
+
rtnl_lock();
list_for_each_entry(dp, &ds->dst->ports, list) {
@@ -1664,6 +1669,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
unregister_netdevice_many(&unregister_list);
rtnl_unlock();
+out:
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 922dd73e5740..83a47998c4b1 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
return err;
}
-static int arp_invalidate(struct net_device *dev, __be32 ip)
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
{
struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
int err = -ENXIO;
struct neigh_table *tbl = &arp_tbl;
if (neigh) {
+ if ((neigh->nud_state & NUD_VALID) && !force) {
+ neigh_release(neigh);
+ return 0;
+ }
+
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
@@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
if (!dev)
return -EINVAL;
}
- return arp_invalidate(dev, ip);
+ return arp_invalidate(dev, ip, true);
}
/*
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..1eb7795edb9d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1112,9 +1112,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
return;
/* Add broadcast address, if it is explicitly assigned. */
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
prim, 0);
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
+ }
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
@@ -1128,6 +1130,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
if (ifa->ifa_prefixlen < 31) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim, 0);
+ arp_invalidate(dev, prefix | ~mask, false);
}
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d244c57b7303..b5563f5ff176 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -887,8 +887,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
}
if (cfg->fc_oif || cfg->fc_gw_family) {
- struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct fib_nh *nh;
+
+ /* cannot match on nexthop object attributes */
+ if (fi->nh)
+ return 1;
+ nh = fib_info_nh(fi, 0);
if (cfg->fc_encap) {
if (fib_encap_match(net, cfg->fc_encap_type,
cfg->fc_encap, nh, cfg, extack))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 75737267746f..7bd1e10086f0 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
+ local_bh_disable();
inet_ehash_nolisten(sk, osk, NULL);
+ local_bh_enable();
return 0;
}
WARN_ON(!sk_unhashed(sk));
@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
-void inet_unhash(struct sock *sk)
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb = NULL;
- spinlock_t *lock;
-
if (sk_unhashed(sk))
return;
- if (sk->sk_state == TCP_LISTEN) {
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &ilb->lock;
- } else {
- lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- }
- spin_lock_bh(lock);
- if (sk_unhashed(sk))
- goto unlock;
-
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_stop_listen_sock(sk);
if (ilb) {
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
inet_unhash2(hashinfo, sk);
ilb->count--;
}
__sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-unlock:
- spin_unlock_bh(lock);
+}
+
+void inet_unhash(struct sock *sk)
+{
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+ if (sk_unhashed(sk))
+ return;
+
+ if (sk->sk_state == TCP_LISTEN) {
+ struct inet_listen_hashbucket *ilb;
+
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ /* Don't disable bottom halves while acquiring the lock to
+ * avoid circular locking dependency on PREEMPT_RT.
+ */
+ spin_lock(&ilb->lock);
+ __inet_unhash(sk, ilb);
+ spin_unlock(&ilb->lock);
+ } else {
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+
+ spin_lock_bh(lock);
+ __inet_unhash(sk, NULL);
+ spin_unlock_bh(lock);
+ }
}
EXPORT_SYMBOL_GPL(inet_unhash);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 23833660584d..ed9b6842a9a0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -506,6 +506,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
}
EXPORT_SYMBOL(__ip_select_ident);
+static void ip_rt_fix_tos(struct flowi4 *fl4)
+{
+ __u8 tos = RT_FL_TOS(fl4);
+
+ fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+ fl4->flowi4_scope = tos & RTO_ONLINK ?
+ RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+}
+
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct sock *sk,
const struct iphdr *iph,
@@ -831,6 +840,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
rt = (struct rtable *) dst;
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
+ ip_rt_fix_tos(&fl4);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -1055,6 +1065,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
+ ip_rt_fix_tos(&fl4);
/* Don't make lookup fail for bridged encapsulations */
if (skb && netif_is_any_bridge_port(skb->dev))
@@ -1129,6 +1140,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
goto out;
new = true;
+ } else {
+ ip_rt_fix_tos(&fl4);
}
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
@@ -2609,7 +2622,6 @@ add:
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
const struct sk_buff *skb)
{
- __u8 tos = RT_FL_TOS(fl4);
struct fib_result res = {
.type = RTN_UNSPEC,
.fi = NULL,
@@ -2619,9 +2631,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+ ip_rt_fix_tos(fl4);
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 9b9b02052fd3..1cdcb4df0eb7 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -138,10 +138,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
struct sk_psock *psock = sk_psock_get(sk);
int ret;
- if (unlikely(!psock)) {
- sk_msg_free(sk, msg);
- return 0;
- }
+ if (unlikely(!psock))
+ return -EPIPE;
+
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
@@ -335,7 +334,7 @@ more_data:
cork = true;
psock->cork = NULL;
}
- sk_msg_return(sk, msg, tosend);
+ sk_msg_return(sk, msg, msg->sg.size);
release_sock(sk);
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
@@ -375,8 +374,11 @@ more_data:
}
if (msg &&
msg->sg.data[msg->sg.start].page_link &&
- msg->sg.data[msg->sg.start].length)
+ msg->sg.data[msg->sg.start].length) {
+ if (eval == __SK_REDIRECT)
+ sk_mem_charge(sk, msg->sg.size);
goto more_data;
+ }
}
return ret;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0492f6942778..369752f5f676 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3734,6 +3734,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
*/
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, err = 0;
@@ -3748,8 +3749,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
* private TCP options. The cost is reduced data space in SYN :(
*/
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
+ /* Sync mss_cache after updating the mss_clamp */
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
+ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
space = min_t(size_t, space, fo->size);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1fe27807e471..3a8838b79bb6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -552,7 +552,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
nla_put_s32(skb, NETCONFA_MC_FORWARDING,
- devconf->mc_forwarding) < 0)
+ atomic_read(&devconf->mc_forwarding)) < 0)
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
@@ -5537,7 +5537,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
#endif
#ifdef CONFIG_IPV6_MROUTE
- array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
+ array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b7b573085bd5..5023f59a5b96 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -813,8 +813,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
struct tcphdr *th;
offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
-
- if (offset < 0) {
+ if (offset == -1) {
err = -EINVAL;
goto out;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 67c9114835c8..0a2e7f228391 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
{
int err = 0;
- if (sk->sk_state != TCP_CLOSE) {
- local_bh_disable();
+ if (sk->sk_state != TCP_CLOSE)
err = __inet_hash(sk, NULL);
- local_bh_enable();
- }
return err;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 80256717868e..d4b1e2c5aa76 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -508,7 +508,7 @@ int ip6_mc_input(struct sk_buff *skb)
/*
* IPv6 multicast router mode is now supported ;)
*/
- if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+ if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
!(ipv6_addr_type(&hdr->daddr) &
(IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 61970fd839c3..8aaf9cf3d74a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1476,8 +1476,8 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- if (mtu < fragheaderlen ||
- ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr))
+ if (mtu <= fragheaderlen ||
+ ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
goto emsgsize;
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6a4065d81aa9..91f1c5f56d5f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -739,7 +739,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding--;
+ atomic_dec(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -907,7 +907,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
- in6_dev->cnf.mc_forwarding++;
+ atomic_inc(&in6_dev->cnf.mc_forwarding);
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
@@ -1557,7 +1557,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
} else {
rcu_assign_pointer(mrt->mroute_sk, sk);
sock_set_flag(sk, SOCK_RCU_FREE);
- net->ipv6.devconf_all->mc_forwarding++;
+ atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
}
write_unlock_bh(&mrt_lock);
@@ -1590,7 +1590,7 @@ int ip6mr_sk_done(struct sock *sk)
* so the RCU grace period before sk freeing
* is guaranteed by sk_destruct()
*/
- net->ipv6.devconf_all->mc_forwarding--;
+ atomic_dec(&net->ipv6.devconf_all->mc_forwarding);
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e0766bdf20e7..6b269595efaa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4509,7 +4509,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
struct inet6_dev *idev;
int type;
- if (netif_is_l3_master(skb->dev) &&
+ if (netif_is_l3_master(skb->dev) ||
dst->dev == net->loopback_dev)
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
else
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d0d280077721..ad07904642ca 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -45,6 +45,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf
return xfrm_output(sk, skb);
}
+static int xfrm6_noneed_fragment(struct sk_buff *skb)
+{
+ struct frag_hdr *fh;
+ u8 prevhdr = ipv6_hdr(skb)->nexthdr;
+
+ if (prevhdr != NEXTHDR_FRAGMENT)
+ return 0;
+ fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
+ if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
+ return 1;
+ return 0;
+}
+
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -73,6 +86,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
+ } else if (toobig && xfrm6_noneed_fragment(skb)) {
+ skb->ignore_df = 1;
+ goto skip_frag;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9bf52a09b5ff..fd51db3be91c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1699,7 +1699,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
xfrm_probe_algs();
- supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+ supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 3086f4a6ae68..99305aadaa08 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -275,6 +275,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
+ struct net_device *dev = NULL;
struct llc_sap *sap;
int rc = -EINVAL;
@@ -286,14 +287,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
goto out;
rc = -ENODEV;
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
- if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
- dev_put(llc->dev);
- llc->dev = NULL;
+ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ if (dev && addr->sllc_arphrd != dev->type) {
+ dev_put(dev);
+ dev = NULL;
}
} else
- llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
- if (!llc->dev)
+ dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+ if (!dev)
goto out;
rc = -EUSERS;
llc->laddr.lsap = llc_ui_autoport();
@@ -303,6 +304,11 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
sap = llc_sap_open(llc->laddr.lsap, NULL);
if (!sap)
goto out;
+
+ /* Note: We do not expect errors from this point. */
+ llc->dev = dev;
+ dev = NULL;
+
memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
/* assign new connection to its SAP */
@@ -310,6 +316,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
sock_reset_flag(sk, SOCK_ZAPPED);
rc = 0;
out:
+ dev_put(dev);
return rc;
}
@@ -332,6 +339,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
+ struct net_device *dev = NULL;
struct llc_sap *sap;
int rc = -EINVAL;
@@ -347,25 +355,27 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
rc = -ENODEV;
rcu_read_lock();
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
- if (llc->dev) {
+ dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
+ if (dev) {
if (is_zero_ether_addr(addr->sllc_mac))
- memcpy(addr->sllc_mac, llc->dev->dev_addr,
+ memcpy(addr->sllc_mac, dev->dev_addr,
IFHWADDRLEN);
- if (addr->sllc_arphrd != llc->dev->type ||
+ if (addr->sllc_arphrd != dev->type ||
!ether_addr_equal(addr->sllc_mac,
- llc->dev->dev_addr)) {
+ dev->dev_addr)) {
rc = -EINVAL;
- llc->dev = NULL;
+ dev = NULL;
}
}
- } else
- llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
+ } else {
+ dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
- dev_hold(llc->dev);
+ }
+ dev_hold(dev);
rcu_read_unlock();
- if (!llc->dev)
+ if (!dev)
goto out;
+
if (!addr->sllc_sap) {
rc = -EUSERS;
addr->sllc_sap = llc_ui_autoport();
@@ -397,6 +407,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
goto out_put;
}
}
+
+ /* Note: We do not expect errors from this point. */
+ llc->dev = dev;
+ dev = NULL;
+
llc->laddr.lsap = addr->sllc_sap;
memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
@@ -407,6 +422,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
out_put:
llc_sap_put(sap);
out:
+ dev_put(dev);
release_sock(sk);
return rc;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1bf83b8d8402..3f625e836a03 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2110,14 +2110,12 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
const struct mesh_setup *setup)
{
u8 *new_ie;
- const u8 *old_ie;
struct ieee80211_sub_if_data *sdata = container_of(ifmsh,
struct ieee80211_sub_if_data, u.mesh);
int i;
/* allocate information elements */
new_ie = NULL;
- old_ie = ifmsh->ie;
if (setup->ie_len) {
new_ie = kmemdup(setup->ie, setup->ie_len,
@@ -2127,7 +2125,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
}
ifmsh->ie_len = setup->ie_len;
ifmsh->ie = new_ie;
- kfree(old_ie);
/* now copy the rest of the setup parameters */
ifmsh->mesh_id_len = setup->mesh_id_len;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6a88195e5abe..d30bd21697a3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2379,7 +2379,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
const struct cfg80211_chan_def *chandef);
u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
-u8 *ieee80211_ie_build_he_cap(u8 *pos,
+u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos,
const struct ieee80211_sta_he_cap *he_cap,
u8 *end);
void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 45fb517591ee..5311c3cd3050 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1131,17 +1131,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->scan_ies_len +=
2 + sizeof(struct ieee80211_vht_cap);
- /* HE cap element is variable in size - set len to allow max size */
/*
- * TODO: 1 is added at the end of the calculation to accommodate for
- * the temporary placing of the HE capabilities IE under EXT.
- * Remove it once it is placed in the final place.
- */
- if (supp_he)
+ * HE cap element is variable in size - set len to allow max size */
+ if (supp_he) {
local->scan_ies_len +=
- 2 + sizeof(struct ieee80211_he_cap_elem) +
+ 3 + sizeof(struct ieee80211_he_cap_elem) +
sizeof(struct ieee80211_he_mcs_nss_supp) +
- IEEE80211_HE_PPE_THRES_MAX_LEN + 1;
+ IEEE80211_HE_PPE_THRES_MAX_LEN;
+ }
if (!local->ops->hw_scan) {
/* For hw_scan, driver needs to set these up. */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5dcfd53a4ab6..42bd81a30310 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -580,7 +580,7 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
return -ENOMEM;
pos = skb_put(skb, ie_len);
- ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len);
+ ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len);
return 0;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6c8505edce75..c8332452c118 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -630,7 +630,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
struct ieee80211_supported_band *sband)
{
- u8 *pos;
+ u8 *pos, *pre_he_pos;
const struct ieee80211_sta_he_cap *he_cap = NULL;
struct ieee80211_chanctx_conf *chanctx_conf;
u8 he_cap_size;
@@ -647,20 +647,21 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
he_cap = ieee80211_get_he_iftype_cap(sband,
ieee80211_vif_type_p2p(&sdata->vif));
- if (!he_cap || !reg_cap)
+ if (!he_cap || !chanctx_conf || !reg_cap)
return;
- /*
- * TODO: the 1 added is because this temporarily is under the EXTENSION
- * IE. Get rid of it when it moves.
- */
+ /* get a max size estimate */
he_cap_size =
2 + 1 + sizeof(he_cap->he_cap_elem) +
ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
ieee80211_he_ppe_size(he_cap->ppe_thres[0],
he_cap->he_cap_elem.phy_cap_info);
pos = skb_put(skb, he_cap_size);
- ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
+ pre_he_pos = pos;
+ pos = ieee80211_ie_build_he_cap(sdata->u.mgd.flags,
+ pos, he_cap, pos + he_cap_size);
+ /* trim excess if any */
+ skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos));
ieee80211_ie_build_he_6ghz_cap(sdata, skb);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 2fe71ed9137b..be1911d8089f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1959,7 +1959,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
if (he_cap &&
cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
IEEE80211_CHAN_NO_HE)) {
- pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
+ pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end);
if (!pos)
goto out_err;
}
@@ -2903,10 +2903,11 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype)
he_cap->he_cap_elem.phy_cap_info);
}
-u8 *ieee80211_ie_build_he_cap(u8 *pos,
+u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos,
const struct ieee80211_sta_he_cap *he_cap,
u8 *end)
{
+ struct ieee80211_he_cap_elem elem;
u8 n;
u8 ie_len;
u8 *orig_pos = pos;
@@ -2919,7 +2920,23 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos,
if (!he_cap)
return orig_pos;
- n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
+ /* modify on stack first to calculate 'n' and 'ie_len' correctly */
+ elem = he_cap->he_cap_elem;
+
+ if (disable_flags & IEEE80211_STA_DISABLE_40MHZ)
+ elem.phy_cap_info[0] &=
+ ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G);
+
+ if (disable_flags & IEEE80211_STA_DISABLE_160MHZ)
+ elem.phy_cap_info[0] &=
+ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+
+ if (disable_flags & IEEE80211_STA_DISABLE_80P80MHZ)
+ elem.phy_cap_info[0] &=
+ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+
+ n = ieee80211_he_mcs_nss_size(&elem);
ie_len = 2 + 1 +
sizeof(he_cap->he_cap_elem) + n +
ieee80211_he_ppe_size(he_cap->ppe_thres[0],
@@ -2933,8 +2950,8 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos,
*pos++ = WLAN_EID_EXT_HE_CAPABILITY;
/* Fixed data */
- memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem));
- pos += sizeof(he_cap->he_cap_elem);
+ memcpy(pos, &elem, sizeof(elem));
+ pos += sizeof(elem);
memcpy(pos, &he_cap->he_mcs_nss_supp, n);
pos += n;
diff --git a/net/mctp/route.c b/net/mctp/route.c
index d5e7db83fe9d..ee548c46c78f 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -512,7 +512,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
daddr, skb->dev->dev_addr, skb->len);
- if (rc) {
+ if (rc < 0) {
kfree_skb(skb);
return -EHOSTUNREACH;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7f7997460764..3a98a1316307 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
u32 next_bucket;
+ u32 avg_timeout;
+ u32 start_time;
bool exiting;
bool early_drop;
};
@@ -77,8 +79,19 @@ static __read_mostly bool nf_conntrack_locks_all;
/* serialize hash resizes and nf_ct_iterate_cleanup */
static DEFINE_MUTEX(nf_conntrack_mutex);
-#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_INTERVAL_MAX (60ul * HZ)
+#define GC_SCAN_INTERVAL_MIN (1ul * HZ)
+
+/* clamp timeouts to this value (TCP unacked) */
+#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ)
+
+/* large initial bias so that we don't scan often just because we have
+ * three entries with a 1s timeout.
+ */
+#define GC_SCAN_INTERVAL_INIT INT_MAX
+
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
+#define GC_SCAN_EXPIRED_MAX (64000u / HZ)
#define MIN_CHAINLEN 8u
#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
@@ -989,7 +1002,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_ct_add_to_dying_list(loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_put(loser_ct);
nf_ct_set(skb, ct, ctinfo);
NF_CT_STAT_INC(net, clash_resolve);
@@ -1420,16 +1433,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
unsigned int i, hashsz, nf_conntrack_max95 = 0;
- unsigned long next_run = GC_SCAN_INTERVAL;
+ u32 end_time, start_time = nfct_time_stamp;
struct conntrack_gc_work *gc_work;
+ unsigned int expired_count = 0;
+ unsigned long next_run;
+ s32 delta_time;
+
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
+ if (i == 0) {
+ gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+ gc_work->start_time = start_time;
+ }
+
+ next_run = gc_work->avg_timeout;
+
+ end_time = start_time + GC_SCAN_MAX_DURATION;
+
do {
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
@@ -1446,6 +1471,7 @@ static void gc_worker(struct work_struct *work)
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
+ unsigned long expires;
struct net *net;
tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1455,11 +1481,29 @@ static void gc_worker(struct work_struct *work)
continue;
}
+ if (expired_count > GC_SCAN_EXPIRED_MAX) {
+ rcu_read_unlock();
+
+ gc_work->next_bucket = i;
+ gc_work->avg_timeout = next_run;
+
+ delta_time = nfct_time_stamp - gc_work->start_time;
+
+ /* re-sched immediately if total cycle time is exceeded */
+ next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
+ goto early_exit;
+ }
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
+ expired_count++;
continue;
}
+ expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+ next_run += expires;
+ next_run /= 2u;
+
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue;
@@ -1477,8 +1521,10 @@ static void gc_worker(struct work_struct *work)
continue;
}
- if (gc_worker_can_early_drop(tmp))
+ if (gc_worker_can_early_drop(tmp)) {
nf_ct_kill(tmp);
+ expired_count++;
+ }
nf_ct_put(tmp);
}
@@ -1491,33 +1537,38 @@ static void gc_worker(struct work_struct *work)
cond_resched();
i++;
- if (time_after(jiffies, end_time) && i < hashsz) {
+ delta_time = nfct_time_stamp - end_time;
+ if (delta_time > 0 && i < hashsz) {
+ gc_work->avg_timeout = next_run;
gc_work->next_bucket = i;
next_run = 0;
- break;
+ goto early_exit;
}
} while (i < hashsz);
+ gc_work->next_bucket = 0;
+
+ next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
+
+ delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
+ if (next_run > (unsigned long)delta_time)
+ next_run -= delta_time;
+ else
+ next_run = 1;
+
+early_exit:
if (gc_work->exiting)
return;
- /*
- * Eviction will normally happen from the packet path, and not
- * from this gc worker.
- *
- * This worker is only here to reap expired entries when system went
- * idle after a busy period.
- */
- if (next_run) {
+ if (next_run)
gc_work->early_drop = false;
- gc_work->next_bucket = 0;
- }
+
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
gc_work->exiting = false;
}
@@ -1920,7 +1971,7 @@ repeat:
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
- nf_conntrack_put(&ct->ct_general);
+ nf_ct_put(ct);
skb->_nfct = 0;
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ae4488a13c70..ceb38a7b37cb 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -556,6 +556,12 @@ static const struct nf_ct_ext_type helper_extend = {
.id = NF_CT_EXT_HELPER,
};
+void nf_ct_set_auto_assign_helper_warned(struct net *net)
+{
+ nf_ct_pernet(net)->auto_assign_helper_warned = true;
+}
+EXPORT_SYMBOL_GPL(nf_ct_set_auto_assign_helper_warned);
+
void nf_conntrack_helper_pernet_init(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index af5115e127cf..3cee5d8ee702 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -341,8 +341,8 @@ static void tcp_options(const struct sk_buff *skb,
if (!ptr)
return;
- state->td_scale =
- state->flags = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
while (length > 0) {
int opcode=*ptr++;
@@ -839,6 +839,16 @@ static bool tcp_can_early_drop(const struct nf_conn *ct)
return false;
}
+static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
+{
+ state->td_end = 0;
+ state->td_maxend = 0;
+ state->td_maxwin = 0;
+ state->td_maxack = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -945,8 +955,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
ct->proto.tcp.last_flags;
- memset(&ct->proto.tcp.seen[dir], 0,
- sizeof(struct ip_ct_tcp_state));
+ nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]);
break;
}
ct->proto.tcp.last_index = index;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index bc4126d8ef65..280fdd32965f 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -6,12 +6,29 @@
#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
+#include <linux/if_vlan.h>
static unsigned int
nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ struct vlan_ethhdr *veth;
+ __be16 proto;
+
switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+ proto = veth->h_vlan_encapsulated_proto;
+ break;
+ case htons(ETH_P_PPP_SES):
+ proto = nf_flow_pppoe_proto(skb);
+ break;
+ default:
+ proto = skb->protocol;
+ break;
+ }
+
+ switch (proto) {
case htons(ETH_P_IP):
return nf_flow_offload_ip_hook(priv, skb, state);
case htons(ETH_P_IPV6):
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 889cf88d3dba..6257d87c3a56 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -8,8 +8,6 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
-#include <linux/if_pppox.h>
-#include <linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -239,22 +237,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
-{
- __be16 proto;
-
- proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
- sizeof(struct pppoe_hdr)));
- switch (proto) {
- case htons(PPP_IP):
- return htons(ETH_P_IP);
- case htons(PPP_IPV6):
- return htons(ETH_P_IPV6);
- }
-
- return 0;
-}
-
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
u32 *offset)
{
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2b2e0210a7f9..3e7f97a70721 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9208,17 +9208,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
}
EXPORT_SYMBOL_GPL(nft_parse_u32_check);
-static unsigned int nft_parse_register(const struct nlattr *attr)
+static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg)
{
unsigned int reg;
reg = ntohl(nla_get_be32(attr));
switch (reg) {
case NFT_REG_VERDICT...NFT_REG_4:
- return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ break;
+ case NFT_REG32_00...NFT_REG32_15:
+ *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ break;
default:
- return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ return -ERANGE;
}
+
+ return 0;
}
/**
@@ -9260,7 +9266,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
u32 reg;
int err;
- reg = nft_parse_register(attr);
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
err = nft_validate_register_load(reg, len);
if (err < 0)
return err;
@@ -9315,7 +9324,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx,
int err;
u32 reg;
- reg = nft_parse_register(attr);
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
err = nft_validate_register_store(ctx, reg, data, type, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 866cfba04d6c..907e848dbc17 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -162,7 +162,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_rule *const *rules;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
- struct nft_regs regs;
+ struct nft_regs regs = {};
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 99b1de14ff7e..54ecb9fbf2de 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1040,6 +1040,9 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
if (err < 0)
goto err_put_helper;
+ /* Avoid the bogus warning, helper will be assigned after CT init */
+ nf_ct_set_auto_assign_helper_warned(ctx->net);
+
return 0;
err_put_helper:
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index beb0e573266d..54c083003947 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -885,6 +885,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len,
unsigned char bitmask;
unsigned char byte;
+ if (offset >= bitmap_len)
+ return -1;
byte_offset = offset / 8;
byte = bitmap[byte_offset];
bit_spot = offset;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 81ba8e51e01f..83ca93b32f5f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -157,6 +157,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack);
static inline u32 netlink_group_mask(u32 group)
{
+ if (group > 32)
+ return 0;
return group ? 1 << (group - 1) : 0;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 780d9e2246f3..8955f31fa47e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1051,7 +1051,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb,
int rem = nla_len(attr);
bool dont_clone_flow_key;
- /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+ /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
clone_arg = nla_data(attr);
dont_clone_flow_key = nla_get_u32(clone_arg);
actions = nla_next(clone_arg, &rem);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 1b5eae57bc90..f2b64cab9af7 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -574,7 +574,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
nf_ct_delete(ct, 0, 0);
- nf_conntrack_put(&ct->ct_general);
+ nf_ct_put(ct);
}
}
@@ -723,7 +723,7 @@ static bool skb_nfct_cached(struct net *net,
if (nf_ct_is_confirmed(ct))
nf_ct_delete(ct, 0, 0);
- nf_conntrack_put(&ct->ct_general);
+ nf_ct_put(ct);
nf_ct_set(skb, NULL, 0);
return false;
}
@@ -732,6 +732,57 @@ static bool skb_nfct_cached(struct net *net,
}
#if IS_ENABLED(CONFIG_NF_NAT)
+static void ovs_nat_update_key(struct sw_flow_key *key,
+ const struct sk_buff *skb,
+ enum nf_nat_manip_type maniptype)
+{
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ __be16 src;
+
+ key->ct_state |= OVS_CS_F_SRC_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.src = ip_hdr(skb)->saddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
+ sizeof(key->ipv6.addr.src));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ src = udp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_TCP)
+ src = tcp_hdr(skb)->source;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ src = sctp_hdr(skb)->source;
+ else
+ return;
+
+ key->tp.src = src;
+ } else {
+ __be16 dst;
+
+ key->ct_state |= OVS_CS_F_DST_NAT;
+ if (key->eth.type == htons(ETH_P_IP))
+ key->ipv4.addr.dst = ip_hdr(skb)->daddr;
+ else if (key->eth.type == htons(ETH_P_IPV6))
+ memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
+ sizeof(key->ipv6.addr.dst));
+ else
+ return;
+
+ if (key->ip.proto == IPPROTO_UDP)
+ dst = udp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_TCP)
+ dst = tcp_hdr(skb)->dest;
+ else if (key->ip.proto == IPPROTO_SCTP)
+ dst = sctp_hdr(skb)->dest;
+ else
+ return;
+
+ key->tp.dst = dst;
+ }
+}
+
/* Modelled after nf_nat_ipv[46]_fn().
* range is only used for new, uninitialized NAT state.
* Returns either NF_ACCEPT or NF_DROP.
@@ -739,7 +790,7 @@ static bool skb_nfct_cached(struct net *net,
static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype)
+ enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
{
int hooknum, nh_off, err = NF_ACCEPT;
@@ -811,58 +862,11 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
push:
skb_push_rcsum(skb, nh_off);
- return err;
-}
-
-static void ovs_nat_update_key(struct sw_flow_key *key,
- const struct sk_buff *skb,
- enum nf_nat_manip_type maniptype)
-{
- if (maniptype == NF_NAT_MANIP_SRC) {
- __be16 src;
-
- key->ct_state |= OVS_CS_F_SRC_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.src = ip_hdr(skb)->saddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
- sizeof(key->ipv6.addr.src));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- src = udp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_TCP)
- src = tcp_hdr(skb)->source;
- else if (key->ip.proto == IPPROTO_SCTP)
- src = sctp_hdr(skb)->source;
- else
- return;
-
- key->tp.src = src;
- } else {
- __be16 dst;
-
- key->ct_state |= OVS_CS_F_DST_NAT;
- if (key->eth.type == htons(ETH_P_IP))
- key->ipv4.addr.dst = ip_hdr(skb)->daddr;
- else if (key->eth.type == htons(ETH_P_IPV6))
- memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
- sizeof(key->ipv6.addr.dst));
- else
- return;
-
- if (key->ip.proto == IPPROTO_UDP)
- dst = udp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_TCP)
- dst = tcp_hdr(skb)->dest;
- else if (key->ip.proto == IPPROTO_SCTP)
- dst = sctp_hdr(skb)->dest;
- else
- return;
+ /* Update the flow key if NAT successful. */
+ if (err == NF_ACCEPT)
+ ovs_nat_update_key(key, skb, maniptype);
- key->tp.dst = dst;
- }
+ return err;
}
/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
@@ -904,7 +908,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
} else {
return NF_ACCEPT; /* Connection is not NATed. */
}
- err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
if (ct->status & IPS_SRC_NAT) {
@@ -914,17 +918,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
maniptype = NF_NAT_MANIP_SRC;
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
- maniptype);
+ maniptype, key);
} else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
- NF_NAT_MANIP_SRC);
+ NF_NAT_MANIP_SRC, key);
}
}
- /* Mark NAT done if successful and update the flow key. */
- if (err == NF_ACCEPT)
- ovs_nat_update_key(key, skb, maniptype);
-
return err;
}
#else /* !CONFIG_NF_NAT */
@@ -967,7 +967,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
/* Associate skb with specified zone. */
if (tmpl) {
- nf_conntrack_put(skb_nfct(skb));
+ ct = nf_ct_get(skb, &ctinfo);
+ nf_ct_put(ct);
nf_conntrack_get(&tmpl->ct_general);
nf_ct_set(skb, tmpl, IP_CT_NEW);
}
@@ -1328,7 +1329,12 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
{
- nf_conntrack_put(skb_nfct(skb));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+ nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
ovs_ct_fill_key(skb, key, false);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index fd1f809e9bc1..c591b923016a 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2201,8 +2201,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
icmpv6_key->icmpv6_type = ntohs(output->tp.src);
icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+ if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
struct ovs_key_nd *nd_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
@@ -2288,6 +2288,62 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size)
return sfa;
}
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
+
+static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
+{
+ const struct nlattr *a;
+ int rem;
+
+ nla_for_each_nested(a, action, rem) {
+ switch (nla_type(a)) {
+ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
+ case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
+ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+ break;
+ }
+ }
+}
+
+static void ovs_nla_free_clone_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_CLONE_ATTR_EXEC:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
+static void ovs_nla_free_dec_ttl_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+
+ switch (nla_type(a)) {
+ case OVS_DEC_TTL_ATTR_ACTION:
+ ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
+ break;
+ }
+}
+
+static void ovs_nla_free_sample_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_SAMPLE_ATTR_ARG:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
static void ovs_nla_free_set_action(const struct nlattr *a)
{
const struct nlattr *ovs_key = nla_data(a);
@@ -2301,25 +2357,54 @@ static void ovs_nla_free_set_action(const struct nlattr *a)
}
}
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
{
const struct nlattr *a;
int rem;
- if (!sf_acts)
+ /* Whenever new actions are added, the need to update this
+ * function should be considered.
+ */
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+
+ if (!actions)
return;
- nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+ nla_for_each_attr(a, actions, len, rem) {
switch (nla_type(a)) {
- case OVS_ACTION_ATTR_SET:
- ovs_nla_free_set_action(a);
+ case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+ ovs_nla_free_check_pkt_len_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_CLONE:
+ ovs_nla_free_clone_action(a);
break;
+
case OVS_ACTION_ATTR_CT:
ovs_ct_free_action(a);
break;
+
+ case OVS_ACTION_ATTR_DEC_TTL:
+ ovs_nla_free_dec_ttl_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ ovs_nla_free_sample_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_SET:
+ ovs_nla_free_set_action(a);
+ break;
}
}
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+ if (!sf_acts)
+ return;
+ ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
kfree(sf_acts);
}
@@ -3429,7 +3514,9 @@ static int clone_action_to_attr(const struct nlattr *attr,
if (!start)
return -EMSGSIZE;
- err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+ /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
+ attr = nla_next(nla_data(attr), &rem);
+ err = ovs_nla_put_actions(attr, rem, skb);
if (err)
nla_nest_cancel(skb, start);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e00c38f242c3..c0d4a65931de 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2281,8 +2281,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
copy_skb = skb_get(skb);
skb_head = skb->data;
}
- if (copy_skb)
+ if (copy_skb) {
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
skb_set_owner_r(copy_skb, sk);
+ }
}
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
@@ -3434,6 +3437,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ const size_t max_len = min(sizeof(skb->cb),
+ sizeof(struct sockaddr_storage));
int copy_len;
/* If the address length field is there to be filled
@@ -3456,6 +3461,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(struct sockaddr_ll);
}
}
+ if (WARN_ON_ONCE(copy_len > max_len)) {
+ copy_len = max_len;
+ msg->msg_namelen = copy_len;
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index ac15a944573f..068c7bcd30c9 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -78,6 +78,7 @@ struct rfkill_data {
struct mutex mtx;
wait_queue_head_t read_wait;
bool input_handler;
+ u8 max_size;
};
@@ -1141,6 +1142,8 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
if (!data)
return -ENOMEM;
+ data->max_size = RFKILL_EVENT_SIZE_V1;
+
INIT_LIST_HEAD(&data->events);
mutex_init(&data->mtx);
init_waitqueue_head(&data->read_wait);
@@ -1223,6 +1226,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
list);
sz = min_t(unsigned long, sizeof(ev->ev), count);
+ sz = min_t(unsigned long, sz, data->max_size);
ret = sz;
if (copy_to_user(buf, &ev->ev, sz))
ret = -EFAULT;
@@ -1237,6 +1241,7 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
+ struct rfkill_data *data = file->private_data;
struct rfkill *rfkill;
struct rfkill_event_ext ev;
int ret;
@@ -1251,6 +1256,7 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
* our API version even in a write() call, if it cares.
*/
count = min(count, sizeof(ev));
+ count = min_t(size_t, count, data->max_size);
if (copy_from_user(&ev, buf, count))
return -EFAULT;
@@ -1310,31 +1316,47 @@ static int rfkill_fop_release(struct inode *inode, struct file *file)
return 0;
}
-#ifdef CONFIG_RFKILL_INPUT
static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct rfkill_data *data = file->private_data;
+ int ret = -ENOSYS;
+ u32 size;
if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
return -ENOSYS;
- if (_IOC_NR(cmd) != RFKILL_IOC_NOINPUT)
- return -ENOSYS;
-
mutex_lock(&data->mtx);
-
- if (!data->input_handler) {
- if (atomic_inc_return(&rfkill_input_disabled) == 1)
- printk(KERN_DEBUG "rfkill: input handler disabled\n");
- data->input_handler = true;
+ switch (_IOC_NR(cmd)) {
+#ifdef CONFIG_RFKILL_INPUT
+ case RFKILL_IOC_NOINPUT:
+ if (!data->input_handler) {
+ if (atomic_inc_return(&rfkill_input_disabled) == 1)
+ printk(KERN_DEBUG "rfkill: input handler disabled\n");
+ data->input_handler = true;
+ }
+ ret = 0;
+ break;
+#endif
+ case RFKILL_IOC_MAX_SIZE:
+ if (get_user(size, (__u32 __user *)arg)) {
+ ret = -EFAULT;
+ break;
+ }
+ if (size < RFKILL_EVENT_SIZE_V1 || size > U8_MAX) {
+ ret = -EINVAL;
+ break;
+ }
+ data->max_size = size;
+ ret = 0;
+ break;
+ default:
+ break;
}
-
mutex_unlock(&data->mtx);
- return 0;
+ return ret;
}
-#endif
static const struct file_operations rfkill_fops = {
.owner = THIS_MODULE,
@@ -1343,10 +1365,8 @@ static const struct file_operations rfkill_fops = {
.write = rfkill_fop_write,
.poll = rfkill_fop_poll,
.release = rfkill_fop_release,
-#ifdef CONFIG_RFKILL_INPUT
.unlocked_ioctl = rfkill_fop_ioctl,
.compat_ioctl = compat_ptr_ioctl,
-#endif
.llseek = no_llseek,
};
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7bd6f8a66a3e..969e532f77a9 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -777,14 +777,12 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
-static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why)
-{
- trace_rxrpc_timer(call, why, now);
- timer_reduce(&call->timer, expire_at);
-}
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why);
+
+void rxrpc_delete_call_timer(struct rxrpc_call *call);
/*
* call_object.c
@@ -808,6 +806,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
bool __rxrpc_queue_call(struct rxrpc_call *);
bool rxrpc_queue_call(struct rxrpc_call *);
void rxrpc_see_call(struct rxrpc_call *);
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op);
void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
void rxrpc_cleanup_call(struct rxrpc_call *);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index df864e692267..22e05de5d1ca 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -310,7 +310,7 @@ recheck_state:
}
if (call->state == RXRPC_CALL_COMPLETE) {
- del_timer_sync(&call->timer);
+ rxrpc_delete_call_timer(call);
goto out_put;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 4eb91d958a48..043508fd8d8a 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -53,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
if (call->state < RXRPC_CALL_COMPLETE) {
trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
- rxrpc_queue_call(call);
+ __rxrpc_queue_call(call);
+ } else {
+ rxrpc_put_call(call, rxrpc_call_put);
+ }
+}
+
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why)
+{
+ if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) {
+ trace_rxrpc_timer(call, why, now);
+ if (timer_reduce(&call->timer, expire_at))
+ rxrpc_put_call(call, rxrpc_call_put_notimer);
}
}
+void rxrpc_delete_call_timer(struct rxrpc_call *call)
+{
+ if (del_timer_sync(&call->timer))
+ rxrpc_put_call(call, rxrpc_call_put_timer);
+}
+
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
/*
@@ -463,6 +483,17 @@ void rxrpc_see_call(struct rxrpc_call *call)
}
}
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+{
+ const void *here = __builtin_return_address(0);
+ int n = atomic_fetch_add_unless(&call->usage, 1, 0);
+
+ if (n == 0)
+ return false;
+ trace_rxrpc_call(call->debug_id, op, n, here, NULL);
+ return true;
+}
+
/*
* Note the addition of a ref on a call.
*/
@@ -510,8 +541,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
spin_unlock_bh(&call->lock);
rxrpc_put_call_slot(call);
-
- del_timer_sync(&call->timer);
+ rxrpc_delete_call_timer(call);
/* Make sure we don't get any more notifications */
write_lock_bh(&rx->recvmsg_lock);
@@ -618,6 +648,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
struct rxrpc_net *rxnet = call->rxnet;
+ rxrpc_delete_call_timer(call);
+
rxrpc_put_connection(call->conn);
rxrpc_put_peer(call->peer);
kfree(call->rxtx_buffer);
@@ -652,8 +684,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
- del_timer_sync(&call->timer);
-
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 25bbc4cc8b13..f15d6942da45 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -113,8 +113,8 @@ static __net_exit void rxrpc_exit_net(struct net *net)
struct rxrpc_net *rxnet = rxrpc_net(net);
rxnet->live = false;
- del_timer_sync(&rxnet->peer_keepalive_timer);
cancel_work_sync(&rxnet->peer_keepalive_work);
+ del_timer_sync(&rxnet->peer_keepalive_timer);
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
rxrpc_destroy_all_peers(rxnet);
diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
index ead3471307ee..ee269e0e6ee8 100644
--- a/net/rxrpc/server_key.c
+++ b/net/rxrpc/server_key.c
@@ -84,6 +84,9 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
prep->payload.data[1] = (struct rxrpc_security *)sec;
+ if (!sec->preparse_server_key)
+ return -EINVAL;
+
return sec->preparse_server_key(prep);
}
@@ -91,7 +94,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
{
const struct rxrpc_security *sec = prep->payload.data[1];
- if (sec)
+ if (sec && sec->free_preparse_server_key)
sec->free_preparse_server_key(prep);
}
@@ -99,7 +102,7 @@ static void rxrpc_destroy_s(struct key *key)
{
const struct rxrpc_security *sec = key->payload.data[1];
- if (sec)
+ if (sec && sec->destroy_server_key)
sec->destroy_server_key(key);
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 4ffea1290ce1..553bf41671a6 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -583,22 +583,25 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
if (!ct)
return false;
if (!net_eq(net, read_pnet(&ct->ct_net)))
- return false;
+ goto drop_ct;
if (nf_ct_zone(ct)->id != zone_id)
- return false;
+ goto drop_ct;
/* Force conntrack entry direction. */
if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
if (nf_ct_is_confirmed(ct))
nf_ct_kill(ct);
- nf_conntrack_put(&ct->ct_general);
- nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-
- return false;
+ goto drop_ct;
}
return true;
+
+drop_ct:
+ nf_ct_put(ct);
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+
+ return false;
}
/* Trim the skb to the length specified by the IP/IPv6 header,
@@ -757,7 +760,7 @@ static void tcf_ct_params_free(struct rcu_head *head)
tcf_ct_flow_table_put(params);
if (params->tmpl)
- nf_conntrack_put(&params->tmpl->ct_general);
+ nf_ct_put(params->tmpl);
kfree(params);
}
@@ -967,7 +970,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tc_skb_cb(skb)->post_ct = false;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
- nf_conntrack_put(&ct->ct_general);
+ nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index ff47091c385e..b3950963fc8f 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -911,6 +911,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
ctx->asoc->base.sk->sk_err = -error;
return;
}
+ ctx->asoc->stats.octrlchunks++;
break;
case SCTP_CID_ABORT:
@@ -935,7 +936,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
case SCTP_CID_HEARTBEAT:
if (chunk->pmtu_probe) {
- sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
+ error = sctp_packet_singleton(ctx->transport,
+ chunk, ctx->gfp);
+ if (!error)
+ ctx->asoc->stats.octrlchunks++;
break;
}
fallthrough;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5c4c0320e822..fa8897497dcc 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2419,8 +2419,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_CLOSED) {
if (val) {
SMC_STAT_INC(smc, ndly_cnt);
- mod_delayed_work(smc->conn.lgr->tx_wq,
- &smc->conn.tx_work, 0);
+ smc_tx_pending(&smc->conn);
+ cancel_delayed_work(&smc->conn.tx_work);
}
}
break;
@@ -2430,8 +2430,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_CLOSED) {
if (!val) {
SMC_STAT_INC(smc, cork_cnt);
- mod_delayed_work(smc->conn.lgr->tx_wq,
- &smc->conn.tx_work, 0);
+ smc_tx_pending(&smc->conn);
+ cancel_delayed_work(&smc->conn.tx_work);
}
}
break;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index dee336eef6d2..7401ec67ebcf 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1822,7 +1822,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
*/
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
- return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+ return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
/* map an rmb buf to a link */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 738a4a99c827..31ee76131a79 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -594,27 +594,32 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
-/* Wakeup sndbuf consumers from process context
- * since there is more data to transmit
- */
-void smc_tx_work(struct work_struct *work)
+void smc_tx_pending(struct smc_connection *conn)
{
- struct smc_connection *conn = container_of(to_delayed_work(work),
- struct smc_connection,
- tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
int rc;
- lock_sock(&smc->sk);
if (smc->sk.sk_err)
- goto out;
+ return;
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+}
+
+/* Wakeup sndbuf consumers from process context
+ * since there is more data to transmit
+ */
+void smc_tx_work(struct work_struct *work)
+{
+ struct smc_connection *conn = container_of(to_delayed_work(work),
+ struct smc_connection,
+ tx_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
-out:
+ lock_sock(&smc->sk);
+ smc_tx_pending(conn);
release_sock(&smc->sk);
}
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 07e6ad76224a..a59f370b8b43 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -27,6 +27,7 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
}
+void smc_tx_pending(struct smc_connection *conn);
void smc_tx_work(struct work_struct *work);
void smc_tx_init(struct smc_sock *smc);
int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 5da1d7e8468a..3286add1a958 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1065,7 +1065,9 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt)
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
- if (task->tk_xprt)
+ if (task->tk_xprt &&
+ !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
+ (task->tk_flags & RPC_TASK_MOVEABLE)))
return;
if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
task->tk_xprt = rpc_task_get_first_xprt(clnt);
@@ -2200,6 +2202,7 @@ call_transmit_status(struct rpc_task *task)
* socket just returned a connection error,
* then hold onto the transport lock.
*/
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2283,6 +2286,7 @@ call_bc_transmit_status(struct rpc_task *task)
case -ENOTCONN:
case -EPIPE:
break;
+ case -ENOMEM:
case -ENOBUFS:
rpc_delay(task, HZ>>2);
fallthrough;
@@ -2365,6 +2369,11 @@ call_status(struct rpc_task *task)
case -EPIPE:
case -EAGAIN:
break;
+ case -ENFILE:
+ case -ENOBUFS:
+ case -ENOMEM:
+ rpc_delay(task, HZ>>2);
+ break;
case -EIO:
/* shutdown or soft timeout */
goto out_exit;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c045f63d11fa..f0f55fbd1375 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -186,11 +186,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
/*
* Add new request to wait queue.
- *
- * Swapper tasks always get inserted at the head of the queue.
- * This should avoid many nasty memory deadlocks and hopefully
- * improve overall performance.
- * Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
struct rpc_task *task,
@@ -199,8 +194,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task, queue_priority);
- else if (RPC_IS_SWAPPER(task))
- list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_waitqueue = queue;
@@ -1012,8 +1005,10 @@ int rpc_malloc(struct rpc_task *task)
struct rpc_buffer *buf;
gfp_t gfp = GFP_NOFS;
+ if (RPC_IS_ASYNC(task))
+ gfp = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
- gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ gfp |= __GFP_MEMALLOC;
size += sizeof(struct rpc_buffer);
if (size <= RPC_BUFFER_MAXSIZE)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 478f857cdaed..6ea3d87e1147 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1096,7 +1096,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
int ret;
*sentp = 0;
- xdr_alloc_bvec(xdr, GFP_KERNEL);
+ ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
if (ret < 0)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d4aeee83763e..e4adb780b69e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -767,7 +767,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done);
*/
static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt)
{
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ if (test_and_set_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ return;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
@@ -1353,17 +1354,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
INIT_LIST_HEAD(&req->rq_xmit2);
goto out;
}
- } else if (RPC_IS_SWAPPER(task)) {
- list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
- if (pos->rq_cong || pos->rq_bytes_sent)
- continue;
- if (RPC_IS_SWAPPER(pos->rq_task))
- continue;
- /* Note: req is added _before_ pos */
- list_add_tail(&req->rq_xmit, &pos->rq_xmit);
- INIT_LIST_HEAD(&req->rq_xmit2);
- goto out;
- }
} else if (!req->rq_seqno) {
list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
if (pos->rq_task->tk_owner != task->tk_owner)
@@ -1686,12 +1676,15 @@ out:
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
+ gfp_t gfp_mask = GFP_KERNEL;
if (xprt->num_reqs >= xprt->max_reqs)
goto out;
++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
- req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+ if (current->flags & PF_WQ_WORKER)
+ gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
+ req = kzalloc(sizeof(*req), gfp_mask);
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
@@ -2111,7 +2104,14 @@ static void xprt_destroy(struct rpc_xprt *xprt)
*/
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
+ /*
+ * xprt_schedule_autodisconnect() can run after XPRT_LOCKED
+ * is cleared. We use ->transport_lock to ensure the mod_timer()
+ * can only run *before* del_time_sync(), never after.
+ */
+ spin_lock(&xprt->transport_lock);
del_timer_sync(&xprt->timer);
+ spin_unlock(&xprt->transport_lock);
/*
* Destroy sockets etc from the system workqueue so they can
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 16e5696314a4..32df23796747 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -521,7 +521,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
return;
out_sleep:
- task->tk_status = -EAGAIN;
+ task->tk_status = -ENOMEM;
xprt_add_backlog(xprt, task);
}
@@ -574,8 +574,10 @@ xprt_rdma_allocate(struct rpc_task *task)
gfp_t flags;
flags = RPCRDMA_DEF_GFP;
+ if (RPC_IS_ASYNC(task))
+ flags = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
- flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ flags |= __GFP_MEMALLOC;
if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
flags))
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 04f1b78bcbca..c2f7819827b6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -763,12 +763,12 @@ xs_stream_start_connect(struct sock_xprt *transport)
/**
* xs_nospace - handle transmit was incomplete
* @req: pointer to RPC request
+ * @transport: pointer to struct sock_xprt
*
*/
-static int xs_nospace(struct rpc_rqst *req)
+static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
{
- struct rpc_xprt *xprt = req->rq_xprt;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk = transport->inet;
int ret = -EAGAIN;
@@ -779,25 +779,49 @@ static int xs_nospace(struct rpc_rqst *req)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
+ rcu_read_unlock();
+
/* wait for more buffer space */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
xprt_wait_for_buffer_space(xprt);
} else
ret = -ENOTCONN;
spin_unlock(&xprt->transport_lock);
+ return ret;
+}
- /* Race breaker in case memory is freed before above code is called */
- if (ret == -EAGAIN) {
- struct socket_wq *wq;
+static int xs_sock_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
- rcu_read_lock();
- wq = rcu_dereference(sk->sk_wq);
- set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
- rcu_read_unlock();
+ lock_sock(sk);
+ if (!sock_writeable(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
+ return ret;
+}
- sk->sk_write_space(sk);
- }
+static int xs_stream_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
+
+ lock_sock(sk);
+ if (!sk_stream_memory_free(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
return ret;
}
@@ -887,7 +911,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
case -ENOBUFS:
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
@@ -963,7 +987,7 @@ process_status:
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_sock_nospace(req);
break;
case -ENETUNREACH:
case -ENOBUFS:
@@ -1083,7 +1107,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req);
break;
case -ECONNRESET:
case -ECONNREFUSED:
@@ -2233,6 +2257,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
fallthrough;
case -EINPROGRESS:
/* SYN_SENT! */
+ set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state);
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
break;
@@ -2258,10 +2283,14 @@ static void xs_tcp_setup_socket(struct work_struct *work)
struct rpc_xprt *xprt = &transport->xprt;
int status = -EIO;
- if (!sock) {
- sock = xs_create_sock(xprt, transport,
- xs_addr(xprt)->sa_family, SOCK_STREAM,
- IPPROTO_TCP, true);
+ if (xprt_connected(xprt))
+ goto out;
+ if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT,
+ &transport->sock_state) ||
+ !sock) {
+ xs_reset_transport(transport);
+ sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family,
+ SOCK_STREAM, IPPROTO_TCP, true);
if (IS_ERR(sock)) {
status = PTR_ERR(sock);
goto out;
@@ -2343,11 +2372,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
if (transport->sock != NULL) {
dprintk("RPC: xs_connect delayed xprt %p for %lu "
- "seconds\n",
- xprt, xprt->reestablish_timeout / HZ);
-
- /* Start by resetting any existing state */
- xs_reset_transport(transport);
+ "seconds\n", xprt, xprt->reestablish_timeout / HZ);
delay = xprt_reconnect_delay(xprt);
xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7545321c3440..17f8c523e33b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
/* Try again later if dest link is congested */
if (tsk->cong_link_cnt) {
- sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ sk_reset_timer(sk, &sk->sk_timer,
+ jiffies + msecs_to_jiffies(100));
return;
}
/* Prepare SYN for retransmit */
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index bd96ec26f4f9..794ef3b3d7d4 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1483,7 +1483,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
if (prot->version == TLS_1_3_VERSION ||
prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305)
memcpy(iv + iv_offset, tls_ctx->rx.iv,
- crypto_aead_ivsize(ctx->aead_recv));
+ prot->iv_size + prot->salt_size);
else
memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b0bfc78e421c..62f47821d783 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1996,7 +1996,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
if (ousk->oob_skb)
consume_skb(ousk->oob_skb);
- ousk->oob_skb = skb;
+ WRITE_ONCE(ousk->oob_skb, skb);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
@@ -2514,9 +2514,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
oob_skb = u->oob_skb;
- if (!(state->flags & MSG_PEEK)) {
- u->oob_skb = NULL;
- }
+ if (!(state->flags & MSG_PEEK))
+ WRITE_ONCE(u->oob_skb, NULL);
unix_state_unlock(sk);
@@ -2551,7 +2550,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
skb = NULL;
} else if (sock_flag(sk, SOCK_URGINLINE)) {
if (!(flags & MSG_PEEK)) {
- u->oob_skb = NULL;
+ WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
}
} else if (!(flags & MSG_PEEK)) {
@@ -3006,11 +3005,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCATMARK:
{
struct sk_buff *skb;
- struct unix_sock *u = unix_sk(sk);
int answ = 0;
skb = skb_peek(&sk->sk_receive_queue);
- if (skb && skb == u->oob_skb)
+ if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
answ = 1;
err = put_user(answ, (int __user *)arg);
}
@@ -3051,6 +3049,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
mask |= EPOLLIN | EPOLLRDNORM;
if (sk_is_readable(sk))
mask |= EPOLLIN | EPOLLRDNORM;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (READ_ONCE(unix_sk(sk)->oob_skb))
+ mask |= EPOLLPRI;
+#endif
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 91a5c65707ba..5df530e89e5a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -333,7 +333,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+ void (*fn)(struct sock *sk))
{
int i;
@@ -342,8 +343,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
struct vsock_sock *vsk;
list_for_each_entry(vsk, &vsock_connected_table[i],
- connected_table)
+ connected_table) {
+ if (vsk->transport != transport)
+ continue;
+
fn(sk_vsock(vsk));
+ }
}
spin_unlock_bh(&vsock_table_lock);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 4f7c99dfd16c..c5f936fbf876 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -24,6 +24,7 @@
static struct workqueue_struct *virtio_vsock_workqueue;
static struct virtio_vsock __rcu *the_virtio_vsock;
static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static struct virtio_transport virtio_transport; /* forward declaration */
struct virtio_vsock {
struct virtio_device *vdev;
@@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
switch (le32_to_cpu(event->id)) {
case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
virtio_vsock_update_guest_cid(vsock);
- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ vsock_for_each_connected_socket(&virtio_transport.transport,
+ virtio_vsock_reset_sock);
break;
}
}
@@ -620,6 +622,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(&vsock->event_work, virtio_transport_event_work);
INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+ if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+ vsock->seqpacket_allow = true;
+
+ vdev->priv = vsock;
+
+ virtio_device_ready(vdev);
+
mutex_lock(&vsock->tx_lock);
vsock->tx_run = true;
mutex_unlock(&vsock->tx_lock);
@@ -634,10 +643,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->event_run = true;
mutex_unlock(&vsock->event_lock);
- if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
- vsock->seqpacket_allow = true;
-
- vdev->priv = vsock;
rcu_assign_pointer(the_virtio_vsock, vsock);
mutex_unlock(&the_virtio_vsock_mutex);
@@ -662,7 +667,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
synchronize_rcu();
/* Reset all connected sockets when the device disappear */
- vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ vsock_for_each_connected_socket(&virtio_transport.transport,
+ virtio_vsock_reset_sock);
/* Stop all work handlers to make sure no one is accessing the device,
* so we can safely call vdev->config->reset().
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7aef34e32bdf..b17dc9745188 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
static int PROTOCOL_OVERRIDE = -1;
+static struct vsock_transport vmci_transport; /* forward declaration */
+
/* Helper function to convert from a VMCI error code to a VSock error code. */
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id,
const struct vmci_event_data *e_data,
void *client_data)
{
- vsock_for_each_connected_socket(vmci_transport_handle_detach);
+ vsock_for_each_connected_socket(&vmci_transport,
+ vmci_transport_handle_detach);
}
static void vmci_transport_recv_pkt_work(struct work_struct *work)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index adc0d14cfd86..8e1e578d64bc 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -702,8 +702,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap,
for (i = 0; i < request->n_ssids; i++) {
/* wildcard ssid in the scan request */
- if (!request->ssids[i].ssid_len)
+ if (!request->ssids[i].ssid_len) {
+ if (ap->multi_bss && !ap->transmitted_bssid)
+ continue;
+
return true;
+ }
if (ap->ssid_len &&
ap->ssid_len == request->ssids[i].ssid_len) {
@@ -829,6 +833,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
!cfg80211_find_ssid_match(ap, request))
continue;
+ if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
+ continue;
+
cfg80211_scan_req_add_chan(request, chan, true);
memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN);
scan_6ghz_params->short_ssid = ap->short_ssid;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3583354a7d7f..3a171828638b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1765,10 +1765,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
write_lock_bh(&x25_list_lock);
- sk_for_each(s, &x25_list)
- if (x25_sk(s)->neighbour == nb)
+ sk_for_each(s, &x25_list) {
+ if (x25_sk(s)->neighbour == nb) {
+ write_unlock_bh(&x25_list_lock);
+ lock_sock(s);
x25_disconnect(s, ENETUNREACH, 0, 0);
-
+ release_sock(s);
+ write_lock_bh(&x25_list_lock);
+ }
+ }
write_unlock_bh(&x25_list_lock);
/* Remove any related forwards */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d6b500dc4208..426e287431d2 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -418,18 +418,8 @@ EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
struct net_device *dev = xs->dev;
- int err;
-
- rcu_read_lock();
- err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
- rcu_read_unlock();
-
- return err;
-}
-static int xsk_zc_xmit(struct xdp_sock *xs)
-{
- return xsk_wakeup(xs, XDP_WAKEUP_TX);
+ return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -548,6 +538,12 @@ static int xsk_generic_xmit(struct sock *sk)
mutex_lock(&xs->mutex);
+ /* Since we dropped the RCU read lock, the socket state might have changed. */
+ if (unlikely(!xsk_is_bound(xs))) {
+ err = -ENXIO;
+ goto out;
+ }
+
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
@@ -611,16 +607,26 @@ out:
return err;
}
-static int __xsk_sendmsg(struct sock *sk)
+static int xsk_xmit(struct sock *sk)
{
struct xdp_sock *xs = xdp_sk(sk);
+ int ret;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
if (unlikely(!xs->tx))
return -ENOBUFS;
- return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
+ if (xs->zc)
+ return xsk_wakeup(xs, XDP_WAKEUP_TX);
+
+ /* Drop the RCU lock since the SKB path might sleep. */
+ rcu_read_unlock();
+ ret = xsk_generic_xmit(sk);
+ /* Reaquire RCU lock before going into common code. */
+ rcu_read_lock();
+
+ return ret;
}
static bool xsk_no_wakeup(struct sock *sk)
@@ -634,7 +640,7 @@ static bool xsk_no_wakeup(struct sock *sk)
#endif
}
-static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
@@ -654,11 +660,22 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
pool = xs->pool;
if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
- return __xsk_sendmsg(sk);
+ return xsk_xmit(sk);
return 0;
}
-static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __xsk_sendmsg(sock, m, total_len);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
{
bool need_wait = !(flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
@@ -684,6 +701,17 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl
return 0;
}
+static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __xsk_recvmsg(sock, m, len, flags);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static __poll_t xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
@@ -694,8 +722,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sock, wait);
- if (unlikely(!xsk_is_bound(xs)))
+ rcu_read_lock();
+ if (unlikely(!xsk_is_bound(xs))) {
+ rcu_read_unlock();
return mask;
+ }
pool = xs->pool;
@@ -704,7 +735,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
xsk_wakeup(xs, pool->cached_need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
- __xsk_sendmsg(sk);
+ xsk_xmit(sk);
}
if (xs->rx && !xskq_prod_is_empty(xs->rx))
@@ -712,6 +743,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
if (xs->tx && xsk_tx_writeable(xs))
mask |= EPOLLOUT | EPOLLWRNORM;
+ rcu_read_unlock();
return mask;
}
@@ -743,7 +775,6 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
/* Wait for driver to stop using the xdp socket. */
xp_del_xsk(xs->pool, xs);
- xs->dev = NULL;
synchronize_net();
dev_put(dev);
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 4e3c62d1ad9e..1e8b26eecb3f 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -304,7 +304,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ if (skb->len > 1280)
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ else
+ goto xmit;
} else {
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto xmit;