summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_input.c7
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/br_stp_if.c43
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c1
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/flow_dissector.c6
-rw-r--r--net/dsa/Kconfig3
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/dsa/dsa_priv.h2
-rw-r--r--net/dsa/tag_qca.c138
-rw-r--r--net/ipv4/devinet.c11
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/ip_tunnel.c76
-rw-r--r--net/ipv4/ipip.c35
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c1
-rw-r--r--net/ipv4/tcp.c31
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/addrconf.c18
-rw-r--r--net/ipv6/ip6_tunnel.c176
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c1
-rw-r--r--net/ipv6/ping.c9
-rw-r--r--net/ipv6/xfrm6_input.c1
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/kcm/kcmsock.c3
-rw-r--r--net/l2tp/l2tp_core.c3
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/mac80211/tdls.c7
-rw-r--r--net/netfilter/nf_tables_netdev.c1
-rw-r--r--net/netfilter/nfnetlink_acct.c6
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c49
-rw-r--r--net/netfilter/nft_meta.c17
-rw-r--r--net/netfilter/nft_reject.c16
-rw-r--r--net/netfilter/nft_reject_inet.c7
-rw-r--r--net/openvswitch/actions.c30
-rw-r--r--net/openvswitch/flow.c40
-rw-r--r--net/openvswitch/flow.h4
-rw-r--r--net/openvswitch/flow_table.c25
-rw-r--r--net/rxrpc/Kconfig7
-rw-r--r--net/rxrpc/af_rxrpc.c34
-rw-r--r--net/rxrpc/ar-internal.h5
-rw-r--r--net/rxrpc/call_accept.c20
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c7
-rw-r--r--net/rxrpc/conn_object.c10
-rw-r--r--net/rxrpc/input.c41
-rw-r--r--net/rxrpc/local_event.c13
-rw-r--r--net/rxrpc/local_object.c41
-rw-r--r--net/rxrpc/misc.c5
-rw-r--r--net/rxrpc/output.c54
-rw-r--r--net/rxrpc/peer_event.c26
-rw-r--r--net/rxrpc/peer_object.c121
-rw-r--r--net/rxrpc/proc.c30
-rw-r--r--net/rxrpc/recvmsg.c5
-rw-r--r--net/rxrpc/sysctl.c2
-rw-r--r--net/rxrpc/utils.c2
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_bpf.c5
-rw-r--r--net/sched/act_skbmod.c301
-rw-r--r--net/sched/act_tunnel_key.c17
-rw-r--r--net/sched/cls_bpf.c3
-rw-r--r--net/sched/cls_flower.c21
-rw-r--r--net/sched/sch_fq.c32
-rw-r--r--net/sctp/chunk.c13
-rw-r--r--net/sctp/output.c63
-rw-r--r--net/sctp/outqueue.c88
-rw-r--r--net/sctp/sm_sideeffect.c25
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/sunrpc/clnt.c4
-rw-r--r--net/tipc/name_distr.c8
-rw-r--r--net/tipc/udp_media.c3
-rw-r--r--net/unix/af_unix.c111
-rw-r--r--net/wireless/wext-core.c25
-rw-r--r--net/xfrm/xfrm_input.c14
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--net/xfrm/xfrm_user.c13
83 files changed, 1446 insertions, 568 deletions
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8a4368461fb0..855b72fbe1da 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -80,13 +80,10 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
- if (dev->flags & IFF_NOARP)
+ if ((dev->flags & IFF_NOARP) ||
+ !pskb_may_pull(skb, arp_hdr_len(dev)))
return;
- if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
- dev->stats.tx_dropped++;
- return;
- }
parp = arp_hdr(skb);
if (parp->ar_pro != htons(ETH_P_IP) ||
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a5423a1eec05..c5fea9393946 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1138,7 +1138,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
} else {
err = br_ip6_multicast_add_group(br, port,
&grec->grec_mca, vid);
- if (!err)
+ if (err)
break;
}
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 341caa0ca63a..d8ad73b38de2 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -134,17 +134,36 @@ void br_stp_disable_port(struct net_bridge_port *p)
br_become_root_bridge(br);
}
-static void br_stp_start(struct net_bridge *br)
+static int br_stp_call_user(struct net_bridge *br, char *arg)
{
- int r;
- char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+ char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL };
char *envp[] = { NULL };
+ int rc;
+
+ /* call userspace STP and report program errors */
+ rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ if (rc > 0) {
+ if (rc & 0xff)
+ br_debug(br, BR_STP_PROG " received signal %d\n",
+ rc & 0x7f);
+ else
+ br_debug(br, BR_STP_PROG " exited with code %d\n",
+ (rc >> 8) & 0xff);
+ }
+
+ return rc;
+}
+
+static void br_stp_start(struct net_bridge *br)
+{
struct net_bridge_port *p;
+ int err = -ENOENT;
if (net_eq(dev_net(br->dev), &init_net))
- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
- else
- r = -ENOENT;
+ err = br_stp_call_user(br, "start");
+
+ if (err && err != -ENOENT)
+ br_err(br, "failed to start userspace STP (%d)\n", err);
spin_lock_bh(&br->lock);
@@ -153,9 +172,10 @@ static void br_stp_start(struct net_bridge *br)
else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY)
__br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
- if (r == 0) {
+ if (!err) {
br->stp_enabled = BR_USER_STP;
br_debug(br, "userspace STP started\n");
+
/* Stop hello and hold timers */
del_timer(&br->hello_timer);
list_for_each_entry(p, &br->port_list, list)
@@ -173,14 +193,13 @@ static void br_stp_start(struct net_bridge *br)
static void br_stp_stop(struct net_bridge *br)
{
- int r;
- char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
- char *envp[] = { NULL };
struct net_bridge_port *p;
+ int err;
if (br->stp_enabled == BR_USER_STP) {
- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
- br_info(br, "userspace STP stopped, return code %d\n", r);
+ err = br_stp_call_user(br, "stop");
+ if (err)
+ br_err(br, "failed to stop userspace STP (%d)\n", err);
/* To start timers on any ports left in blocking */
mod_timer(&br->hello_timer, jiffies + br->hello_time);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cceac5bb658f..0833c251aef7 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -368,6 +368,8 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
+ if (!IS_ERR(match))
+ module_put(match->me);
request_module("ebt_%s", m->u.name);
match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
}
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 4b901d9f2e7c..ad47a921b701 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -86,6 +86,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
+ .validate = nft_meta_set_validate,
};
static const struct nft_expr_ops *
diff --git a/net/core/dev.c b/net/core/dev.c
index b0d307b6af19..9dbece2f1296 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3964,6 +3964,22 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
/**
+ * netdev_is_rx_handler_busy - check if receive handler is registered
+ * @dev: device to check
+ *
+ * Check if a receive handler is already registered for a given device.
+ * Return true if there one.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+bool netdev_is_rx_handler_busy(struct net_device *dev)
+{
+ ASSERT_RTNL();
+ return dev && rtnl_dereference(dev->rx_handler);
+}
+EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
+
+/**
* netdev_rx_handler_register - register receive handler
* @dev: device to register a handler for
* @rx_handler: receive handler to register
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a2879c0f6c4c..1a7b80f73376 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -750,11 +750,13 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
void __skb_get_hash(struct sk_buff *skb)
{
struct flow_keys keys;
+ u32 hash;
__flow_hash_secret_init();
- __skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
- flow_keys_have_l4(&keys));
+ hash = ___skb_get_hash(skb, &keys, hashrnd);
+
+ __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
}
EXPORT_SYMBOL(__skb_get_hash);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index ff7736f7ff42..96e47c539bee 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -38,4 +38,7 @@ config NET_DSA_TAG_EDSA
config NET_DSA_TAG_TRAILER
bool
+config NET_DSA_TAG_QCA
+ bool
+
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 8af4ded70f1c..a3380ed0e0be 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -7,3 +7,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d8d267e9a872..66e31acfcad8 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -54,6 +54,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_BRCM
[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_QCA
+ [DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
+#endif
[DSA_TAG_PROTO_NONE] = &none_ops,
};
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 00077a9c97f4..6cfd7388834e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -81,5 +81,7 @@ extern const struct dsa_device_ops trailer_netdev_ops;
/* tag_brcm.c */
extern const struct dsa_device_ops brcm_netdev_ops;
+/* tag_qca.c */
+extern const struct dsa_device_ops qca_netdev_ops;
#endif
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
new file mode 100644
index 000000000000..0c90cacee7aa
--- /dev/null
+++ b/net/dsa/tag_qca.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/etherdevice.h>
+#include "dsa_priv.h"
+
+#define QCA_HDR_LEN 2
+#define QCA_HDR_VERSION 0x2
+
+#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14)
+#define QCA_HDR_RECV_VERSION_S 14
+#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11)
+#define QCA_HDR_RECV_PRIORITY_S 11
+#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6)
+#define QCA_HDR_RECV_TYPE_S 6
+#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3)
+#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
+
+#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14)
+#define QCA_HDR_XMIT_VERSION_S 14
+#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11)
+#define QCA_HDR_XMIT_PRIORITY_S 11
+#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8)
+#define QCA_HDR_XMIT_CONTROL_S 8
+#define QCA_HDR_XMIT_FROM_CPU BIT(7)
+#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0)
+
+static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ u16 *phdr, hdr;
+
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += skb->len;
+
+ if (skb_cow_head(skb, 0) < 0)
+ goto out_free;
+
+ skb_push(skb, QCA_HDR_LEN);
+
+ memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
+ phdr = (u16 *)(skb->data + 2 * ETH_ALEN);
+
+ /* Set the version field, and set destination port information */
+ hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
+ QCA_HDR_XMIT_FROM_CPU |
+ BIT(p->port);
+
+ *phdr = htons(hdr);
+
+ return skb;
+
+out_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds;
+ u8 ver;
+ int port;
+ __be16 *phdr, hdr;
+
+ if (unlikely(!dst))
+ goto out_drop;
+
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ goto out;
+
+ if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
+ goto out_drop;
+
+ /* The QCA header is added by the switch between src addr and Ethertype
+ * At this point, skb->data points to ethertype so header should be
+ * right before
+ */
+ phdr = (__be16 *)(skb->data - 2);
+ hdr = ntohs(*phdr);
+
+ /* Make sure the version is correct */
+ ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
+ if (unlikely(ver != QCA_HDR_VERSION))
+ goto out_drop;
+
+ /* Remove QCA tag and recalculate checksum */
+ skb_pull_rcsum(skb, QCA_HDR_LEN);
+ memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
+ ETH_HLEN - QCA_HDR_LEN);
+
+ /* This protocol doesn't support cascading multiple switches so it's
+ * safe to assume the switch is first in the tree
+ */
+ ds = dst->ds[0];
+ if (!ds)
+ goto out_drop;
+
+ /* Get source port information */
+ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
+ if (!ds->ports[port].netdev)
+ goto out_drop;
+
+ /* Update skb & forward the frame accordingly */
+ skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = ds->ports[port].netdev;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ skb->dev->stats.rx_packets++;
+ skb->dev->stats.rx_bytes += skb->len;
+
+ netif_receive_skb(skb);
+
+ return 0;
+
+out_drop:
+ kfree_skb(skb);
+out:
+ return 0;
+}
+
+const struct dsa_device_ops qca_netdev_ops = {
+ .xmit = qca_tag_xmit,
+ .rcv = qca_tag_rcv,
+};
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 415e117967c7..062a67ca9a21 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2232,7 +2232,7 @@ static struct devinet_sysctl_table {
};
static int __devinet_sysctl_register(struct net *net, char *dev_name,
- struct ipv4_devconf *p)
+ int ifindex, struct ipv4_devconf *p)
{
int i;
struct devinet_sysctl_table *t;
@@ -2255,6 +2255,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
goto free;
p->sysctl = t;
+
+ inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
return 0;
free:
@@ -2286,7 +2288,7 @@ static int devinet_sysctl_register(struct in_device *idev)
if (err)
return err;
err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
- &idev->cnf);
+ idev->dev->ifindex, &idev->cnf);
if (err)
neigh_sysctl_unregister(idev->arp_parms);
return err;
@@ -2347,11 +2349,12 @@ static __net_init int devinet_init_net(struct net *net)
}
#ifdef CONFIG_SYSCTL
- err = __devinet_sysctl_register(net, "all", all);
+ err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
if (err < 0)
goto err_reg_all;
- err = __devinet_sysctl_register(net, "default", dflt);
+ err = __devinet_sysctl_register(net, "default",
+ NETCONFA_IFINDEX_DEFAULT, dflt);
if (err < 0)
goto err_reg_dflt;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 317c31939732..4e56a4c20a3c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -503,6 +503,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
if (!dev)
return -ENODEV;
cfg->fc_oif = dev->ifindex;
+ cfg->fc_table = l3mdev_fib_table(dev);
if (colon) {
struct in_ifaddr *ifa;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -1021,7 +1022,7 @@ no_promotions:
* First of all, we scan fib_info list searching
* for stray nexthop entries, then ignite fib_flush.
*/
- if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
+ if (fib_sync_down_addr(dev, ifa->ifa_local))
fib_flush(dev_net(dev));
}
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8066ccc48a17..388d3e21629b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1057,6 +1057,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_priority = cfg->fc_priority;
fi->fib_prefsrc = cfg->fc_prefsrc;
fi->fib_type = cfg->fc_type;
+ fi->fib_tb_id = cfg->fc_table;
fi->fib_nhs = nhs;
change_nexthops(fi) {
@@ -1337,18 +1338,21 @@ nla_put_failure:
* referring to it.
* - device went down -> we must shutdown all nexthops going via it.
*/
-int fib_sync_down_addr(struct net *net, __be32 local)
+int fib_sync_down_addr(struct net_device *dev, __be32 local)
{
int ret = 0;
unsigned int hash = fib_laddr_hashfn(local);
struct hlist_head *head = &fib_info_laddrhash[hash];
+ struct net *net = dev_net(dev);
+ int tb_id = l3mdev_fib_table(dev);
struct fib_info *fi;
if (!fib_info_laddrhash || local == 0)
return 0;
hlist_for_each_entry(fi, head, fib_lhash) {
- if (!net_eq(fi->fib_net, net))
+ if (!net_eq(fi->fib_net, net) ||
+ fi->fib_tb_id != tb_id)
continue;
if (fi->fib_prefsrc == local) {
fi->fib_flags |= RTNH_F_DEAD;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 95649ebd2874..5719d6ba0824 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
+#include <net/dst_metadata.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ u32 headroom = sizeof(struct iphdr);
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ const struct iphdr *inner_iph;
+ struct rtable *rt;
+ struct flowi4 fl4;
+ __be16 df = 0;
+ u8 tos, ttl;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto tx_error;
+ key = &tun_info->key;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+ tos = key->tos;
+ if (tos == 1) {
+ if (skb->protocol == htons(ETH_P_IP))
+ tos = inner_iph->tos;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+ }
+ init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+ RT_TOS(tos), tunnel->parms.link);
+ if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ goto tx_error;
+ rt = ip_route_output_key(tunnel->net, &fl4);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error;
+ }
+ if (rt->dst.dev == dev) {
+ ip_rt_put(rt);
+ dev->stats.collisions++;
+ goto tx_error;
+ }
+ tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
+ ttl = key->ttl;
+ if (ttl == 0) {
+ if (skb->protocol == htons(ETH_P_IP))
+ ttl = inner_iph->ttl;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+ else
+ ttl = ip4_dst_hoplimit(&rt->dst);
+ }
+ if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+ df = htons(IP_DF);
+ else if (skb->protocol == htons(ETH_P_IP))
+ df = inner_iph->frag_off & htons(IP_DF);
+ headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+ if (headroom > dev->needed_headroom)
+ dev->needed_headroom = headroom;
+
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ ip_rt_put(rt);
+ goto tx_dropped;
+ }
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
+ key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ return;
+tx_error:
+ dev->stats.tx_errors++;
+ goto kfree;
+tx_dropped:
+ dev->stats.tx_dropped++;
+kfree:
+ kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
+
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4ae3f8e6c6cc..c9392589c415 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -115,6 +115,7 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/dst_metadata.h>
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
@@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+ struct metadata_dst *tun_dst = NULL;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
@@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tpi = &ipip_tpi;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ if (tunnel->collect_md) {
+ tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
+ if (!tun_dst)
+ return 0;
+ }
+ return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
}
return -1;
@@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, ipproto);
+ if (tunnel->collect_md)
+ ip_md_tunnel_xmit(skb, dev, ipproto);
+ else
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
tx_error:
@@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
}
static void ipip_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm *parms, bool *collect_md)
{
memset(parms, 0, sizeof(*parms));
parms->iph.version = 4;
parms->iph.protocol = IPPROTO_IPIP;
parms->iph.ihl = 5;
+ *collect_md = false;
if (!data)
return;
@@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
+
+ if (data[IFLA_IPTUN_COLLECT_METADATA])
+ *collect_md = true;
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
+ struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
if (ipip_netlink_encap_parms(data, &ipencap)) {
- struct ip_tunnel *t = netdev_priv(dev);
int err = ip_tunnel_encap_setup(t, &ipencap);
if (err < 0)
return err;
}
- ipip_netlink_parms(data, &p);
+ ipip_netlink_parms(data, &p, &t->collect_md);
return ip_tunnel_newlink(dev, tb, &p);
}
@@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ bool collect_md;
if (ipip_netlink_encap_parms(data, &ipencap)) {
struct ip_tunnel *t = netdev_priv(dev);
@@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
- ipip_netlink_parms(data, &p);
+ ipip_netlink_parms(data, &p, &collect_md);
+ if (collect_md)
+ return -EINVAL;
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
@@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_IPTUN_COLLECT_METADATA */
+ nla_total_size(0) +
0;
}
@@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
tunnel->encap.flags))
goto nla_put_failure;
+ if (tunnel->collect_md)
+ if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index c24f41c816b3..2c2553b9026c 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -46,6 +46,7 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = {
.eval = nft_reject_ipv4_eval,
.init = nft_reject_init,
.dump = nft_reject_dump,
+ .validate = nft_reject_validate,
};
static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a13fcb369f52..7dae800092e6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1020,17 +1020,31 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(tcp_sendpage);
-static inline int select_size(const struct sock *sk, bool sg)
+/* Do not bother using a page frag for very small frames.
+ * But use this heuristic only for the first skb in write queue.
+ *
+ * Having no payload in skb->head allows better SACK shifting
+ * in tcp_shift_skb_data(), reducing sack/rack overhead, because
+ * write queue has less skbs.
+ * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB.
+ * This also speeds up tso_fragment(), since it wont fallback
+ * to tcp_fragment().
+ */
+static int linear_payload_sz(bool first_skb)
+{
+ if (first_skb)
+ return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
+ return 0;
+}
+
+static int select_size(const struct sock *sk, bool sg, bool first_skb)
{
const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sg) {
if (sk_can_gso(sk)) {
- /* Small frames wont use a full page:
- * Payload will immediately follow tcp header.
- */
- tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
+ tmp = linear_payload_sz(first_skb);
} else {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
@@ -1161,6 +1175,8 @@ restart:
}
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
+ bool first_skb;
+
new_segment:
/* Allocate new segment. If the interface is SG,
* allocate skb fitting to single page.
@@ -1172,10 +1188,11 @@ new_segment:
process_backlog = false;
goto restart;
}
+ first_skb = skb_queue_empty(&sk->sk_write_queue);
skb = sk_stream_alloc_skb(sk,
- select_size(sk, sg),
+ select_size(sk, sg, first_skb),
sk->sk_allocation,
- skb_queue_empty(&sk->sk_write_queue));
+ first_skb);
if (!skb)
goto wait_for_memory;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 54d9f9b0120f..4e777a3243f9 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -150,6 +150,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
tp->segs_in = 0;
tcp_segs_in(tp, skb);
__skb_pull(skb, tcp_hdrlen(skb));
+ sk_forced_mem_schedule(sk, skb->truesize);
skb_set_owner_r(skb, sk);
TCP_SKB_CB(skb)->seq++;
@@ -226,6 +227,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
tcp_fastopen_add_skb(child, skb);
tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
+ tp->rcv_wup = tp->rcv_nxt;
/* tcp_conn_request() is sending the SYNACK,
* and queues the child into listener accept queue.
*/
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 70b892db9901..dad3e7eeed94 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4502,7 +4502,7 @@ coalesce_done:
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
__kfree_skb(skb1);
- goto add_sack;
+ goto merge_right;
}
} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
goto coalesce_done;
@@ -4514,6 +4514,7 @@ insert:
rb_link_node(&skb->rbnode, parent, p);
rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+merge_right:
/* Remove other segments covered by skb. */
while ((q = rb_next(&skb->rbnode)) != NULL) {
skb1 = rb_entry(q, struct sk_buff, rbnode);
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 028eb046ea40..9c5fc973267f 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -76,7 +76,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else if (!yeah->doing_reno_now) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3155ed73d3b3..6a7ff6957535 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -29,7 +29,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = daddr->a4;
fl4->flowi4_tos = tos;
- fl4->flowi4_oif = oif;
+ fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
if (saddr)
fl4->saddr = saddr->a4;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f418d2eaeddd..2f1f5d439788 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -778,7 +778,14 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
}
if (p == &net->ipv6.devconf_all->forwarding) {
+ int old_dflt = net->ipv6.devconf_dflt->forwarding;
+
net->ipv6.devconf_dflt->forwarding = newf;
+ if ((!newf) ^ (!old_dflt))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+
addrconf_forward_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
@@ -1941,6 +1948,7 @@ errdad:
spin_unlock_bh(&ifp->lock);
addrconf_mod_dad_work(ifp, 0);
+ in6_ifa_put(ifp);
}
/* Join to solicited addr multicast group.
@@ -3850,6 +3858,7 @@ static void addrconf_dad_work(struct work_struct *w)
addrconf_dad_begin(ifp);
goto out;
} else if (action == DAD_ABORT) {
+ in6_ifa_hold(ifp);
addrconf_dad_stop(ifp, 1);
if (disable_ipv6)
addrconf_ifdown(idev->dev, 0);
@@ -6025,7 +6034,7 @@ static const struct ctl_table addrconf_sysctl[] = {
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct inet6_dev *idev, struct ipv6_devconf *p)
{
- int i;
+ int i, ifindex;
struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
@@ -6045,6 +6054,13 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
if (!p->sysctl_header)
goto free;
+ if (!strcmp(dev_name, "all"))
+ ifindex = NETCONFA_IFINDEX_ALL;
+ else if (!strcmp(dev_name, "default"))
+ ifindex = NETCONFA_IFINDEX_DEFAULT;
+ else
+ ifindex = idev->dev->ifindex;
+ inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
return 0;
free:
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2050217df565..6a66adba0c22 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/dst_metadata.h>
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -90,6 +91,7 @@ struct ip6_tnl_net {
struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
+ struct ip6_tnl __rcu *collect_md_tun;
};
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
@@ -166,6 +168,10 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
return t;
}
+ t = rcu_dereference(ip6n->collect_md_tun);
+ if (t)
+ return t;
+
t = rcu_dereference(ip6n->tnls_wc[0]);
if (t && (t->dev->flags & IFF_UP))
return t;
@@ -209,6 +215,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ip6n->collect_md_tun, t);
rcu_assign_pointer(t->next , rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -224,6 +232,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ip6n->collect_md_tun, NULL);
+
for (tp = ip6_tnl_bucket(ip6n, &t->parms);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -829,6 +840,9 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
+ if (tun_dst)
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+
gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
@@ -865,6 +879,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
{
struct ip6_tnl *t;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct metadata_dst *tun_dst = NULL;
int ret = -1;
rcu_read_lock();
@@ -881,7 +896,12 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
goto drop;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
+ if (t->parms.collect_md) {
+ tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
+ if (!tun_dst)
+ return 0;
+ }
+ ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
log_ecn_error);
}
@@ -1012,8 +1032,16 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
int mtu;
unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
unsigned int max_headroom = psh_hlen;
+ u8 hop_limit;
int err = -1;
+ if (t->parms.collect_md) {
+ hop_limit = skb_tunnel_info(skb)->key.ttl;
+ goto route_lookup;
+ } else {
+ hop_limit = t->parms.hop_limit;
+ }
+
/* NBMA tunnel */
if (ipv6_addr_any(&t->parms.raddr)) {
struct in6_addr *addr6;
@@ -1043,6 +1071,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
goto tx_err_link_failure;
if (!dst) {
+route_lookup:
dst = ip6_route_output(net, NULL, fl6);
if (dst->error)
@@ -1053,6 +1082,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
dst = NULL;
goto tx_err_link_failure;
}
+ if (t->parms.collect_md &&
+ ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+ &fl6->daddr, 0, &fl6->saddr))
+ goto tx_err_link_failure;
ndst = dst;
}
@@ -1071,7 +1104,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
}
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (skb_dst(skb))
+ if (skb_dst(skb) && !t->parms.collect_md)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
@@ -1111,8 +1144,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
skb = new_skb;
}
- if (!fl6->flowi6_mark && ndst)
- dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+ if (t->parms.collect_md) {
+ if (t->encap.type != TUNNEL_ENCAP_NONE)
+ goto tx_err_dst_release;
+ } else {
+ if (!fl6->flowi6_mark && ndst)
+ dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+ }
skb_dst_set(skb, dst);
if (encap_limit >= 0) {
@@ -1137,7 +1175,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
- ipv6h->hop_limit = t->parms.hop_limit;
+ ipv6h->hop_limit = hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
@@ -1170,18 +1208,34 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
+ dsfield = ipv4_get_dsfield(iph);
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ if (t->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
- dsfield = ipv4_get_dsfield(iph);
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -1;
+ key = &tun_info->key;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ } else {
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
- & IPV6_TCLASS_MASK;
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+ }
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
@@ -1219,28 +1273,47 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
ip6_tnl_addr_conflict(t, ipv6h))
return -1;
- offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
+ dsfield = ipv6_get_dsfield(ipv6h);
+
+ if (t->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
return -1;
+ key = &tun_info->key;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ } else {
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+
+ tel = (void *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ encap_limit = t->parms.encap_limit;
}
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
- dsfield = ipv6_get_dsfield(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= ip6_flowlabel(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= ip6_flowlabel(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+ }
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
@@ -1739,6 +1812,10 @@ static int ip6_tnl_dev_init(struct net_device *dev)
if (err)
return err;
ip6_tnl_link_config(t);
+ if (t->parms.collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
return 0;
}
@@ -1809,6 +1886,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
if (data[IFLA_IPTUN_PROTO])
parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
+ if (data[IFLA_IPTUN_COLLECT_METADATA])
+ parms->collect_md = true;
}
static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
@@ -1848,6 +1928,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct ip6_tnl *nt, *t;
struct ip_tunnel_encap ipencap;
@@ -1862,9 +1943,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
ip6_tnl_netlink_parms(data, &nt->parms);
- t = ip6_tnl_locate(net, &nt->parms, 0);
- if (!IS_ERR(t))
- return -EEXIST;
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ip6n->collect_md_tun))
+ return -EEXIST;
+ } else {
+ t = ip6_tnl_locate(net, &nt->parms, 0);
+ if (!IS_ERR(t))
+ return -EEXIST;
+ }
return ip6_tnl_create2(dev);
}
@@ -1888,6 +1974,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
ip6_tnl_netlink_parms(data, &p);
+ if (p.collect_md)
+ return -EINVAL;
t = ip6_tnl_locate(net, &p, 0);
if (!IS_ERR(t)) {
@@ -1935,6 +2023,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_IPTUN_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_IPTUN_COLLECT_METADATA */
+ nla_total_size(0) +
0;
}
@@ -1953,16 +2043,15 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
- tunnel->encap.type) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
- tunnel->encap.sport) ||
- nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
- tunnel->encap.dport) ||
- nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
- tunnel->encap.flags))
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags))
goto nla_put_failure;
+ if (parm->collect_md)
+ if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -1990,6 +2079,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
};
static struct rtnl_link_ops ip6_link_ops __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 533cd5719c59..92bda9908bb9 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -47,6 +47,7 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = {
.eval = nft_reject_ipv6_eval,
.init = nft_reject_init,
.dump = nft_reject_dump,
+ .validate = nft_reject_validate,
};
static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 0900352c924c..0e983b694ee8 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -126,8 +126,10 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = (struct rt6_info *) dst;
np = inet6_sk(sk);
- if (!np)
- return -EBADF;
+ if (!np) {
+ err = -EBADF;
+ goto dst_err_out;
+ }
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
fl6.flowi6_oif = np->mcast_oif;
@@ -163,6 +165,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
release_sock(sk);
+dst_err_out:
+ dst_release(dst);
+
if (err)
return err;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0eaab1fa6be5..00a2d40677d6 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -23,6 +23,7 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
{
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
return xfrm_input(skb, nexthdr, spi, 0);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index b7b7e863a2bb..e0f71c01d728 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -36,7 +36,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
int err;
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
+ fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 2632ac748371..b7f869a85ab7 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -23,6 +23,7 @@
#include <linux/socket.h>
#include <linux/uaccess.h>
#include <linux/workqueue.h>
+#include <linux/syscalls.h>
#include <net/kcm.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -1721,7 +1722,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
if (copy_to_user((void __user *)arg, &info,
sizeof(info))) {
err = -EFAULT;
- sock_release(newsock);
+ sys_close(info.fd);
}
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1e40dacaa137..a2ed3bda4ddc 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1855,6 +1855,9 @@ static __net_exit void l2tp_exit_net(struct net *net)
(void)l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
+
+ flush_workqueue(l2tp_wq);
+ rcu_barrier();
}
static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index ef2cd30ca06e..965f7e344cef 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -121,7 +121,7 @@ static struct rtnl_link_stats64 *l2tp_eth_get_stats64(struct net_device *dev,
}
-static struct net_device_ops l2tp_eth_netdev_ops = {
+static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_init = l2tp_eth_dev_init,
.ndo_uninit = l2tp_eth_dev_uninit,
.ndo_start_xmit = l2tp_eth_dev_xmit,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8ae3ed97d95c..db916cf51ffe 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256];
static struct sockaddr_llc llc_ui_addrnull;
static const struct proto_ops llc_ui_ops;
-static long llc_ui_wait_for_conn(struct sock *sk, long timeout);
+static bool llc_ui_wait_for_conn(struct sock *sk, long timeout);
static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
@@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
return rc;
}
-static long llc_ui_wait_for_conn(struct sock *sk, long timeout)
+static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
{
DEFINE_WAIT(wait);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index b5d28f14b9cf..afca7d103684 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -333,10 +333,11 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
if (!uc.center_freq1)
return;
- /* proceed to downgrade the chandef until usable or the same */
+ /* proceed to downgrade the chandef until usable or the same as AP BW */
while (uc.width > max_width ||
- !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
- sdata->wdev.iftype))
+ (uc.width > sta->tdls_chandef.width &&
+ !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
+ sdata->wdev.iftype)))
ieee80211_chandef_downgrade(&uc);
if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 5eefe4a355c6..75d696f11045 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -30,7 +30,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
if (!iph)
return;
- iph = ip_hdr(skb);
if (iph->ihl < 5 || iph->version != 4)
return;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 70eb2f6a3b01..d44d89b56127 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -343,12 +343,12 @@ static int nfnl_acct_del(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const tb[])
{
- char *acct_name;
- struct nf_acct *cur;
+ struct nf_acct *cur, *tmp;
int ret = -ENOENT;
+ char *acct_name;
if (!tb[NFACCT_NAME]) {
- list_for_each_entry(cur, &net->nfnl_acct_list, head)
+ list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head)
nfnl_acct_try_del(cur);
return 0;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 68216cdc7083..139e0867e56e 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -98,31 +98,28 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
break;
}
- l4proto = nf_ct_l4proto_find_get(l3num, l4num);
-
- /* This protocol is not supportted, skip. */
- if (l4proto->l4proto != l4num) {
- ret = -EOPNOTSUPP;
- goto err_proto_put;
- }
-
if (matching) {
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
/* You cannot replace one timeout policy by another of
* different kind, sorry.
*/
if (matching->l3num != l3num ||
- matching->l4proto->l4proto != l4num) {
- ret = -EINVAL;
- goto err_proto_put;
- }
-
- ret = ctnl_timeout_parse_policy(&matching->data,
- l4proto, net,
- cda[CTA_TIMEOUT_DATA]);
- return ret;
+ matching->l4proto->l4proto != l4num)
+ return -EINVAL;
+
+ return ctnl_timeout_parse_policy(&matching->data,
+ matching->l4proto, net,
+ cda[CTA_TIMEOUT_DATA]);
}
- ret = -EBUSY;
+
+ return -EBUSY;
+ }
+
+ l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+ /* This protocol is not supportted, skip. */
+ if (l4proto->l4proto != l4num) {
+ ret = -EOPNOTSUPP;
goto err_proto_put;
}
@@ -305,7 +302,16 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
const struct hlist_nulls_node *nn;
unsigned int last_hsize;
spinlock_t *lock;
- int i;
+ int i, cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
+ untimeout(h, timeout);
+ spin_unlock_bh(&pcpu->lock);
+ }
local_bh_disable();
restart:
@@ -350,12 +356,13 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
- struct ctnl_timeout *cur;
+ struct ctnl_timeout *cur, *tmp;
int ret = -ENOENT;
char *name;
if (!cda[CTA_TIMEOUT_NAME]) {
- list_for_each_entry(cur, &net->nfct_timeout_list, head)
+ list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list,
+ head)
ctnl_timeout_try_del(net, cur);
return 0;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 2863f3493038..8a6bc7630912 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -291,10 +291,16 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
-static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
{
+ struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (priv->key != NFT_META_PKTTYPE)
+ return 0;
+
switch (ctx->afi->family) {
case NFPROTO_BRIDGE:
hooks = 1 << NF_BR_PRE_ROUTING;
@@ -308,6 +314,7 @@ static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
return nft_chain_validate_hooks(ctx->chain, hooks);
}
+EXPORT_SYMBOL_GPL(nft_meta_set_validate);
int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
@@ -327,15 +334,16 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
len = sizeof(u8);
break;
case NFT_META_PKTTYPE:
- err = nft_meta_set_init_pkttype(ctx);
- if (err)
- return err;
len = sizeof(u8);
break;
default:
return -EOPNOTSUPP;
}
+ err = nft_meta_set_validate(ctx, expr, NULL);
+ if (err < 0)
+ return err;
+
priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
@@ -407,6 +415,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
+ .validate = nft_meta_set_validate,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 0522fc9bfb0a..c64de3f7379d 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -26,11 +26,27 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
};
EXPORT_SYMBOL_GPL(nft_reject_policy);
+int nft_reject_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+EXPORT_SYMBOL_GPL(nft_reject_validate);
+
int nft_reject_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
+ int err;
+
+ err = nft_reject_validate(ctx, expr, NULL);
+ if (err < 0)
+ return err;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 759ca5248a3d..e79d9ca2ffee 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -66,7 +66,11 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
- int icmp_code;
+ int icmp_code, err;
+
+ err = nft_reject_validate(ctx, expr, NULL);
+ if (err < 0)
+ return err;
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
@@ -124,6 +128,7 @@ static const struct nft_expr_ops nft_reject_inet_ops = {
.eval = nft_reject_inet_eval,
.init = nft_reject_inet_init,
.dump = nft_reject_inet_dump,
+ .validate = nft_reject_validate,
};
static struct nft_expr_type nft_reject_inet_type __read_mostly = {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4fe9032b1160..863e992dfbc0 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -71,6 +71,8 @@ struct ovs_frag_data {
static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
#define DEFERRED_ACTION_FIFO_SIZE 10
+#define OVS_RECURSION_LIMIT 5
+#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
struct action_fifo {
int head;
int tail;
@@ -78,7 +80,12 @@ struct action_fifo {
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};
+struct recirc_keys {
+ struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+};
+
static struct action_fifo __percpu *action_fifos;
+static struct recirc_keys __percpu *recirc_keys;
static DEFINE_PER_CPU(int, exec_actions_level);
static void action_fifo_init(struct action_fifo *fifo)
@@ -1020,6 +1027,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *a, int rem)
{
struct deferred_action *da;
+ int level;
if (!is_flow_key_valid(key)) {
int err;
@@ -1043,6 +1051,18 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
return 0;
}
+ level = this_cpu_read(exec_actions_level);
+ if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+ struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
+ struct sw_flow_key *recirc_key = &rks->key[level - 1];
+
+ *recirc_key = *key;
+ recirc_key->recirc_id = nla_get_u32(a);
+ ovs_dp_process_packet(skb, recirc_key);
+
+ return 0;
+ }
+
da = add_deferred_actions(skb, key, NULL);
if (da) {
da->pkt_key.recirc_id = nla_get_u32(a);
@@ -1209,11 +1229,10 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_actions *acts,
struct sw_flow_key *key)
{
- static const int ovs_recursion_limit = 5;
int err, level;
level = __this_cpu_inc_return(exec_actions_level);
- if (unlikely(level > ovs_recursion_limit)) {
+ if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
kfree_skb(skb);
@@ -1238,10 +1257,17 @@ int action_fifos_init(void)
if (!action_fifos)
return -ENOMEM;
+ recirc_keys = alloc_percpu(struct recirc_keys);
+ if (!recirc_keys) {
+ free_percpu(action_fifos);
+ return -ENOMEM;
+ }
+
return 0;
}
void action_fifos_exit(void)
{
free_percpu(action_fifos);
+ free_percpu(recirc_keys);
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 1240ae3b88d2..0fa45439def1 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/in.h>
#include <linux/rcupdate.h>
+#include <linux/cpumask.h>
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
{
struct flow_stats *stats;
int node = numa_node_id();
+ int cpu = smp_processor_id();
int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
- stats = rcu_dereference(flow->stats[node]);
+ stats = rcu_dereference(flow->stats[cpu]);
- /* Check if already have node-specific stats. */
+ /* Check if already have CPU-specific stats. */
if (likely(stats)) {
spin_lock(&stats->lock);
/* Mark if we write on the pre-allocated stats. */
- if (node == 0 && unlikely(flow->stats_last_writer != node))
- flow->stats_last_writer = node;
+ if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
+ flow->stats_last_writer = cpu;
} else {
stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
spin_lock(&stats->lock);
- /* If the current NUMA-node is the only writer on the
+ /* If the current CPU is the only writer on the
* pre-allocated stats keep using them.
*/
- if (unlikely(flow->stats_last_writer != node)) {
+ if (unlikely(flow->stats_last_writer != cpu)) {
/* A previous locker may have already allocated the
- * stats, so we need to check again. If node-specific
+ * stats, so we need to check again. If CPU-specific
* stats were already allocated, we update the pre-
* allocated stats as we have already locked them.
*/
- if (likely(flow->stats_last_writer != NUMA_NO_NODE)
- && likely(!rcu_access_pointer(flow->stats[node]))) {
- /* Try to allocate node-specific stats. */
+ if (likely(flow->stats_last_writer != -1) &&
+ likely(!rcu_access_pointer(flow->stats[cpu]))) {
+ /* Try to allocate CPU-specific stats. */
struct flow_stats *new_stats;
new_stats =
@@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
new_stats->tcp_flags = tcp_flags;
spin_lock_init(&new_stats->lock);
- rcu_assign_pointer(flow->stats[node],
+ rcu_assign_pointer(flow->stats[cpu],
new_stats);
goto unlock;
}
}
- flow->stats_last_writer = node;
+ flow->stats_last_writer = cpu;
}
}
@@ -136,14 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
- int node;
+ int cpu;
*used = 0;
*tcp_flags = 0;
memset(ovs_stats, 0, sizeof(*ovs_stats));
- for_each_node(node) {
- struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
+ /* We open code this to make sure cpu 0 is always considered */
+ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+ struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
/* Local CPU may write on non-local stats, so we must
@@ -163,10 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
/* Called with ovs_mutex. */
void ovs_flow_stats_clear(struct sw_flow *flow)
{
- int node;
+ int cpu;
- for_each_node(node) {
- struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
+ /* We open code this to make sure cpu 0 is always considered */
+ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+ struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
spin_lock_bh(&stats->lock);
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 156a3029c17b..ae783f5c6695 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -178,14 +178,14 @@ struct sw_flow {
struct hlist_node node[2];
u32 hash;
} flow_table, ufid_table;
- int stats_last_writer; /* NUMA-node id of the last writer on
+ int stats_last_writer; /* CPU id of the last writer on
* 'stats[0]'.
*/
struct sw_flow_key key;
struct sw_flow_id id;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
- struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
+ struct flow_stats __rcu *stats[]; /* One for each CPU. First one
* is allocated at flow creation time,
* the rest are allocated on demand
* while holding the 'stats[0].lock'.
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index d073fff82fdb..ea7a8073fa02 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/in.h>
#include <linux/rcupdate.h>
+#include <linux/cpumask.h>
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -79,17 +80,12 @@ struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
struct flow_stats *stats;
- int node;
- flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+ flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
if (!flow)
return ERR_PTR(-ENOMEM);
- flow->sf_acts = NULL;
- flow->mask = NULL;
- flow->id.unmasked_key = NULL;
- flow->id.ufid_len = 0;
- flow->stats_last_writer = NUMA_NO_NODE;
+ flow->stats_last_writer = -1;
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
@@ -102,10 +98,6 @@ struct sw_flow *ovs_flow_alloc(void)
RCU_INIT_POINTER(flow->stats[0], stats);
- for_each_node(node)
- if (node != 0)
- RCU_INIT_POINTER(flow->stats[node], NULL);
-
return flow;
err:
kmem_cache_free(flow_cache, flow);
@@ -142,16 +134,17 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
static void flow_free(struct sw_flow *flow)
{
- int node;
+ int cpu;
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
if (flow->sf_acts)
ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
- for_each_node(node)
- if (flow->stats[node])
+ /* We open code this to make sure cpu 0 is always considered */
+ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+ if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
- (struct flow_stats __force *)flow->stats[node]);
+ (struct flow_stats __force *)flow->stats[cpu]);
kmem_cache_free(flow_cache, flow);
}
@@ -756,7 +749,7 @@ int ovs_flow_init(void)
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
- + (nr_node_ids
+ + (nr_cpu_ids
* sizeof(struct flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL)
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 784c53163b7b..13396c74b5c1 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -19,6 +19,13 @@ config AF_RXRPC
See Documentation/networking/rxrpc.txt.
+config AF_RXRPC_IPV6
+ bool "IPv6 support for RxRPC"
+ depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
+ help
+ Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as
+ its network transport.
+
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index caa226dd436e..09f81befc705 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -106,19 +106,25 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
case AF_INET:
if (srx->transport_len < sizeof(struct sockaddr_in))
return -EINVAL;
- _debug("INET: %x @ %pI4",
- ntohs(srx->transport.sin.sin_port),
- &srx->transport.sin.sin_addr);
tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
+ if (srx->transport_len < sizeof(struct sockaddr_in6))
+ return -EINVAL;
+ tail = offsetof(struct sockaddr_rxrpc, transport) +
+ sizeof(struct sockaddr_in6);
+ break;
+#endif
+
default:
return -EAFNOSUPPORT;
}
if (tail < len)
memset((void *)srx + tail, 0, len - tail);
+ _debug("INET: %pISp", &srx->transport);
return 0;
}
@@ -299,7 +305,7 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
{
_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
rxrpc_release_call(rxrpc_sk(sock->sk), call);
- rxrpc_put_call(call, rxrpc_call_put);
+ rxrpc_put_call(call, rxrpc_call_put_kernel);
}
EXPORT_SYMBOL(rxrpc_kernel_end_call);
@@ -401,6 +407,23 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
switch (rx->sk.sk_state) {
case RXRPC_UNBOUND:
+ rx->srx.srx_family = AF_RXRPC;
+ rx->srx.srx_service = 0;
+ rx->srx.transport_type = SOCK_DGRAM;
+ rx->srx.transport.family = rx->family;
+ switch (rx->family) {
+ case AF_INET:
+ rx->srx.transport_len = sizeof(struct sockaddr_in);
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ rx->srx.transport_len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ default:
+ ret = -EAFNOSUPPORT;
+ goto error_unlock;
+ }
local = rxrpc_lookup_local(&rx->srx);
if (IS_ERR(local)) {
ret = PTR_ERR(local);
@@ -551,7 +574,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
return -EAFNOSUPPORT;
/* we support transport protocol UDP/UDP6 only */
- if (protocol != PF_INET)
+ if (protocol != PF_INET &&
+ IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
return -EPROTONOSUPPORT;
if (sock->type != SOCK_DGRAM)
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index b1cb79ec4e96..e78c40b37db5 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -498,6 +498,7 @@ struct rxrpc_call {
*/
#define RXRPC_RXTX_BUFF_SIZE 64
#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1)
+#define RXRPC_INIT_RX_WINDOW_SIZE 32
struct sk_buff **rxtx_buffer;
u8 *rxtx_annotations;
#define RXRPC_TX_ANNO_ACK 0
@@ -518,7 +519,7 @@ struct rxrpc_call {
rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */
u8 rx_winsize; /* Size of Rx window */
u8 tx_winsize; /* Maximum size of Tx window */
- u8 nr_jumbo_dup; /* Number of jumbo duplicates */
+ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */
/* receive-phase ACK management */
u8 ackr_reason; /* reason to ACK */
@@ -540,8 +541,10 @@ enum rxrpc_call_trace {
rxrpc_call_seen,
rxrpc_call_got,
rxrpc_call_got_userid,
+ rxrpc_call_got_kernel,
rxrpc_call_put,
rxrpc_call_put_userid,
+ rxrpc_call_put_kernel,
rxrpc_call_put_noqueue,
rxrpc_call__nr_trace
};
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index b8acec0d596e..26c293ef98eb 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -121,7 +121,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
call->user_call_ID = user_call_ID;
call->notify_rx = notify_rx;
- rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_get_call(call, rxrpc_call_got_kernel);
user_attach_call(call, user_call_ID);
rxrpc_get_call(call, rxrpc_call_got_userid);
rb_link_node(&call->sock_node, parent, pp);
@@ -221,6 +221,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
if (rx->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
rx->discard_new_call(call, call->user_call_ID);
+ rxrpc_put_call(call, rxrpc_call_put_kernel);
}
rxrpc_call_completed(call);
rxrpc_release_call(rx, call);
@@ -300,6 +301,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
smp_store_release(&b->call_backlog_tail,
(call_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
+ rxrpc_see_call(call);
call->conn = conn;
call->peer = rxrpc_get_peer(conn->params.peer);
return call;
@@ -401,6 +403,13 @@ found_service:
if (call->state == RXRPC_CALL_SERVER_ACCEPTING)
rxrpc_notify_socket(call);
+ /* We have to discard the prealloc queue's ref here and rely on a
+ * combination of the RCU read lock and refs held either by the socket
+ * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel
+ * service to prevent the call from being deallocated too early.
+ */
+ rxrpc_put_call(call, rxrpc_call_put);
+
_leave(" = %p{%d}", call, call->debug_id);
out:
spin_unlock(&rx->incoming_lock);
@@ -425,9 +434,11 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
write_lock(&rx->call_lock);
- ret = -ENODATA;
- if (list_empty(&rx->to_be_accepted))
- goto out;
+ if (list_empty(&rx->to_be_accepted)) {
+ write_unlock(&rx->call_lock);
+ kleave(" = -ENODATA [empty]");
+ return ERR_PTR(-ENODATA);
+ }
/* check the user ID isn't already in use */
pp = &rx->calls.rb_node;
@@ -466,7 +477,6 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
}
/* formalise the acceptance */
- rxrpc_get_call(call, rxrpc_call_got);
call->notify_rx = notify_rx;
call->user_call_ID = user_call_ID;
rxrpc_get_call(call, rxrpc_call_got_userid);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 2b976e789562..61432049869b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -95,7 +95,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
break;
case RXRPC_ACK_IDLE:
- if (rxrpc_soft_ack_delay < expiry)
+ if (rxrpc_idle_ack_delay < expiry)
expiry = rxrpc_idle_ack_delay;
break;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 18ab13f82f6e..22f9b0d1a138 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -56,8 +56,10 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = {
[rxrpc_call_seen] = "SEE",
[rxrpc_call_got] = "GOT",
[rxrpc_call_got_userid] = "Gus",
+ [rxrpc_call_got_kernel] = "Gke",
[rxrpc_call_put] = "PUT",
[rxrpc_call_put_userid] = "Pus",
+ [rxrpc_call_put_kernel] = "Pke",
[rxrpc_call_put_noqueue] = "PNQ",
};
@@ -150,7 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
/* Leave space in the ring to handle a maxed-out jumbo packet */
- call->rx_winsize = RXRPC_RXTX_BUFF_SIZE - 1 - 46;
+ call->rx_winsize = rxrpc_rx_window_size;
call->tx_winsize = 16;
call->rx_expect_next = 1;
return call;
@@ -462,9 +464,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->rxtx_buffer[i] = NULL;
}
- /* We have to release the prealloc backlog ref */
- if (rxrpc_is_service_call(call))
- rxrpc_put_call(call, rxrpc_call_put);
_leave("");
}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index ffa9addb97b2..bb1f29280aea 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -134,6 +134,16 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
srx.transport.sin.sin_addr.s_addr)
goto not_found;
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ if (peer->srx.transport.sin6.sin6_port !=
+ srx.transport.sin6.sin6_port ||
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx.transport.sin6.sin6_addr,
+ sizeof(struct in6_addr)) != 0)
+ goto not_found;
+ break;
+#endif
default:
BUG();
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index afeba98004b1..75af0bd316c7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -59,6 +59,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to)
spin_unlock(&call->lock);
+ wake_up(&call->waitq);
+
while (list) {
skb = list;
list = skb->next;
@@ -125,7 +127,7 @@ static bool rxrpc_validate_jumbo(struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned int offset = sp->offset;
- unsigned int len = skb->data_len;
+ unsigned int len = skb->len;
int nr_jumbo = 1;
u8 flags = sp->hdr.flags;
@@ -162,7 +164,7 @@ protocol_error:
* (that information is encoded in the ACK packet).
*/
static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
- u8 annotation, bool *_jumbo_dup)
+ u8 annotation, bool *_jumbo_bad)
{
/* Discard normal packets that are duplicates. */
if (annotation == 0)
@@ -172,9 +174,9 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
* more partially duplicate jumbo packets, we refuse to take any more
* jumbos for this call.
*/
- if (!*_jumbo_dup) {
- call->nr_jumbo_dup++;
- *_jumbo_dup = true;
+ if (!*_jumbo_bad) {
+ call->nr_jumbo_bad++;
+ *_jumbo_bad = true;
}
}
@@ -189,12 +191,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
unsigned int ix;
rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
- bool immediate_ack = false, jumbo_dup = false, queued;
+ bool immediate_ack = false, jumbo_bad = false, queued;
u16 len;
u8 ack = 0, flags, annotation = 0;
_enter("{%u,%u},{%u,%u}",
- call->rx_hard_ack, call->rx_top, skb->data_len, seq);
+ call->rx_hard_ack, call->rx_top, skb->len, seq);
_proto("Rx DATA %%%u { #%u f=%02x }",
sp->hdr.serial, seq, sp->hdr.flags);
@@ -220,7 +222,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
flags = sp->hdr.flags;
if (flags & RXRPC_JUMBO_PACKET) {
- if (call->nr_jumbo_dup > 3) {
+ if (call->nr_jumbo_bad > 3) {
ack = RXRPC_ACK_NOSPACE;
ack_serial = serial;
goto ack;
@@ -231,7 +233,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
next_subpacket:
queued = false;
ix = seq & RXRPC_RXTX_BUFF_MASK;
- len = skb->data_len;
+ len = skb->len;
if (flags & RXRPC_JUMBO_PACKET)
len = RXRPC_JUMBO_DATALEN;
@@ -257,7 +259,7 @@ next_subpacket:
}
if (call->rxtx_buffer[ix]) {
- rxrpc_input_dup_data(call, seq, annotation, &jumbo_dup);
+ rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad);
if (ack != RXRPC_ACK_DUPLICATE) {
ack = RXRPC_ACK_DUPLICATE;
ack_serial = serial;
@@ -302,6 +304,15 @@ skip:
annotation++;
if (flags & RXRPC_JUMBO_PACKET)
annotation |= RXRPC_RX_ANNO_JLAST;
+ if (after(seq, hard_ack + call->rx_winsize)) {
+ ack = RXRPC_ACK_EXCEEDS_WINDOW;
+ ack_serial = serial;
+ if (!jumbo_bad) {
+ call->nr_jumbo_bad++;
+ jumbo_bad = true;
+ }
+ goto ack;
+ }
_proto("Rx DATA Jumbo %%%u", serial);
goto next_subpacket;
@@ -331,14 +342,16 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_peer *peer;
unsigned int mtu;
+ u32 rwind = ntohl(ackinfo->rwind);
_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
sp->hdr.serial,
ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
- ntohl(ackinfo->rwind), ntohl(ackinfo->jumbo_max));
+ rwind, ntohl(ackinfo->jumbo_max));
- if (call->tx_winsize > ntohl(ackinfo->rwind))
- call->tx_winsize = ntohl(ackinfo->rwind);
+ if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+ rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+ call->tx_winsize = rwind;
mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
@@ -442,7 +455,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
}
offset = sp->offset + nr_acks + 3;
- if (skb->data_len >= offset + sizeof(buf.info)) {
+ if (skb->len >= offset + sizeof(buf.info)) {
if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0)
return rxrpc_proto_abort("XAI", call, 0);
rxrpc_input_ackinfo(call, skb, &buf.info);
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index cdd58e6e9fbd..f073e932500e 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -15,8 +15,6 @@
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <generated/utsrelease.h>
@@ -33,7 +31,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
{
struct rxrpc_wire_header whdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct sockaddr_in sin;
+ struct sockaddr_rxrpc srx;
struct msghdr msg;
struct kvec iov[2];
size_t len;
@@ -41,12 +39,11 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
_enter("");
- sin.sin_family = AF_INET;
- sin.sin_port = udp_hdr(skb)->source;
- sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ return;
- msg.msg_name = &sin;
- msg.msg_namelen = sizeof(sin);
+ msg.msg_name = &srx.transport;
+ msg.msg_namelen = srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 782b9adf67cb..e3fad80b0795 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -58,6 +58,17 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
memcmp(&local->srx.transport.sin.sin_addr,
&srx->transport.sin.sin_addr,
sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ /* If the choice of UDP6 port is left up to the transport, then
+ * the endpoint record doesn't match.
+ */
+ return ((u16 __force)local->srx.transport.sin6.sin6_port -
+ (u16 __force)srx->transport.sin6.sin6_port) ?:
+ memcmp(&local->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+#endif
default:
BUG();
}
@@ -100,11 +111,12 @@ static int rxrpc_open_socket(struct rxrpc_local *local)
struct sock *sock;
int ret, opt;
- _enter("%p{%d}", local, local->srx.transport_type);
+ _enter("%p{%d,%d}",
+ local, local->srx.transport_type, local->srx.transport.family);
/* create a socket to represent the local endpoint */
- ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
- IPPROTO_UDP, &local->socket);
+ ret = sock_create_kern(&init_net, local->srx.transport.family,
+ local->srx.transport_type, 0, &local->socket);
if (ret < 0) {
_leave(" = %d [socket]", ret);
return ret;
@@ -169,18 +181,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
long diff;
int ret;
- if (srx->transport.family == AF_INET) {
- _enter("{%d,%u,%pI4+%hu}",
- srx->transport_type,
- srx->transport.family,
- &srx->transport.sin.sin_addr,
- ntohs(srx->transport.sin.sin_port));
- } else {
- _enter("{%d,%u}",
- srx->transport_type,
- srx->transport.family);
- return ERR_PTR(-EAFNOSUPPORT);
- }
+ _enter("{%d,%d,%pISp}",
+ srx->transport_type, srx->transport.family, &srx->transport);
mutex_lock(&rxrpc_local_mutex);
@@ -233,13 +235,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
found:
mutex_unlock(&rxrpc_local_mutex);
- _net("LOCAL %s %d {%d,%u,%pI4+%hu}",
- age,
- local->debug_id,
- local->srx.transport_type,
- local->srx.transport.family,
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port));
+ _net("LOCAL %s %d {%pISp}",
+ age, local->debug_id, &local->srx.transport);
_leave(" = %p", local);
return local;
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index fd096f742e4b..8b910780f1ac 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -50,7 +50,10 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
* limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
* packets.
*/
-unsigned int rxrpc_rx_window_size = RXRPC_RXTX_BUFF_SIZE - 46;
+unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE;
+#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE
+#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE
+#endif
/*
* Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 719a4c23f09d..06a9aca739d1 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -15,8 +15,6 @@
#include <linux/gfp.h>
#include <linux/skbuff.h>
#include <linux/export.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
@@ -71,10 +69,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
mtu = call->conn->params.peer->if_mtu;
mtu -= call->conn->params.peer->hdrsize;
- jmax = (call->nr_jumbo_dup > 3) ? 1 : rxrpc_rx_jumbo_max;
+ jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
pkt->ackinfo.maxMTU = htonl(mtu);
- pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size);
+ pkt->ackinfo.rwind = htonl(call->rx_winsize);
pkt->ackinfo.jumbo_max = htonl(jmax);
*ackp++ = 0;
@@ -260,6 +258,24 @@ send_fragmentable:
(char *)&opt, sizeof(opt));
}
break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ opt = IPV6_PMTUDISC_DONT;
+ ret = kernel_setsockopt(conn->params.local->socket,
+ SOL_IPV6, IPV6_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
+ if (ret == 0) {
+ ret = kernel_sendmsg(conn->params.local->socket, &msg,
+ iov, 1, iov[0].iov_len);
+
+ opt = IPV6_PMTUDISC_DO;
+ kernel_setsockopt(conn->params.local->socket,
+ SOL_IPV6, IPV6_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
+ }
+ break;
+#endif
}
up_write(&conn->params.local->defrag_sem);
@@ -272,10 +288,7 @@ send_fragmentable:
*/
void rxrpc_reject_packets(struct rxrpc_local *local)
{
- union {
- struct sockaddr sa;
- struct sockaddr_in sin;
- } sa;
+ struct sockaddr_rxrpc srx;
struct rxrpc_skb_priv *sp;
struct rxrpc_wire_header whdr;
struct sk_buff *skb;
@@ -292,32 +305,21 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
iov[1].iov_len = sizeof(code);
size = sizeof(whdr) + sizeof(code);
- msg.msg_name = &sa;
+ msg.msg_name = &srx.transport;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
- memset(&sa, 0, sizeof(sa));
- sa.sa.sa_family = local->srx.transport.family;
- switch (sa.sa.sa_family) {
- case AF_INET:
- msg.msg_namelen = sizeof(sa.sin);
- break;
- default:
- msg.msg_namelen = 0;
- break;
- }
-
memset(&whdr, 0, sizeof(whdr));
whdr.type = RXRPC_PACKET_TYPE_ABORT;
while ((skb = skb_dequeue(&local->reject_queue))) {
rxrpc_see_skb(skb);
sp = rxrpc_skb(skb);
- switch (sa.sa.sa_family) {
- case AF_INET:
- sa.sin.sin_port = udp_hdr(skb)->source;
- sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+
+ if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+ msg.msg_namelen = srx.transport_len;
+
code = htonl(skb->priority);
whdr.epoch = htonl(sp->hdr.epoch);
@@ -329,10 +331,6 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
whdr.flags &= RXRPC_CLIENT_INITIATED;
kernel_sendmsg(local->socket, &msg, iov, 2, size);
- break;
-
- default:
- break;
}
rxrpc_free_skb(skb);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index c8948936c6fc..9e0725f5652b 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -66,6 +66,32 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
}
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ srx.transport.sin6.sin6_port = serr->port;
+ srx.transport_len = sizeof(struct sockaddr_in6);
+ switch (serr->ee.ee_origin) {
+ case SO_EE_ORIGIN_ICMP6:
+ _net("Rx ICMP6");
+ memcpy(&srx.transport.sin6.sin6_addr,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in6_addr));
+ break;
+ case SO_EE_ORIGIN_ICMP:
+ _net("Rx ICMP on v6 sock");
+ memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in_addr));
+ break;
+ default:
+ memcpy(&srx.transport.sin6.sin6_addr,
+ &ipv6_hdr(skb)->saddr,
+ sizeof(struct in6_addr));
+ break;
+ }
+ break;
+#endif
+
default:
BUG();
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 2efe29a4c232..f3e5766910fd 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -16,12 +16,14 @@
#include <linux/skbuff.h>
#include <linux/udp.h>
#include <linux/in.h>
+#include <linux/in6.h>
#include <linux/slab.h>
#include <linux/hashtable.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/ip6_route.h>
#include "ar-internal.h"
static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
@@ -50,6 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local,
size = sizeof(srx->transport.sin.sin_addr);
p = (u16 *)&srx->transport.sin.sin_addr;
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ hash_key += (u16 __force)srx->transport.sin.sin_port;
+ size = sizeof(srx->transport.sin6.sin6_addr);
+ p = (u16 *)&srx->transport.sin6.sin6_addr;
+ break;
+#endif
default:
WARN(1, "AF_RXRPC: Unsupported transport address family\n");
return 0;
@@ -93,6 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer,
memcmp(&peer->srx.transport.sin.sin_addr,
&srx->transport.sin.sin_addr,
sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ return ((u16 __force)peer->srx.transport.sin6.sin6_port -
+ (u16 __force)srx->transport.sin6.sin6_port) ?:
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+#endif
default:
BUG();
}
@@ -130,17 +147,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
if (peer) {
- switch (srx->transport.family) {
- case AF_INET:
- _net("PEER %d {%d,%u,%pI4+%hu}",
- peer->debug_id,
- peer->srx.transport_type,
- peer->srx.transport.family,
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
- break;
- }
-
+ _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
}
return peer;
@@ -152,22 +159,53 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
*/
static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
{
+ struct dst_entry *dst;
struct rtable *rt;
- struct flowi4 fl4;
+ struct flowi fl;
+ struct flowi4 *fl4 = &fl.u.ip4;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ struct flowi6 *fl6 = &fl.u.ip6;
+#endif
peer->if_mtu = 1500;
- rt = ip_route_output_ports(&init_net, &fl4, NULL,
- peer->srx.transport.sin.sin_addr.s_addr, 0,
- htons(7000), htons(7001),
- IPPROTO_UDP, 0, 0);
- if (IS_ERR(rt)) {
- _leave(" [route err %ld]", PTR_ERR(rt));
- return;
+ memset(&fl, 0, sizeof(fl));
+ switch (peer->srx.transport.family) {
+ case AF_INET:
+ rt = ip_route_output_ports(
+ &init_net, fl4, NULL,
+ peer->srx.transport.sin.sin_addr.s_addr, 0,
+ htons(7000), htons(7001), IPPROTO_UDP, 0, 0);
+ if (IS_ERR(rt)) {
+ _leave(" [route err %ld]", PTR_ERR(rt));
+ return;
+ }
+ dst = &rt->dst;
+ break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ fl6->flowi6_scope = RT_SCOPE_UNIVERSE;
+ fl6->flowi6_proto = IPPROTO_UDP;
+ memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+ fl6->fl6_dport = htons(7001);
+ fl6->fl6_sport = htons(7000);
+ dst = ip6_route_output(&init_net, NULL, fl6);
+ if (IS_ERR(dst)) {
+ _leave(" [route err %ld]", PTR_ERR(dst));
+ return;
+ }
+ break;
+#endif
+
+ default:
+ BUG();
}
- peer->if_mtu = dst_mtu(&rt->dst);
- dst_release(&rt->dst);
+ peer->if_mtu = dst_mtu(dst);
+ dst_release(dst);
_leave(" [if_mtu %u]", peer->if_mtu);
}
@@ -203,20 +241,28 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
*/
static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
{
+ peer->hash_key = hash_key;
rxrpc_assess_MTU_size(peer);
peer->mtu = peer->if_mtu;
- if (peer->srx.transport.family == AF_INET) {
+ switch (peer->srx.transport.family) {
+ case AF_INET:
peer->hdrsize = sizeof(struct iphdr);
- switch (peer->srx.transport_type) {
- case SOCK_DGRAM:
- peer->hdrsize += sizeof(struct udphdr);
- break;
- default:
- BUG();
- break;
- }
- } else {
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ peer->hdrsize = sizeof(struct ipv6hdr);
+ break;
+#endif
+ default:
+ BUG();
+ }
+
+ switch (peer->srx.transport_type) {
+ case SOCK_DGRAM:
+ peer->hdrsize += sizeof(struct udphdr);
+ break;
+ default:
BUG();
}
@@ -238,7 +284,6 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
peer = rxrpc_alloc_peer(local, gfp);
if (peer) {
- peer->hash_key = hash_key;
memcpy(&peer->srx, srx, sizeof(*srx));
rxrpc_init_peer(peer, hash_key);
}
@@ -285,11 +330,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct rxrpc_peer *peer, *candidate;
unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
- _enter("{%d,%d,%pI4+%hu}",
- srx->transport_type,
- srx->transport_len,
- &srx->transport.sin.sin_addr,
- ntohs(srx->transport.sin.sin_port));
+ _enter("{%pISp}", &srx->transport);
/* search the peer list first */
rcu_read_lock();
@@ -326,11 +367,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
peer = candidate;
}
- _net("PEER %d {%d,%pI4+%hu}",
- peer->debug_id,
- peer->srx.transport_type,
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
+ _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
return peer;
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index d529d1b4021c..65cd980767fa 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -52,11 +52,12 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_sock *rx;
struct rxrpc_peer *peer;
struct rxrpc_call *call;
- char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+ char lbuff[50], rbuff[50];
if (v == &rxrpc_calls) {
seq_puts(seq,
- "Proto Local Remote "
+ "Proto Local "
+ " Remote "
" SvID ConnID CallID End Use State Abort "
" UserID\n");
return 0;
@@ -68,9 +69,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
if (rx) {
local = READ_ONCE(rx->local);
if (local)
- sprintf(lbuff, "%pI4:%u",
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port));
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
else
strcpy(lbuff, "no_local");
} else {
@@ -79,14 +78,12 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
peer = call->peer;
if (peer)
- sprintf(rbuff, "%pI4:%u",
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
+ sprintf(rbuff, "%pISpc", &peer->srx.transport);
else
strcpy(rbuff, "no_connection");
seq_printf(seq,
- "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %lx\n",
lbuff,
rbuff,
@@ -145,11 +142,12 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
- char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+ char lbuff[50], rbuff[50];
if (v == &rxrpc_connection_proc_list) {
seq_puts(seq,
- "Proto Local Remote "
+ "Proto Local "
+ " Remote "
" SvID ConnID End Use State Key "
" Serial ISerial\n"
);
@@ -163,16 +161,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
goto print;
}
- sprintf(lbuff, "%pI4:%u",
- &conn->params.local->srx.transport.sin.sin_addr,
- ntohs(conn->params.local->srx.transport.sin.sin_port));
+ sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport);
- sprintf(rbuff, "%pI4:%u",
- &conn->params.peer->srx.transport.sin.sin_addr,
- ntohs(conn->params.peer->srx.transport.sin.sin_port));
+ sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport);
print:
seq_printf(seq,
- "UDP %-22.22s %-22.22s %4x %08x %s %3u"
+ "UDP %-47.47s %-47.47s %4x %08x %s %3u"
" %s %08x %08x %08x\n",
lbuff,
rbuff,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 20d0b5c6f81b..a284205b8ecf 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -118,6 +118,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx,
list_del_init(&call->recvmsg_link);
write_unlock_bh(&rx->recvmsg_lock);
+ rxrpc_get_call(call, rxrpc_call_got);
write_lock(&rx->call_lock);
list_add_tail(&call->accept_link, &rx->to_be_accepted);
write_unlock(&rx->call_lock);
@@ -463,6 +464,10 @@ try_again:
flags, &copied);
if (ret == -EAGAIN)
ret = 0;
+
+ if (after(call->rx_top, call->rx_hard_ack) &&
+ call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK])
+ rxrpc_notify_socket(call);
break;
default:
ret = 0;
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index b7ca8cf13c84..a03c61c672f5 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -20,7 +20,7 @@ static const unsigned int one = 1;
static const unsigned int four = 4;
static const unsigned int thirtytwo = 32;
static const unsigned int n_65535 = 65535;
-static const unsigned int n_max_acks = RXRPC_MAXACKS;
+static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
/*
* RxRPC operating parameters.
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index b88914d53ca5..ff7af71c4b49 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
return 0;
+#ifdef CONFIG_AF_RXRPC_IPV6
case ETH_P_IPV6:
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin6);
@@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
return 0;
+#endif
default:
pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 72e3426fa48f..7795d5a3f79a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -749,6 +749,17 @@ config NET_ACT_CONNMARK
To compile this code as a module, choose M here: the
module will be called act_connmark.
+config NET_ACT_SKBMOD
+ tristate "skb data modification action"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to allow modification of skb data
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_skbmod.
+
config NET_ACT_IFE
tristate "Inter-FE action based on IETF ForCES InterFE LFB"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b9d046b9535a..148ae0d5ac2c 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
+obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o
obj-$(CONFIG_NET_ACT_IFE) += act_ife.o
obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index bfa870731e74..1d3960033f61 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -39,13 +39,10 @@ static struct tc_action_ops act_bpf_ops;
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
+ bool at_ingress = skb_at_tc_ingress(skb);
struct tcf_bpf *prog = to_bpf(act);
struct bpf_prog *filter;
int action, filter_res;
- bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
-
- if (unlikely(!skb_mac_header_was_set(skb)))
- return TC_ACT_UNSPEC;
tcf_lastuse_update(&prog->tcf_tm);
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
new file mode 100644
index 000000000000..e7d96381c908
--- /dev/null
+++ b/net/sched/act_skbmod.c
@@ -0,0 +1,301 @@
+/*
+ * net/sched/act_skbmod.c skb data modifier
+ *
+ * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbmod.h>
+#include <net/tc_act/tc_skbmod.h>
+
+#define SKBMOD_TAB_MASK 15
+
+static int skbmod_net_id;
+static struct tc_action_ops act_skbmod_ops;
+
+#define MAX_EDIT_LEN ETH_HLEN
+static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_skbmod *d = to_skbmod(a);
+ int action;
+ struct tcf_skbmod_params *p;
+ u64 flags;
+ int err;
+
+ tcf_lastuse_update(&d->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+ /* XXX: if you are going to edit more fields beyond ethernet header
+ * (example when you add IP header replacement or vlan swap)
+ * then MAX_EDIT_LEN needs to change appropriately
+ */
+ err = skb_ensure_writable(skb, MAX_EDIT_LEN);
+ if (unlikely(err)) { /* best policy is to drop on the floor */
+ qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+ return TC_ACT_SHOT;
+ }
+
+ rcu_read_lock();
+ action = READ_ONCE(d->tcf_action);
+ if (unlikely(action == TC_ACT_SHOT)) {
+ qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+ rcu_read_unlock();
+ return action;
+ }
+
+ p = rcu_dereference(d->skbmod_p);
+ flags = p->flags;
+ if (flags & SKBMOD_F_DMAC)
+ ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
+ if (flags & SKBMOD_F_SMAC)
+ ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
+ if (flags & SKBMOD_F_ETYPE)
+ eth_hdr(skb)->h_proto = p->eth_type;
+ rcu_read_unlock();
+
+ if (flags & SKBMOD_F_SWAPMAC) {
+ u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
+ /*XXX: I am sure we can come up with more efficient swapping*/
+ ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest);
+ ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source);
+ ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
+ }
+
+ return action;
+}
+
+static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
+ [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) },
+ [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN },
+ [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN },
+ [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 },
+};
+
+static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+ struct nlattr *tb[TCA_SKBMOD_MAX + 1];
+ struct tcf_skbmod_params *p, *p_old;
+ struct tc_skbmod *parm;
+ struct tcf_skbmod *d;
+ bool exists = false;
+ u8 *daddr = NULL;
+ u8 *saddr = NULL;
+ u16 eth_type = 0;
+ u32 lflags = 0;
+ int ret = 0, err;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_SKBMOD_PARMS])
+ return -EINVAL;
+
+ if (tb[TCA_SKBMOD_DMAC]) {
+ daddr = nla_data(tb[TCA_SKBMOD_DMAC]);
+ lflags |= SKBMOD_F_DMAC;
+ }
+
+ if (tb[TCA_SKBMOD_SMAC]) {
+ saddr = nla_data(tb[TCA_SKBMOD_SMAC]);
+ lflags |= SKBMOD_F_SMAC;
+ }
+
+ if (tb[TCA_SKBMOD_ETYPE]) {
+ eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]);
+ lflags |= SKBMOD_F_ETYPE;
+ }
+
+ parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+ if (parm->flags & SKBMOD_F_SWAPMAC)
+ lflags = SKBMOD_F_SWAPMAC;
+
+ exists = tcf_hash_check(tn, parm->index, a, bind);
+ if (exists && bind)
+ return 0;
+
+ if (!lflags)
+ return -EINVAL;
+
+ if (!exists) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ &act_skbmod_ops, bind, true);
+ if (ret)
+ return ret;
+
+ ret = ACT_P_CREATED;
+ } else {
+ tcf_hash_release(*a, bind);
+ if (!ovr)
+ return -EEXIST;
+ }
+
+ d = to_skbmod(*a);
+
+ ASSERT_RTNL();
+ p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
+ if (unlikely(!p)) {
+ if (ovr)
+ tcf_hash_release(*a, bind);
+ return -ENOMEM;
+ }
+
+ p->flags = lflags;
+ d->tcf_action = parm->action;
+
+ p_old = rtnl_dereference(d->skbmod_p);
+
+ if (ovr)
+ spin_lock_bh(&d->tcf_lock);
+
+ if (lflags & SKBMOD_F_DMAC)
+ ether_addr_copy(p->eth_dst, daddr);
+ if (lflags & SKBMOD_F_SMAC)
+ ether_addr_copy(p->eth_src, saddr);
+ if (lflags & SKBMOD_F_ETYPE)
+ p->eth_type = htons(eth_type);
+
+ rcu_assign_pointer(d->skbmod_p, p);
+ if (ovr)
+ spin_unlock_bh(&d->tcf_lock);
+
+ if (p_old)
+ kfree_rcu(p_old, rcu);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(tn, *a);
+ return ret;
+}
+
+static void tcf_skbmod_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_skbmod *d = to_skbmod(a);
+ struct tcf_skbmod_params *p;
+
+ p = rcu_dereference_protected(d->skbmod_p, 1);
+ kfree_rcu(p, rcu);
+}
+
+static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ struct tcf_skbmod *d = to_skbmod(a);
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p);
+ struct tc_skbmod opt = {
+ .index = d->tcf_index,
+ .refcnt = d->tcf_refcnt - ref,
+ .bindcnt = d->tcf_bindcnt - bind,
+ .action = d->tcf_action,
+ };
+ struct tcf_t t;
+
+ opt.flags = p->flags;
+ if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+ if ((p->flags & SKBMOD_F_DMAC) &&
+ nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
+ goto nla_put_failure;
+ if ((p->flags & SKBMOD_F_SMAC) &&
+ nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
+ goto nla_put_failure;
+ if ((p->flags & SKBMOD_F_ETYPE) &&
+ nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
+ goto nla_put_failure;
+
+ tcf_tm_dump(&t, &d->tcf_tm);
+ if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
+ goto nla_put_failure;
+
+ return skb->len;
+nla_put_failure:
+ rcu_read_unlock();
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_skbmod_ops = {
+ .kind = "skbmod",
+ .type = TCA_ACT_SKBMOD,
+ .owner = THIS_MODULE,
+ .act = tcf_skbmod_run,
+ .dump = tcf_skbmod_dump,
+ .init = tcf_skbmod_init,
+ .cleanup = tcf_skbmod_cleanup,
+ .walk = tcf_skbmod_walker,
+ .lookup = tcf_skbmod_search,
+ .size = sizeof(struct tcf_skbmod),
+};
+
+static __net_init int skbmod_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+}
+
+static void __net_exit skbmod_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbmod_net_ops = {
+ .init = skbmod_init_net,
+ .exit = skbmod_exit_net,
+ .id = &skbmod_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
+MODULE_DESCRIPTION("SKB data mod-ing");
+MODULE_LICENSE("GPL");
+
+static int __init skbmod_init_module(void)
+{
+ return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+static void __exit skbmod_cleanup_module(void)
+{
+ tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+module_init(skbmod_init_module);
+module_exit(skbmod_cleanup_module);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index dceff7412dc3..af47bdf2f483 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -194,15 +194,12 @@ static void tunnel_key_release(struct tc_action *a, int bind)
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
- rcu_read_lock();
- params = rcu_dereference(t->params);
+ params = rcu_dereference_protected(t->params, 1);
if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
dst_release(&params->tcft_enc_metadata->dst);
kfree_rcu(params, rcu);
-
- rcu_read_unlock();
}
static int tunnel_key_dump_addresses(struct sk_buff *skb,
@@ -245,10 +242,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
.bindcnt = t->tcf_bindcnt - bind,
};
struct tcf_t tm;
- int ret = -1;
- rcu_read_lock();
- params = rcu_dereference(t->params);
+ params = rtnl_dereference(t->params);
opt.t_action = params->tcft_action;
opt.action = params->action;
@@ -272,15 +267,11 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
&tm, TCA_TUNNEL_KEY_PAD))
goto nla_put_failure;
- ret = skb->len;
- goto out;
+ return skb->len;
nla_put_failure:
nlmsg_trim(skb, b);
-out:
- rcu_read_unlock();
-
- return ret;
+ return -1;
}
static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 4742f415ee5b..1d92d4d3f222 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -83,9 +83,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_bpf_prog *prog;
int ret = -1;
- if (unlikely(!skb_mac_header_was_set(skb)))
- return -1;
-
/* Needed here for accessing maps. */
rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b084b2aab2d7..a3f4c706dfaa 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -33,7 +33,6 @@ struct fl_flow_key {
struct flow_dissector_key_basic basic;
struct flow_dissector_key_eth_addrs eth;
struct flow_dissector_key_vlan vlan;
- struct flow_dissector_key_addrs ipaddrs;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
@@ -335,6 +334,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -432,17 +435,17 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (key->basic.ip_proto == IPPROTO_TCP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
sizeof(key->tp.src));
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
sizeof(key->tp.dst));
} else if (key->basic.ip_proto == IPPROTO_UDP) {
fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
sizeof(key->tp.src));
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst));
}
@@ -877,18 +880,18 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (key->basic.ip_proto == IPPROTO_TCP &&
(fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
sizeof(key->tp.src)) ||
fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
else if (key->basic.ip_proto == IPPROTO_UDP &&
(fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
- &mask->tp.src, TCA_FLOWER_UNSPEC,
+ &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
sizeof(key->tp.src)) ||
fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
- &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index e5458b99e09c..dc52cc10d6ed 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -823,20 +823,24 @@ nla_put_failure:
static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 now = ktime_get_ns();
- struct tc_fq_qd_stats st = {
- .gc_flows = q->stat_gc_flows,
- .highprio_packets = q->stat_internal_packets,
- .tcp_retrans = q->stat_tcp_retrans,
- .throttled = q->stat_throttled,
- .flows_plimit = q->stat_flows_plimit,
- .pkts_too_long = q->stat_pkts_too_long,
- .allocation_errors = q->stat_allocation_errors,
- .flows = q->flows,
- .inactive_flows = q->inactive_flows,
- .throttled_flows = q->throttled_flows,
- .time_next_delayed_flow = q->time_next_delayed_flow - now,
- };
+ struct tc_fq_qd_stats st;
+
+ sch_tree_lock(sch);
+
+ st.gc_flows = q->stat_gc_flows;
+ st.highprio_packets = q->stat_internal_packets;
+ st.tcp_retrans = q->stat_tcp_retrans;
+ st.throttled = q->stat_throttled;
+ st.flows_plimit = q->stat_flows_plimit;
+ st.pkts_too_long = q->stat_pkts_too_long;
+ st.allocation_errors = q->stat_allocation_errors;
+ st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns();
+ st.flows = q->flows;
+ st.inactive_flows = q->inactive_flows;
+ st.throttled_flows = q->throttled_flows;
+ st.pad = 0;
+
+ sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
}
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index a55e54738b81..af9cc8055465 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -70,6 +70,19 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp)
return msg;
}
+void sctp_datamsg_free(struct sctp_datamsg *msg)
+{
+ struct sctp_chunk *chunk;
+
+ /* This doesn't have to be a _safe vairant because
+ * sctp_chunk_free() only drops the refs.
+ */
+ list_for_each_entry(chunk, &msg->chunks, frag_list)
+ sctp_chunk_free(chunk);
+
+ sctp_datamsg_put(msg);
+}
+
/* Final destructruction of datamsg memory. */
static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
{
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1f1682b9a6a8..0c605ec74dc4 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -180,7 +180,6 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
int one_packet, gfp_t gfp)
{
sctp_xmit_t retval;
- int error = 0;
pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
@@ -188,6 +187,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
case SCTP_XMIT_PMTU_FULL:
if (!packet->has_cookie_echo) {
+ int error = 0;
+
error = sctp_packet_transmit(packet, gfp);
if (error < 0)
chunk->skb->sk->sk_err = -error;
@@ -441,14 +442,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* time. Application may notice this error.
*/
pr_err_once("Trying to GSO but underlying device doesn't support it.");
- goto nomem;
+ goto err;
}
} else {
pkt_size = packet->size;
}
head = alloc_skb(pkt_size + MAX_HEADER, gfp);
if (!head)
- goto nomem;
+ goto err;
if (gso) {
NAPI_GRO_CB(head)->last = head;
skb_shinfo(head)->gso_type = sk->sk_gso_type;
@@ -469,8 +470,12 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
}
}
dst = dst_clone(tp->dst);
- if (!dst)
- goto no_route;
+ if (!dst) {
+ if (asoc)
+ IP_INC_STATS(sock_net(asoc->base.sk),
+ IPSTATS_MIB_OUTNOROUTES);
+ goto nodst;
+ }
skb_dst_set(head, dst);
/* Build the SCTP header. */
@@ -621,8 +626,10 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
if (!gso)
break;
- if (skb_gro_receive(&head, nskb))
+ if (skb_gro_receive(&head, nskb)) {
+ kfree_skb(nskb);
goto nomem;
+ }
nskb = NULL;
if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
sk->sk_gso_max_segs))
@@ -716,18 +723,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
}
head->ignore_df = packet->ipfragok;
tp->af_specific->sctp_xmit(head, tp);
+ goto out;
-out:
- sctp_packet_reset(packet);
- return err;
-no_route:
- kfree_skb(head);
- if (nskb != head)
- kfree_skb(nskb);
-
- if (asoc)
- IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+nomem:
+ if (packet->auth && list_empty(&packet->auth->list))
+ sctp_chunk_free(packet->auth);
+nodst:
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
* association is unreachable.
@@ -736,22 +738,18 @@ no_route:
* required.
*/
/* err = -EHOSTUNREACH; */
-err:
- /* Control chunks are unreliable so just drop them. DATA chunks
- * will get resent or dropped later.
- */
+ kfree_skb(head);
+err:
list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
list_del_init(&chunk->list);
if (!sctp_chunk_is_data(chunk))
sctp_chunk_free(chunk);
}
- goto out;
-nomem:
- if (packet->auth && list_empty(&packet->auth->list))
- sctp_chunk_free(packet->auth);
- err = -ENOMEM;
- goto err;
+
+out:
+ sctp_packet_reset(packet);
+ return err;
}
/********************************************************************
@@ -878,7 +876,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
struct sctp_chunk *chunk,
u16 chunk_len)
{
- size_t psize, pmtu;
+ size_t psize, pmtu, maxsize;
sctp_xmit_t retval = SCTP_XMIT_OK;
psize = packet->size;
@@ -906,6 +904,17 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
goto out;
}
+ /* Similarly, if this chunk was built before a PMTU
+ * reduction, we have to fragment it at IP level now. So
+ * if the packet already contains something, we need to
+ * flush.
+ */
+ maxsize = pmtu - packet->overhead;
+ if (packet->auth)
+ maxsize -= WORD_ROUND(packet->auth->skb->len);
+ if (chunk_len > maxsize)
+ retval = SCTP_XMIT_PMTU_FULL;
+
/* It is also okay to fragment if the chunk we are
* adding is a control chunk, but only if current packet
* is not a GSO one otherwise it causes fragmentation of
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 72e54a416af6..8c3f446d965c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -68,7 +68,7 @@ static void sctp_mark_missing(struct sctp_outq *q,
static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
-static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
+static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
/* Add data to the front of the queue. */
static inline void sctp_outq_head_data(struct sctp_outq *q,
@@ -285,10 +285,9 @@ void sctp_outq_free(struct sctp_outq *q)
}
/* Put a new chunk in an sctp_outq. */
-int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
+void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
{
struct net *net = sock_net(q->asoc->base.sk);
- int error = 0;
pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk,
chunk && chunk->chunk_hdr ?
@@ -299,54 +298,26 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
* immediately.
*/
if (sctp_chunk_is_data(chunk)) {
- /* Is it OK to queue data chunks? */
- /* From 9. Termination of Association
- *
- * When either endpoint performs a shutdown, the
- * association on each peer will stop accepting new
- * data from its user and only deliver data in queue
- * at the time of sending or receiving the SHUTDOWN
- * chunk.
- */
- switch (q->asoc->state) {
- case SCTP_STATE_CLOSED:
- case SCTP_STATE_SHUTDOWN_PENDING:
- case SCTP_STATE_SHUTDOWN_SENT:
- case SCTP_STATE_SHUTDOWN_RECEIVED:
- case SCTP_STATE_SHUTDOWN_ACK_SENT:
- /* Cannot send after transport endpoint shutdown */
- error = -ESHUTDOWN;
- break;
-
- default:
- pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
- __func__, q, chunk, chunk && chunk->chunk_hdr ?
- sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
- "illegal chunk");
-
- sctp_chunk_hold(chunk);
- sctp_outq_tail_data(q, chunk);
- if (chunk->asoc->prsctp_enable &&
- SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
- chunk->asoc->sent_cnt_removable++;
- if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
- SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
- else
- SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
- break;
- }
+ pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
+ __func__, q, chunk, chunk && chunk->chunk_hdr ?
+ sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
+ "illegal chunk");
+
+ sctp_outq_tail_data(q, chunk);
+ if (chunk->asoc->prsctp_enable &&
+ SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+ chunk->asoc->sent_cnt_removable++;
+ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+ SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
+ else
+ SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
} else {
list_add_tail(&chunk->list, &q->control_chunk_list);
SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
}
- if (error < 0)
- return error;
-
if (!q->cork)
- error = sctp_outq_flush(q, 0, gfp);
-
- return error;
+ sctp_outq_flush(q, 0, gfp);
}
/* Insert a chunk into the sorted list based on the TSNs. The retransmit list
@@ -559,7 +530,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
sctp_retransmit_reason_t reason)
{
struct net *net = sock_net(q->asoc->base.sk);
- int error = 0;
switch (reason) {
case SCTP_RTXR_T3_RTX:
@@ -603,10 +573,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
* will be flushed at the end.
*/
if (reason != SCTP_RTXR_FAST_RTX)
- error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
-
- if (error)
- q->asoc->base.sk->sk_err = -error;
+ sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
}
/*
@@ -778,12 +745,12 @@ redo:
}
/* Cork the outqueue so queued chunks are really queued. */
-int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
+void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
{
if (q->cork)
q->cork = 0;
- return sctp_outq_flush(q, 0, gfp);
+ sctp_outq_flush(q, 0, gfp);
}
@@ -796,7 +763,7 @@ int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
* locking concerns must be made. Today we use the sock lock to protect
* this function.
*/
-static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
+static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
{
struct sctp_packet *packet;
struct sctp_packet singleton;
@@ -919,8 +886,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
sctp_packet_config(&singleton, vtag, 0);
sctp_packet_append_chunk(&singleton, chunk);
error = sctp_packet_transmit(&singleton, gfp);
- if (error < 0)
- return error;
+ if (error < 0) {
+ asoc->base.sk->sk_err = -error;
+ return;
+ }
break;
case SCTP_CID_ABORT:
@@ -1018,6 +987,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
retran:
error = sctp_outq_flush_rtx(q, packet,
rtx_timeout, &start_timer);
+ if (error < 0)
+ asoc->base.sk->sk_err = -error;
if (start_timer) {
sctp_transport_reset_t3_rtx(transport);
@@ -1192,14 +1163,15 @@ sctp_flush_out:
struct sctp_transport,
send_ready);
packet = &t->packet;
- if (!sctp_packet_empty(packet))
+ if (!sctp_packet_empty(packet)) {
error = sctp_packet_transmit(packet, gfp);
+ if (error < 0)
+ asoc->base.sk->sk_err = -error;
+ }
/* Clear the burst limited state, if any */
sctp_transport_burst_reset(t);
}
-
- return error;
}
/* Update unack_data based on the incoming SACK chunk */
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 12d45193357c..c345bf153bed 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1020,19 +1020,13 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
* This way the whole message is queued up and bundling if
* encouraged for small fragments.
*/
-static int sctp_cmd_send_msg(struct sctp_association *asoc,
- struct sctp_datamsg *msg, gfp_t gfp)
+static void sctp_cmd_send_msg(struct sctp_association *asoc,
+ struct sctp_datamsg *msg, gfp_t gfp)
{
struct sctp_chunk *chunk;
- int error = 0;
-
- list_for_each_entry(chunk, &msg->chunks, frag_list) {
- error = sctp_outq_tail(&asoc->outqueue, chunk, gfp);
- if (error)
- break;
- }
- return error;
+ list_for_each_entry(chunk, &msg->chunks, frag_list)
+ sctp_outq_tail(&asoc->outqueue, chunk, gfp);
}
@@ -1427,8 +1421,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
local_cork = 1;
}
/* Send a chunk to our peer. */
- error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk,
- gfp);
+ sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, gfp);
break;
case SCTP_CMD_SEND_PKT:
@@ -1682,7 +1675,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_FORCE_PRIM_RETRAN:
t = asoc->peer.retran_path;
asoc->peer.retran_path = asoc->peer.primary_path;
- error = sctp_outq_uncork(&asoc->outqueue, gfp);
+ sctp_outq_uncork(&asoc->outqueue, gfp);
local_cork = 0;
asoc->peer.retran_path = t;
break;
@@ -1709,7 +1702,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
sctp_outq_cork(&asoc->outqueue);
local_cork = 1;
}
- error = sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
+ sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
break;
case SCTP_CMD_SEND_NEXT_ASCONF:
sctp_cmd_send_asconf(asoc);
@@ -1739,9 +1732,9 @@ out:
*/
if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) {
if (chunk->end_of_packet || chunk->singleton)
- error = sctp_outq_uncork(&asoc->outqueue, gfp);
+ sctp_outq_uncork(&asoc->outqueue, gfp);
} else if (local_cork)
- error = sctp_outq_uncork(&asoc->outqueue, gfp);
+ sctp_outq_uncork(&asoc->outqueue, gfp);
if (sp->data_ready_signalled)
sp->data_ready_signalled = 0;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9fc417a8b476..6cdc61c21438 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1958,6 +1958,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
/* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
+ sctp_chunk_hold(chunk);
+
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
@@ -1970,13 +1972,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
* breaks.
*/
err = sctp_primitive_SEND(net, asoc, datamsg);
- sctp_datamsg_put(datamsg);
/* Did the lower layer accept the chunk? */
- if (err)
+ if (err) {
+ sctp_datamsg_free(datamsg);
goto out_free;
+ }
pr_debug("%s: we sent primitively\n", __func__);
+ sctp_datamsg_put(datamsg);
err = msg_len;
if (unlikely(wait_connect)) {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7f79fb7dc6a0..66f23b376fa0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -453,7 +453,7 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt_switch *xps;
if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) {
- WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+ WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
xps = args->bc_xprt->xpt_bc_xps;
xprt_switch_get(xps);
} else {
@@ -520,7 +520,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
char servername[48];
if (args->bc_xprt) {
- WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+ WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
xprt = args->bc_xprt->xpt_bc_xprt;
if (xprt) {
xprt_get(xprt);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 6b626a64b517..a04fe9be1c60 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -62,6 +62,8 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
/**
* named_prepare_buf - allocate & initialize a publication message
+ *
+ * The buffer returned is of size INT_H_SIZE + payload size
*/
static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
u32 dest)
@@ -141,9 +143,9 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
struct publication *publ;
struct sk_buff *skb = NULL;
struct distr_item *item = NULL;
- uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) *
- ITEM_SIZE;
- uint msg_rem = msg_dsz;
+ u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+ ITEM_SIZE) * ITEM_SIZE;
+ u32 msg_rem = msg_dsz;
list_for_each_entry(publ, pls, local_list) {
/* Prepare next buffer: */
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index dd274687a53d..d80cd3f7503f 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -665,7 +665,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
pr_err("Invalid UDP bearer configuration");
- return -EINVAL;
+ err = -EINVAL;
+ goto err;
}
err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f1dffe84f0d5..8309687a56b0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val)
{
struct unix_sock *u = unix_sk(sk);
- if (mutex_lock_interruptible(&u->readlock))
+ if (mutex_lock_interruptible(&u->iolock))
return -EINTR;
sk->sk_peek_off = val;
- mutex_unlock(&u->readlock);
+ mutex_unlock(&u->iolock);
return 0;
}
@@ -779,7 +779,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
spin_lock_init(&u->lock);
atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
- mutex_init(&u->readlock); /* single task reading lock */
+ mutex_init(&u->iolock); /* single task reading lock */
+ mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
unix_insert_socket(unix_sockets_unbound(sk), sk);
@@ -848,7 +849,7 @@ static int unix_autobind(struct socket *sock)
int err;
unsigned int retries = 0;
- err = mutex_lock_interruptible(&u->readlock);
+ err = mutex_lock_interruptible(&u->bindlock);
if (err)
return err;
@@ -895,7 +896,7 @@ retry:
spin_unlock(&unix_table_lock);
err = 0;
-out: mutex_unlock(&u->readlock);
+out: mutex_unlock(&u->bindlock);
return err;
}
@@ -954,20 +955,32 @@ fail:
return NULL;
}
-static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode,
- struct path *res)
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
{
- int err;
+ struct dentry *dentry;
+ struct path path;
+ int err = 0;
+ /*
+ * Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ return err;
- err = security_path_mknod(path, dentry, mode, 0);
+ /*
+ * All right, let's create it.
+ */
+ err = security_path_mknod(&path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
+ err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
if (!err) {
- res->mnt = mntget(path->mnt);
+ res->mnt = mntget(path.mnt);
res->dentry = dget(dentry);
}
}
-
+ done_path_create(&path, dentry);
return err;
}
@@ -978,12 +991,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- int err, name_err;
+ int err;
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
- struct path path;
- struct dentry *dentry;
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -999,34 +1010,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
- name_err = 0;
- dentry = NULL;
- if (sun_path[0]) {
- /* Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-
- if (IS_ERR(dentry)) {
- /* delay report until after 'already bound' check */
- name_err = PTR_ERR(dentry);
- dentry = NULL;
- }
- }
-
- err = mutex_lock_interruptible(&u->readlock);
+ err = mutex_lock_interruptible(&u->bindlock);
if (err)
- goto out_path;
+ goto out;
err = -EINVAL;
if (u->addr)
goto out_up;
- if (name_err) {
- err = name_err == -EEXIST ? -EADDRINUSE : name_err;
- goto out_up;
- }
-
err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
@@ -1037,11 +1028,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr->hash = hash ^ sk->sk_type;
atomic_set(&addr->refcnt, 1);
- if (dentry) {
- struct path u_path;
+ if (sun_path[0]) {
+ struct path path;
umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(dentry, &path, mode, &u_path);
+ err = unix_mknod(sun_path, mode, &path);
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
@@ -1049,9 +1040,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out_up;
}
addr->hash = UNIX_HASH_SIZE;
- hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
- u->path = u_path;
+ u->path = path;
list = &unix_socket_table[hash];
} else {
spin_lock(&unix_table_lock);
@@ -1073,11 +1064,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
out_unlock:
spin_unlock(&unix_table_lock);
out_up:
- mutex_unlock(&u->readlock);
-out_path:
- if (dentry)
- done_path_create(&path, dentry);
-
+ mutex_unlock(&u->bindlock);
out:
return err;
}
@@ -1969,17 +1956,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
if (false) {
alloc_skb:
unix_state_unlock(other);
- mutex_unlock(&unix_sk(other)->readlock);
+ mutex_unlock(&unix_sk(other)->iolock);
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
&err, 0);
if (!newskb)
goto err;
}
- /* we must acquire readlock as we modify already present
+ /* we must acquire iolock as we modify already present
* skbs in the sk_receive_queue and mess with skb->len
*/
- err = mutex_lock_interruptible(&unix_sk(other)->readlock);
+ err = mutex_lock_interruptible(&unix_sk(other)->iolock);
if (err) {
err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
goto err;
@@ -2046,7 +2033,7 @@ alloc_skb:
}
unix_state_unlock(other);
- mutex_unlock(&unix_sk(other)->readlock);
+ mutex_unlock(&unix_sk(other)->iolock);
other->sk_data_ready(other);
scm_destroy(&scm);
@@ -2055,7 +2042,7 @@ alloc_skb:
err_state_unlock:
unix_state_unlock(other);
err_unlock:
- mutex_unlock(&unix_sk(other)->readlock);
+ mutex_unlock(&unix_sk(other)->iolock);
err:
kfree_skb(newskb);
if (send_sigpipe && !(flags & MSG_NOSIGNAL))
@@ -2123,7 +2110,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
- mutex_lock(&u->readlock);
+ mutex_lock(&u->iolock);
skip = sk_peek_offset(sk, flags);
skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
@@ -2131,14 +2118,14 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
if (skb)
break;
- mutex_unlock(&u->readlock);
+ mutex_unlock(&u->iolock);
if (err != -EAGAIN)
break;
} while (timeo &&
!__skb_wait_for_more_packets(sk, &err, &timeo, last));
- if (!skb) { /* implies readlock unlocked */
+ if (!skb) { /* implies iolock unlocked */
unix_state_lock(sk);
/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
@@ -2203,7 +2190,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
out_free:
skb_free_datagram(sk, skb);
- mutex_unlock(&u->readlock);
+ mutex_unlock(&u->iolock);
out:
return err;
}
@@ -2298,7 +2285,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
- mutex_lock(&u->readlock);
+ mutex_lock(&u->iolock);
if (flags & MSG_PEEK)
skip = sk_peek_offset(sk, flags);
@@ -2340,7 +2327,7 @@ again:
break;
}
- mutex_unlock(&u->readlock);
+ mutex_unlock(&u->iolock);
timeo = unix_stream_data_wait(sk, timeo, last,
last_len);
@@ -2351,7 +2338,7 @@ again:
goto out;
}
- mutex_lock(&u->readlock);
+ mutex_lock(&u->iolock);
goto redo;
unlock:
unix_state_unlock(sk);
@@ -2454,7 +2441,7 @@ unlock:
}
} while (size);
- mutex_unlock(&u->readlock);
+ mutex_unlock(&u->iolock);
if (state->msg)
scm_recv(sock, state->msg, &scm, flags);
else
@@ -2495,9 +2482,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk,
int ret;
struct unix_sock *u = unix_sk(sk);
- mutex_unlock(&u->readlock);
+ mutex_unlock(&u->iolock);
ret = splice_to_pipe(pipe, spd);
- mutex_lock(&u->readlock);
+ mutex_lock(&u->iolock);
return ret;
}
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index dbb2738e356a..6250b1cfcde5 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -958,29 +958,8 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
return private(dev, iwr, cmd, info, handler);
}
/* Old driver API : call driver ioctl handler */
- if (dev->netdev_ops->ndo_do_ioctl) {
-#ifdef CONFIG_COMPAT
- if (info->flags & IW_REQUEST_FLAG_COMPAT) {
- int ret = 0;
- struct iwreq iwr_lcl;
- struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
-
- memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
- iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
- iwr_lcl.u.data.length = iwp_compat->length;
- iwr_lcl.u.data.flags = iwp_compat->flags;
-
- ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
-
- iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
- iwp_compat->length = iwr_lcl.u.data.length;
- iwp_compat->flags = iwr_lcl.u.data.flags;
-
- return ret;
- } else
-#endif
- return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
- }
+ if (dev->netdev_ops->ndo_do_ioctl)
+ return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1c4ad477ce93..6e3f0254d8a1 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -207,15 +207,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
family = XFRM_SPI_SKB_CB(skb)->family;
/* if tunnel is present override skb->mark value with tunnel i_key */
- if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) {
- switch (family) {
- case AF_INET:
+ switch (family) {
+ case AF_INET:
+ if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
- break;
- case AF_INET6:
+ break;
+ case AF_INET6:
+ if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
- break;
- }
+ break;
}
/* Allocate new secpath or COW existing one. */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f7ce6265961a..fd6986634e6f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -643,6 +643,10 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+ /* skip socket policies */
+ continue;
+ }
newpos = NULL;
chain = policy_hash_bysel(net, &policy->selector,
policy->family,
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d516845e16e3..cb65d916a345 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -896,7 +896,8 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
struct sock *sk = cb->skb->sk;
struct net *net = sock_net(sk);
- xfrm_state_walk_done(walk, net);
+ if (cb->args[0])
+ xfrm_state_walk_done(walk, net);
return 0;
}
@@ -921,8 +922,6 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
u8 proto = 0;
int err;
- cb->args[0] = 1;
-
err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
xfrma_policy);
if (err < 0)
@@ -939,6 +938,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
proto = nla_get_u8(attrs[XFRMA_PROTO]);
xfrm_state_walk_init(walk, proto, filter);
+ cb->args[0] = 1;
}
(void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -2051,9 +2051,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (up->hard) {
xfrm_policy_delete(xp, p->dir);
xfrm_audit_policy_delete(xp, 1, true);
- } else {
- // reset the timers here?
- WARN(1, "Don't know what to do with soft policy expire\n");
}
km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid);
@@ -2117,7 +2114,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = verify_newpolicy_info(&ua->policy);
if (err)
- goto bad_policy;
+ goto free_state;
/* build an XP */
xp = xfrm_policy_construct(net, &ua->policy, attrs, &err);
@@ -2149,8 +2146,6 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
-bad_policy:
- WARN(1, "BAD policy passed\n");
free_state:
kfree(x);
nomem: