summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml14
-rw-r--r--drivers/infiniband/core/addr.c12
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_common.c41
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_ethtool.c93
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c18
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.h11
-rw-r--r--drivers/net/loopback.c5
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--drivers/s390/net/qeth_core.h5
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/skbuff.h11
-rw-r--r--include/net/dsa.h11
-rw-r--r--include/net/hotdata.h3
-rw-r--r--include/net/ip.h4
-rw-r--r--include/net/proto_memory.h83
-rw-r--r--include/net/route.h11
-rw-r--r--include/net/sock.h78
-rw-r--r--include/net/tcp.h10
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/core/dev.c28
-rw-r--r--net/core/dev.h1
-rw-r--r--net/core/dst_cache.c2
-rw-r--r--net/core/filter.c3
-rw-r--r--net/core/hotdata.c7
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sysctl_net_core.c7
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/dsa/port.c139
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/arp.c203
-rw-r--r--net/ipv4/icmp.c26
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c24
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_input.c1
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/anycast.c5
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/mpls/mpls_iptunnel.c2
-rw-r--r--net/mptcp/protocol.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_flow_table_ip.c4
-rw-r--r--net/netfilter/nft_rt.c2
-rw-r--r--net/sched/sch_api.c1
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sctp/sm_statefuns.c1
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile2
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh28
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range_perf.sh9
58 files changed, 535 insertions, 436 deletions
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 15073627c53a..21cc27e75f50 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -328,9 +328,6 @@ properties:
snps,tx-sched-dwrr:
type: boolean
description: Deficit Weighted Round Robin
- snps,tx-sched-sp:
- type: boolean
- description: Strict priority
allOf:
- if:
required:
@@ -339,7 +336,6 @@ properties:
properties:
snps,tx-sched-wfq: false
snps,tx-sched-dwrr: false
- snps,tx-sched-sp: false
- if:
required:
- snps,tx-sched-wfq
@@ -347,7 +343,6 @@ properties:
properties:
snps,tx-sched-wrr: false
snps,tx-sched-dwrr: false
- snps,tx-sched-sp: false
- if:
required:
- snps,tx-sched-dwrr
@@ -355,15 +350,6 @@ properties:
properties:
snps,tx-sched-wrr: false
snps,tx-sched-wfq: false
- snps,tx-sched-sp: false
- - if:
- required:
- - snps,tx-sched-sp
- then:
- properties:
- snps,tx-sched-wrr: false
- snps,tx-sched-wfq: false
- snps,tx-sched-dwrr: false
patternProperties:
"^queue[0-9]$":
description: Each subnode represents a queue.
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index f20dfe70fa0e..be0743dac3ff 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -348,16 +348,10 @@ static int dst_fetch_ha(const struct dst_entry *dst,
static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
{
- const struct rtable *rt;
- const struct rt6_info *rt6;
+ if (family == AF_INET)
+ return dst_rtable(dst)->rt_uses_gateway;
- if (family == AF_INET) {
- rt = container_of(dst, struct rtable, dst);
- return rt->rt_uses_gateway;
- }
-
- rt6 = dst_rt6_info(dst);
- return rt6->rt6i_flags & RTF_GATEWAY;
+ return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY;
}
static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c
index 284f97876054..088ab8076db4 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_common.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_common.c
@@ -122,7 +122,7 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
}
int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
- int budget)
+ int budget, bool *tdown)
{
struct net_device *ndev = emac->ndev;
struct cppi5_host_desc_t *desc_tx;
@@ -145,6 +145,7 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
if (cppi5_desc_is_tdcm(desc_dma)) {
if (atomic_dec_and_test(&emac->tdown_cnt))
complete(&emac->tdown_complete);
+ *tdown = true;
break;
}
@@ -190,19 +191,37 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
return num_tx;
}
+static enum hrtimer_restart emac_tx_timer_callback(struct hrtimer *timer)
+{
+ struct prueth_tx_chn *tx_chns =
+ container_of(timer, struct prueth_tx_chn, tx_hrtimer);
+
+ enable_irq(tx_chns->irq);
+ return HRTIMER_NORESTART;
+}
+
static int emac_napi_tx_poll(struct napi_struct *napi_tx, int budget)
{
struct prueth_tx_chn *tx_chn = prueth_napi_to_tx_chn(napi_tx);
struct prueth_emac *emac = tx_chn->emac;
+ bool tdown = false;
int num_tx_packets;
- num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget);
+ num_tx_packets = emac_tx_complete_packets(emac, tx_chn->id, budget,
+ &tdown);
if (num_tx_packets >= budget)
return budget;
- if (napi_complete_done(napi_tx, num_tx_packets))
- enable_irq(tx_chn->irq);
+ if (napi_complete_done(napi_tx, num_tx_packets)) {
+ if (unlikely(tx_chn->tx_pace_timeout_ns && !tdown)) {
+ hrtimer_start(&tx_chn->tx_hrtimer,
+ ns_to_ktime(tx_chn->tx_pace_timeout_ns),
+ HRTIMER_MODE_REL_PINNED);
+ } else {
+ enable_irq(tx_chn->irq);
+ }
+ }
return num_tx_packets;
}
@@ -226,6 +245,9 @@ int prueth_ndev_add_tx_napi(struct prueth_emac *emac)
struct prueth_tx_chn *tx_chn = &emac->tx_chns[i];
netif_napi_add_tx(emac->ndev, &tx_chn->napi_tx, emac_napi_tx_poll);
+ hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_PINNED);
+ tx_chn->tx_hrtimer.function = &emac_tx_timer_callback;
ret = request_irq(tx_chn->irq, prueth_tx_irq,
IRQF_TRIGGER_HIGH, tx_chn->name,
tx_chn);
@@ -871,8 +893,15 @@ int emac_napi_rx_poll(struct napi_struct *napi_rx, int budget)
break;
}
- if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
- enable_irq(emac->rx_chns.irq[rx_flow]);
+ if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
+ if (unlikely(emac->rx_pace_timeout_ns)) {
+ hrtimer_start(&emac->rx_hrtimer,
+ ns_to_ktime(emac->rx_pace_timeout_ns),
+ HRTIMER_MODE_REL_PINNED);
+ } else {
+ enable_irq(emac->rx_chns.irq[rx_flow]);
+ }
+ }
return num_rx;
}
diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
index ca20325d4d3e..c8d0f45cc5b1 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
@@ -201,6 +201,93 @@ static void emac_get_rmon_stats(struct net_device *ndev,
rmon_stats->hist_tx[4] = emac_get_stat_by_name(emac, "tx_bucket5_frames");
}
+static int emac_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct prueth_emac *emac = netdev_priv(ndev);
+ struct prueth_tx_chn *tx_chn;
+
+ tx_chn = &emac->tx_chns[0];
+
+ coal->rx_coalesce_usecs = emac->rx_pace_timeout_ns / 1000;
+ coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000;
+
+ return 0;
+}
+
+static int emac_get_per_queue_coalesce(struct net_device *ndev, u32 queue,
+ struct ethtool_coalesce *coal)
+{
+ struct prueth_emac *emac = netdev_priv(ndev);
+ struct prueth_tx_chn *tx_chn;
+
+ if (queue >= PRUETH_MAX_TX_QUEUES)
+ return -EINVAL;
+
+ tx_chn = &emac->tx_chns[queue];
+
+ coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout_ns / 1000;
+
+ return 0;
+}
+
+static int emac_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct prueth_emac *emac = netdev_priv(ndev);
+ struct prueth *prueth = emac->prueth;
+ struct prueth_tx_chn *tx_chn;
+
+ tx_chn = &emac->tx_chns[0];
+
+ if (coal->rx_coalesce_usecs &&
+ coal->rx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
+ dev_info(prueth->dev, "defaulting to min value of %dus for rx-usecs\n",
+ ICSSG_MIN_COALESCE_USECS);
+ coal->rx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
+ }
+
+ if (coal->tx_coalesce_usecs &&
+ coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
+ dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs\n",
+ ICSSG_MIN_COALESCE_USECS);
+ coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
+ }
+
+ emac->rx_pace_timeout_ns = coal->rx_coalesce_usecs * 1000;
+ tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000;
+
+ return 0;
+}
+
+static int emac_set_per_queue_coalesce(struct net_device *ndev, u32 queue,
+ struct ethtool_coalesce *coal)
+{
+ struct prueth_emac *emac = netdev_priv(ndev);
+ struct prueth *prueth = emac->prueth;
+ struct prueth_tx_chn *tx_chn;
+
+ if (queue >= PRUETH_MAX_TX_QUEUES)
+ return -EINVAL;
+
+ tx_chn = &emac->tx_chns[queue];
+
+ if (coal->tx_coalesce_usecs &&
+ coal->tx_coalesce_usecs < ICSSG_MIN_COALESCE_USECS) {
+ dev_info(prueth->dev, "defaulting to min value of %dus for tx-usecs for tx-%u\n",
+ ICSSG_MIN_COALESCE_USECS, queue);
+ coal->tx_coalesce_usecs = ICSSG_MIN_COALESCE_USECS;
+ }
+
+ tx_chn->tx_pace_timeout_ns = coal->tx_coalesce_usecs * 1000;
+
+ return 0;
+}
+
const struct ethtool_ops icssg_ethtool_ops = {
.get_drvinfo = emac_get_drvinfo,
.get_msglevel = emac_get_msglevel,
@@ -209,6 +296,12 @@ const struct ethtool_ops icssg_ethtool_ops = {
.get_ethtool_stats = emac_get_ethtool_stats,
.get_strings = emac_get_strings,
.get_ts_info = emac_get_ts_info,
+ .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
+ ETHTOOL_COALESCE_TX_USECS,
+ .get_coalesce = emac_get_coalesce,
+ .set_coalesce = emac_set_coalesce,
+ .get_per_queue_coalesce = emac_get_per_queue_coalesce,
+ .set_per_queue_coalesce = emac_set_per_queue_coalesce,
.get_channels = emac_get_channels,
.set_channels = emac_set_channels,
.get_link_ksettings = emac_get_link_ksettings,
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 186b0365c2e5..7c9e9518f555 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -243,6 +243,16 @@ static void emac_adjust_link(struct net_device *ndev)
}
}
+static enum hrtimer_restart emac_rx_timer_callback(struct hrtimer *timer)
+{
+ struct prueth_emac *emac =
+ container_of(timer, struct prueth_emac, rx_hrtimer);
+ int rx_flow = PRUETH_RX_FLOW_DATA;
+
+ enable_irq(emac->rx_chns.irq[rx_flow]);
+ return HRTIMER_NORESTART;
+}
+
static int emac_phy_connect(struct prueth_emac *emac)
{
struct prueth *prueth = emac->prueth;
@@ -582,8 +592,10 @@ static int emac_ndo_stop(struct net_device *ndev)
netdev_err(ndev, "tx teardown timeout\n");
prueth_reset_tx_chan(emac, emac->tx_ch_num, true);
- for (i = 0; i < emac->tx_ch_num; i++)
+ for (i = 0; i < emac->tx_ch_num; i++) {
napi_disable(&emac->tx_chns[i].napi_tx);
+ hrtimer_cancel(&emac->tx_chns[i].tx_hrtimer);
+ }
max_rx_flows = PRUETH_MAX_RX_FLOWS;
k3_udma_glue_tdown_rx_chn(emac->rx_chns.rx_chn, true);
@@ -591,6 +603,7 @@ static int emac_ndo_stop(struct net_device *ndev)
prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, true);
napi_disable(&emac->napi_rx);
+ hrtimer_cancel(&emac->rx_hrtimer);
cancel_work_sync(&emac->rx_mode_work);
@@ -801,6 +814,9 @@ static int prueth_netdev_init(struct prueth *prueth,
ndev->features = ndev->hw_features;
netif_napi_add(ndev, &emac->napi_rx, emac_napi_rx_poll);
+ hrtimer_init(&emac->rx_hrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_PINNED);
+ emac->rx_hrtimer.function = &emac_rx_timer_callback;
prueth->emac[mac] = emac;
return 0;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index 82e38ef5635b..a78c5eb75fb8 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -108,6 +108,8 @@ struct prueth_tx_chn {
u32 descs_num;
unsigned int irq;
char name[32];
+ struct hrtimer tx_hrtimer;
+ unsigned long tx_pace_timeout_ns;
};
struct prueth_rx_chn {
@@ -127,6 +129,9 @@ struct prueth_rx_chn {
#define PRUETH_MAX_TX_TS_REQUESTS 50 /* Max simultaneous TX_TS requests */
+/* Minimum coalesce time in usecs for both Tx and Rx */
+#define ICSSG_MIN_COALESCE_USECS 20
+
/* data for each emac port */
struct prueth_emac {
bool is_sr1;
@@ -183,6 +188,10 @@ struct prueth_emac {
struct delayed_work stats_work;
u64 stats[ICSSG_NUM_STATS];
+
+ /* RX IRQ Coalescing Related */
+ struct hrtimer rx_hrtimer;
+ unsigned long rx_pace_timeout_ns;
};
/**
@@ -320,7 +329,7 @@ void prueth_ndev_del_tx_napi(struct prueth_emac *emac, int num);
void prueth_xmit_free(struct prueth_tx_chn *tx_chn,
struct cppi5_host_desc_t *desc);
int emac_tx_complete_packets(struct prueth_emac *emac, int chn,
- int budget);
+ int budget, bool *tdown);
int prueth_ndev_add_tx_napi(struct prueth_emac *emac);
int prueth_init_tx_chns(struct prueth_emac *emac);
int prueth_init_rx_chns(struct prueth_emac *emac,
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index f6eab66c2660..2b486e7c749c 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -141,9 +141,6 @@ static const struct ethtool_ops loopback_ethtool_ops = {
static int loopback_dev_init(struct net_device *dev)
{
- dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
- if (!dev->lstats)
- return -ENOMEM;
netdev_lockdep_set_classes(dev);
return 0;
}
@@ -151,7 +148,6 @@ static int loopback_dev_init(struct net_device *dev)
static void loopback_dev_free(struct net_device *dev)
{
dev_net(dev)->loopback_dev = NULL;
- free_percpu(dev->lstats);
}
static const struct net_device_ops loopback_ops = {
@@ -191,6 +187,7 @@ static void gen_lo_setup(struct net_device *dev,
dev->header_ops = hdr_ops;
dev->netdev_ops = dev_ops;
dev->needs_free_netdev = true;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
dev->priv_destructor = dev_destructor;
netif_set_tso_max_size(dev, GSO_MAX_SIZE);
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 784b9b2d275e..3a252ac5dd28 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -860,7 +860,7 @@ static int vrf_rt6_create(struct net_device *dev)
static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 5f17a2a5d0e3..41fe8a043d61 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -970,9 +970,8 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb,
static inline __be32 qeth_next_hop_v4_rcu(struct sk_buff *skb,
struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *) dst;
-
- return (rt) ? rt_nexthop(rt, ip_hdr(skb)->daddr) : ip_hdr(skb)->daddr;
+ return (dst) ? rt_nexthop(dst_rtable(dst), ip_hdr(skb)->daddr) :
+ ip_hdr(skb)->daddr;
}
static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f849e7d110ed..41853424b41d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3136,6 +3136,7 @@ struct net_device *netdev_get_by_name(struct net *net, const char *name,
netdevice_tracker *tracker, gfp_t gfp);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
+void netdev_copy_name(struct net_device *dev, char *name);
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f76825e5b92a..36b133f04d30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -353,8 +353,6 @@ struct sk_buff;
#define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS
-extern int sysctl_max_skb_frags;
-
/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
* segment using its current segmentation instead.
*/
@@ -1180,15 +1178,6 @@ static inline bool skb_dst_is_noref(const struct sk_buff *skb)
return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
}
-/**
- * skb_rtable - Returns the skb &rtable
- * @skb: buffer
- */
-static inline struct rtable *skb_rtable(const struct sk_buff *skb)
-{
- return (struct rtable *)skb_dst(skb);
-}
-
/* For mangling skb->pkt_type from user space side from applications
* such as nft, tc, etc, we only allow a conservative subset of
* possible pkt_types to be set.
diff --git a/include/net/dsa.h b/include/net/dsa.h
index a6ef7e4c503f..eef702dbea78 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -24,9 +24,6 @@
struct dsa_8021q_context;
struct tc_action;
-struct phy_device;
-struct fixed_phy_status;
-struct phylink_link_state;
#define DSA_TAG_PROTO_NONE_VALUE 0
#define DSA_TAG_PROTO_BRCM_VALUE 1
@@ -869,14 +866,6 @@ struct dsa_switch_ops {
int regnum, u16 val);
/*
- * Link state adjustment (called from libphy)
- */
- void (*adjust_link)(struct dsa_switch *ds, int port,
- struct phy_device *phydev);
- void (*fixed_link_update)(struct dsa_switch *ds, int port,
- struct fixed_phy_status *st);
-
- /*
* PHYLINK integration
*/
void (*phylink_get_caps)(struct dsa_switch *ds, int port,
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
index 003667a1efd6..30e9570beb2a 100644
--- a/include/net/hotdata.h
+++ b/include/net/hotdata.h
@@ -38,6 +38,9 @@ struct net_hotdata {
int max_backlog;
int dev_tx_weight;
int dev_rx_weight;
+ int sysctl_max_skb_frags;
+ int sysctl_skb_defer_max;
+ int sysctl_mem_pcpu_rsv;
};
#define inet_ehash_secret net_hotdata.tcp_protocol.secret
diff --git a/include/net/ip.h b/include/net/ip.h
index 25cb688bdc62..6d735e00d3f3 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -423,7 +423,7 @@ int ip_decrease_ttl(struct iphdr *iph)
static inline int ip_mtu_locked(const struct dst_entry *dst)
{
- const struct rtable *rt = (const struct rtable *)dst;
+ const struct rtable *rt = dst_rtable(dst);
return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
}
@@ -461,7 +461,7 @@ static inline bool ip_sk_ignore_df(const struct sock *sk)
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
bool forwarding)
{
- const struct rtable *rt = container_of(dst, struct rtable, dst);
+ const struct rtable *rt = dst_rtable(dst);
struct net *net = dev_net(dst->dev);
unsigned int mtu;
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
new file mode 100644
index 000000000000..a6ab2f4f5e28
--- /dev/null
+++ b/include/net/proto_memory.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PROTO_MEMORY_H
+#define _PROTO_MEMORY_H
+
+#include <net/sock.h>
+#include <net/hotdata.h>
+
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
+static inline bool sk_has_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure != NULL;
+}
+
+static inline bool
+proto_memory_pressure(const struct proto *prot)
+{
+ if (!prot->memory_pressure)
+ return false;
+ return !!READ_ONCE(*prot->memory_pressure);
+}
+
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return proto_memory_pressure(sk->sk_prot);
+}
+
+static inline bool sk_under_memory_pressure(const struct sock *sk)
+{
+ if (!sk->sk_prot->memory_pressure)
+ return false;
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
+ mem_cgroup_under_socket_pressure(sk->sk_memcg))
+ return true;
+
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
+static inline long
+proto_memory_allocated(const struct proto *prot)
+{
+ return max(0L, atomic_long_read(prot->memory_allocated));
+}
+
+static inline long
+sk_memory_allocated(const struct sock *sk)
+{
+ return proto_memory_allocated(sk->sk_prot);
+}
+
+static inline void proto_memory_pcpu_drain(struct proto *proto)
+{
+ int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
+
+ if (val)
+ atomic_long_add(val, proto->memory_allocated);
+}
+
+static inline void
+sk_memory_allocated_add(const struct sock *sk, int val)
+{
+ struct proto *proto = sk->sk_prot;
+
+ val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val >= READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
+}
+
+static inline void
+sk_memory_allocated_sub(const struct sock *sk, int val)
+{
+ struct proto *proto = sk->sk_prot;
+
+ val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val <= -READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
+}
+
+#endif /* _PROTO_MEMORY_H */
diff --git a/include/net/route.h b/include/net/route.h
index 630d1ef6868a..93833cfe9c96 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -75,6 +75,17 @@ struct rtable {
rt_pmtu:31;
};
+#define dst_rtable(_ptr) container_of_const(_ptr, struct rtable, dst)
+
+/**
+ * skb_rtable - Returns the skb &rtable
+ * @skb: buffer
+ */
+static inline struct rtable *skb_rtable(const struct sk_buff *skb)
+{
+ return dst_rtable(skb_dst(skb));
+}
+
static inline bool rt_is_input_route(const struct rtable *rt)
{
return rt->rt_is_input != 0;
diff --git a/include/net/sock.h b/include/net/sock.h
index 48bcc845202f..0450494a1766 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1371,75 +1371,6 @@ static inline int sk_under_cgroup_hierarchy(struct sock *sk,
#endif
}
-static inline bool sk_has_memory_pressure(const struct sock *sk)
-{
- return sk->sk_prot->memory_pressure != NULL;
-}
-
-static inline bool sk_under_global_memory_pressure(const struct sock *sk)
-{
- return sk->sk_prot->memory_pressure &&
- !!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline bool sk_under_memory_pressure(const struct sock *sk)
-{
- if (!sk->sk_prot->memory_pressure)
- return false;
-
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg))
- return true;
-
- return !!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline long
-proto_memory_allocated(const struct proto *prot)
-{
- return max(0L, atomic_long_read(prot->memory_allocated));
-}
-
-static inline long
-sk_memory_allocated(const struct sock *sk)
-{
- return proto_memory_allocated(sk->sk_prot);
-}
-
-/* 1 MB per cpu, in page units */
-#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
-extern int sysctl_mem_pcpu_rsv;
-
-static inline void proto_memory_pcpu_drain(struct proto *proto)
-{
- int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
-
- if (val)
- atomic_long_add(val, proto->memory_allocated);
-}
-
-static inline void
-sk_memory_allocated_add(const struct sock *sk, int val)
-{
- struct proto *proto = sk->sk_prot;
-
- val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
-
- if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv)))
- proto_memory_pcpu_drain(proto);
-}
-
-static inline void
-sk_memory_allocated_sub(const struct sock *sk, int val)
-{
- struct proto *proto = sk->sk_prot;
-
- val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
-
- if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv)))
- proto_memory_pcpu_drain(proto);
-}
-
#define SK_ALLOC_PERCPU_COUNTER_BATCH 16
static inline void sk_sockets_allocated_dec(struct sock *sk)
@@ -1466,15 +1397,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
return percpu_counter_sum_positive(prot->sockets_allocated);
}
-static inline bool
-proto_memory_pressure(struct proto *prot)
-{
- if (!prot->memory_pressure)
- return false;
- return !!READ_ONCE(*prot->memory_pressure);
-}
-
-
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index fe98fb01879b..0a51e6a45bce 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -296,14 +296,6 @@ static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
return seq3 - seq2 >= seq1 - seq2;
}
-static inline bool tcp_out_of_memory(struct sock *sk)
-{
- if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
- sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
- return true;
- return false;
-}
-
static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sk_wmem_queued_add(sk, -skb->truesize);
@@ -316,7 +308,7 @@ static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
void sk_forced_mem_schedule(struct sock *sk, int size);
-bool tcp_check_oom(struct sock *sk, int shift);
+bool tcp_check_oom(const struct sock *sk, int shift);
extern struct proto tcp_prot;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 362e8d25a79e..42b910cb4e8e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
if (rt->rt_gw_family == AF_INET)
daddr = &rt->rt_gw4;
else
diff --git a/net/core/dev.c b/net/core/dev.c
index c9e59eff8ec8..e02d2363347e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -940,6 +940,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
}
EXPORT_SYMBOL(dev_get_by_napi_id);
+static DEFINE_SEQLOCK(netdev_rename_lock);
+
+void netdev_copy_name(struct net_device *dev, char *name)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&netdev_rename_lock);
+ strscpy(name, dev->name, IFNAMSIZ);
+ } while (read_seqretry(&netdev_rename_lock, seq));
+}
+
/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
@@ -951,7 +963,6 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
struct net_device *dev;
int ret;
- down_read(&devnet_rename_sem);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
@@ -960,12 +971,11 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
goto out;
}
- strcpy(name, dev->name);
+ netdev_copy_name(dev, name);
ret = 0;
out:
rcu_read_unlock();
- up_read(&devnet_rename_sem);
return ret;
}
@@ -1217,7 +1227,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
+ write_seqlock(&netdev_rename_lock);
err = dev_get_valid_name(net, dev, newname);
+ write_sequnlock(&netdev_rename_lock);
+
if (err < 0) {
up_write(&devnet_rename_sem);
return err;
@@ -1257,7 +1270,9 @@ rollback:
if (err >= 0) {
err = ret;
down_write(&devnet_rename_sem);
+ write_seqlock(&netdev_rename_lock);
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock(&netdev_rename_lock);
memcpy(oldname, newname, IFNAMSIZ);
WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
@@ -4450,7 +4465,6 @@ EXPORT_SYMBOL(__dev_direct_xmit);
*************************************************************************/
static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
-unsigned int sysctl_skb_defer_max __read_mostly = 64;
int weight_p __read_mostly = 64; /* old backlog weight */
int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
@@ -11404,8 +11418,12 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- if (new_name[0]) /* Rename the netdev to prepared name */
+ if (new_name[0]) {
+ /* Rename the netdev to prepared name */
+ write_seqlock(&netdev_rename_lock);
strscpy(dev->name, new_name, IFNAMSIZ);
+ write_sequnlock(&netdev_rename_lock);
+ }
/* Fixup kobjects */
dev_set_uevent_suppress(&dev->dev, 1);
diff --git a/net/core/dev.h b/net/core/dev.h
index 8572d2c8dc4a..b7b518bc2be5 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -36,7 +36,6 @@ int dev_addr_init(struct net_device *dev);
void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
-extern unsigned int sysctl_skb_defer_max;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
extern int dev_weight_rx_bias;
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index b17171345d64..0c0bdb058c5b 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -83,7 +83,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
return NULL;
*saddr = idst->in_saddr.s_addr;
- return container_of(dst, struct rtable, dst);
+ return dst_rtable(dst);
}
EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6d319c76188b..29165744c505 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2317,8 +2317,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
rcu_read_lock();
if (!nh) {
- struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = container_of(dst, struct rtable, dst);
+ struct rtable *rt = skb_rtable(skb);
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
} else if (nh->nh_family == AF_INET6) {
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index c8a7a451c18a..d0aaaaa556f2 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-#include <net/hotdata.h>
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/list.h>
-
+#include <net/hotdata.h>
+#include <net/proto_memory.h>
struct net_hotdata net_hotdata __cacheline_aligned = {
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
@@ -18,5 +18,8 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.max_backlog = 1000,
.dev_tx_weight = 64,
.dev_rx_weight = 64,
+ .sysctl_max_skb_frags = MAX_SKB_FRAGS,
+ .sysctl_skb_defer_max = 64,
+ .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
};
EXPORT_SYMBOL(net_hotdata);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 543007f159f9..55bcacf67df3 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -316,7 +316,7 @@ static int netpoll_owner_active(struct net_device *dev)
struct napi_struct *napi;
list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
- if (napi->poll_owner == smp_processor_id())
+ if (READ_ONCE(napi->poll_owner) == smp_processor_id())
return 1;
}
return 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0c8b82750000..5f382e94b4d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -109,9 +109,6 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#define SKB_SMALL_HEAD_HEADROOM \
SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
-int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
-EXPORT_SYMBOL(sysctl_max_skb_frags);
-
/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
* iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
* netmem is a page.
@@ -6988,7 +6985,7 @@ nodefer: kfree_skb_napi_cache(skb);
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
sd = &per_cpu(softnet_data, cpu);
- defer_max = READ_ONCE(sysctl_skb_defer_max);
+ defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
if (READ_ONCE(sd->defer_count) >= defer_max)
goto nodefer;
@@ -7040,7 +7037,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
ssize_t maxsize, gfp_t gfp)
{
- size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+ size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
struct page *pages[8], **ppages = pages;
ssize_t spliced = 0, ret = 0;
unsigned int i;
diff --git a/net/core/sock.c b/net/core/sock.c
index fe9195186c13..8d6e638b5426 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -127,6 +127,7 @@
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
@@ -283,7 +284,6 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
int sysctl_tstamp_allow_data __read_mostly = 1;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 903ab4a51c17..6da5995ac86a 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
#include <net/hotdata.h>
+#include <net/proto_memory.h>
#include <net/rps.h>
#include "dev.h"
@@ -415,7 +416,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "mem_pcpu_rsv",
- .data = &sysctl_mem_pcpu_rsv,
+ .data = &net_hotdata.sysctl_mem_pcpu_rsv,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -595,7 +596,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "max_skb_frags",
- .data = &sysctl_max_skb_frags,
+ .data = &net_hotdata.sysctl_max_skb_frags,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -654,7 +655,7 @@ static struct ctl_table net_core_table[] = {
},
{
.procname = "skb_defer_max",
- .data = &sysctl_skb_defer_max,
+ .data = &net_hotdata.sysctl_skb_defer_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2f347cd37316..12521a7d4048 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1511,8 +1511,7 @@ static int dsa_switch_probe(struct dsa_switch *ds)
ds->ops->phylink_mac_config ||
ds->ops->phylink_mac_finish ||
ds->ops->phylink_mac_link_down ||
- ds->ops->phylink_mac_link_up ||
- ds->ops->adjust_link)
+ ds->ops->phylink_mac_link_up)
return -EINVAL;
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c6febc3d96d9..9a249d4ac3a5 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1535,25 +1535,6 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
cpu_dp->tag_ops = tag_ops;
}
-static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
-{
- struct device_node *phy_dn;
- struct phy_device *phydev;
-
- phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0);
- if (!phy_dn)
- return NULL;
-
- phydev = of_phy_find_device(phy_dn);
- if (!phydev) {
- of_node_put(phy_dn);
- return ERR_PTR(-EPROBE_DEFER);
- }
-
- of_node_put(phy_dn);
- return phydev;
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
@@ -1616,17 +1597,10 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
phy_interface_t interface)
{
struct dsa_port *dp = dsa_phylink_to_port(config);
- struct phy_device *phydev = NULL;
struct dsa_switch *ds = dp->ds;
- if (dsa_port_is_user(dp))
- phydev = dp->user->phydev;
-
- if (!ds->ops->phylink_mac_link_down) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_down)
return;
- }
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
@@ -1641,11 +1615,8 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
struct dsa_port *dp = dsa_phylink_to_port(config);
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->phylink_mac_link_up) {
- if (ds->ops->adjust_link && phydev)
- ds->ops->adjust_link(ds, dp->index, phydev);
+ if (!ds->ops->phylink_mac_link_up)
return;
- }
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev,
speed, duplex, tx_pause, rx_pause);
@@ -1708,78 +1679,6 @@ void dsa_port_phylink_destroy(struct dsa_port *dp)
dp->pl = NULL;
}
-static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable)
-{
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- int err = 0;
-
- phydev = dsa_port_get_phy_device(dp);
- if (!phydev)
- return 0;
-
- if (IS_ERR(phydev))
- return PTR_ERR(phydev);
-
- if (enable) {
- err = genphy_resume(phydev);
- if (err < 0)
- goto err_put_dev;
-
- err = genphy_read_status(phydev);
- if (err < 0)
- goto err_put_dev;
- } else {
- err = genphy_suspend(phydev);
- if (err < 0)
- goto err_put_dev;
- }
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
-
-err_put_dev:
- put_device(&phydev->mdio.dev);
- return err;
-}
-
-static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp)
-{
- struct device_node *dn = dp->dn;
- struct dsa_switch *ds = dp->ds;
- struct phy_device *phydev;
- int port = dp->index;
- phy_interface_t mode;
- int err;
-
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
-
- phydev = of_phy_find_device(dn);
-
- err = of_get_phy_mode(dn, &mode);
- if (err)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
-
- genphy_read_status(phydev);
-
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
-
- put_device(&phydev->mdio.dev);
-
- return 0;
-}
-
static int dsa_shared_port_phylink_register(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
@@ -1983,44 +1882,28 @@ int dsa_shared_port_link_register_of(struct dsa_port *dp)
dsa_switches_apply_workarounds))
return -EINVAL;
- if (!ds->ops->adjust_link) {
- if (missing_link_description) {
- dev_warn(ds->dev,
- "Skipping phylink registration for %s port %d\n",
- dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
- } else {
- dsa_shared_port_link_down(dp);
+ if (missing_link_description) {
+ dev_warn(ds->dev,
+ "Skipping phylink registration for %s port %d\n",
+ dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index);
+ } else {
+ dsa_shared_port_link_down(dp);
- return dsa_shared_port_phylink_register(dp);
- }
- return 0;
+ return dsa_shared_port_phylink_register(dp);
}
- dev_warn(ds->dev,
- "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
-
- if (of_phy_is_fixed_link(dp->dn))
- return dsa_shared_port_fixed_link_register_of(dp);
- else
- return dsa_shared_port_setup_phy_of(dp, true);
+ return 0;
}
void dsa_shared_port_link_unregister_of(struct dsa_port *dp)
{
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->adjust_link && dp->pl) {
+ if (dp->pl) {
rtnl_lock();
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
dsa_port_phylink_destroy(dp);
return;
}
-
- if (of_phy_is_fixed_link(dp->dn))
- of_phy_deregister_fixed_link(dp->dn);
- else
- dsa_shared_port_setup_phy_of(dp, false);
}
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a7cfeda28bb2..486a8d4f53b1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1307,8 +1307,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
int inet_sk_rebuild_header(struct sock *sk)
{
+ struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0));
struct inet_sock *inet = inet_sk(sk);
- struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
__be32 daddr;
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ab82ca104496..11c1519b3699 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1003,6 +1003,55 @@ out_of_mem:
* User level interface (ioctl)
*/
+static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r,
+ bool getarp)
+{
+ struct net_device *dev;
+
+ if (getarp)
+ dev = dev_get_by_name_rcu(net, r->arp_dev);
+ else
+ dev = __dev_get_by_name(net, r->arp_dev);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ /* Mmmm... It is wrong... ARPHRD_NETROM == 0 */
+ if (!r->arp_ha.sa_family)
+ r->arp_ha.sa_family = dev->type;
+
+ if ((r->arp_flags & ATF_COM) && r->arp_ha.sa_family != dev->type)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
+static struct net_device *arp_req_dev(struct net *net, struct arpreq *r)
+{
+ struct net_device *dev;
+ struct rtable *rt;
+ __be32 ip;
+
+ if (r->arp_dev[0])
+ return arp_req_dev_by_name(net, r, false);
+
+ if (r->arp_flags & ATF_PUBL)
+ return NULL;
+
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
+ rt = ip_route_output(net, ip, 0, 0, 0, RT_SCOPE_LINK);
+ if (IS_ERR(rt))
+ return ERR_CAST(rt);
+
+ dev = rt->dst.dev;
+ ip_rt_put(rt);
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
/*
* Set (create) an ARP cache entry.
*/
@@ -1023,11 +1072,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
static int arp_req_set_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask && mask != htonl(0xFFFFFFFF))
- return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
@@ -1035,6 +1081,8 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return -ENODEV;
}
if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1))
return -ENOBUFS;
return 0;
@@ -1043,30 +1091,20 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return arp_req_set_proxy(net, dev, 1);
}
-static int arp_req_set(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_set(struct net *net, struct arpreq *r)
{
- __be32 ip;
struct neighbour *neigh;
+ struct net_device *dev;
+ __be32 ip;
int err;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_set_public(net, r, dev);
- ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (r->arp_flags & ATF_PERM)
- r->arp_flags |= ATF_COM;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
- RT_SCOPE_LINK);
-
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
switch (dev->type) {
#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
@@ -1088,12 +1126,18 @@ static int arp_req_set(struct net *net, struct arpreq *r,
break;
}
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
err = PTR_ERR(neigh);
if (!IS_ERR(neigh)) {
unsigned int state = NUD_STALE;
- if (r->arp_flags & ATF_PERM)
+
+ if (r->arp_flags & ATF_PERM) {
+ r->arp_flags |= ATF_COM;
state = NUD_PERMANENT;
+ }
+
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
@@ -1117,27 +1161,40 @@ static unsigned int arp_state_to_flags(struct neighbour *neigh)
* Get an ARP cache entry.
*/
-static int arp_req_get(struct arpreq *r, struct net_device *dev)
+static int arp_req_get(struct net *net, struct arpreq *r)
{
__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
- int err = -ENXIO;
+ struct net_device *dev;
+
+ if (!r->arp_dev[0])
+ return -ENODEV;
+
+ dev = arp_req_dev_by_name(net, r, true);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
neigh = neigh_lookup(&arp_tbl, &ip, dev);
- if (neigh) {
- if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha,
- min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
- r->arp_ha.sa_family = dev->type;
- strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
- err = 0;
- }
+ if (!neigh)
+ return -ENXIO;
+
+ if (READ_ONCE(neigh->nud_state) & NUD_NOARP) {
neigh_release(neigh);
+ return -ENXIO;
}
- return err;
+
+ read_lock_bh(&neigh->lock);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
+ r->arp_flags = arp_state_to_flags(neigh);
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+
+ r->arp_ha.sa_family = dev->type;
+ netdev_copy_name(dev, r->arp_dev);
+
+ return 0;
}
int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
@@ -1168,37 +1225,31 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
static int arp_req_delete_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask == htonl(0xFFFFFFFF))
- return pneigh_delete(&arp_tbl, net, &ip, dev);
+ if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (mask)
- return -EINVAL;
+ return pneigh_delete(&arp_tbl, net, &ip, dev);
+ }
return arp_req_set_proxy(net, dev, 0);
}
-static int arp_req_delete(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_delete(struct net *net, struct arpreq *r)
{
+ struct net_device *dev;
__be32 ip;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_delete_public(net, r, dev);
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
- RT_SCOPE_LINK);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
+
return arp_invalidate(dev, ip, true);
}
@@ -1208,9 +1259,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
- int err;
struct arpreq r;
- struct net_device *dev = NULL;
+ __be32 *netmask;
+ int err;
switch (cmd) {
case SIOCDARP:
@@ -1233,42 +1284,34 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!(r.arp_flags & ATF_PUBL) &&
(r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
return -EINVAL;
+
+ netmask = &((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr;
if (!(r.arp_flags & ATF_NETMASK))
- ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
- htonl(0xFFFFFFFFUL);
- rtnl_lock();
- if (r.arp_dev[0]) {
- err = -ENODEV;
- dev = __dev_get_by_name(net, r.arp_dev);
- if (!dev)
- goto out;
-
- /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
- if (!r.arp_ha.sa_family)
- r.arp_ha.sa_family = dev->type;
- err = -EINVAL;
- if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
- goto out;
- } else if (cmd == SIOCGARP) {
- err = -ENODEV;
- goto out;
- }
+ *netmask = htonl(0xFFFFFFFFUL);
+ else if (*netmask && *netmask != htonl(0xFFFFFFFFUL))
+ return -EINVAL;
switch (cmd) {
case SIOCDARP:
- err = arp_req_delete(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_delete(net, &r);
+ rtnl_unlock();
break;
case SIOCSARP:
- err = arp_req_set(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_set(net, &r);
+ rtnl_unlock();
break;
case SIOCGARP:
- err = arp_req_get(&r, dev);
+ rcu_read_lock();
+ err = arp_req_get(net, &r);
+ rcu_read_unlock();
+
+ if (!err && copy_to_user(arg, &r, sizeof(r)))
+ err = -EFAULT;
break;
}
-out:
- rtnl_unlock();
- if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
- err = -EFAULT;
+
return err;
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 437e782b9663..207482d30dc7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -483,6 +483,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
+ struct dst_entry *dst, *dst2;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -508,16 +509,17 @@ static struct rtable *icmp_route_lookup(struct net *net,
/* No need to clone since we're just using its address. */
rt2 = rt;
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(fl4), NULL, 0);
- if (!IS_ERR(rt)) {
+ dst = xfrm_lookup(net, &rt->dst,
+ flowi4_to_flowi(fl4), NULL, 0);
+ rt = dst_rtable(dst);
+ if (!IS_ERR(dst)) {
if (rt != rt2)
return rt;
- } else if (PTR_ERR(rt) == -EPERM) {
+ } else if (PTR_ERR(dst) == -EPERM) {
rt = NULL;
- } else
+ } else {
return rt;
-
+ }
err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
@@ -551,19 +553,19 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
- flowi4_to_flowi(&fl4_dec), NULL,
- XFRM_LOOKUP_ICMP);
- if (!IS_ERR(rt2)) {
+ dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL,
+ XFRM_LOOKUP_ICMP);
+ rt2 = dst_rtable(dst2);
+ if (!IS_ERR(dst2)) {
dst_release(&rt->dst);
memcpy(fl4, &fl4_dec, sizeof(*fl4));
rt = rt2;
- } else if (PTR_ERR(rt2) == -EPERM) {
+ } else if (PTR_ERR(dst2) == -EPERM) {
if (rt)
dst_release(&rt->dst);
return rt2;
} else {
- err = PTR_ERR(rt2);
+ err = PTR_ERR(dst2);
goto relookup_failed;
}
return rt;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 5e9c8156656a..d6fbcbd2358a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
dst = skb_dst(skb);
if (curr_dst != dst) {
hint = ip_extract_route_hint(net, skb,
- ((struct rtable *)dst)->rt_type);
+ dst_rtable(dst)->rt_type);
/* dispatch old sublist */
if (!list_empty(&sublist))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1fe794967211..b455bd05a7d5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
@@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
goto packet_routed;
/* Make sure we can route this packet. */
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
if (!rt) {
__be32 daddr;
@@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk,
bool zc = false;
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
bool paged, hold_tskey, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
@@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
- struct rtable *rt = (struct rtable *)cork->dst;
+ struct rtable *rt = dst_rtable(cork->dst);
struct iphdr *iph;
u8 pmtudisc, ttl;
__be16 df = 0;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 914bc9c35cc7..6c4664c681ca 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -33,6 +33,7 @@
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/udp.h>
#include <net/udplite.h>
#include <linux/bottom_half.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f89ff2e5a05b..0fd9a3d7ac4a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -819,7 +819,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
u32 mark = skb->mark;
__u8 tos = iph->tos;
- rt = (struct rtable *) dst;
+ rt = dst_rtable(dst);
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
__ip_do_redirect(rt, skb, &fl4, true);
@@ -827,7 +827,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
+ struct rtable *rt = dst_rtable(dst);
struct dst_entry *ret = dst;
if (rt) {
@@ -1044,7 +1044,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
bool confirm_neigh)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
@@ -1115,7 +1115,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
- rt = (struct rtable *)odst;
+ rt = dst_rtable(odst);
if (odst->obsolete && !odst->ops->check(odst, 0)) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
@@ -1124,7 +1124,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
new = true;
}
- __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
+ __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu);
if (!dst_check(&rt->dst, 0)) {
if (new)
@@ -1181,7 +1181,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
u32 cookie)
{
- struct rtable *rt = (struct rtable *) dst;
+ struct rtable *rt = dst_rtable(dst);
/* All IPV4 dsts are created with ->obsolete set to the value
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
@@ -1516,10 +1516,8 @@ void rt_del_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
- struct rtable *rt = (struct rtable *)dst;
-
ip_dst_metrics_put(dst);
- rt_del_uncached_list(rt);
+ rt_del_uncached_list(dst_rtable(dst));
}
void rt_flush_dev(struct net_device *dev)
@@ -2820,7 +2818,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rtable *ort = (struct rtable *) dst_orig;
+ struct rtable *ort = dst_rtable(dst_orig);
struct rtable *rt;
rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
@@ -2865,9 +2863,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (flp4->flowi4_proto) {
flp4->flowi4_oif = rt->dst.dev->ifindex;
- rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = dst_rtable(xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0));
}
return rt;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4ec0f4feee00..e1f0efbb29d6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -272,6 +272,7 @@
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
@@ -280,6 +281,7 @@
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+#include <net/hotdata.h>
#include <net/rps.h>
/* Track pending CMSGs. */
@@ -1188,7 +1190,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -2751,7 +2753,15 @@ static bool tcp_too_many_orphans(int shift)
READ_ONCE(sysctl_tcp_max_orphans);
}
-bool tcp_check_oom(struct sock *sk, int shift)
+static bool tcp_out_of_memory(const struct sock *sk)
+{
+ if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
+ sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
+ return true;
+ return false;
+}
+
+bool tcp_check_oom(const struct sock *sk, int shift)
{
bool too_many_orphans, out_of_socket_memory;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53e1150f706f..ad8fa129fcfe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,6 +72,7 @@
#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/tcp.h>
+#include <net/proto_memory.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
#include <asm/unaligned.h>
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ea7ad7d99245..57edf66ff91b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -39,6 +39,7 @@
#include <net/tcp.h>
#include <net/mptcp.h>
+#include <net/proto_memory.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6e2446295089..fe55ff5d379b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1217,7 +1217,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (connected)
- rt = (struct rtable *)sk_dst_check(sk, 0);
+ rt = dst_rtable(sk_dst_check(sk, 0));
if (!rt) {
struct net *net = sock_net(sk);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1dda59e0aeab..fccbbd3e1a4b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif,
static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rtable *rt = (struct rtable *)xdst->route;
+ struct rtable *rt = dst_rtable(xdst->route);
const struct flowi4 *fl4 = &fl->u.ip4;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0f2506e35359..0627c4c18d1a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -252,9 +252,8 @@ static void aca_free_rcu(struct rcu_head *h)
static void aca_put(struct ifacaddr6 *ac)
{
- if (refcount_dec_and_test(&ac->aca_refcnt)) {
- call_rcu(&ac->rcu, aca_free_rcu);
- }
+ if (refcount_dec_and_test(&ac->aca_refcnt))
+ call_rcu_hurry(&ac->rcu, aca_free_rcu);
}
static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 970af3983d11..19c8cc5289d5 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &inet->cork.fl.u.ip4;
if (connected)
- rt = (struct rtable *)__sk_dst_check(sk, 0);
+ rt = dst_rtable(__sk_dst_check(sk, 0));
rcu_read_lock();
if (!rt) {
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 606349c8df0e..4385fd3b13be 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -81,7 +81,7 @@ static int mpls_xmit(struct sk_buff *skb)
ttl = net->mpls.default_ttl;
else
ttl = ip_hdr(skb)->ttl;
- rt = (struct rtable *)dst;
+ rt = dst_rtable(dst);
} else if (dst->ops->family == AF_INET6) {
if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
ttl = tun_encap_info->default_ttl;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4b13ca362efa..aff17597e6a7 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -20,6 +20,7 @@
#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
+#include <net/hotdata.h>
#include <net/xfrm.h>
#include <asm/ioctls.h>
#include "protocol.h"
@@ -1272,7 +1273,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
+ if (!can_coalesce && i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 6e8b9d100ad2..3313bceb6cc9 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (dest) {
dest_dst = __ip_vs_dst_check(dest);
if (likely(dest_dst))
- rt = (struct rtable *) dest_dst->dst_cache;
+ rt = dst_rtable(dest_dst->dst_cache);
else {
dest_dst = ip_vs_dest_dst_alloc();
spin_lock_bh(&dest->dst_lock);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 100887beed31..c2c005234dcd 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
IPCB(skb)->iif = skb->dev->ifindex;
IPCB(skb)->flags = IPSKB_FORWARDED;
@@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
- rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ rt = dst_rtable(tuplehash->tuple.dst_cache);
outdev = rt->dst.dev;
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 2434c624aafd..14d88394bcb7 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -73,7 +73,7 @@ void nft_rt_get_eval(const struct nft_expr *expr,
if (nft_pf(pkt) != NFPROTO_IPV4)
goto err;
- *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+ *dest = (__force u32)rt_nexthop(dst_rtable(dst),
ip_hdr(skb)->daddr);
break;
case NFT_RT_NEXTHOP6:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 60239378d43f..6292d6d73b72 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1389,6 +1389,7 @@ err_out4:
ops->destroy(sch);
qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
+ lockdep_unregister_key(&sch->root_lock_key);
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
err_out2:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 31dfd6c7405b..d3f6006b563c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -982,6 +982,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
return sch;
errout1:
+ lockdep_unregister_key(&sch->root_lock_key);
kfree(sch);
errout:
return ERR_PTR(err);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e849f368ed91..5a7436a13b74 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk,
struct flowi *fl)
{
union sctp_addr *saddr = &t->saddr;
- struct rtable *rt = (struct rtable *)t->dst;
+ struct rtable *rt = dst_rtable(t->dst);
if (rt) {
saddr->v4.sin_family = AF_INET;
@@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
skb_reset_inner_mac_header(skb);
skb_reset_inner_transport_header(skb);
skb_set_inner_ipproto(skb, IPPROTO_SCTP);
- udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr,
+ udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr,
fl4->daddr, dscp, ip4_dst_hoplimit(dst), df,
sctp_sk(sk)->udp_port, t->encap_port, false, false);
return 0;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 08fdf1251f46..5adf0c0a6c1a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -38,6 +38,7 @@
#include <linux/inet.h>
#include <linux/slab.h>
#include <net/sock.h>
+#include <net/proto_memory.h>
#include <net/inet_ecn.h>
#include <linux/skbuff.h>
#include <net/sctp/sctp.h>
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index f892b0903dba..b849a3d133a0 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -174,7 +174,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
local_bh_disable();
ndst = dst_cache_get(cache);
if (dst->proto == htons(ETH_P_IP)) {
- struct rtable *rt = (struct rtable *)ndst;
+ struct rtable *rt = dst_rtable(ndst);
if (!rt) {
struct flowi4 fl = {
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index 72c6001964a6..e9a6c702b8c9 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -28,6 +28,8 @@ TEST_PROGS += nft_zones_many.sh
TEST_PROGS += rpath.sh
TEST_PROGS += xt_string.sh
+TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
+
TEST_GEN_PROGS = conntrack_dump_flush
TEST_GEN_FILES = audit_logread
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 60b86c7f3ea1..5b5b764f6cd0 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -85,3 +85,4 @@ CONFIG_VETH=m
CONFIG_VLAN_8021Q=m
CONFIG_XFRM_USER=m
CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_PKTGEN=m
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 2b6661519055..6d66240e149c 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -19,7 +19,7 @@ source lib.sh
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
TESTS="reported_issues correctness concurrency timeout"
-[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance"
+[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
@@ -31,7 +31,7 @@ BUGS="flush_remove_add reload"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
- ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ ../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
# Definition of set types:
@@ -951,6 +951,10 @@ cleanup() {
killall iperf 2>/dev/null
killall netperf 2>/dev/null
killall netserver 2>/dev/null
+}
+
+cleanup_exit() {
+ cleanup
rm -f "$tmp"
}
@@ -1371,6 +1375,9 @@ test_timeout() {
setup veth send_"${proto}" set || return ${ksft_skip}
timeout=3
+
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8
+
range_size=1
for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
@@ -1386,7 +1393,7 @@ test_timeout() {
range_size=$((range_size + 1))
start=$((end + range_size))
done
- sleep 3
+ sleep $timeout
for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
@@ -1480,10 +1487,13 @@ test_performance() {
}
test_bug_flush_remove_add() {
+ rounds=100
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10
+
set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }'
elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }'
elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }'
- for i in $(seq 1 100); do
+ for i in $(seq 1 $rounds); do
nft add table t "$set_cmd" || return ${ksft_skip}
nft add element t s "$elem1" 2>/dev/null || return 1
nft flush set t s 2>/dev/null || return 1
@@ -1552,7 +1562,7 @@ test_reported_issues() {
# Run everything in a separate network namespace
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
tmp="$(mktemp)"
-trap cleanup EXIT
+trap cleanup_exit EXIT
# Entry point for test runs
passed=0
@@ -1584,10 +1594,16 @@ for name in ${TESTS}; do
continue
fi
- printf " %-60s " "${display}"
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1
+
+ printf " %-32s " "${display}"
+ tthen=$(date +%s)
eval test_"${name}"
ret=$?
+ tnow=$(date +%s)
+ printf "%5ds%-30s" $((tnow-tthen))
+
if [ $ret -eq 0 ]; then
printf "[ OK ]\n"
info_flush
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
new file mode 100755
index 000000000000..5d276995a5c5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+source lib.sh
+
+[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip}
+
+NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh