summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2022-03-03 13:37:23 +0300
committerDavid S. Miller <davem@davemloft.net>2022-03-03 13:37:23 +0300
commitca0a53dcec9495d1dc5bbc369c810c520d728373 (patch)
treecd59200569e2e41e1aefd31a5ebf4182cb2ac901
parentcbcc44db2cf7b836896733acc0e5ea966136ed22 (diff)
parentba95e7930957e853ffae2d4528e057abe486a005 (diff)
downloadlinux-ca0a53dcec9495d1dc5bbc369c810c520d728373.tar.xz
Merge branch 'net-hw-counters-for-soft-devices'
Ido Schimmel says: ==================== HW counters for soft devices Petr says: Offloading switch device drivers may be able to collect statistics of the traffic taking place in the HW datapath that pertains to a certain soft netdevice, such as a VLAN. In this patch set, add the necessary infrastructure to allow exposing these statistics to the offloaded netdevice in question, and add mlxsw offload. Across HW platforms, the counter itself very likely constitutes a limited resource, and the act of counting may have a performance impact. Therefore this patch set makes the HW statistics collection opt-in and togglable from userspace on a per-netdevice basis. Additionally, HW devices may have various limiting conditions under which they can realize the counter. Therefore it is also possible to query whether the requested counter is realized by any driver. In TC parlance, which is to a degree reused in this patch set, two values are recognized: "request" tracks whether the user enabled collecting HW statistics, and "used" tracks whether any HW statistics are actually collected. In the past, this author has expressed the opinion that `a typical user doing "ip -s l sh", including various scripts, wants to see the full picture and not worry what's going on where'. While that would be nice, unfortunately it cannot work: - Packets that trap from the HW datapath to the SW datapath would be double counted. For a given netdevice, some traffic can be purely a SW artifact, and some may flow through the HW object corresponding to the netdevice. But some traffic can also get trapped to the SW datapath after bumping the HW counter. It is not clear how to make sure double-counting does not occur in the SW datapath in that case, while still making sure that possibly divergent SW forwarding path gets bumped as appropriate. So simply adding HW and SW stats may work roughly, most of the time, but there are scenarios where the result is nonsensical. - HW devices will have limitations as to what type of traffic they can count. In case of mlxsw, which is part of this patch set, there is no reasonable way to count all traffic going through a certain netdevice, such as a VLAN netdevice enslaved to a bridge. It is however very simple to count traffic flowing through an L3 object, such as a VLAN netdevice with an IP address. Similarly for physical netdevices, the L3 object at which the counter is installed is the subport carrying untagged traffic. These are not "just counters". It is important that the user understands what is being counted. It would be incorrect to conflate these statistics with another existing statistics suite. To that end, this patch set introduces a statistics suite called "L3 stats". This label should make it easy to understand what is being counted, and to decide whether a given device can or cannot implement this suite for some type of netdevice. At the same time, the code is written to make future extensions easy, should a device pop up that can implement a different flavor of statistics suite (say L2, or an address-family-specific suite). For example, using a work-in-progress iproute2[1], to turn on and then list the counters on a VLAN netdevice: # ip stats set dev swp1.200 l3_stats on # ip stats show dev swp1.200 group offload subgroup l3_stats 56: swp1.200: group offload subgroup l3_stats on used on RX: bytes packets errors dropped missed mcast 0 0 0 0 0 0 TX: bytes packets errors dropped carrier collsns 0 0 0 0 0 0 The patchset progresses as follows: - Patch #1 is a cleanup. - In patch #2, remove the assumption that all LINK_OFFLOAD_XSTATS are dev-backed. The only attribute defined under the nest is currently IFLA_OFFLOAD_XSTATS_CPU_HIT. L3_STATS differs from CPU_HIT in that the driver that supplies the statistics is not the same as the driver that implements the netdevice. Make the code compatible with this in patch #2. - In patch #3, add the possibility to filter inside nests. The filter_mask field of RTM_GETSTATS header determines which top-level attributes should be included in the netlink response. This saves processing time by only including the bits that the user cares about instead of always dumping everything. This is doubly important for HW-backed statistics that would typically require a trip to the device to fetch the stats. In this patch, the UAPI is extended to allow filtering inside IFLA_STATS_LINK_OFFLOAD_XSTATS in particular, but the scheme is easily extensible to other nests as well. - In patch #4, propagate extack where we need it. In patch #5, make it possible to propagate errors from drivers to the user. - In patch #6, add the in-kernel APIs for keeping track of the new stats suite, and the notifiers that the core uses to communicate with the drivers. - In patch #7, add UAPI for obtaining the new stats suite. - In patch #8, add a new UAPI message, RTM_SETSTATS, which will carry the message to toggle the newly-added stats suite. In patch #9, add the toggle itself. At this point the core is ready for drivers to add support for the new stats suite. - In patches #10, #11 and #12, apply small tweaks to mlxsw code. - In patch #13, add support for L3 stats, which are realized as RIF counters. - Finally in patch #14, a selftest is added to the net/forwarding directory. Technically this is a HW-specific test, in that without a HW implementing the counters, it just will not pass. But devices that support L3 statistics at all are likely to be able to reuse this selftest, so it seems appropriate to put it in the general forwarding directory. We also have a netdevsim implementation, and a corresponding selftest that verifies specifically some of the core code. We intend to contribute these later. Interested parties can take a look at the raw code at [2]. [1] https://github.com/pmachata/iproute2/commits/soft_counters [2] https://github.com/pmachata/linux_mlxsw/commits/petrm_soft_counters_2 v2: - Patch #3: - Do not declare strict_start_type at the new policies, since they are used with nla_parse_nested() (sans _deprecated). - Use NLA_POLICY_NESTED to declare what the nest contents should be - Use NLA_POLICY_MASK instead of BITFIELD32 for the filtering attribute. - Patch #6: - s/monotonous/monotonic/ in commit message - Use a newly-added struct rtnl_hw_stats64 for stats transfer - Patch #7: - Use a newly-added struct rtnl_hw_stats64 for stats transfer - Patch #8: - Do not declare strict_start_type at the new policies, since they are used with nla_parse_nested() (sans _deprecated). - Patch #13: - Use a newly-added struct rtnl_hw_stats64 for stats transfer ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c305
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h6
-rw-r--r--include/linux/netdevice.h42
-rw-r--r--include/linux/rtnetlink.h3
-rw-r--r--include/uapi/linux/if_link.h37
-rw-r--r--include/uapi/linux/rtnetlink.h4
-rw-r--r--net/core/dev.c267
-rw-r--r--net/core/rtnetlink.c522
-rw-r--r--security/selinux/nlmsgtab.c1
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3.sh332
13 files changed, 1458 insertions, 93 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index dce21daaf330..67b1a2f8397f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -6784,12 +6784,14 @@ static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index,
set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC;
else
set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT;
- mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type);
- if (egress)
+ if (egress) {
+ mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type);
mlxsw_reg_ritr_egress_counter_index_set(payload, index);
- else
+ } else {
+ mlxsw_reg_ritr_ingress_counter_set_type_set(payload, set_type);
mlxsw_reg_ritr_ingress_counter_index_set(payload, index);
+ }
}
static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4880521b11a7..7b7b17183d10 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4823,6 +4823,22 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+static bool mlxsw_sp_netdevice_event_is_router(unsigned long event)
+{
+ switch (event) {
+ case NETDEV_PRE_CHANGEADDR:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_CHANGEMTU:
+ case NETDEV_OFFLOAD_XSTATS_ENABLE:
+ case NETDEV_OFFLOAD_XSTATS_DISABLE:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -4847,9 +4863,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
event, ptr);
- else if (event == NETDEV_PRE_CHANGEADDR ||
- event == NETDEV_CHANGEADDR ||
- event == NETDEV_CHANGEMTU)
+ else if (mlxsw_sp_netdevice_event_is_router(event))
err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
else if (mlxsw_sp_is_vrf_event(event, ptr))
err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 1a2fef2a5379..5d494fabf93d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -266,10 +266,10 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
if (!rif)
continue;
if (enable)
- mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif,
+ mlxsw_sp_rif_counter_alloc(rif,
MLXSW_SP_RIF_COUNTER_EGRESS);
else
- mlxsw_sp_rif_counter_free(mlxsw_sp, rif,
+ mlxsw_sp_rif_counter_free(rif,
MLXSW_SP_RIF_COUNTER_EGRESS);
}
mutex_unlock(&mlxsw_sp->router->lock);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d40762cfc453..79deb19e3a19 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -225,6 +225,64 @@ int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+struct mlxsw_sp_rif_counter_set_basic {
+ u64 good_unicast_packets;
+ u64 good_multicast_packets;
+ u64 good_broadcast_packets;
+ u64 good_unicast_bytes;
+ u64 good_multicast_bytes;
+ u64 good_broadcast_bytes;
+ u64 error_packets;
+ u64 discard_packets;
+ u64 error_bytes;
+ u64 discard_bytes;
+};
+
+static int
+mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir,
+ struct mlxsw_sp_rif_counter_set_basic *set)
+{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ char ricnt_pl[MLXSW_REG_RICNT_LEN];
+ unsigned int *p_counter_index;
+ int err;
+
+ if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
+ return -EINVAL;
+
+ p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
+ if (!p_counter_index)
+ return -EINVAL;
+
+ mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
+ MLXSW_REG_RICNT_OPCODE_CLEAR);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
+ if (err)
+ return err;
+
+ if (!set)
+ return 0;
+
+#define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME) \
+ (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
+
+ MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
+ MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
+
+#undef MLXSW_SP_RIF_COUNTER_EXTRACT
+
+ return 0;
+}
+
static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
unsigned int counter_index)
{
@@ -235,16 +293,20 @@ static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
}
-int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_rif *rif,
+int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir)
{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
unsigned int *p_counter_index;
int err;
+ if (mlxsw_sp_rif_counter_valid_get(rif, dir))
+ return 0;
+
p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
if (!p_counter_index)
return -EINVAL;
+
err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
p_counter_index);
if (err)
@@ -268,10 +330,10 @@ err_counter_clear:
return err;
}
-void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_rif *rif,
+void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir)
{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
unsigned int *p_counter_index;
if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
@@ -296,14 +358,12 @@ static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
if (!devlink_dpipe_table_counter_enabled(devlink,
MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
return;
- mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+ mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
}
static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
{
- struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
-
- mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+ mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
}
#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
@@ -8148,6 +8208,166 @@ u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
return lb_rif->ul_rif_id;
}
+static bool
+mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
+{
+ return mlxsw_sp_rif_counter_valid_get(rif,
+ MLXSW_SP_RIF_COUNTER_EGRESS) &&
+ mlxsw_sp_rif_counter_valid_get(rif,
+ MLXSW_SP_RIF_COUNTER_INGRESS);
+}
+
+static int
+mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
+{
+ int err;
+
+ err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
+ if (err)
+ return err;
+
+ /* Clear stale data. */
+ err = mlxsw_sp_rif_counter_fetch_clear(rif,
+ MLXSW_SP_RIF_COUNTER_INGRESS,
+ NULL);
+ if (err)
+ goto err_clear_ingress;
+
+ err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+ if (err)
+ goto err_alloc_egress;
+
+ /* Clear stale data. */
+ err = mlxsw_sp_rif_counter_fetch_clear(rif,
+ MLXSW_SP_RIF_COUNTER_EGRESS,
+ NULL);
+ if (err)
+ goto err_clear_egress;
+
+ return 0;
+
+err_clear_egress:
+ mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+err_alloc_egress:
+err_clear_ingress:
+ mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
+ return err;
+}
+
+static void
+mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
+{
+ mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+ mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
+}
+
+static void
+mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
+ struct netdev_notifier_offload_xstats_info *info)
+{
+ if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
+ return;
+ netdev_offload_xstats_report_used(info->report_used);
+}
+
+static int
+mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
+ struct rtnl_hw_stats64 *p_stats)
+{
+ struct mlxsw_sp_rif_counter_set_basic ingress;
+ struct mlxsw_sp_rif_counter_set_basic egress;
+ int err;
+
+ err = mlxsw_sp_rif_counter_fetch_clear(rif,
+ MLXSW_SP_RIF_COUNTER_INGRESS,
+ &ingress);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_rif_counter_fetch_clear(rif,
+ MLXSW_SP_RIF_COUNTER_EGRESS,
+ &egress);
+ if (err)
+ return err;
+
+#define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX) \
+ ((SET.good_unicast_ ## SFX) + \
+ (SET.good_multicast_ ## SFX) + \
+ (SET.good_broadcast_ ## SFX))
+
+ p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
+ p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
+ p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
+ p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
+ p_stats->rx_errors = ingress.error_packets;
+ p_stats->tx_errors = egress.error_packets;
+ p_stats->rx_dropped = ingress.discard_packets;
+ p_stats->tx_dropped = egress.discard_packets;
+ p_stats->multicast = ingress.good_multicast_packets +
+ ingress.good_broadcast_packets;
+
+#undef MLXSW_SP_ROUTER_ALL_GOOD
+
+ return 0;
+}
+
+static int
+mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
+ struct netdev_notifier_offload_xstats_info *info)
+{
+ struct rtnl_hw_stats64 stats = {};
+ int err;
+
+ if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
+ return 0;
+
+ err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
+ if (err)
+ return err;
+
+ netdev_offload_xstats_report_delta(info->report_delta, &stats);
+ return 0;
+}
+
+struct mlxsw_sp_router_hwstats_notify_work {
+ struct work_struct work;
+ struct net_device *dev;
+};
+
+static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
+{
+ struct mlxsw_sp_router_hwstats_notify_work *hws_work =
+ container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
+ work);
+
+ rtnl_lock();
+ rtnl_offload_xstats_notify(hws_work->dev);
+ rtnl_unlock();
+ dev_put(hws_work->dev);
+ kfree(hws_work);
+}
+
+static void
+mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
+{
+ struct mlxsw_sp_router_hwstats_notify_work *hws_work;
+
+ /* To collect notification payload, the core ends up sending another
+ * notifier block message, which would deadlock on the attempt to
+ * acquire the router lock again. Just postpone the notification until
+ * later.
+ */
+
+ hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
+ if (!hws_work)
+ return;
+
+ INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
+ dev_hold(dev);
+ hws_work->dev = dev;
+ mlxsw_core_schedule_work(&hws_work->work);
+}
+
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
{
return rif->dev->ifindex;
@@ -8158,6 +8378,16 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
return rif->dev;
}
+static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
+{
+ struct rtnl_hw_stats64 stats = {};
+
+ if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
+ netdev_offload_xstats_push_delta(rif->dev,
+ NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ &stats);
+}
+
static struct mlxsw_sp_rif *
mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_rif_params *params,
@@ -8218,10 +8448,19 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
goto err_mr_rif_add;
}
- mlxsw_sp_rif_counters_alloc(rif);
+ if (netdev_offload_xstats_enabled(rif->dev,
+ NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
+ err = mlxsw_sp_router_port_l3_stats_enable(rif);
+ if (err)
+ goto err_stats_enable;
+ mlxsw_sp_router_hwstats_notify_schedule(rif->dev);
+ } else {
+ mlxsw_sp_rif_counters_alloc(rif);
+ }
return rif;
+err_stats_enable:
err_mr_rif_add:
for (i--; i >= 0; i--)
mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
@@ -8251,7 +8490,15 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
vr = &mlxsw_sp->router->vrs[rif->vr_id];
- mlxsw_sp_rif_counters_free(rif);
+ if (netdev_offload_xstats_enabled(rif->dev,
+ NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
+ mlxsw_sp_rif_push_l3_stats(rif);
+ mlxsw_sp_router_port_l3_stats_disable(rif);
+ mlxsw_sp_router_hwstats_notify_schedule(rif->dev);
+ } else {
+ mlxsw_sp_rif_counters_free(rif);
+ }
+
for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
ops->deconfigure(rif);
@@ -9128,6 +9375,35 @@ static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
return -ENOBUFS;
}
+static int
+mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
+ unsigned long event,
+ struct netdev_notifier_offload_xstats_info *info)
+{
+ switch (info->type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ break;
+ default:
+ return 0;
+ }
+
+ switch (event) {
+ case NETDEV_OFFLOAD_XSTATS_ENABLE:
+ return mlxsw_sp_router_port_l3_stats_enable(rif);
+ case NETDEV_OFFLOAD_XSTATS_DISABLE:
+ mlxsw_sp_router_port_l3_stats_disable(rif);
+ return 0;
+ case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
+ mlxsw_sp_router_port_l3_stats_report_used(rif, info);
+ return 0;
+ case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
+ return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
+ }
+
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
unsigned long event, void *ptr)
{
@@ -9153,6 +9429,15 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
case NETDEV_PRE_CHANGEADDR:
err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
break;
+ case NETDEV_OFFLOAD_XSTATS_ENABLE:
+ case NETDEV_OFFLOAD_XSTATS_DISABLE:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
+ err = mlxsw_sp_router_port_offload_xstats_cmd(rif, event, ptr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
out:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 99e8371a82a5..fa829658a11b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -159,11 +159,9 @@ int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir,
u64 *cnt);
-void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_rif *rif,
+void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir);
-int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_rif *rif,
+int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir);
struct mlxsw_sp_neigh_entry *
mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c79ee2296296..19a27ac361ef 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1950,6 +1950,7 @@ enum netdev_ml_priv_type {
* @watchdog_dev_tracker: refcount tracker used by watchdog.
* @dev_registered_tracker: tracker for reference held while
* registered
+ * @offload_xstats_l3: L3 HW stats for this netdevice.
*
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
@@ -2287,6 +2288,7 @@ struct net_device {
netdevice_tracker linkwatch_dev_tracker;
netdevice_tracker watchdog_dev_tracker;
netdevice_tracker dev_registered_tracker;
+ struct rtnl_hw_stats64 *offload_xstats_l3;
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
@@ -2727,6 +2729,10 @@ enum netdev_cmd {
NETDEV_CVLAN_FILTER_DROP_INFO,
NETDEV_SVLAN_FILTER_PUSH_INFO,
NETDEV_SVLAN_FILTER_DROP_INFO,
+ NETDEV_OFFLOAD_XSTATS_ENABLE,
+ NETDEV_OFFLOAD_XSTATS_DISABLE,
+ NETDEV_OFFLOAD_XSTATS_REPORT_USED,
+ NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
};
const char *netdev_cmd_to_name(enum netdev_cmd cmd);
@@ -2777,6 +2783,42 @@ struct netdev_notifier_pre_changeaddr_info {
const unsigned char *dev_addr;
};
+enum netdev_offload_xstats_type {
+ NETDEV_OFFLOAD_XSTATS_TYPE_L3 = 1,
+};
+
+struct netdev_notifier_offload_xstats_info {
+ struct netdev_notifier_info info; /* must be first */
+ enum netdev_offload_xstats_type type;
+
+ union {
+ /* NETDEV_OFFLOAD_XSTATS_REPORT_DELTA */
+ struct netdev_notifier_offload_xstats_rd *report_delta;
+ /* NETDEV_OFFLOAD_XSTATS_REPORT_USED */
+ struct netdev_notifier_offload_xstats_ru *report_used;
+ };
+};
+
+int netdev_offload_xstats_enable(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct netlink_ext_ack *extack);
+int netdev_offload_xstats_disable(struct net_device *dev,
+ enum netdev_offload_xstats_type type);
+bool netdev_offload_xstats_enabled(const struct net_device *dev,
+ enum netdev_offload_xstats_type type);
+int netdev_offload_xstats_get(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *stats, bool *used,
+ struct netlink_ext_ack *extack);
+void
+netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *rd,
+ const struct rtnl_hw_stats64 *stats);
+void
+netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *ru);
+void netdev_offload_xstats_push_delta(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ const struct rtnl_hw_stats64 *stats);
+
static inline void netdev_notifier_info_init(struct netdev_notifier_info *info,
struct net_device *dev)
{
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index bb9cb84114c1..7f970b16da3a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -134,4 +134,7 @@ extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
int (*vlan_fill)(struct sk_buff *skb,
struct net_device *dev,
u32 filter_mask));
+
+extern void rtnl_offload_xstats_notify(struct net_device *dev);
+
#endif /* __LINUX_RTNETLINK_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e315e53125f4..ddca20357e7e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -245,6 +245,21 @@ struct rtnl_link_stats64 {
__u64 rx_nohandler;
};
+/* Subset of link stats useful for in-HW collection. Meaning of the fields is as
+ * for struct rtnl_link_stats64.
+ */
+struct rtnl_hw_stats64 {
+ __u64 rx_packets;
+ __u64 tx_packets;
+ __u64 rx_bytes;
+ __u64 tx_bytes;
+ __u64 rx_errors;
+ __u64 tx_errors;
+ __u64 rx_dropped;
+ __u64 tx_dropped;
+ __u64 multicast;
+};
+
/* The struct should be in sync with struct ifmap */
struct rtnl_link_ifmap {
__u64 mem_start;
@@ -1207,6 +1222,17 @@ enum {
#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1))
+enum {
+ IFLA_STATS_GETSET_UNSPEC,
+ IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with
+ * a filter mask for the corresponding group.
+ */
+ IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */
+ __IFLA_STATS_GETSET_MAX,
+};
+
+#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1)
+
/* These are embedded into IFLA_STATS_LINK_XSTATS:
* [IFLA_STATS_LINK_XSTATS]
* -> [LINK_XSTATS_TYPE_xxx]
@@ -1224,10 +1250,21 @@ enum {
enum {
IFLA_OFFLOAD_XSTATS_UNSPEC,
IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */
+ IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */
__IFLA_OFFLOAD_XSTATS_MAX
};
#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+enum {
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC,
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */
+ __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX,
+};
+#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \
+ (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1)
+
/* XDP section */
#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 0970cb4b1b88..51530aade46e 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -146,6 +146,8 @@ enum {
#define RTM_NEWSTATS RTM_NEWSTATS
RTM_GETSTATS = 94,
#define RTM_GETSTATS RTM_GETSTATS
+ RTM_SETSTATS,
+#define RTM_SETSTATS RTM_SETSTATS
RTM_NEWCACHEREPORT = 96,
#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
@@ -765,6 +767,8 @@ enum rtnetlink_groups {
#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR
RTNLGRP_TUNNEL,
#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL
+ RTNLGRP_STATS,
+#define RTNLGRP_STATS RTNLGRP_STATS
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2d6771075720..c9e54e5ad48d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1622,7 +1622,8 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
- N(PRE_CHANGEADDR)
+ N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
+ N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1939,6 +1940,32 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+/**
+ * call_netdevice_notifiers_info_robust - call per-netns notifier blocks
+ * for and rollback on error
+ * @val_up: value passed unmodified to notifier function
+ * @val_down: value passed unmodified to the notifier function when
+ * recovering from an error on @val_up
+ * @info: notifier information data
+ *
+ * Call all per-netns network notifier blocks, but not notifier blocks on
+ * the global notifier chain. Parameters and return value are as for
+ * raw_notifier_call_chain_robust().
+ */
+
+static int
+call_netdevice_notifiers_info_robust(unsigned long val_up,
+ unsigned long val_down,
+ struct netdev_notifier_info *info)
+{
+ struct net *net = dev_net(info->dev);
+
+ ASSERT_RTNL();
+
+ return raw_notifier_call_chain_robust(&net->netdev_chain,
+ val_up, val_down, info);
+}
+
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack)
@@ -7728,6 +7755,242 @@ void netdev_bonding_info_change(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_bonding_info_change);
+static int netdev_offload_xstats_enable_l3(struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+ int err;
+ int rc;
+
+ dev->offload_xstats_l3 = kzalloc(sizeof(*dev->offload_xstats_l3),
+ GFP_KERNEL);
+ if (!dev->offload_xstats_l3)
+ return -ENOMEM;
+
+ rc = call_netdevice_notifiers_info_robust(NETDEV_OFFLOAD_XSTATS_ENABLE,
+ NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ err = notifier_to_errno(rc);
+ if (err)
+ goto free_stats;
+
+ return 0;
+
+free_stats:
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+ return err;
+}
+
+int netdev_offload_xstats_enable(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return netdev_offload_xstats_enable_l3(dev, extack);
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enable);
+
+static void netdev_offload_xstats_disable_l3(struct net_device *dev)
+{
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .type = NETDEV_OFFLOAD_XSTATS_TYPE_L3,
+ };
+
+ call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_DISABLE,
+ &info.info);
+ kfree(dev->offload_xstats_l3);
+ dev->offload_xstats_l3 = NULL;
+}
+
+int netdev_offload_xstats_disable(struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ if (!netdev_offload_xstats_enabled(dev, type))
+ return -EALREADY;
+
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ netdev_offload_xstats_disable_l3(dev);
+ return 0;
+ }
+
+ WARN_ON(1);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_disable);
+
+static void netdev_offload_xstats_disable_all(struct net_device *dev)
+{
+ netdev_offload_xstats_disable(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3);
+}
+
+static struct rtnl_hw_stats64 *
+netdev_offload_xstats_get_ptr(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ switch (type) {
+ case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
+ return dev->offload_xstats_l3;
+ }
+
+ WARN_ON(1);
+ return NULL;
+}
+
+bool netdev_offload_xstats_enabled(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ ASSERT_RTNL();
+
+ return netdev_offload_xstats_get_ptr(dev, type);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_enabled);
+
+struct netdev_notifier_offload_xstats_ru {
+ bool used;
+};
+
+struct netdev_notifier_offload_xstats_rd {
+ struct rtnl_hw_stats64 stats;
+ bool used;
+};
+
+static void netdev_hw_stats64_add(struct rtnl_hw_stats64 *dest,
+ const struct rtnl_hw_stats64 *src)
+{
+ dest->rx_packets += src->rx_packets;
+ dest->tx_packets += src->tx_packets;
+ dest->rx_bytes += src->rx_bytes;
+ dest->tx_bytes += src->tx_bytes;
+ dest->rx_errors += src->rx_errors;
+ dest->tx_errors += src->tx_errors;
+ dest->rx_dropped += src->rx_dropped;
+ dest->tx_dropped += src->tx_dropped;
+ dest->multicast += src->multicast;
+}
+
+static int netdev_offload_xstats_get_used(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_ru report_used = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_used = &report_used,
+ };
+ int rc;
+
+ WARN_ON(!netdev_offload_xstats_enabled(dev, type));
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_USED,
+ &info.info);
+ *p_used = report_used.used;
+ return notifier_to_errno(rc);
+}
+
+static int netdev_offload_xstats_get_stats(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats,
+ bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_offload_xstats_rd report_delta = {};
+ struct netdev_notifier_offload_xstats_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .type = type,
+ .report_delta = &report_delta,
+ };
+ struct rtnl_hw_stats64 *stats;
+ int rc;
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return -EINVAL;
+
+ rc = call_netdevice_notifiers_info(NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
+ &info.info);
+
+ /* Cache whatever we got, even if there was an error, otherwise the
+ * successful stats retrievals would get lost.
+ */
+ netdev_hw_stats64_add(stats, &report_delta.stats);
+
+ if (p_stats)
+ *p_stats = *stats;
+ *p_used = report_delta.used;
+
+ return notifier_to_errno(rc);
+}
+
+int netdev_offload_xstats_get(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_hw_stats64 *p_stats, bool *p_used,
+ struct netlink_ext_ack *extack)
+{
+ ASSERT_RTNL();
+
+ if (p_stats)
+ return netdev_offload_xstats_get_stats(dev, type, p_stats,
+ p_used, extack);
+ else
+ return netdev_offload_xstats_get_used(dev, type, p_used,
+ extack);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_get);
+
+void
+netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *report_delta,
+ const struct rtnl_hw_stats64 *stats)
+{
+ report_delta->used = true;
+ netdev_hw_stats64_add(&report_delta->stats, stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_delta);
+
+void
+netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *report_used)
+{
+ report_used->used = true;
+}
+EXPORT_SYMBOL(netdev_offload_xstats_report_used);
+
+void netdev_offload_xstats_push_delta(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ const struct rtnl_hw_stats64 *p_stats)
+{
+ struct rtnl_hw_stats64 *stats;
+
+ ASSERT_RTNL();
+
+ stats = netdev_offload_xstats_get_ptr(dev, type);
+ if (WARN_ON(!stats))
+ return;
+
+ netdev_hw_stats64_add(stats, p_stats);
+}
+EXPORT_SYMBOL(netdev_offload_xstats_push_delta);
+
/**
* netdev_get_xmit_slave - Get the xmit slave of master device
* @dev: device
@@ -10417,6 +10680,8 @@ void unregister_netdevice_many(struct list_head *head)
dev_xdp_uninstall(dev);
+ netdev_offload_xstats_disable_all(dev);
+
/* Notify protocols, that we are about to destroy
* this device. They should clean all the things.
*/
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 20a9e1686453..a66b6761b88b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -5048,82 +5048,256 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr)
(!idxattr || idxattr == attrid);
}
-#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1)
-static int rtnl_get_offload_stats_attr_size(int attr_id)
+static bool
+rtnl_offload_xstats_have_ndo(const struct net_device *dev, int attr_id)
{
- switch (attr_id) {
- case IFLA_OFFLOAD_XSTATS_CPU_HIT:
- return sizeof(struct rtnl_link_stats64);
- }
+ return dev->netdev_ops &&
+ dev->netdev_ops->ndo_has_offload_stats &&
+ dev->netdev_ops->ndo_get_offload_stats &&
+ dev->netdev_ops->ndo_has_offload_stats(dev, attr_id);
+}
- return 0;
+static unsigned int
+rtnl_offload_xstats_get_size_ndo(const struct net_device *dev, int attr_id)
+{
+ return rtnl_offload_xstats_have_ndo(dev, attr_id) ?
+ sizeof(struct rtnl_link_stats64) : 0;
}
-static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
- int *prividx)
+static int
+rtnl_offload_xstats_fill_ndo(struct net_device *dev, int attr_id,
+ struct sk_buff *skb)
{
+ unsigned int size = rtnl_offload_xstats_get_size_ndo(dev, attr_id);
struct nlattr *attr = NULL;
- int attr_id, size;
void *attr_data;
int err;
- if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
- dev->netdev_ops->ndo_get_offload_stats))
+ if (!size)
return -ENODATA;
- for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
- attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
- if (attr_id < *prividx)
- continue;
+ attr = nla_reserve_64bit(skb, attr_id, size,
+ IFLA_OFFLOAD_XSTATS_UNSPEC);
+ if (!attr)
+ return -EMSGSIZE;
- size = rtnl_get_offload_stats_attr_size(attr_id);
- if (!size)
- continue;
+ attr_data = nla_data(attr);
+ memset(attr_data, 0, size);
- if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
- continue;
+ err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, attr_data);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static unsigned int
+rtnl_offload_xstats_get_size_stats(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ bool enabled = netdev_offload_xstats_enabled(dev, type);
+
+ return enabled ? sizeof(struct rtnl_hw_stats64) : 0;
+}
+
+struct rtnl_offload_xstats_request_used {
+ bool request;
+ bool used;
+};
+
+static int
+rtnl_offload_xstats_get_stats(struct net_device *dev,
+ enum netdev_offload_xstats_type type,
+ struct rtnl_offload_xstats_request_used *ru,
+ struct rtnl_hw_stats64 *stats,
+ struct netlink_ext_ack *extack)
+{
+ bool request;
+ bool used;
+ int err;
+
+ request = netdev_offload_xstats_enabled(dev, type);
+ if (!request) {
+ used = false;
+ goto out;
+ }
+
+ err = netdev_offload_xstats_get(dev, type, stats, &used, extack);
+ if (err)
+ return err;
- attr = nla_reserve_64bit(skb, attr_id, size,
+out:
+ if (ru) {
+ ru->request = request;
+ ru->used = used;
+ }
+ return 0;
+}
+
+static int
+rtnl_offload_xstats_fill_hw_s_info_one(struct sk_buff *skb, int attr_id,
+ struct rtnl_offload_xstats_request_used *ru)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, attr_id);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, ru->request))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, ru->used))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int
+rtnl_offload_xstats_fill_hw_s_info(struct sk_buff *skb, struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ struct rtnl_offload_xstats_request_used ru_l3;
+ struct nlattr *nest;
+ int err;
+
+ err = rtnl_offload_xstats_get_stats(dev, t_l3, &ru_l3, NULL, extack);
+ if (err)
+ return err;
+
+ nest = nla_nest_start(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (rtnl_offload_xstats_fill_hw_s_info_one(skb,
+ IFLA_OFFLOAD_XSTATS_L3_STATS,
+ &ru_l3))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev,
+ int *prividx, u32 off_filter_mask,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO;
+ int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS;
+ int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
+ bool have_data = false;
+ int err;
+
+ if (*prividx <= attr_id_cpu_hit &&
+ (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(attr_id_cpu_hit))) {
+ err = rtnl_offload_xstats_fill_ndo(dev, attr_id_cpu_hit, skb);
+ if (!err) {
+ have_data = true;
+ } else if (err != -ENODATA) {
+ *prividx = attr_id_cpu_hit;
+ return err;
+ }
+ }
+
+ if (*prividx <= attr_id_hw_s_info &&
+ (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_hw_s_info))) {
+ *prividx = attr_id_hw_s_info;
+
+ err = rtnl_offload_xstats_fill_hw_s_info(skb, dev, extack);
+ if (err)
+ return err;
+
+ have_data = true;
+ *prividx = 0;
+ }
+
+ if (*prividx <= attr_id_l3_stats &&
+ (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_l3_stats))) {
+ unsigned int size_l3;
+ struct nlattr *attr;
+
+ *prividx = attr_id_l3_stats;
+
+ size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3);
+ attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3,
IFLA_OFFLOAD_XSTATS_UNSPEC);
if (!attr)
- goto nla_put_failure;
+ return -EMSGSIZE;
- attr_data = nla_data(attr);
- memset(attr_data, 0, size);
- err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev,
- attr_data);
+ err = rtnl_offload_xstats_get_stats(dev, t_l3, NULL,
+ nla_data(attr), extack);
if (err)
- goto get_offload_stats_failure;
+ return err;
+
+ have_data = true;
+ *prividx = 0;
}
- if (!attr)
+ if (!have_data)
return -ENODATA;
*prividx = 0;
return 0;
+}
-nla_put_failure:
- err = -EMSGSIZE;
-get_offload_stats_failure:
- *prividx = attr_id;
- return err;
+static unsigned int
+rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+{
+ bool enabled = netdev_offload_xstats_enabled(dev, type);
+
+ return nla_total_size(0) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
+ nla_total_size(sizeof(u8)) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
+ (enabled ? nla_total_size(sizeof(u8)) : 0) +
+ 0;
}
-static int rtnl_get_offload_stats_size(const struct net_device *dev)
+static unsigned int
+rtnl_offload_xstats_get_size_hw_s_info(const struct net_device *dev)
{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+
+ return nla_total_size(0) +
+ /* IFLA_OFFLOAD_XSTATS_L3_STATS */
+ rtnl_offload_xstats_get_size_hw_s_info_one(dev, t_l3) +
+ 0;
+}
+
+static int rtnl_offload_xstats_get_size(const struct net_device *dev,
+ u32 off_filter_mask)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT;
int nla_size = 0;
- int attr_id;
int size;
- if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
- dev->netdev_ops->ndo_get_offload_stats))
- return 0;
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(attr_id_cpu_hit)) {
+ size = rtnl_offload_xstats_get_size_ndo(dev, attr_id_cpu_hit);
+ nla_size += nla_total_size_64bit(size);
+ }
- for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
- attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
- if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
- continue;
- size = rtnl_get_offload_stats_attr_size(attr_id);
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO))
+ nla_size += rtnl_offload_xstats_get_size_hw_s_info(dev);
+
+ if (off_filter_mask &
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_L3_STATS)) {
+ size = rtnl_offload_xstats_get_size_stats(dev, t_l3);
nla_size += nla_total_size_64bit(size);
}
@@ -5133,11 +5307,21 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
return nla_size;
}
+struct rtnl_stats_dump_filters {
+ /* mask[0] filters outer attributes. Then individual nests have their
+ * filtering mask at the index of the nested attribute.
+ */
+ u32 mask[IFLA_STATS_MAX + 1];
+};
+
static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
- unsigned int flags, unsigned int filter_mask,
- int *idxattr, int *prividx)
+ unsigned int flags,
+ const struct rtnl_stats_dump_filters *filters,
+ int *idxattr, int *prividx,
+ struct netlink_ext_ack *extack)
{
+ unsigned int filter_mask = filters->mask[0];
struct if_stats_msg *ifsm;
struct nlmsghdr *nlh;
struct nlattr *attr;
@@ -5163,8 +5347,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64,
sizeof(struct rtnl_link_stats64),
IFLA_STATS_UNSPEC);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
sp = nla_data(attr);
dev_get_stats(dev, sp);
@@ -5177,8 +5363,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_LINK_XSTATS;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_XSTATS);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
@@ -5200,8 +5388,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_XSTATS_SLAVE);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
@@ -5213,13 +5403,19 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
*idxattr)) {
+ u32 off_filter_mask;
+
+ off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
*idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
attr = nla_nest_start_noflag(skb,
IFLA_STATS_LINK_OFFLOAD_XSTATS);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
- err = rtnl_get_offload_stats(skb, dev, prividx);
+ err = rtnl_offload_xstats_fill(skb, dev, prividx,
+ off_filter_mask, extack);
if (err == -ENODATA)
nla_nest_cancel(skb, attr);
else
@@ -5235,8 +5431,10 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = IFLA_STATS_AF_SPEC;
attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC);
- if (!attr)
+ if (!attr) {
+ err = -EMSGSIZE;
goto nla_put_failure;
+ }
rcu_read_lock();
list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
@@ -5280,13 +5478,14 @@ nla_put_failure:
else
nlmsg_end(skb, nlh);
- return -EMSGSIZE;
+ return err;
}
static size_t if_nlmsg_stats_size(const struct net_device *dev,
- u32 filter_mask)
+ const struct rtnl_stats_dump_filters *filters)
{
size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
+ unsigned int filter_mask = filters->mask[0];
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
@@ -5322,8 +5521,12 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
}
}
- if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
- size += rtnl_get_offload_stats_size(dev);
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) {
+ u32 off_filter_mask;
+
+ off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS];
+ size += rtnl_offload_xstats_get_size(dev, off_filter_mask);
+ }
if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) {
struct rtnl_af_ops *af_ops;
@@ -5347,6 +5550,79 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
return size;
}
+#define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1)
+
+static const struct nla_policy
+rtnl_stats_get_policy_filters[IFLA_STATS_MAX + 1] = {
+ [IFLA_STATS_LINK_OFFLOAD_XSTATS] =
+ NLA_POLICY_MASK(NLA_U32, RTNL_STATS_OFFLOAD_XSTATS_VALID),
+};
+
+static const struct nla_policy
+rtnl_stats_get_policy[IFLA_STATS_GETSET_MAX + 1] = {
+ [IFLA_STATS_GET_FILTERS] =
+ NLA_POLICY_NESTED(rtnl_stats_get_policy_filters),
+};
+
+static const struct nla_policy
+ifla_stats_set_policy[IFLA_STATS_GETSET_MAX + 1] = {
+ [IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS] = NLA_POLICY_MAX(NLA_U8, 1),
+};
+
+static int rtnl_stats_get_parse_filters(struct nlattr *ifla_filters,
+ struct rtnl_stats_dump_filters *filters,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_STATS_MAX + 1];
+ int err;
+ int at;
+
+ err = nla_parse_nested(tb, IFLA_STATS_MAX, ifla_filters,
+ rtnl_stats_get_policy_filters, extack);
+ if (err < 0)
+ return err;
+
+ for (at = 1; at <= IFLA_STATS_MAX; at++) {
+ if (tb[at]) {
+ if (!(filters->mask[0] & IFLA_STATS_FILTER_BIT(at))) {
+ NL_SET_ERR_MSG(extack, "Filtered attribute not enabled in filter_mask");
+ return -EINVAL;
+ }
+ filters->mask[at] = nla_get_u32(tb[at]);
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_stats_get_parse(const struct nlmsghdr *nlh,
+ u32 filter_mask,
+ struct rtnl_stats_dump_filters *filters,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
+ int err;
+ int i;
+
+ filters->mask[0] = filter_mask;
+ for (i = 1; i < ARRAY_SIZE(filters->mask); i++)
+ filters->mask[i] = -1U;
+
+ err = nlmsg_parse(nlh, sizeof(struct if_stats_msg), tb,
+ IFLA_STATS_GETSET_MAX, rtnl_stats_get_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (tb[IFLA_STATS_GET_FILTERS]) {
+ err = rtnl_stats_get_parse_filters(tb[IFLA_STATS_GET_FILTERS],
+ filters, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
bool is_dump, struct netlink_ext_ack *extack)
{
@@ -5369,10 +5645,6 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
return -EINVAL;
}
- if (nlmsg_attrlen(nlh, sizeof(*ifsm))) {
- NL_SET_ERR_MSG(extack, "Invalid attributes after stats header");
- return -EINVAL;
- }
if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) {
NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask");
return -EINVAL;
@@ -5384,12 +5656,12 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
struct net_device *dev = NULL;
int idxattr = 0, prividx = 0;
struct if_stats_msg *ifsm;
struct sk_buff *nskb;
- u32 filter_mask;
int err;
err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
@@ -5406,19 +5678,22 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!dev)
return -ENODEV;
- filter_mask = ifsm->filter_mask;
- if (!filter_mask) {
+ if (!ifsm->filter_mask) {
NL_SET_ERR_MSG(extack, "Filter mask must be set for stats get");
return -EINVAL;
}
- nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
+ err = rtnl_stats_get_parse(nlh, ifsm->filter_mask, &filters, extack);
+ if (err)
+ return err;
+
+ nskb = nlmsg_new(if_nlmsg_stats_size(dev, &filters), GFP_KERNEL);
if (!nskb)
return -ENOBUFS;
err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- 0, filter_mask, &idxattr, &prividx);
+ 0, &filters, &idxattr, &prividx, extack);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
WARN_ON(err == -EMSGSIZE);
@@ -5434,12 +5709,12 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
int h, s_h, err, s_idx, s_idxattr, s_prividx;
+ struct rtnl_stats_dump_filters filters;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
struct if_stats_msg *ifsm;
struct hlist_head *head;
struct net_device *dev;
- u32 filter_mask = 0;
int idx = 0;
s_h = cb->args[0];
@@ -5454,12 +5729,16 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
return err;
ifsm = nlmsg_data(cb->nlh);
- filter_mask = ifsm->filter_mask;
- if (!filter_mask) {
+ if (!ifsm->filter_mask) {
NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump");
return -EINVAL;
}
+ err = rtnl_stats_get_parse(cb->nlh, ifsm->filter_mask, &filters,
+ extack);
+ if (err)
+ return err;
+
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
@@ -5469,8 +5748,9 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
- flags, filter_mask,
- &s_idxattr, &s_prividx);
+ flags, &filters,
+ &s_idxattr, &s_prividx,
+ extack);
/* If we ran out of room on the first message,
* we're in trouble
*/
@@ -5494,6 +5774,107 @@ out:
return skb->len;
}
+void rtnl_offload_xstats_notify(struct net_device *dev)
+{
+ struct rtnl_stats_dump_filters response_filters = {};
+ struct net *net = dev_net(dev);
+ int idxattr = 0, prividx = 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ ASSERT_RTNL();
+
+ response_filters.mask[0] |=
+ IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
+ response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+
+ skb = nlmsg_new(if_nlmsg_stats_size(dev, &response_filters),
+ GFP_KERNEL);
+ if (!skb)
+ goto errout;
+
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 0, 0, 0, 0,
+ &response_filters, &idxattr, &prividx, NULL);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_STATS, NULL, GFP_KERNEL);
+ return;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_STATS, err);
+}
+EXPORT_SYMBOL(rtnl_offload_xstats_notify);
+
+static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3;
+ struct rtnl_stats_dump_filters response_filters = {};
+ struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev = NULL;
+ struct if_stats_msg *ifsm;
+ bool notify = false;
+ int err;
+
+ err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb),
+ false, extack);
+ if (err)
+ return err;
+
+ ifsm = nlmsg_data(nlh);
+ if (ifsm->family != AF_UNSPEC) {
+ NL_SET_ERR_MSG(extack, "Address family should be AF_UNSPEC");
+ return -EINVAL;
+ }
+
+ if (ifsm->ifindex > 0)
+ dev = __dev_get_by_index(net, ifsm->ifindex);
+ else
+ return -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (ifsm->filter_mask) {
+ NL_SET_ERR_MSG(extack, "Filter mask must be 0 for stats set");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse(nlh, sizeof(*ifsm), tb, IFLA_STATS_GETSET_MAX,
+ ifla_stats_set_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]) {
+ u8 req = nla_get_u8(tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]);
+
+ if (req)
+ err = netdev_offload_xstats_enable(dev, t_l3, extack);
+ else
+ err = netdev_offload_xstats_disable(dev, t_l3);
+
+ if (!err)
+ notify = true;
+ else if (err != -EALREADY)
+ return err;
+
+ response_filters.mask[0] |=
+ IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS);
+ response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |=
+ IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO);
+ }
+
+ if (notify)
+ rtnl_offload_xstats_notify(dev);
+
+ return 0;
+}
+
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -5719,4 +6100,5 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
0);
+ rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
}
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 6ad3ee02e023..d8ceee9e0d6f 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -76,6 +76,7 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
{ RTM_GETNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ },
{ RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ },
{ RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ },
+ { RTM_SETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_NEWCACHEREPORT, NETLINK_ROUTE_SOCKET__NLMSG_READ },
{ RTM_NEWCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_DELCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
new file mode 100755
index 000000000000..1c11c4256d06
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.200 + | | + $h2.200 |
+# | 192.0.2.1/28 | | | | 192.0.2.18/28 |
+# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | | | |
+# | $rp1.200 + + $rp2.200 |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | 2001:db8:1::2/64 2001:db8:2::2/64 |
+# | |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ respin_enablement
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ test_stats_report_rx
+ test_stats_report_tx
+ test_destroy_enabled
+ test_double_enable
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ vlan_destroy $h1 200
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
+ ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+ ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ vlan_destroy $h2 200
+ simple_if_fini $h2
+}
+
+router_rp1_200_create()
+{
+ ip link add name $rp1.200 up \
+ link $rp1 addrgenmode eui64 type vlan id 200
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+router_rp1_200_destroy()
+{
+ ip stats set dev $rp1.200 l3_stats off
+ ip address del dev $rp1.200 2001:db8:1::2/64
+ ip address del dev $rp1.200 192.0.2.2/28
+ ip link del dev $rp1.200
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ router_rp1_200_create
+
+ ip link set dev $rp2 up
+ vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ vlan_destroy $rp2 200
+ ip link set dev $rp2 down
+
+ router_rp1_200_destroy
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1mac=$(mac_get $rp1)
+ rp2mac=$(mac_get $rp2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.200 192.0.2.18 " IPv4"
+}
+
+ping_ipv6()
+{
+ ping_test $h1.200 2001:db8:2::1 " IPv6"
+}
+
+get_l3_stat()
+{
+ local selector=$1; shift
+
+ ip -j stats show dev $rp1.200 group offload subgroup l3_stats |
+ jq '.[0].stats64.'$selector
+}
+
+send_packets_rx_ipv4()
+{
+ # Send 21 packets instead of 20, because the first one might trap and go
+ # through the SW datapath, which might not bump the HW counter.
+ $MZ $h1.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_rx_ipv6()
+{
+ $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv4()
+{
+ $MZ $h2.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv6()
+{
+ $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+___test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ a=$(get_l3_stat ${dir}.packets)
+ send_packets_${dir}_${prot}
+ "$@"
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ get_l3_stat ${dir}.packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+}
+
+__test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ RET=0
+ ___test_stats "$dir" "$prot"
+ log_test "Test $dir packets: $prot"
+}
+
+test_stats_rx_ipv4()
+{
+ __test_stats rx ipv4
+}
+
+test_stats_tx_ipv4()
+{
+ __test_stats tx ipv4
+}
+
+test_stats_rx_ipv6()
+{
+ __test_stats rx ipv6
+}
+
+test_stats_tx_ipv6()
+{
+ __test_stats tx ipv6
+}
+
+# Make sure everything works well even after stats have been disabled and
+# reenabled on the same device without touching the L3 configuration.
+respin_enablement()
+{
+ log_info "Turning stats off and on again"
+ ip stats set dev $rp1.200 l3_stats off
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
+# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
+# then apply the L3 configuration.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ router_rp1_200_destroy
+
+ ip link add name $rp1.200 link $rp1 addrgenmode none type vlan id 200
+ ip stats set dev $rp1.200 l3_stats on
+ ip link set dev $rp1.200 up addrgenmode eui64
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+}
+
+__test_stats_report()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ RET=0
+
+ a=$(get_l3_stat ${dir}.packets)
+ send_packets_${dir}_${prot}
+ ip address flush dev $rp1.200
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ get_l3_stat ${dir}.packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+ log_test "Test ${dir} packets: stats pushed on loss of L3"
+
+ ip stats set dev $rp1.200 l3_stats off
+ ip link del dev $rp1.200
+ router_rp1_200_create
+}
+
+test_stats_report_rx()
+{
+ __test_stats_report rx ipv4
+}
+
+test_stats_report_tx()
+{
+ __test_stats_report tx ipv4
+}
+
+test_destroy_enabled()
+{
+ RET=0
+
+ ip link del dev $rp1.200
+ router_rp1_200_create
+
+ log_test "Destroy l3_stats-enabled netdev"
+}
+
+test_double_enable()
+{
+ RET=0
+ ___test_stats rx ipv4 \
+ ip stats set dev $rp1.200 l3_stats on
+ log_test "Test stat retention across a spurious enablement"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS