summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-11-18 08:41:41 +0300
committerJakub Kicinski <kuba@kernel.org>2022-11-18 08:43:34 +0300
commit24f627a3a85f2b3c71e0e975a641e4f6df5df4a2 (patch)
treeb54b999134fa309a2d518e1fcb1df3d21119b131 /include
parent4ab45e973c3fd9e460b1e23cd018ea7f3d8a8b67 (diff)
parent242dd64375b80aecd7eacb1e7c4b052c481c15e7 (diff)
downloadlinux-24f627a3a85f2b3c71e0e975a641e4f6df5df4a2.tar.xz
Merge branch 'implement-devlink-rate-api-and-extend-it'
Michal Wilczynski says: ==================== Implement devlink-rate API and extend it This patch series implements devlink-rate for ice driver. Unfortunately current API isn't flexible enough for our use case, so there is a need to extend it. Some functions have been introduced to enable the driver to export current Tx scheduling configuration. Pasting justification for this series from commit implementing devlink-rate in ice driver(that is a part of this series): There is a need to support modification of Tx scheduler tree, in the ice driver. This will allow user to control Tx settings of each node in the internal hierarchy of nodes. As a result user will be able to use Hierarchy QoS implemented entirely in the hardware. This patch implemenents devlink-rate API. It also exports initial default hierarchy. It's mostly dictated by the fact that the tree can't be removed entirely, all we can do is enable the user to modify it. For example root node shouldn't ever be removed, also nodes that have children are off-limits. Example initial tree with 2 VF's: [root@fedora ~]# devlink port function rate show pci/0000:4b:00.0/node_27: type node parent node_26 pci/0000:4b:00.0/node_26: type node parent node_0 pci/0000:4b:00.0/node_34: type node parent node_33 pci/0000:4b:00.0/node_33: type node parent node_32 pci/0000:4b:00.0/node_32: type node parent node_16 pci/0000:4b:00.0/node_19: type node parent node_18 pci/0000:4b:00.0/node_18: type node parent node_17 pci/0000:4b:00.0/node_17: type node parent node_16 pci/0000:4b:00.0/node_21: type node parent node_20 pci/0000:4b:00.0/node_20: type node parent node_3 pci/0000:4b:00.0/node_14: type node parent node_5 pci/0000:4b:00.0/node_5: type node parent node_3 pci/0000:4b:00.0/node_13: type node parent node_4 pci/0000:4b:00.0/node_12: type node parent node_4 pci/0000:4b:00.0/node_11: type node parent node_4 pci/0000:4b:00.0/node_10: type node parent node_4 pci/0000:4b:00.0/node_9: type node parent node_4 pci/0000:4b:00.0/node_8: type node parent node_4 pci/0000:4b:00.0/node_7: type node parent node_4 pci/0000:4b:00.0/node_6: type node parent node_4 pci/0000:4b:00.0/node_4: type node parent node_3 pci/0000:4b:00.0/node_3: type node parent node_16 pci/0000:4b:00.0/node_16: type node parent node_15 pci/0000:4b:00.0/node_15: type node parent node_0 pci/0000:4b:00.0/node_2: type node parent node_1 pci/0000:4b:00.0/node_1: type node parent node_0 pci/0000:4b:00.0/node_0: type node pci/0000:4b:00.0/1: type leaf parent node_27 pci/0000:4b:00.0/2: type leaf parent node_27 Let me visualize part of the tree: +---------+ | node_0 | +---------+ | +----v----+ | node_26 | +----+----+ | +----v----+ | node_27 | +----+----+ | |-----------------| +----v----+ +----v----+ | VF 1 | | VF 2 | +----+----+ +----+----+ So at this point there is a couple things that can be done. For example we could only assign parameters to VF's. [root@fedora ~]# devlink port function rate set pci/0000:4b:00.0/1 \ tx_max 5Gbps This would cap the VF 1 BW to 5Gbps. But let's say you would like to create a completely new branch. This can be done like this: [root@fedora ~]# devlink port function rate add \ pci/0000:4b:00.0/node_custom parent node_0 [root@fedora ~]# devlink port function rate add \ pci/0000:4b:00.0/node_custom_1 parent node_custom [root@fedora ~]# devlink port function rate set \ pci/0000:4b:00.0/1 parent node_custom_1 This creates a completely new branch and reassigns VF 1 to it. A number of parameters is supported per each node: tx_max, tx_share, tx_priority and tx_weight. ==================== Link: https://lore.kernel.org/r/20221115104825.172668-1-michal.wilczynski@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/net/devlink.h18
-rw-r--r--include/uapi/linux/devlink.h3
2 files changed, 20 insertions, 1 deletions
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 611a23a3deb2..074a79b8933f 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -114,6 +114,9 @@ struct devlink_rate {
refcount_t refcnt;
};
};
+
+ u32 tx_priority;
+ u32 tx_weight;
};
struct devlink_port {
@@ -1511,10 +1514,18 @@ struct devlink_ops {
u64 tx_share, struct netlink_ext_ack *extack);
int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_leaf_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_priority, struct netlink_ext_ack *extack);
+ int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_weight, struct netlink_ext_ack *extack);
int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
u64 tx_share, struct netlink_ext_ack *extack);
int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_priority, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_weight, struct netlink_ext_ack *extack);
int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
struct netlink_ext_ack *extack);
int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
@@ -1606,7 +1617,12 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
-int devl_rate_leaf_create(struct devlink_port *port, void *priv);
+struct devlink_rate *
+devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
+ struct devlink_rate *parent);
+int
+devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
+ struct devlink_rate *parent);
void devl_rate_leaf_destroy(struct devlink_port *devlink_port);
void devl_rate_nodes_destroy(struct devlink *devlink);
void devlink_port_linecard_set(struct devlink_port *devlink_port,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 2f24b53a87a5..498d0d5d0957 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -607,6 +607,9 @@ enum devlink_attr {
DEVLINK_ATTR_SELFTESTS, /* nested */
+ DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */
+ DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */
+
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,