From 29cc6679076a00a6ce193004dcf2d14ae7c428a5 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 14 Jul 2016 10:32:37 +0300 Subject: net/mlx5: Store counters in rbtree instead of list In order to use bulk counters, we need to have counters sorted by id. Signed-off-by: Amir Vadai Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 64 ++++++++++++++++++---- include/linux/mlx5/driver.h | 2 +- 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index d7ba91a1eea2..9cffb6aeb4e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -111,6 +111,7 @@ struct mlx5_fc_cache { }; struct mlx5_fc { + struct rb_node node; struct list_head list; /* last{packets,bytes} members are used when calculating the delta since diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 164dc37fda72..aaf8fd149cc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" @@ -68,6 +69,27 @@ * elapsed, the thread will actually query the hardware. */ +static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) +{ + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node); + int result = counter->id - this->id; + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&counter->node, parent, new); + rb_insert_color(&counter->node, root); +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, @@ -75,25 +97,35 @@ static void mlx5_fc_stats_work(struct work_struct *work) struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; unsigned long now = jiffies; struct mlx5_fc *counter; - struct mlx5_fc *tmp; + struct rb_node *node; + LIST_HEAD(tmplist); int err = 0; spin_lock(&fc_stats->addlist_lock); - list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); + list_splice_tail_init(&fc_stats->addlist, &tmplist); - if (!list_empty(&fc_stats->list)) + if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); spin_unlock(&fc_stats->addlist_lock); - list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { - struct mlx5_fc_cache *c = &counter->cache; + list_for_each_entry(counter, &tmplist, list) + mlx5_fc_stats_insert(&fc_stats->counters, counter); + + node = rb_first(&fc_stats->counters); + while (node) { + struct mlx5_fc_cache *c; u64 packets; u64 bytes; + counter = rb_entry(node, struct mlx5_fc, node); + c = &counter->cache; + + node = rb_next(node); + if (counter->deleted) { - list_del(&counter->list); + rb_erase(&counter->node, &fc_stats->counters); mlx5_cmd_fc_free(dev, counter->id); @@ -176,7 +208,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - INIT_LIST_HEAD(&fc_stats->list); + fc_stats->counters = RB_ROOT; INIT_LIST_HEAD(&fc_stats->addlist); spin_lock_init(&fc_stats->addlist_lock); @@ -194,20 +226,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc *counter; struct mlx5_fc *tmp; + struct rb_node *node; cancel_delayed_work_sync(&dev->priv.fc_stats.work); destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; - list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); - - list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { + list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) { list_del(&counter->list); mlx5_cmd_fc_free(dev, counter->id); kfree(counter); } + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = rb_next(node); + + rb_erase(&counter->node, &fc_stats->counters); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + } } void mlx5_fc_query_cached(struct mlx5_fc *counter, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 81e8396574f4..a041b99fceac 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -469,7 +469,7 @@ struct mlx5_irq_info { }; struct mlx5_fc_stats { - struct list_head list; + struct rb_root counters; struct list_head addlist; /* protect addlist add/splice operations */ spinlock_t addlist_lock; -- cgit v1.2.3 From a351a1b03bf169f77891060be30036ef71cbe618 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 14 Jul 2016 10:32:38 +0300 Subject: net/mlx5: Introduce bulk reading of flow counters This commit utilize the ability of ConnectX-4 to bulk read flow counters. Few bulk counter queries could be done instead of issuing thousands of firmware commands per second to get statistics of all flows set to HW, such as those programmed when we offload tc filters. Counters are stored sorted by hardware id, and queried in blocks (id + number of counters). Due to hardware requirement, start of block and number of counters in a block must be four aligned. Reviewed-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 67 +++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 12 ++++ .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 83 ++++++++++++++++------ include/linux/mlx5/mlx5_ifc.h | 8 ++- 4 files changed, 146 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index a5bb6b695242..9134010e2921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, return 0; } + +struct mlx5_cmd_fc_bulk { + u16 id; + int num; + int outlen; + u32 out[0]; +}; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +{ + struct mlx5_cmd_fc_bulk *b; + int outlen = sizeof(*b) + + MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * num; + + b = kzalloc(outlen, GFP_KERNEL); + if (!b) + return NULL; + + b->id = id; + b->num = num; + b->outlen = outlen; + + return b; +} + +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) +{ + kfree(b); +} + +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +{ + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + b->out, b->outlen); +} + +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes) +{ + int index = id - b->id; + void *stats; + + if (index < 0 || index >= b->num) { + mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", + id, b->id, b->id + b->num - 1); + return; + } + + stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, + flow_statistics[index]); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index fc4f7b83fe0a..158844cef82b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, u64 *packets, u64 *bytes); + +struct mlx5_cmd_fc_bulk; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index aaf8fd149cc6..c2877e9de8a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -90,16 +90,66 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) rb_insert_color(&counter->node, root); } +static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u16 last_id) +{ + struct mlx5_cmd_fc_bulk *b; + struct rb_node *node = NULL; + u16 afirst_id; + int num; + int err; + int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk); + + /* first id must be aligned to 4 when using bulk query */ + afirst_id = first->id & ~0x3; + + /* number of counters to query inc. the last counter */ + num = ALIGN(last_id - afirst_id + 1, 4); + if (num > max_bulk) { + num = max_bulk; + last_id = afirst_id + num - 1; + } + + b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num); + if (!b) { + mlx5_core_err(dev, "Error allocating resources for bulk query\n"); + return NULL; + } + + err = mlx5_cmd_fc_bulk_query(dev, b); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + goto out; + } + + for (node = &first->node; node; node = rb_next(node)) { + struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node); + struct mlx5_fc_cache *c = &counter->cache; + + if (counter->id > last_id) + break; + + mlx5_cmd_fc_bulk_get(dev, b, + counter->id, &c->packets, &c->bytes); + } + +out: + mlx5_cmd_fc_bulk_free(b); + + return node; +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, priv.fc_stats.work.work); struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; unsigned long now = jiffies; - struct mlx5_fc *counter; + struct mlx5_fc *counter = NULL; + struct mlx5_fc *last = NULL; struct rb_node *node; LIST_HEAD(tmplist); - int err = 0; spin_lock(&fc_stats->addlist_lock); @@ -115,12 +165,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) node = rb_first(&fc_stats->counters); while (node) { - struct mlx5_fc_cache *c; - u64 packets; - u64 bytes; - counter = rb_entry(node, struct mlx5_fc, node); - c = &counter->cache; node = rb_next(node); @@ -133,26 +178,20 @@ static void mlx5_fc_stats_work(struct work_struct *work) continue; } - if (time_before(now, fc_stats->next_query)) - continue; + last = counter; + } - err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes); - if (err) { - pr_err("Error querying stats for counter id %d\n", - counter->id); - continue; - } + if (time_before(now, fc_stats->next_query) || !last) + return; - if (packets == c->packets) - continue; + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); - c->lastuse = jiffies; - c->packets = packets; - c->bytes = bytes; + node = mlx5_fc_stats_query(dev, counter, last->id); } - if (time_after_eq(now, fc_stats->next_query)) - fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; + fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; } struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 152421cc6f44..d671e4e8e7db 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -893,7 +893,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_330[0xb]; u8 log_max_xrcd[0x5]; - u8 reserved_at_340[0x20]; + u8 reserved_at_340[0x8]; + u8 log_max_flow_counter_bulk[0x8]; + u8 max_flow_counter[0x10]; + u8 reserved_at_360[0x3]; u8 log_max_rq[0x5]; @@ -980,7 +983,8 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 reserved_at_0[0x10]; + u8 clear[0x1]; + u8 num_of_counters[0xf]; u8 flow_counter_id[0x10]; u8 reserved_at_20[0x20]; -- cgit v1.2.3 From 55130287875c96b1e4669ee9713621e7d7f055b2 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:39 +0300 Subject: net/mlx5e: Offload TC flow counters only when supported Currenly, the code that programs the flow actions into the firmware doesn't check if was actually asked to offload the statistics, fix that. Fixes: aad7e08d39bd ('net/mlx5e: Hardware offloaded flower filter statistics support') Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3261e8b1286e..cd58fc8f9174 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -62,7 +62,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.vlan.ft.t; - } else { + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); -- cgit v1.2.3 From 1033665e63b6d98e91c1b938bad2dc624a72c137 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:40 +0300 Subject: net/mlx5: E-Switch, Use two priorities for SRIOV offloads mode In the offloads mode, some slow path rules are added by the driver (e.g send-to-vport), while offloaded rules are to be added from upper layers. The slow path rules have lower priority and we don't want matching on offloaded rules to suffer from extra steering hops related to the slow path rules. We use two priorities, one for offloaded rules (fast path), and one for the control rules (slow path). To allow for that, we enable two priorities for the FDB namespace in the FS core code. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 34 +++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 22 +++++++++----- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 7b45e6a6efb8..035e536a9911 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -145,6 +145,7 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { + struct mlx5_flow_table *fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_rule *miss_rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1842dfb4636b..27122c043aea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,11 @@ #include "mlx5_core.h" #include "eswitch.h" +enum { + FDB_FAST_PATH = 0, + FDB_SLOW_PATH +}; + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { @@ -149,7 +154,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, + flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); if (IS_ERR(flow_rule)) { @@ -165,6 +170,8 @@ out: } #define MAX_PF_SQ 256 +#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */ +#define ESW_OFFLOADS_NUM_GROUPS 4 static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { @@ -190,15 +197,25 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, + ESW_OFFLOADS_NUM_ENTRIES, + ESW_OFFLOADS_NUM_GROUPS, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create FDB Table err %d\n", err); - goto fdb_err; + esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); + goto fast_fdb_err; } esw->fdb_table.fdb = fdb; + table_size = nvports + MAX_PF_SQ + 1; + fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); + goto slow_fdb_err; + } + esw->fdb_table.offloads.fdb = fdb; + /* create send-to-vport group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -247,8 +264,10 @@ miss_rule_err: miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: - mlx5_destroy_flow_table(fdb); -fdb_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); +slow_fdb_err: + mlx5_destroy_flow_table(esw->fdb_table.fdb); +fast_fdb_err: ns_err: kvfree(flow_group_in); return err; @@ -264,6 +283,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); mlx5_destroy_flow_table(esw->fdb_table.fdb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b0a130479085..1a377b403321 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1712,15 +1712,21 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) if (!steering->fdb_root_ns) return -ENOMEM; - /* Create single prio */ prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1); - if (IS_ERR(prio)) { - cleanup_root_ns(steering->fdb_root_ns); - steering->fdb_root_ns = NULL; - return PTR_ERR(prio); - } else { - return 0; - } + if (IS_ERR(prio)) + goto out_err; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + if (IS_ERR(prio)) + goto out_err; + + set_prio_attrs(steering->fdb_root_ns); + return 0; + +out_err: + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + return PTR_ERR(prio); } static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering) -- cgit v1.2.3 From 3d80d1a2f59ab350f452e1dbc085ba68e5fd8169 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:41 +0300 Subject: net/mlx5: E-Switch, Add API to configure rules for the offloaded mode This allows for upper levels in the driver, e.g the TC offload code to add e-switch offloaded steering rules. The caller provides the rule spec for matching, action, source and destination vports. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 43 ++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 035e536a9911..c0b05603fc31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -222,6 +222,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_spec; + +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 27122c043aea..a357e8eeeed8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -43,6 +43,49 @@ enum { FDB_SLOW_PATH }; +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport) +{ + struct mlx5_flow_destination dest = { 0 }; + struct mlx5_fc *counter = NULL; + struct mlx5_flow_rule *rule; + void *misc; + + if (esw->mode != SRIOV_OFFLOADS) + return ERR_PTR(-EOPNOTSUPP); + + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = dst_vport; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) + return ERR_CAST(counter); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = counter; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS; + + rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb, + spec, action, 0, &dest); + + if (IS_ERR(rule)) + mlx5_fc_destroy(esw->dev, counter); + + return rule; +} + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { -- cgit v1.2.3 From 5c40348c69f33c4c14c051181088e8c71e38be7d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:42 +0300 Subject: net/mlx5e: Adjustments in the TC offload code towards reuse for SRIOV Towards reusing the TC offloads code for an SRIOV use-case, change some of the helper functions to have _nic in their names so it's clear what's NIC unique and what's general. Also group together the NIC related helpers so we can easily branch per the use-case in downstream patch. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 32 ++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cd58fc8f9174..57b76f759ee8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -49,9 +49,9 @@ struct mlx5e_tc_flow { #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 -static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - u32 action, u32 flow_tag) +static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 flow_tag) { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest = { 0 }; @@ -120,7 +120,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv)) { + if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } @@ -295,8 +295,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec return 0; } -static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *flow_tag) +static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *flow_tag) { const struct tc_action *a; @@ -369,28 +369,28 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - err = parse_tc_actions(priv, f->exts, &action, &flow_tag); + err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); if (err < 0) goto err_free; - err = rhashtable_insert_fast(&tc->ht, &flow->node, - tc->ht_params); - if (err) - goto err_free; - - flow->rule = mlx5e_tc_add_flow(priv, spec, action, flow_tag); + flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_hash_del; + goto err_free; } + err = rhashtable_insert_fast(&tc->ht, &flow->node, + tc->ht_params); + if (err) + goto err_del_rule; + if (old) mlx5e_tc_del_flow(priv, old); goto out; -err_hash_del: - rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); +err_del_rule: + mlx5_del_flow_rule(flow->rule); err_free: if (!old) -- cgit v1.2.3 From 8438884d4ab423161b974854ebb90c08219dd678 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:43 +0300 Subject: net/switchdev: Export the same parent ID service function This helper serves to know if two switchdev port netdevices belong to the same HW ASIC, e.g to figure out if forwarding offload is possible between them. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/switchdev.h | 8 ++++++++ net/switchdev/switchdev.c | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 985619a59323..9023e3e3be0b 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); +bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b); #else static inline void switchdev_deferred_process(void) @@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev, { } +static inline bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) +{ + return false; +} + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 59658b2e9cdf..a5fc9dd24aa9 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) } EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); -static bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) +bool switchdev_port_same_parent_id(struct net_device *a, + struct net_device *b) { struct switchdev_attr a_attr = { .orig_dev = a, @@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev, return dev->ifindex; } +EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, u32 old_mark, u32 *reset_mark) -- cgit v1.2.3 From 03a9d11e6eeb30363971a9e372b4d61b6e817882 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:44 +0300 Subject: net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads Add the setup code that parses the TC actions needed to support offloading drop and mirred/redirect for SRIOV e-switch. We can redirect between two devices if they belong to the same HW switch, compare the switchdev HW ID attribute to enforce that. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 53 +++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 57b76f759ee8..9a66441262d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -37,8 +37,11 @@ #include #include #include +#include +#include #include "en.h" #include "en_tc.h" +#include "eswitch.h" struct mlx5e_tc_flow { struct rhash_head node; @@ -339,6 +342,56 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *dest_vport) +{ + const struct tc_action *a; + + if (tc_no_actions(exts)) + return -EINVAL; + + *action = 0; + + tc_for_each_action(a, exts) { + /* Only support a single action per rule */ + if (*action) + return -EINVAL; + + if (is_tcf_gact_shot(a)) { + *action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_mirred_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch_rep *out_rep; + + out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); + + if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) { + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EINVAL; + } + + out_priv = netdev_priv(out_dev); + out_rep = out_priv->ppriv; + if (out_rep->vport == 0) + *dest_vport = FDB_UPLINK_VPORT; + else + *dest_vport = out_rep->vport; + *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + continue; + } + + return -EINVAL; + } + return 0; +} + int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { -- cgit v1.2.3 From adb4c123f88dfa7a9c3a320731c765f07a125503 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:45 +0300 Subject: net/mlx5e: Add TC HW support for FDB (SRIOV e-switch) offloads Enhance the TC offload code such that when the eswitch exists and it's mode being SRIOV offloads, we do TC actions parsing and setup targeted for eswitch. Next, we add the offloaded flow to the HW e-switch (fdb). Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 35 ++++++++++++++++++++---- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5ef02f02a1d5..fdaf2fa811a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -36,6 +36,7 @@ #include "eswitch.h" #include "en.h" +#include "en_tc.h" static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -201,6 +202,10 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); } static int mlx5e_rep_get_phys_port_name(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9a66441262d2..0f19b01e3fff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -112,6 +112,22 @@ err_create_ft: return rule; } +static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 dst_vport) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + u32 src_vport; + + if (rep->vport) /* set source vport for the flow */ + src_vport = rep->vport; + else + src_vport = FDB_UPLINK_VPORT; + + return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); +} + static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5_flow_rule *rule) { @@ -397,11 +413,11 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, { struct mlx5e_tc_table *tc = &priv->fs.tc; int err = 0; - u32 flow_tag; - u32 action; + u32 flow_tag, action, dest_vport = 0; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); @@ -422,11 +438,18 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); - if (err < 0) - goto err_free; + if (esw && esw->mode == SRIOV_OFFLOADS) { + err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + } else { + err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); + } - flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); goto err_free; -- cgit v1.2.3 From d957b4e38388a7ff6911db70930436ce8a2e0b2c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 14 Jul 2016 10:32:46 +0300 Subject: net/mlx5e: Add TC offload support for the VF representors netdevice The VF representors support only TC filter/action offloads (not mqprio) and this is enabled for them by default. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 35 +++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index fdaf2fa811a4..1c7d8b8314bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "eswitch.h" #include "en.h" @@ -222,6 +223,29 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, return 0; } +static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, + __be16 proto, struct tc_to_netdev *tc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -EOPNOTSUPP; + + switch (tc->type) { + case TC_SETUP_CLSFLOWER: + switch (tc->cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, proto, tc->cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, tc->cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, tc->cls_flower); + } + default: + return -EOPNOTSUPP; + } +} + static const struct switchdev_ops mlx5e_rep_switchdev_ops = { .switchdev_port_attr_get = mlx5e_attr_get, }; @@ -231,6 +255,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, .ndo_get_stats64 = mlx5e_get_stats, }; @@ -284,7 +309,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; #endif - netdev->features |= NETIF_F_VLAN_CHALLENGED; + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_HW_TC; eth_hw_addr_random(netdev); } @@ -328,8 +354,14 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) } rep->vport_rx_rule = flow_rule; + err = mlx5e_tc_init(priv); + if (err) + goto err_del_flow_rule; + return 0; +err_del_flow_rule: + mlx5_del_flow_rule(rep->vport_rx_rule); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: @@ -343,6 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) struct mlx5_eswitch_rep *rep = priv->ppriv; int i; + mlx5e_tc_cleanup(priv); mlx5_del_flow_rule(rep->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); for (i = 0; i < priv->params.num_channels; i++) -- cgit v1.2.3