diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 4 | ||||
-rw-r--r-- | net/sched/act_ct.c | 3 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 40 | ||||
-rw-r--r-- | net/sched/cls_fw.c | 1 | ||||
-rw-r--r-- | net/sched/cls_route.c | 1 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 57 | ||||
-rw-r--r-- | net/sched/em_meta.c | 6 | ||||
-rw-r--r-- | net/sched/sch_api.c | 53 | ||||
-rw-r--r-- | net/sched/sch_drr.c | 11 | ||||
-rw-r--r-- | net/sched/sch_hfsc.c | 14 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 17 | ||||
-rw-r--r-- | net/sched/sch_ingress.c | 61 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 49 | ||||
-rw-r--r-- | net/sched/sch_qfq.c | 12 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 83 |
15 files changed, 310 insertions, 102 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 4b95cb1ac435..470c70deffe2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -347,8 +347,7 @@ config NET_SCH_FQ_PIE config NET_SCH_INGRESS tristate "Ingress/classifier-action Qdisc" depends on NET_CLS_ACT - select NET_INGRESS - select NET_EGRESS + select NET_XGRESS help Say Y here if you want to use classifiers for incoming and/or outgoing packets. This qdisc doesn't do anything else besides running classifiers, @@ -679,6 +678,7 @@ config NET_EMATCH_IPT config NET_CLS_ACT bool "Actions" select NET_CLS + select NET_XGRESS help Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index abc71a06d634..7c652d14528b 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1238,7 +1238,8 @@ static int tcf_ct_fill_params(struct net *net, } } - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); + if (p->ct_action & TCA_CT_ACT_COMMIT) + __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); return 0; err: nf_ct_put(p->tmpl); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8da9d039d964..e5314a31f75a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -72,6 +72,7 @@ struct fl_flow_key { struct flow_dissector_key_num_of_vlans num_of_vlans; struct flow_dissector_key_pppoe pppoe; struct flow_dissector_key_l2tpv3 l2tpv3; + struct flow_dissector_key_ipsec ipsec; struct flow_dissector_key_cfm cfm; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ @@ -726,6 +727,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_SPI] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, }; @@ -776,7 +779,8 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 }, }; -static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = { +static const struct nla_policy +cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX + 1] = { [TCA_FLOWER_KEY_CFM_MD_LEVEL] = NLA_POLICY_MAX(NLA_U8, FLOW_DIS_CFM_MDL_MAX), [TCA_FLOWER_KEY_CFM_OPCODE] = { .type = NLA_U8 }, @@ -795,6 +799,24 @@ static void fl_set_key_val(struct nlattr **tb, nla_memcpy(mask, tb[mask_type], len); } +static int fl_set_key_spi(struct nlattr **tb, struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + if (key->basic.ip_proto != IPPROTO_ESP && + key->basic.ip_proto != IPPROTO_AH) { + NL_SET_ERR_MSG(extack, + "Protocol must be either ESP or AH"); + return -EINVAL; + } + + fl_set_key_val(tb, &key->ipsec.spi, + TCA_FLOWER_KEY_SPI, + &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK, + sizeof(key->ipsec.spi)); + return 0; +} + static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1709,7 +1731,7 @@ static int fl_set_key_cfm(struct nlattr **tb, struct fl_flow_key *mask, struct netlink_ext_ack *extack) { - struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX]; + struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX + 1]; int err; if (!tb[TCA_FLOWER_KEY_CFM]) @@ -1894,6 +1916,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb, return ret; } + if (tb[TCA_FLOWER_KEY_SPI]) { + ret = fl_set_key_spi(tb, key, mask, extack); + if (ret) + return ret; + } + if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -2067,6 +2095,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3); FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_IPSEC, ipsec); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CFM, cfm); skb_flow_dissector_init(dissector, keys, cnt); @@ -3364,6 +3394,12 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, sizeof(key->l2tpv3.session_id))) goto nla_put_failure; + if (key->ipsec.spi && + fl_dump_key_val(skb, &key->ipsec.spi, TCA_FLOWER_KEY_SPI, + &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK, + sizeof(key->ipsec.spi))) + goto nla_put_failure; + if ((key->basic.ip_proto == IPPROTO_TCP || key->basic.ip_proto == IPPROTO_UDP || key->basic.ip_proto == IPPROTO_SCTP) && diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8641f8059317..c49d6af0e048 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; fnew->id = f->id; - fnew->res = f->res; fnew->ifindex = f->ifindex; fnew->tp = f->tp; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d0c53724d3e8..1e20bbd687f1 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (fold) { f->id = fold->id; f->iif = fold->iif; - f->res = fold->res; f->handle = fold->handle; f->tp = fold->tp; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 5abf31e432ca..da4c179a4d41 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -826,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->ifindex = n->ifindex; new->fshift = n->fshift; - new->res = n->res; new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); @@ -1024,18 +1023,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } + /* At this point, we need to derive the new handle that will be used to + * uniquely map the identity of this table match entry. The + * identity of the entry that we need to construct is 32 bits made of: + * htid(12b):bucketid(8b):node/entryid(12b) + * + * At this point _we have the table(ht)_ in which we will insert this + * entry. We carry the table's id in variable "htid". + * Note that earlier code picked the ht selection either by a) the user + * providing the htid specified via TCA_U32_HASH attribute or b) when + * no such attribute is passed then the root ht, is default to at ID + * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. + * If OTOH the user passed us the htid, they may also pass a bucketid of + * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is + * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be + * passed via the htid, so even if it was non-zero it will be ignored. + * + * We may also have a handle, if the user passed one. The handle also + * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). + * Rule: the bucketid on the handle is ignored even if one was passed; + * rather the value on "htid" is always assumed to be the bucketid. + */ if (handle) { + /* Rule: The htid from handle and tableid from htid must match */ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL; } - handle = htid | TC_U32_NODE(handle); - err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, - GFP_KERNEL); - if (err) - return err; - } else + /* Ok, so far we have a valid htid(12b):bucketid(8b) but we + * need to finalize the table entry identification with the last + * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for + * entries. Rule: nodeid of 0 is reserved only for tables(see + * earlier code which processes TC_U32_DIVISOR attribute). + * Rule: The nodeid can only be derived from the handle (and not + * htid). + * Rule: if the handle specified zero for the node id example + * 0x60000000, then pick a new nodeid from the pool of IDs + * this hash table has been allocating from. + * If OTOH it is specified (i.e for example the user passed a + * handle such as 0x60000123), then we use it generate our final + * handle which is used to uniquely identify the match entry. + */ + if (!TC_U32_NODE(handle)) { + handle = gen_new_kid(ht, htid); + } else { + handle = htid | TC_U32_NODE(handle); + err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, + handle, GFP_KERNEL); + if (err) + return err; + } + } else { + /* The user did not give us a handle; lets just generate one + * from the table's pool of nodeids. + */ handle = gen_new_kid(ht, htid); + } if (tb[TCA_U32_SEL] == NULL) { NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index af85a73c4c54..da34fd4c9269 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -502,7 +502,7 @@ META_COLLECTOR(int_sk_lingertime) *err = -1; return; } - dst->value = sk->sk_lingertime / HZ; + dst->value = READ_ONCE(sk->sk_lingertime) / HZ; } META_COLLECTOR(int_sk_err_qlen) @@ -568,7 +568,7 @@ META_COLLECTOR(int_sk_rcvtimeo) *err = -1; return; } - dst->value = sk->sk_rcvtimeo / HZ; + dst->value = READ_ONCE(sk->sk_rcvtimeo) / HZ; } META_COLLECTOR(int_sk_sndtimeo) @@ -579,7 +579,7 @@ META_COLLECTOR(int_sk_sndtimeo) *err = -1; return; } - dst->value = sk->sk_sndtimeo / HZ; + dst->value = READ_ONCE(sk->sk_sndtimeo) / HZ; } META_COLLECTOR(int_sk_sendmsg_off) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index aa6b1fe65151..e9eaf637220e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, return 0; } +static bool req_create_or_replace(struct nlmsghdr *n) +{ + return (n->nlmsg_flags & NLM_F_CREATE && + n->nlmsg_flags & NLM_F_REPLACE); +} + +static bool req_create_exclusive(struct nlmsghdr *n) +{ + return (n->nlmsg_flags & NLM_F_CREATE && + n->nlmsg_flags & NLM_F_EXCL); +} + +static bool req_change(struct nlmsghdr *n) +{ + return (!(n->nlmsg_flags & NLM_F_CREATE) && + !(n->nlmsg_flags & NLM_F_REPLACE) && + !(n->nlmsg_flags & NLM_F_EXCL)); +} + /* * Create/change qdisc. */ - static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1644,27 +1662,35 @@ replay: * * We know, that some child q is already * attached to this parent and have choice: - * either to change it or to create/graft new one. + * 1) change it or 2) create/graft new one. + * If the requested qdisc kind is different + * than the existing one, then we choose graft. + * If they are the same then this is "change" + * operation - just let it fallthrough.. * * 1. We are allowed to create/graft only - * if CREATE and REPLACE flags are set. + * if the request is explicitly stating + * "please create if it doesn't exist". * - * 2. If EXCL is set, requestor wanted to say, - * that qdisc tcm_handle is not expected + * 2. If the request is to exclusive create + * then the qdisc tcm_handle is not expected * to exist, so that we choose create/graft too. * * 3. The last case is when no flags are set. + * This will happen when for example tc + * utility issues a "change" command. * Alas, it is sort of hole in API, we * cannot decide what to do unambiguously. - * For now we select create/graft, if - * user gave KIND, which does not match existing. + * For now we select create/graft. */ - if ((n->nlmsg_flags & NLM_F_CREATE) && - (n->nlmsg_flags & NLM_F_REPLACE) && - ((n->nlmsg_flags & NLM_F_EXCL) || - (tca[TCA_KIND] && - nla_strcmp(tca[TCA_KIND], q->ops->id)))) - goto create_n_graft; + if (tca[TCA_KIND] && + nla_strcmp(tca[TCA_KIND], q->ops->id)) { + if (req_create_or_replace(n) || + req_create_exclusive(n)) + goto create_n_graft; + else if (req_change(n)) + goto create_n_graft2; + } } } } else { @@ -1698,6 +1724,7 @@ create_n_graft: NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); return -ENOENT; } +create_n_graft2: if (clid == TC_H_INGRESS) { if (dev_ingress_queue(dev)) { q = qdisc_create(dev, dev_ingress_queue(dev), diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e35a4e90f4e6..19901e77cd3b 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -17,7 +17,6 @@ struct drr_class { struct Qdisc_class_common common; - unsigned int filter_cnt; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; @@ -150,8 +149,10 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl = (struct drr_class *)arg; - if (cl->filter_cnt > 0) + if (qdisc_class_in_use(&cl->common)) { + NL_SET_ERR_MSG(extack, "DRR class is in use"); return -EBUSY; + } sch_tree_lock(sch); @@ -187,8 +188,8 @@ static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, { struct drr_class *cl = drr_find_class(sch, classid); - if (cl != NULL) - cl->filter_cnt++; + if (cl) + qdisc_class_get(&cl->common); return (unsigned long)cl; } @@ -197,7 +198,7 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - cl->filter_cnt--; + qdisc_class_put(&cl->common); } static int drr_graft_class(struct Qdisc *sch, unsigned long arg, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 70b0c5873d32..3554085bc2be 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,7 +116,6 @@ struct hfsc_class { struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ struct tcf_block *block; - unsigned int filter_cnt; /* filter count */ unsigned int level; /* class level in hierarchy */ struct hfsc_sched *sched; /* scheduler data */ @@ -1012,6 +1011,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (parent == NULL) return -ENOENT; } + if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { + NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC"); + return -EINVAL; + } if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) return -EINVAL; @@ -1094,8 +1097,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg, struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; - if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root) + if (cl->level > 0 || qdisc_class_in_use(&cl->cl_common) || + cl == &q->root) { + NL_SET_ERR_MSG(extack, "HFSC class in use"); return -EBUSY; + } sch_tree_lock(sch); @@ -1223,7 +1229,7 @@ hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid) if (cl != NULL) { if (p != NULL && p->level <= cl->level) return 0; - cl->filter_cnt++; + qdisc_class_get(&cl->cl_common); } return (unsigned long)cl; @@ -1234,7 +1240,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) { struct hfsc_class *cl = (struct hfsc_class *)arg; - cl->filter_cnt--; + qdisc_class_put(&cl->cl_common); } static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 325c29041c7d..0d947414e616 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -102,7 +102,6 @@ struct htb_class { struct tcf_proto __rcu *filter_list; /* class attached filters */ struct tcf_block *block; - int filter_cnt; int level; /* our level (see above) */ unsigned int children; @@ -1710,8 +1709,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, * tc subsys guarantee us that in htb_destroy it holds no class * refs so that we can remove children safely there ? */ - if (cl->children || cl->filter_cnt) + if (cl->children || qdisc_class_in_use(&cl->common)) { + NL_SET_ERR_MSG(extack, "HTB class in use"); return -EBUSY; + } if (!cl->level && htb_parent_last_child(cl)) last_child = 1; @@ -1810,10 +1811,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); goto failure; } - if (hopt->quantum) { - NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); - goto failure; - } } /* Keeping backward compatible with rate_table based iproute2 tc */ @@ -1910,6 +1907,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), .prio = hopt->prio, + .quantum = hopt->quantum, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -1931,6 +1929,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), .prio = hopt->prio, + .quantum = hopt->quantum, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -2017,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), .prio = hopt->prio, + .quantum = hopt->quantum, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -2108,7 +2108,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, * be broken by class during destroy IIUC. */ if (cl) - cl->filter_cnt++; + qdisc_class_get(&cl->common); return (unsigned long)cl; } @@ -2116,8 +2116,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (cl) - cl->filter_cnt--; + qdisc_class_put(&cl->common); } static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index e43a45499372..a463a63192c3 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -13,6 +13,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> +#include <net/tcx.h> struct ingress_sched_data { struct tcf_block *block; @@ -78,6 +79,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct bpf_mprog_entry *entry; + bool created; int err; if (sch->parent != TC_H_INGRESS) @@ -85,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, net_inc_ingress_queue(); - mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); + entry = tcx_entry_fetch_or_create(dev, true, &created); + if (!entry) + return -ENOMEM; + tcx_miniq_set_active(entry, true); + mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq); + if (created) + tcx_entry_update(dev, entry, true); q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; q->block_info.chain_head_change = clsact_chain_head_change; @@ -103,11 +112,22 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress); if (sch->parent != TC_H_INGRESS) return; tcf_block_put_ext(q->block, sch, &q->block_info); + + if (entry) { + tcx_miniq_set_active(entry, false); + if (!tcx_entry_is_active(entry)) { + tcx_entry_update(dev, NULL, true); + tcx_entry_free(entry); + } + } + net_dec_ingress_queue(); } @@ -223,6 +243,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, { struct clsact_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct bpf_mprog_entry *entry; + bool created; int err; if (sch->parent != TC_H_CLSACT) @@ -231,7 +253,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, net_inc_ingress_queue(); net_inc_egress_queue(); - mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); + entry = tcx_entry_fetch_or_create(dev, true, &created); + if (!entry) + return -ENOMEM; + tcx_miniq_set_active(entry, true); + mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq); + if (created) + tcx_entry_update(dev, entry, true); q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; q->ingress_block_info.chain_head_change = clsact_chain_head_change; @@ -244,7 +272,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block); - mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); + entry = tcx_entry_fetch_or_create(dev, false, &created); + if (!entry) + return -ENOMEM; + tcx_miniq_set_active(entry, true); + mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq); + if (created) + tcx_entry_update(dev, entry, false); q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS; q->egress_block_info.chain_head_change = clsact_chain_head_change; @@ -256,12 +290,31 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress); + struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress); if (sch->parent != TC_H_CLSACT) return; - tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); + + if (ingress_entry) { + tcx_miniq_set_active(ingress_entry, false); + if (!tcx_entry_is_active(ingress_entry)) { + tcx_entry_update(dev, NULL, true); + tcx_entry_free(ingress_entry); + } + } + + if (egress_entry) { + tcx_miniq_set_active(egress_entry, false); + if (!tcx_entry_is_active(egress_entry)) { + tcx_entry_update(dev, NULL, false); + tcx_entry_free(egress_entry); + } + } net_dec_ingress_queue(); net_dec_egress_queue(); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 38d9aa0cd30e..4ad39a4a3cf5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -105,6 +105,11 @@ struct netem_sched_data { u32 rho; } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; + struct prng { + u64 seed; + struct rnd_state prng_state; + } prng; + struct disttable *delay_dist; enum { @@ -179,15 +184,16 @@ static void init_crandom(struct crndstate *state, unsigned long rho) * Next number depends on last value. * rho is scaled to avoid floating point. */ -static u32 get_crandom(struct crndstate *state) +static u32 get_crandom(struct crndstate *state, struct prng *p) { u64 value, rho; unsigned long answer; + struct rnd_state *s = &p->prng_state; if (!state || state->rho == 0) /* no correlation */ - return get_random_u32(); + return prandom_u32_state(s); - value = get_random_u32(); + value = prandom_u32_state(s); rho = (u64)state->rho + 1; answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; state->last = answer; @@ -201,7 +207,7 @@ static u32 get_crandom(struct crndstate *state) static bool loss_4state(struct netem_sched_data *q) { struct clgstate *clg = &q->clg; - u32 rnd = get_random_u32(); + u32 rnd = prandom_u32_state(&q->prng.prng_state); /* * Makes a comparison between rnd and the transition @@ -266,18 +272,19 @@ static bool loss_4state(struct netem_sched_data *q) static bool loss_gilb_ell(struct netem_sched_data *q) { struct clgstate *clg = &q->clg; + struct rnd_state *s = &q->prng.prng_state; switch (clg->state) { case GOOD_STATE: - if (get_random_u32() < clg->a1) + if (prandom_u32_state(s) < clg->a1) clg->state = BAD_STATE; - if (get_random_u32() < clg->a4) + if (prandom_u32_state(s) < clg->a4) return true; break; case BAD_STATE: - if (get_random_u32() < clg->a2) + if (prandom_u32_state(s) < clg->a2) clg->state = GOOD_STATE; - if (get_random_u32() > clg->a3) + if (prandom_u32_state(s) > clg->a3) return true; } @@ -289,7 +296,7 @@ static bool loss_event(struct netem_sched_data *q) switch (q->loss_model) { case CLG_RANDOM: /* Random packet drop 0 => none, ~0 => all */ - return q->loss && q->loss >= get_crandom(&q->loss_cor); + return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng); case CLG_4_STATES: /* 4state loss model algorithm (used also for GI model) @@ -318,6 +325,7 @@ static bool loss_event(struct netem_sched_data *q) */ static s64 tabledist(s64 mu, s32 sigma, struct crndstate *state, + struct prng *prng, const struct disttable *dist) { s64 x; @@ -327,7 +335,7 @@ static s64 tabledist(s64 mu, s32 sigma, if (sigma == 0) return mu; - rnd = get_crandom(state); + rnd = get_crandom(state, prng); /* default uniform distribution */ if (dist == NULL) @@ -449,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb->prev = NULL; /* Random duplication */ - if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) + if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) ++count; /* Drop packet? */ @@ -492,7 +500,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * If packet is going to be hardware checksummed, then * do it now in software before we mangle it. */ - if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) { if (skb_is_gso(skb)) { skb = netem_segment(skb, sch, to_free); if (!skb) @@ -530,12 +538,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, cb = netem_skb_cb(skb); if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ - q->reorder < get_crandom(&q->reorder_cor)) { + q->reorder < get_crandom(&q->reorder_cor, &q->prng)) { u64 now; s64 delay; delay = tabledist(q->latency, q->jitter, - &q->delay_cor, q->delay_dist); + &q->delay_cor, &q->prng, q->delay_dist); now = ktime_get_ns(); @@ -639,7 +647,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) else next_delay = tabledist(q->slot_config.dist_delay, (s32)(q->slot_config.dist_jitter), - NULL, q->slot_dist); + NULL, &q->prng, q->slot_dist); q->slot.slot_next = now + next_delay; q->slot.packets_left = q->slot_config.max_packets; @@ -922,6 +930,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, + [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -1040,6 +1049,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); + if (tb[TCA_NETEM_PRNG_SEED]) + q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); + else + q->prng.seed = get_random_u64(); + prandom_seed_state(&q->prng.prng_state, q->prng.seed); + unlock: sch_tree_unlock(sch); @@ -1203,6 +1218,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; } + if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed, + TCA_NETEM_PAD)) + goto nla_put_failure; + return nla_nest_end(skb, nla); nla_put_failure: diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index befaf74b33ca..1a25752f1a9a 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -130,8 +130,6 @@ struct qfq_aggregate; struct qfq_class { struct Qdisc_class_common common; - unsigned int filter_cnt; - struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; @@ -545,8 +543,10 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; - if (cl->filter_cnt > 0) + if (qdisc_class_in_use(&cl->common)) { + NL_SET_ERR_MSG_MOD(extack, "QFQ class in use"); return -EBUSY; + } sch_tree_lock(sch); @@ -580,8 +580,8 @@ static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, { struct qfq_class *cl = qfq_find_class(sch, classid); - if (cl != NULL) - cl->filter_cnt++; + if (cl) + qdisc_class_get(&cl->common); return (unsigned long)cl; } @@ -590,7 +590,7 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) { struct qfq_class *cl = (struct qfq_class *)arg; - cl->filter_cnt--; + qdisc_class_put(&cl->common); } static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 717ae51d94a0..1cb5e41c0ec7 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1015,6 +1015,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { TC_FP_PREEMPTIBLE), }; +static struct netlink_range_validation_signed taprio_cycle_time_range = { + .min = 0, + .max = INT_MAX, +}; + static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -1023,7 +1028,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, - [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = + NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range), [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, @@ -1159,6 +1165,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, return -EINVAL; } + if (cycle < 0 || cycle > INT_MAX) { + NL_SET_ERR_MSG(extack, "'cycle_time' is too big"); + return -EINVAL; + } + new->cycle_time = cycle; } @@ -1347,7 +1358,7 @@ static void setup_txtime(struct taprio_sched *q, struct sched_gate_list *sched, ktime_t base) { struct sched_entry *entry; - u32 interval = 0; + u64 interval = 0; list_for_each_entry(entry, &sched->entries, list) { entry->next_txtime = ktime_add_ns(base, interval); @@ -2088,11 +2099,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, return -EOPNOTSUPP; } - /* pre-allocate qdisc, attachment can't fail */ - q->qdiscs = kcalloc(dev->num_tx_queues, - sizeof(q->qdiscs[0]), + q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]), GFP_KERNEL); - if (!q->qdiscs) return -ENOMEM; @@ -2134,25 +2142,32 @@ static void taprio_attach(struct Qdisc *sch) /* Attach underlying qdisc */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - struct Qdisc *qdisc = q->qdiscs[ntx]; - struct Qdisc *old; + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old, *dev_queue_qdisc; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + struct Qdisc *qdisc = q->qdiscs[ntx]; + + /* In offload mode, the root taprio qdisc is bypassed + * and the netdev TX queues see the children directly + */ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + dev_queue_qdisc = qdisc; } else { - old = dev_graft_qdisc(qdisc->dev_queue, sch); - qdisc_refcount_inc(sch); + /* In software mode, attach the root taprio qdisc + * to all netdev TX queues, so that dev_qdisc_enqueue() + * goes through taprio_enqueue(). + */ + dev_queue_qdisc = sch; } + old = dev_graft_qdisc(dev_queue, dev_queue_qdisc); + /* The qdisc's refcount requires to be elevated once + * for each netdev TX queue it is grafted onto + */ + qdisc_refcount_inc(dev_queue_qdisc); if (old) qdisc_put(old); } - - /* access to the child qdiscs is not needed in offload mode */ - if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - kfree(q->qdiscs); - q->qdiscs = NULL; - } } static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, @@ -2181,13 +2196,23 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, if (dev->flags & IFF_UP) dev_deactivate(dev); + /* In offload mode, the child Qdisc is directly attached to the netdev + * TX queue, and thus, we need to keep its refcount elevated in order + * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue. + * However, save the reference to the new qdisc in the private array in + * both software and offload cases, to have an up-to-date reference to + * our children. + */ + *old = q->qdiscs[cl - 1]; if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - *old = dev_graft_qdisc(dev_queue, new); - } else { - *old = q->qdiscs[cl - 1]; - q->qdiscs[cl - 1] = new; + WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old); + if (new) + qdisc_refcount_inc(new); + if (*old) + qdisc_put(*old); } + q->qdiscs[cl - 1] = new; if (new) new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; @@ -2425,12 +2450,14 @@ start_error: static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx = cl - 1; - if (!dev_queue) + if (ntx >= dev->num_tx_queues) return NULL; - return rtnl_dereference(dev_queue->qdisc_sleeping); + return q->qdiscs[ntx]; } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) @@ -2445,11 +2472,11 @@ static unsigned long taprio_find(struct Qdisc *sch, u32 classid) static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct Qdisc *child = taprio_leaf(sch, cl); tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; + tcm->tcm_info = child->handle; return 0; } @@ -2459,16 +2486,14 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __releases(d->lock) __acquires(d->lock) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct Qdisc *child = taprio_leaf(sch, cl); struct tc_taprio_qopt_offload offload = { .cmd = TAPRIO_CMD_QUEUE_STATS, .queue_stats = { .queue = cl - 1, }, }; - struct Qdisc *child; - child = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 || qdisc_qstats_copy(d, child) < 0) return -1; |