summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig14
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c84
-rw-r--r--net/sched/act_bpf.c19
-rw-r--r--net/sched/act_connmark.c5
-rw-r--r--net/sched/act_csum.c32
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c119
-rw-r--r--net/sched/act_ipt.c16
-rw-r--r--net/sched/act_mirred.c94
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c222
-rw-r--r--net/sched/act_police.c23
-rw-r--r--net/sched/act_sample.c276
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c23
-rw-r--r--net/sched/act_skbmod.c3
-rw-r--r--net/sched/act_tunnel_key.c19
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c218
-rw-r--r--net/sched/cls_bpf.c62
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/cls_flower.c409
-rw-r--r--net/sched/cls_matchall.c160
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/em_ipset.c17
-rw-r--r--net/sched/em_meta.c10
-rw-r--r--net/sched/sch_api.c52
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sched/sch_cbq.c9
-rw-r--r--net/sched/sch_choke.c1
-rw-r--r--net/sched/sch_drr.c6
-rw-r--r--net/sched/sch_dsmark.c11
-rw-r--r--net/sched/sch_fq.c18
-rw-r--r--net/sched/sch_fq_codel.c7
-rw-r--r--net/sched/sch_generic.c6
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_htb.c7
-rw-r--r--net/sched/sch_ingress.c1
-rw-r--r--net/sched/sch_mq.c10
-rw-r--r--net/sched/sch_mqprio.c19
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_netem.c6
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c8
-rw-r--r--net/sched/sch_sfb.c1
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/sched/sch_teql.c10
49 files changed, 1474 insertions, 568 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 87956a768d1b..403790cce7d2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -650,6 +650,18 @@ config NET_ACT_MIRRED
To compile this code as a module, choose M here: the
module will be called act_mirred.
+config NET_ACT_SAMPLE
+ tristate "Traffic Sampling"
+ depends on NET_CLS_ACT
+ select PSAMPLE
+ ---help---
+ Say Y here to allow packet sampling tc action. The packet sample
+ action consists of statistically choosing packets and sampling
+ them using the psample module.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_sample.
+
config NET_ACT_IPT
tristate "IPtables targets"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
@@ -707,6 +719,7 @@ config NET_ACT_SKBEDIT
config NET_ACT_CSUM
tristate "Checksum Updating"
depends on NET_CLS_ACT && INET
+ select LIBCRC32C
---help---
Say Y here to update some common checksum after some direct
packet alterations.
@@ -763,6 +776,7 @@ config NET_ACT_SKBMOD
config NET_ACT_IFE
tristate "Inter-FE action based on IETF ForCES InterFE LFB"
depends on NET_CLS_ACT
+ select NET_IFE
---help---
Say Y here to allow for sourcing and terminating metadata
For details refer to netdev01 paper:
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 4bdda3634e0b..7b915d226de7 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_NET_CLS_ACT) += act_api.o
obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
+obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o
obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f893d180da1c..e05b924618a0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -24,6 +24,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
#include <net/act_api.h>
#include <net/netlink.h>
@@ -33,6 +34,12 @@ static void free_tcf(struct rcu_head *head)
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_qstats);
+
+ if (p->act_cookie) {
+ kfree(p->act_cookie->data);
+ kfree(p->act_cookie);
+ }
+
kfree(p);
}
@@ -41,8 +48,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
spin_lock_bh(&hinfo->lock);
hlist_del(&p->tcfa_head);
spin_unlock_bh(&hinfo->lock);
- gen_kill_estimator(&p->tcfa_bstats,
- &p->tcfa_rate_est);
+ gen_kill_estimator(&p->tcfa_rate_est);
/*
* gen_estimator est_timer() might access p->tcfa_lock
* or bstats, wait a RCU grace period before freeing p
@@ -237,8 +243,7 @@ EXPORT_SYMBOL(tcf_hash_check);
void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
{
if (est)
- gen_kill_estimator(&a->tcfa_bstats,
- &a->tcfa_rate_est);
+ gen_kill_estimator(&a->tcfa_rate_est);
call_rcu(&a->tcfa_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
@@ -428,11 +433,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
{
int ret = -1, i;
- if (skb->tc_verd & TC_NCLS) {
- skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
- ret = TC_ACT_OK;
- goto exec_done;
- }
+ if (skb_skip_tc_classify(skb))
+ return TC_ACT_OK;
+
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
@@ -441,9 +444,8 @@ repeat:
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
- goto exec_done;
+ break;
}
-exec_done:
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
@@ -480,6 +482,12 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
goto nla_put_failure;
if (tcf_action_copy_stats(skb, a, 0))
goto nla_put_failure;
+ if (a->act_cookie) {
+ if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len,
+ a->act_cookie->data))
+ goto nla_put_failure;
+ }
+
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
@@ -521,12 +529,29 @@ errout:
return err;
}
+static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
+{
+ struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return NULL;
+
+ c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
+ if (!c->data) {
+ kfree(c);
+ return NULL;
+ }
+ c->len = nla_len(tb[TCA_ACT_COOKIE]);
+
+ return c;
+}
+
struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
struct nlattr *est, char *name, int ovr,
int bind)
{
struct tc_action *a;
struct tc_action_ops *a_o;
+ struct tc_cookie *cookie = NULL;
char act_name[IFNAMSIZ];
struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
@@ -542,6 +567,18 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
goto err_out;
if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
goto err_out;
+ if (tb[TCA_ACT_COOKIE]) {
+ int cklen = nla_len(tb[TCA_ACT_COOKIE]);
+
+ if (cklen > TC_COOKIE_MAX_SIZE)
+ goto err_out;
+
+ cookie = nla_memdup_cookie(tb);
+ if (!cookie) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
} else {
err = -EINVAL;
if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
@@ -580,6 +617,14 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
if (err < 0)
goto err_mod;
+ if (name == NULL && tb[TCA_ACT_COOKIE]) {
+ if (a->act_cookie) {
+ kfree(a->act_cookie->data);
+ kfree(a->act_cookie);
+ }
+ a->act_cookie = cookie;
+ }
+
/* module count goes up only when brand new policy is created
* if it exists and is only bound to in a_o->init() then
* ACT_P_CREATED is not returned (a zero is).
@@ -592,6 +637,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
err_mod:
module_put(a_o->owner);
err_out:
+ if (cookie) {
+ kfree(cookie->data);
+ kfree(cookie);
+ }
return ERR_PTR(err);
}
@@ -670,8 +719,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
goto errout;
if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &p->tcfa_bstats,
- &p->tcfa_rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfa_qstats,
p->tcfa_qstats.qlen) < 0)
@@ -820,10 +868,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
goto out_module_put;
err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
- if (err < 0)
+ if (err <= 0)
goto out_module_put;
- if (err == 0)
- goto noflush_out;
nla_nest_end(skb, nest);
@@ -840,7 +886,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
out_module_put:
module_put(ops->owner);
err_out:
-noflush_out:
kfree_skb(skb);
return err;
}
@@ -903,8 +948,6 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
act->order = i;
- if (event == RTM_GETACTION)
- act->tcfa_refcnt++;
list_add_tail(&act->list, &actions);
}
@@ -917,7 +960,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
return ret;
}
err:
- tcf_action_destroy(&actions, 0);
+ if (event != RTM_GETACTION)
+ tcf_action_destroy(&actions, 0);
return ret;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1d3960033f61..520baa41cba3 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -28,12 +28,11 @@ struct tcf_bpf_cfg {
struct bpf_prog *filter;
struct sock_filter *bpf_ops;
const char *bpf_name;
- u32 bpf_fd;
u16 bpf_num_ops;
bool is_ebpf;
};
-static int bpf_net_id;
+static unsigned int bpf_net_id;
static struct tc_action_ops act_bpf_ops;
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
@@ -118,13 +117,18 @@ static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
struct sk_buff *skb)
{
- if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
- return -EMSGSIZE;
+ struct nlattr *nla;
if (prog->bpf_name &&
nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
return -EMSGSIZE;
+ nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
+
return 0;
}
@@ -226,16 +230,13 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
return PTR_ERR(fp);
if (tb[TCA_ACT_BPF_NAME]) {
- name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
- nla_len(tb[TCA_ACT_BPF_NAME]),
- GFP_KERNEL);
+ name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL);
if (!name) {
bpf_prog_put(fp);
return -ENOMEM;
}
}
- cfg->bpf_fd = bpf_fd;
cfg->bpf_name = name;
cfg->filter = fp;
cfg->is_ebpf = true;
@@ -334,8 +335,6 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (cfg.bpf_num_ops)
prog->bpf_num_ops = cfg.bpf_num_ops;
- if (cfg.bpf_fd)
- prog->bpf_fd = cfg.bpf_fd;
prog->tcf_action = parm->action;
rcu_assign_pointer(prog->filter, cfg.filter);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index eae07a2e774d..f9bb43c25697 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -30,7 +30,7 @@
#define CONNMARK_TAB_MASK 3
-static int connmark_net_id;
+static unsigned int connmark_net_id;
static struct tc_action_ops act_connmark_ops;
static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
+ if (!tb[TCA_CONNMARK_PARMS])
+ return -EINVAL;
+
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(tn, parm->index, a, bind)) {
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e0defcef376d..e978ccd4402c 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -30,6 +30,7 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/ip6_checksum.h>
+#include <net/sctp/checksum.h>
#include <net/act_api.h>
@@ -42,7 +43,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
};
-static int csum_net_id;
+static unsigned int csum_net_id;
static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
@@ -322,6 +323,25 @@ ignore_obscure_skb:
return 1;
}
+static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl)
+{
+ struct sctphdr *sctph;
+
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)
+ return 1;
+
+ sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph));
+ if (!sctph)
+ return 0;
+
+ sctph->checksum = sctp_compute_cksum(skb,
+ skb_network_offset(skb) + ihl);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return 1;
+}
+
static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
{
const struct iphdr *iph;
@@ -365,6 +385,11 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
ntohs(iph->tot_len), 1))
goto fail;
break;
+ case IPPROTO_SCTP:
+ if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) &&
+ !tcf_csum_sctp(skb, iph->ihl * 4, ntohs(iph->tot_len)))
+ goto fail;
+ break;
}
if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
@@ -481,6 +506,11 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
pl + sizeof(*ip6h), 1))
goto fail;
goto done;
+ case IPPROTO_SCTP:
+ if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) &&
+ !tcf_csum_sctp(skb, hl, pl + sizeof(*ip6h)))
+ goto fail;
+ goto done;
default:
goto ignore_skb;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e0aa30f83c6c..e6c874a2b283 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -25,7 +25,7 @@
#define GACT_TAB_MASK 15
-static int gact_net_id;
+static unsigned int gact_net_id;
static struct tc_action_ops act_gact_ops;
#ifdef CONFIG_GACT_PROB
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 95c463cbb9a6..71e7ff22f7c9 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -32,10 +32,11 @@
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
#include <linux/etherdevice.h>
+#include <net/ife.h>
#define IFE_TAB_MASK 15
-static int ife_net_id;
+static unsigned int ife_net_id;
static int max_metacnt = IFE_META_MAX + 1;
static struct tc_action_ops act_ife_ops;
@@ -46,23 +47,6 @@ static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = {
[TCA_IFE_TYPE] = { .type = NLA_U16},
};
-/* Caller takes care of presenting data in network order
-*/
-int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
-{
- u32 *tlv = (u32 *)(skbdata);
- u16 totlen = nla_total_size(dlen); /*alignment + hdr */
- char *dptr = (char *)tlv + NLA_HDRLEN;
- u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN);
-
- *tlv = htonl(htlv);
- memset(dptr, 0, totlen - NLA_HDRLEN);
- memcpy(dptr, dval, dlen);
-
- return totlen;
-}
-EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
-
int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi)
{
u16 edata = 0;
@@ -637,69 +621,59 @@ int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
return 0;
}
-struct ifeheadr {
- __be16 metalen;
- u8 tlv_data[];
-};
-
-struct meta_tlvhdr {
- __be16 type;
- __be16 len;
-};
-
static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
- struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
- int ifehdrln = (int)ifehdr->metalen;
- struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
+ u8 *ifehdr_end;
+ u8 *tlv_data;
+ u16 metalen;
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
tcf_lastuse_update(&ife->tcf_tm);
spin_unlock(&ife->tcf_lock);
- ifehdrln = ntohs(ifehdrln);
- if (unlikely(!pskb_may_pull(skb, ifehdrln))) {
+ if (skb_at_tc_ingress(skb))
+ skb_push(skb, skb->dev->hard_header_len);
+
+ tlv_data = ife_decode(skb, &metalen);
+ if (unlikely(!tlv_data)) {
spin_lock(&ife->tcf_lock);
ife->tcf_qstats.drops++;
spin_unlock(&ife->tcf_lock);
return TC_ACT_SHOT;
}
- skb_set_mac_header(skb, ifehdrln);
- __skb_pull(skb, ifehdrln);
- skb->protocol = eth_type_trans(skb, skb->dev);
- ifehdrln -= IFE_METAHDRLEN;
-
- while (ifehdrln > 0) {
- u8 *tlvdata = (u8 *)tlv;
- u16 mtype = tlv->type;
- u16 mlen = tlv->len;
- u16 alen;
+ ifehdr_end = tlv_data + metalen;
+ for (; tlv_data < ifehdr_end; tlv_data = ife_tlv_meta_next(tlv_data)) {
+ u8 *curr_data;
+ u16 mtype;
+ u16 dlen;
- mtype = ntohs(mtype);
- mlen = ntohs(mlen);
- alen = NLA_ALIGN(mlen);
+ curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL);
- if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN),
- (void *)(tlvdata + NLA_HDRLEN))) {
+ if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
* but dont have an ops for it
*/
- pr_info_ratelimited("Unknown metaid %d alnlen %d\n",
- mtype, mlen);
+ pr_info_ratelimited("Unknown metaid %d dlen %d\n",
+ mtype, dlen);
ife->tcf_qstats.overlimits++;
}
+ }
- tlvdata += alen;
- ifehdrln -= alen;
- tlv = (struct meta_tlvhdr *)tlvdata;
+ if (WARN_ON(tlv_data != ifehdr_end)) {
+ spin_lock(&ife->tcf_lock);
+ ife->tcf_qstats.drops++;
+ spin_unlock(&ife->tcf_lock);
+ return TC_ACT_SHOT;
}
+ skb->protocol = eth_type_trans(skb, skb->dev);
skb_reset_network_header(skb);
+
return action;
}
@@ -727,7 +701,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
struct ethhdr *oethh; /* outer ether header */
- struct ethhdr *iethh; /* inner eth header */
struct tcf_meta_info *e;
/*
OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA
@@ -735,13 +708,13 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
*/
u16 metalen = ife_get_sz(skb, ife);
int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
- unsigned int skboff = skb->dev->hard_header_len;
- u32 at = G_TC_AT(skb->tc_verd);
+ unsigned int skboff = 0;
int new_len = skb->len + hdrm;
bool exceed_mtu = false;
- int err;
+ void *ife_meta;
+ int err = 0;
- if (at & AT_EGRESS) {
+ if (!skb_at_tc_ingress(skb)) {
if (new_len > skb->dev->mtu)
exceed_mtu = true;
}
@@ -766,27 +739,10 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
return TC_ACT_SHOT;
}
- err = skb_cow_head(skb, hdrm);
- if (unlikely(err)) {
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
- return TC_ACT_SHOT;
- }
-
- if (!(at & AT_EGRESS))
+ if (skb_at_tc_ingress(skb))
skb_push(skb, skb->dev->hard_header_len);
- iethh = (struct ethhdr *)skb->data;
- __skb_push(skb, hdrm);
- memcpy(skb->data, iethh, skb->mac_len);
- skb_reset_mac_header(skb);
- oethh = eth_hdr(skb);
-
- /*total metadata length */
- metalen += IFE_METAHDRLEN;
- metalen = htons(metalen);
- memcpy((skb->data + skboff), &metalen, IFE_METAHDRLEN);
- skboff += IFE_METAHDRLEN;
+ ife_meta = ife_encode(skb, metalen);
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
@@ -794,7 +750,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
*/
list_for_each_entry(e, &ife->metalist, metalist) {
if (e->ops->encode) {
- err = e->ops->encode(skb, (void *)(skb->data + skboff),
+ err = e->ops->encode(skb, (void *)(ife_meta + skboff),
e);
}
if (err < 0) {
@@ -805,18 +761,15 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
}
skboff += err;
}
+ oethh = (struct ethhdr *)skb->data;
if (!is_zero_ether_addr(ife->eth_src))
ether_addr_copy(oethh->h_source, ife->eth_src);
- else
- ether_addr_copy(oethh->h_source, iethh->h_source);
if (!is_zero_ether_addr(ife->eth_dst))
ether_addr_copy(oethh->h_dest, ife->eth_dst);
- else
- ether_addr_copy(oethh->h_dest, iethh->h_dest);
oethh->h_proto = htons(ife->eth_type);
- if (!(at & AT_EGRESS))
+ if (skb_at_tc_ingress(skb))
skb_pull(skb, skb->dev->hard_header_len);
spin_unlock(&ife->tcf_lock);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 378c1c976058..992ef8d624f1 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,10 +30,10 @@
#define IPT_TAB_MASK 15
-static int ipt_net_id;
+static unsigned int ipt_net_id;
static struct tc_action_ops act_ipt_ops;
-static int xt_net_id;
+static unsigned int xt_net_id;
static struct tc_action_ops act_xt_ops;
static int ipt_init_target(struct xt_entry_target *t, char *table,
@@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
int ret = 0, result = 0;
struct tcf_ipt *ipt = to_ipt(a);
struct xt_action_param par;
+ struct nf_hook_state state = {
+ .net = dev_net(skb->dev),
+ .in = skb->dev,
+ .hook = ipt->tcfi_hook,
+ .pf = NFPROTO_IPV4,
+ };
if (skb_unclone(skb, GFP_ATOMIC))
return TC_ACT_UNSPEC;
@@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
* worry later - danger - this API seems to have changed
* from earlier kernels
*/
- par.net = dev_net(skb->dev);
- par.in = skb->dev;
- par.out = NULL;
- par.hooknum = ipt->tcfi_hook;
+ par.state = &state;
par.target = ipt->tcfi_t->u.kernel.target;
par.targinfo = ipt->tcfi_t->data;
- par.family = NFPROTO_IPV4;
ret = par.target->target(skb, &par);
switch (ret) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6b07fba5770b..af49c7dca860 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -21,18 +21,36 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfp.h>
+#include <linux/if_arp.h>
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
-#include <linux/if_arp.h>
-
#define MIRRED_TAB_MASK 7
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
+static bool tcf_mirred_is_act_redirect(int action)
+{
+ return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
+}
+
+static bool tcf_mirred_act_wants_ingress(int action)
+{
+ switch (action) {
+ case TCA_EGRESS_REDIR:
+ case TCA_EGRESS_MIRROR:
+ return false;
+ case TCA_INGRESS_REDIR:
+ case TCA_INGRESS_MIRROR:
+ return true;
+ default:
+ BUG();
+ }
+}
+
static void tcf_mirred_release(struct tc_action *a, int bind)
{
struct tcf_mirred *m = to_mirred(a);
@@ -51,7 +69,7 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
[TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
};
-static int mirred_net_id;
+static unsigned int mirred_net_id;
static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
@@ -60,11 +78,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
+ bool mac_header_xmit = false;
struct tc_mirred *parm;
struct tcf_mirred *m;
struct net_device *dev;
- int ret, ok_push = 0;
bool exists = false;
+ int ret;
if (nla == NULL)
return -EINVAL;
@@ -82,6 +101,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
switch (parm->eaction) {
case TCA_EGRESS_MIRROR:
case TCA_EGRESS_REDIR:
+ case TCA_INGRESS_REDIR:
+ case TCA_INGRESS_MIRROR:
break;
default:
if (exists)
@@ -95,19 +116,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
tcf_hash_release(*a, bind);
return -ENODEV;
}
- switch (dev->type) {
- case ARPHRD_TUNNEL:
- case ARPHRD_TUNNEL6:
- case ARPHRD_SIT:
- case ARPHRD_IPGRE:
- case ARPHRD_VOID:
- case ARPHRD_NONE:
- ok_push = 0;
- break;
- default:
- ok_push = 1;
- break;
- }
+ mac_header_xmit = dev_is_mac_header_xmit(dev);
} else {
dev = NULL;
}
@@ -136,7 +145,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
rcu_assign_pointer(m->tcfm_dev, dev);
- m->tcfm_ok_push = ok_push;
+ m->tcfm_mac_header_xmit = mac_header_xmit;
}
if (ret == ACT_P_CREATED) {
@@ -153,15 +162,19 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
+ bool m_mac_header_xmit;
struct net_device *dev;
struct sk_buff *skb2;
- int retval, err;
- u32 at;
+ int retval, err = 0;
+ int m_eaction;
+ int mac_len;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
rcu_read_lock();
+ m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
+ m_eaction = READ_ONCE(m->tcfm_eaction);
retval = READ_ONCE(m->tcf_action);
dev = rcu_dereference(m->tcfm_dev);
if (unlikely(!dev)) {
@@ -175,28 +188,43 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- at = G_TC_AT(skb->tc_verd);
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
goto out;
- if (!(at & AT_EGRESS)) {
- if (m->tcfm_ok_push)
+ /* If action's target direction differs than filter's direction,
+ * and devices expect a mac header on xmit, then mac push/pull is
+ * needed.
+ */
+ if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
+ m_mac_header_xmit) {
+ if (!skb_at_tc_ingress(skb)) {
+ /* caught at egress, act ingress: pull mac */
+ mac_len = skb_network_header(skb) - skb_mac_header(skb);
+ skb_pull_rcsum(skb2, mac_len);
+ } else {
+ /* caught at ingress, act egress: push mac */
skb_push_rcsum(skb2, skb->mac_len);
+ }
}
/* mirror is always swallowed */
- if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
- skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
+ if (tcf_mirred_is_act_redirect(m_eaction)) {
+ skb2->tc_redirected = 1;
+ skb2->tc_from_ingress = skb2->tc_at_ingress;
+ }
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
- err = dev_queue_xmit(skb2);
+ if (!tcf_mirred_act_wants_ingress(m_eaction))
+ err = dev_queue_xmit(skb2);
+ else
+ err = netif_receive_skb(skb2);
if (err) {
out:
qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
- if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
rcu_read_unlock();
@@ -286,6 +314,17 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
+static int tcf_mirred_device(const struct tc_action *a, struct net *net,
+ struct net_device **mirred_dev)
+{
+ int ifindex = tcf_mirred_ifindex(a);
+
+ *mirred_dev = __dev_get_by_index(net, ifindex);
+ if (!*mirred_dev)
+ return -EINVAL;
+ return 0;
+}
+
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
.type = TCA_ACT_MIRRED,
@@ -298,6 +337,7 @@ static struct tc_action_ops act_mirred_ops = {
.walk = tcf_mirred_walker,
.lookup = tcf_mirred_search,
.size = sizeof(struct tcf_mirred),
+ .get_dev = tcf_mirred_device,
};
static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 8e8b0cc30704..9b6aec665495 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,7 +31,7 @@
#define NAT_TAB_MASK 15
-static int nat_net_id;
+static unsigned int nat_net_id;
static struct tc_action_ops act_nat_ops;
static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cf9b2fe8eac6..c1310472f620 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -22,26 +22,126 @@
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_pedit.h>
#include <net/tc_act/tc_pedit.h>
+#include <uapi/linux/tc_act/tc_pedit.h>
#define PEDIT_TAB_MASK 15
-static int pedit_net_id;
+static unsigned int pedit_net_id;
static struct tc_action_ops act_pedit_ops;
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
+ [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED },
};
+static const struct nla_policy pedit_key_ex_policy[TCA_PEDIT_KEY_EX_MAX + 1] = {
+ [TCA_PEDIT_KEY_EX_HTYPE] = { .type = NLA_U16 },
+ [TCA_PEDIT_KEY_EX_CMD] = { .type = NLA_U16 },
+};
+
+static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
+ u8 n)
+{
+ struct tcf_pedit_key_ex *keys_ex;
+ struct tcf_pedit_key_ex *k;
+ const struct nlattr *ka;
+ int err = -EINVAL;
+ int rem;
+
+ if (!nla || !n)
+ return NULL;
+
+ keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL);
+ if (!keys_ex)
+ return ERR_PTR(-ENOMEM);
+
+ k = keys_ex;
+
+ nla_for_each_nested(ka, nla, rem) {
+ struct nlattr *tb[TCA_PEDIT_KEY_EX_MAX + 1];
+
+ if (!n) {
+ err = -EINVAL;
+ goto err_out;
+ }
+ n--;
+
+ if (nla_type(ka) != TCA_PEDIT_KEY_EX) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka,
+ pedit_key_ex_policy);
+ if (err)
+ goto err_out;
+
+ if (!tb[TCA_PEDIT_KEY_EX_HTYPE] ||
+ !tb[TCA_PEDIT_KEY_EX_CMD]) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ k->htype = nla_get_u16(tb[TCA_PEDIT_KEY_EX_HTYPE]);
+ k->cmd = nla_get_u16(tb[TCA_PEDIT_KEY_EX_CMD]);
+
+ if (k->htype > TCA_PEDIT_HDR_TYPE_MAX ||
+ k->cmd > TCA_PEDIT_CMD_MAX) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ k++;
+ }
+
+ if (n)
+ goto err_out;
+
+ return keys_ex;
+
+err_out:
+ kfree(keys_ex);
+ return ERR_PTR(err);
+}
+
+static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
+ struct tcf_pedit_key_ex *keys_ex, int n)
+{
+ struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX);
+
+ for (; n > 0; n--) {
+ struct nlattr *key_start;
+
+ key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX);
+
+ if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) ||
+ nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) {
+ nlmsg_trim(skb, keys_start);
+ return -EINVAL;
+ }
+
+ nla_nest_end(skb, key_start);
+
+ keys_ex++;
+ }
+
+ nla_nest_end(skb, keys_start);
+
+ return 0;
+}
+
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
+ struct nlattr *pattr;
struct tc_pedit *parm;
int ret = 0, err;
struct tcf_pedit *p;
struct tc_pedit_key *keys = NULL;
+ struct tcf_pedit_key_ex *keys_ex;
int ksize;
if (nla == NULL)
@@ -51,13 +151,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (err < 0)
return err;
- if (tb[TCA_PEDIT_PARMS] == NULL)
+ pattr = tb[TCA_PEDIT_PARMS];
+ if (!pattr)
+ pattr = tb[TCA_PEDIT_PARMS_EX];
+ if (!pattr)
return -EINVAL;
- parm = nla_data(tb[TCA_PEDIT_PARMS]);
+
+ parm = nla_data(pattr);
ksize = parm->nkeys * sizeof(struct tc_pedit_key);
- if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
+ if (nla_len(pattr) < sizeof(*parm) + ksize)
return -EINVAL;
+ keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
+ if (IS_ERR(keys_ex))
+ return PTR_ERR(keys_ex);
+
if (!tcf_hash_check(tn, parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
@@ -69,6 +177,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
tcf_hash_cleanup(*a, est);
+ kfree(keys_ex);
return -ENOMEM;
}
ret = ACT_P_CREATED;
@@ -81,8 +190,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p = to_pedit(*a);
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
- if (keys == NULL)
+ if (!keys) {
+ kfree(keys_ex);
return -ENOMEM;
+ }
}
}
@@ -95,6 +206,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
+
+ kfree(p->tcfp_keys_ex);
+ p->tcfp_keys_ex = keys_ex;
+
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
tcf_hash_insert(tn, *a);
@@ -106,6 +221,7 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind)
struct tcf_pedit *p = to_pedit(a);
struct tc_pedit_key *keys = p->tcfp_keys;
kfree(keys);
+ kfree(p->tcfp_keys_ex);
}
static bool offset_valid(struct sk_buff *skb, int offset)
@@ -119,38 +235,88 @@ static bool offset_valid(struct sk_buff *skb, int offset)
return true;
}
+static int pedit_skb_hdr_offset(struct sk_buff *skb,
+ enum pedit_header_type htype, int *hoffset)
+{
+ int ret = -EINVAL;
+
+ switch (htype) {
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+ if (skb_mac_header_was_set(skb)) {
+ *hoffset = skb_mac_offset(skb);
+ ret = 0;
+ }
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK:
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+ *hoffset = skb_network_offset(skb);
+ ret = 0;
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+ if (skb_transport_header_was_set(skb)) {
+ *hoffset = skb_transport_offset(skb);
+ ret = 0;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ };
+
+ return ret;
+}
+
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
int i;
- unsigned int off;
if (skb_unclone(skb, GFP_ATOMIC))
return p->tcf_action;
- off = skb_network_offset(skb);
-
spin_lock(&p->tcf_lock);
tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
struct tc_pedit_key *tkey = p->tcfp_keys;
+ struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
+ enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+ enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
u32 *ptr, _data;
int offset = tkey->off;
+ int hoffset;
+ u32 val;
+ int rc;
+
+ if (tkey_ex) {
+ htype = tkey_ex->htype;
+ cmd = tkey_ex->cmd;
+
+ tkey_ex++;
+ }
+
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info("tc filter pedit bad header type specified (0x%x)\n",
+ htype);
+ goto bad;
+ }
if (tkey->offmask) {
char *d, _d;
- if (!offset_valid(skb, off + tkey->at)) {
+ if (!offset_valid(skb, hoffset + tkey->at)) {
pr_info("tc filter pedit 'at' offset %d out of bounds\n",
- off + tkey->at);
+ hoffset + tkey->at);
goto bad;
}
- d = skb_header_pointer(skb, off + tkey->at, 1,
+ d = skb_header_pointer(skb, hoffset + tkey->at, 1,
&_d);
if (!d)
goto bad;
@@ -163,19 +329,32 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
goto bad;
}
- if (!offset_valid(skb, off + offset)) {
+ if (!offset_valid(skb, hoffset + offset)) {
pr_info("tc filter pedit offset %d out of bounds\n",
- offset);
+ hoffset + offset);
goto bad;
}
- ptr = skb_header_pointer(skb, off + offset, 4, &_data);
+ ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data);
if (!ptr)
goto bad;
/* just do it, baby */
- *ptr = ((*ptr & tkey->mask) ^ tkey->val);
+ switch (cmd) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ val = tkey->val;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ val = (*ptr + tkey->val) & ~tkey->mask;
+ break;
+ default:
+ pr_info("tc filter pedit bad command (%d)\n",
+ cmd);
+ goto bad;
+ }
+
+ *ptr = ((*ptr & tkey->mask) ^ val);
if (ptr == &_data)
- skb_store_bits(skb, off + offset, ptr, 4);
+ skb_store_bits(skb, hoffset + offset, ptr, 4);
}
goto done;
@@ -215,8 +394,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
opt->refcnt = p->tcf_refcnt - ref;
opt->bindcnt = p->tcf_bindcnt - bind;
- if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
- goto nla_put_failure;
+ if (p->tcfp_keys_ex) {
+ tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
+
+ if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
+ goto nla_put_failure;
+ } else {
+ if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
+ goto nla_put_failure;
+ }
tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index d1bd248fe146..0ba91d1ce994 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -55,7 +55,7 @@ struct tc_police_compat {
/* Each policer is serialized by its individual spinlock */
-static int police_net_id;
+static unsigned int police_net_id;
static struct tc_action_ops act_police_ops;
static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
@@ -142,8 +142,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
goto failure_unlock;
} else if (tb[TCA_POLICE_AVRATE] &&
(ret == ACT_P_CREATED ||
- !gen_estimator_active(&police->tcf_bstats,
- &police->tcf_rate_est))) {
+ !gen_estimator_active(&police->tcf_rate_est))) {
err = -EINVAL;
goto failure_unlock;
}
@@ -216,13 +215,17 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a,
bstats_update(&police->tcf_bstats, skb);
tcf_lastuse_update(&police->tcf_tm);
- if (police->tcfp_ewma_rate &&
- police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
- police->tcf_qstats.overlimits++;
- if (police->tcf_action == TC_ACT_SHOT)
- police->tcf_qstats.drops++;
- spin_unlock(&police->tcf_lock);
- return police->tcf_action;
+ if (police->tcfp_ewma_rate) {
+ struct gnet_stats_rate_est64 sample;
+
+ if (!gen_estimator_read(&police->tcf_rate_est, &sample) ||
+ sample.bps >= police->tcfp_ewma_rate) {
+ police->tcf_qstats.overlimits++;
+ if (police->tcf_action == TC_ACT_SHOT)
+ police->tcf_qstats.drops++;
+ spin_unlock(&police->tcf_lock);
+ return police->tcf_action;
+ }
}
if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
new file mode 100644
index 000000000000..0b8217b4763f
--- /dev/null
+++ b/net/sched/act_sample.c
@@ -0,0 +1,276 @@
+/*
+ * net/sched/act_sample.c - Packet sampling tc action
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <net/net_namespace.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_sample.h>
+#include <net/tc_act/tc_sample.h>
+#include <net/psample.h>
+
+#include <linux/if_arp.h>
+
+#define SAMPLE_TAB_MASK 7
+static unsigned int sample_net_id;
+static struct tc_action_ops act_sample_ops;
+
+static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
+ [TCA_SAMPLE_PARMS] = { .len = sizeof(struct tc_sample) },
+ [TCA_SAMPLE_RATE] = { .type = NLA_U32 },
+ [TCA_SAMPLE_TRUNC_SIZE] = { .type = NLA_U32 },
+ [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NLA_U32 },
+};
+
+static int tcf_sample_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a, int ovr,
+ int bind)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+ struct nlattr *tb[TCA_SAMPLE_MAX + 1];
+ struct psample_group *psample_group;
+ struct tc_sample *parm;
+ struct tcf_sample *s;
+ bool exists = false;
+ int ret;
+
+ if (!nla)
+ return -EINVAL;
+ ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy);
+ if (ret < 0)
+ return ret;
+ if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] ||
+ !tb[TCA_SAMPLE_PSAMPLE_GROUP])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_SAMPLE_PARMS]);
+
+ exists = tcf_hash_check(tn, parm->index, a, bind);
+ if (exists && bind)
+ return 0;
+
+ if (!exists) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ &act_sample_ops, bind, false);
+ if (ret)
+ return ret;
+ ret = ACT_P_CREATED;
+ } else {
+ tcf_hash_release(*a, bind);
+ if (!ovr)
+ return -EEXIST;
+ }
+ s = to_sample(*a);
+
+ s->tcf_action = parm->action;
+ s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
+ psample_group = psample_group_get(net, s->psample_group_num);
+ if (!psample_group) {
+ if (ret == ACT_P_CREATED)
+ tcf_hash_release(*a, bind);
+ return -ENOMEM;
+ }
+ RCU_INIT_POINTER(s->psample_group, psample_group);
+
+ if (tb[TCA_SAMPLE_TRUNC_SIZE]) {
+ s->truncate = true;
+ s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]);
+ }
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(tn, *a);
+ return ret;
+}
+
+static void tcf_sample_cleanup_rcu(struct rcu_head *rcu)
+{
+ struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu);
+ struct psample_group *psample_group;
+
+ psample_group = rcu_dereference_protected(s->psample_group, 1);
+ RCU_INIT_POINTER(s->psample_group, NULL);
+ psample_group_put(psample_group);
+}
+
+static void tcf_sample_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_sample *s = to_sample(a);
+
+ call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
+}
+
+static bool tcf_sample_dev_ok_push(struct net_device *dev)
+{
+ switch (dev->type) {
+ case ARPHRD_TUNNEL:
+ case ARPHRD_TUNNEL6:
+ case ARPHRD_SIT:
+ case ARPHRD_IPGRE:
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_sample *s = to_sample(a);
+ struct psample_group *psample_group;
+ int retval;
+ int size;
+ int iif;
+ int oif;
+
+ tcf_lastuse_update(&s->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
+ retval = READ_ONCE(s->tcf_action);
+
+ rcu_read_lock();
+ psample_group = rcu_dereference(s->psample_group);
+
+ /* randomly sample packets according to rate */
+ if (psample_group && (prandom_u32() % s->rate == 0)) {
+ if (!skb_at_tc_ingress(skb)) {
+ iif = skb->skb_iif;
+ oif = skb->dev->ifindex;
+ } else {
+ iif = skb->dev->ifindex;
+ oif = 0;
+ }
+
+ /* on ingress, the mac header gets popped, so push it back */
+ if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
+ skb_push(skb, skb->mac_len);
+
+ size = s->truncate ? s->trunc_size : skb->len;
+ psample_sample_packet(psample_group, skb, size, iif, oif,
+ s->rate);
+
+ if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
+ skb_pull(skb, skb->mac_len);
+ }
+
+ rcu_read_unlock();
+ return retval;
+}
+
+static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_sample *s = to_sample(a);
+ struct tc_sample opt = {
+ .index = s->tcf_index,
+ .action = s->tcf_action,
+ .refcnt = s->tcf_refcnt - ref,
+ .bindcnt = s->tcf_bindcnt - bind,
+ };
+ struct tcf_t t;
+
+ if (nla_put(skb, TCA_SAMPLE_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ tcf_tm_dump(&t, &s->tcf_tm);
+ if (nla_put_64bit(skb, TCA_SAMPLE_TM, sizeof(t), &t, TCA_SAMPLE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_SAMPLE_RATE, s->rate))
+ goto nla_put_failure;
+
+ if (s->truncate)
+ if (nla_put_u32(skb, TCA_SAMPLE_TRUNC_SIZE, s->trunc_size))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_SAMPLE_PSAMPLE_GROUP, s->psample_group_num))
+ goto nla_put_failure;
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_sample_ops = {
+ .kind = "sample",
+ .type = TCA_ACT_SAMPLE,
+ .owner = THIS_MODULE,
+ .act = tcf_sample_act,
+ .dump = tcf_sample_dump,
+ .init = tcf_sample_init,
+ .cleanup = tcf_sample_cleanup,
+ .walk = tcf_sample_walker,
+ .lookup = tcf_sample_search,
+ .size = sizeof(struct tcf_sample),
+};
+
+static __net_init int sample_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK);
+}
+
+static void __net_exit sample_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations sample_net_ops = {
+ .init = sample_init_net,
+ .exit = sample_exit_net,
+ .id = &sample_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init sample_init_module(void)
+{
+ return tcf_register_action(&act_sample_ops, &sample_net_ops);
+}
+
+static void __exit sample_cleanup_module(void)
+{
+ tcf_unregister_action(&act_sample_ops, &sample_net_ops);
+}
+
+module_init(sample_init_module);
+module_exit(sample_cleanup_module);
+
+MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_DESCRIPTION("Packet sampling action");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 289af6f9bb3b..823a73ad0c60 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -26,7 +26,7 @@
#define SIMP_TAB_MASK 7
-static int simp_net_id;
+static unsigned int simp_net_id;
static struct tc_action_ops act_simp_ops;
#define SIMP_MAX_DATA 32
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index a133dcb82132..06ccae3c12ee 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -29,7 +29,7 @@
#define SKBEDIT_TAB_MASK 15
-static int skbedit_net_id;
+static unsigned int skbedit_net_id;
static struct tc_action_ops act_skbedit_ops;
static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
@@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
skb->dev->real_num_tx_queues > d->queue_mapping)
skb_set_queue_mapping(skb, d->queue_mapping);
- if (d->flags & SKBEDIT_F_MARK)
- skb->mark = d->mark;
+ if (d->flags & SKBEDIT_F_MARK) {
+ skb->mark &= ~d->mask;
+ skb->mark |= d->mark & d->mask;
+ }
if (d->flags & SKBEDIT_F_PTYPE)
skb->pkt_type = d->ptype;
@@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MARK] = { .len = sizeof(u32) },
[TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
+ [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
- u32 flags = 0, *priority = NULL, *mark = NULL;
+ u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
u16 *queue_mapping = NULL, *ptype = NULL;
bool exists = false;
int ret = 0, err;
@@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
mark = nla_data(tb[TCA_SKBEDIT_MARK]);
}
+ if (tb[TCA_SKBEDIT_MASK] != NULL) {
+ flags |= SKBEDIT_F_MASK;
+ mask = nla_data(tb[TCA_SKBEDIT_MASK]);
+ }
+
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
exists = tcf_hash_check(tn, parm->index, a, bind);
@@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
d->mark = *mark;
if (flags & SKBEDIT_F_PTYPE)
d->ptype = *ptype;
+ /* default behaviour is to use all the bits */
+ d->mask = 0xffffffff;
+ if (flags & SKBEDIT_F_MASK)
+ d->mask = *mask;
d->tcf_action = parm->action;
@@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
if ((d->flags & SKBEDIT_F_PTYPE) &&
nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_MASK) &&
+ nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+ goto nla_put_failure;
tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index e7d96381c908..c736627f8f4a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -22,7 +22,7 @@
#define SKBMOD_TAB_MASK 15
-static int skbmod_net_id;
+static unsigned int skbmod_net_id;
static struct tc_action_ops act_skbmod_ops;
#define MAX_EDIT_LEN ETH_HLEN
@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
return skb->len;
nla_put_failure:
- rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index af47bdf2f483..e3a58e021198 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -16,14 +16,13 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
-#include <net/dst_metadata.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
#define TUNNEL_KEY_TAB_MASK 15
-static int tunnel_key_net_id;
+static unsigned int tunnel_key_net_id;
static struct tc_action_ops act_tunnel_key_ops;
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
@@ -67,6 +66,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 },
+ [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
};
static int tunnel_key_init(struct net *net, struct nlattr *nla,
@@ -81,6 +81,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct tc_tunnel_key *parm;
struct tcf_tunnel_key *t;
bool exists = false;
+ __be16 dst_port = 0;
__be64 key_id;
int ret = 0;
int err;
@@ -111,6 +112,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
+ if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
+ dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
+
if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
__be32 saddr;
@@ -120,7 +124,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
- TUNNEL_KEY, key_id, 0);
+ dst_port, TUNNEL_KEY,
+ key_id, 0);
} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
struct in6_addr saddr;
@@ -129,8 +134,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
- metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0,
- TUNNEL_KEY, key_id, 0);
+ metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
+ 0, TUNNEL_KEY,
+ key_id, 0);
}
if (!metadata) {
@@ -258,7 +264,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
tunnel_key_dump_addresses(skb,
- &params->tcft_enc_metadata->u.tun_info))
+ &params->tcft_enc_metadata->u.tun_info) ||
+ nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst))
goto nla_put_failure;
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index b57fcbcefea1..19e0dba305ce 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -21,7 +21,7 @@
#define VLAN_TAB_MASK 15
-static int vlan_net_id;
+static unsigned int vlan_net_id;
static struct tc_action_ops act_vlan_ops;
static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b05d4a2155b0..732f7cae459d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
@@ -38,14 +39,14 @@ static DEFINE_RWLOCK(cls_mod_lock);
/* Find classifier type by string name */
-static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
+static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
{
const struct tcf_proto_ops *t, *res = NULL;
if (kind) {
read_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
- if (nla_strcmp(kind, t->kind) == 0) {
+ if (strcmp(kind, t->kind) == 0) {
if (try_module_get(t->owner))
res = t;
break;
@@ -127,6 +128,77 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
return first;
}
+static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
+ u32 prio, u32 parent, struct Qdisc *q)
+{
+ struct tcf_proto *tp;
+ int err;
+
+ tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+ if (!tp)
+ return ERR_PTR(-ENOBUFS);
+
+ err = -ENOENT;
+ tp->ops = tcf_proto_lookup_ops(kind);
+ if (!tp->ops) {
+#ifdef CONFIG_MODULES
+ rtnl_unlock();
+ request_module("cls_%s", kind);
+ rtnl_lock();
+ tp->ops = tcf_proto_lookup_ops(kind);
+ /* We dropped the RTNL semaphore in order to perform
+ * the module load. So, even if we succeeded in loading
+ * the module we have to replay the request. We indicate
+ * this using -EAGAIN.
+ */
+ if (tp->ops) {
+ module_put(tp->ops->owner);
+ err = -EAGAIN;
+ } else {
+ err = -ENOENT;
+ }
+ goto errout;
+#endif
+ }
+ tp->classify = tp->ops->classify;
+ tp->protocol = protocol;
+ tp->prio = prio;
+ tp->classid = parent;
+ tp->q = q;
+
+ err = tp->ops->init(tp);
+ if (err) {
+ module_put(tp->ops->owner);
+ goto errout;
+ }
+ return tp;
+
+errout:
+ kfree(tp);
+ return ERR_PTR(err);
+}
+
+static bool tcf_proto_destroy(struct tcf_proto *tp, bool force)
+{
+ if (tp->ops->destroy(tp, force)) {
+ module_put(tp->ops->owner);
+ kfree_rcu(tp, rcu);
+ return true;
+ }
+ return false;
+}
+
+void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+{
+ struct tcf_proto *tp;
+
+ while ((tp = rtnl_dereference(*fl)) != NULL) {
+ RCU_INIT_POINTER(*fl, tp->next);
+ tcf_proto_destroy(tp, true);
+ }
+}
+EXPORT_SYMBOL(tcf_destroy_chain);
+
/* Add/change/delete/get a filter node */
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
@@ -142,19 +214,21 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *q;
struct tcf_proto __rcu **back;
struct tcf_proto __rcu **chain;
+ struct tcf_proto *next;
struct tcf_proto *tp;
- const struct tcf_proto_ops *tp_ops;
const struct Qdisc_class_ops *cops;
unsigned long cl;
unsigned long fh;
int err;
- int tp_created = 0;
+ int tp_created;
if ((n->nlmsg_type != RTM_GETTFILTER) &&
!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
+ tp_created = 0;
+
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
if (err < 0)
return err;
@@ -220,9 +294,10 @@ replay:
/* And the last stroke */
chain = cops->tcf_chain(q, cl);
- err = -EINVAL;
- if (chain == NULL)
+ if (chain == NULL) {
+ err = -EINVAL;
goto errout;
+ }
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
tcf_destroy_chain(chain);
@@ -237,10 +312,13 @@ replay:
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (!nprio ||
- (tp->protocol != protocol && protocol))
+ (tp->protocol != protocol && protocol)) {
+ err = -EINVAL;
goto errout;
- } else
+ }
+ } else {
tp = NULL;
+ }
break;
}
}
@@ -248,109 +326,69 @@ replay:
if (tp == NULL) {
/* Proto-tcf does not exist, create new one */
- if (tca[TCA_KIND] == NULL || !protocol)
+ if (tca[TCA_KIND] == NULL || !protocol) {
+ err = -EINVAL;
goto errout;
+ }
- err = -ENOENT;
if (n->nlmsg_type != RTM_NEWTFILTER ||
- !(n->nlmsg_flags & NLM_F_CREATE))
+ !(n->nlmsg_flags & NLM_F_CREATE)) {
+ err = -ENOENT;
goto errout;
+ }
+ if (!nprio)
+ nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
- /* Create new proto tcf */
-
- err = -ENOBUFS;
- tp = kzalloc(sizeof(*tp), GFP_KERNEL);
- if (tp == NULL)
- goto errout;
- err = -ENOENT;
- tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
- if (tp_ops == NULL) {
-#ifdef CONFIG_MODULES
- struct nlattr *kind = tca[TCA_KIND];
- char name[IFNAMSIZ];
-
- if (kind != NULL &&
- nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
- rtnl_unlock();
- request_module("cls_%s", name);
- rtnl_lock();
- tp_ops = tcf_proto_lookup_ops(kind);
- /* We dropped the RTNL semaphore in order to
- * perform the module load. So, even if we
- * succeeded in loading the module we have to
- * replay the request. We indicate this using
- * -EAGAIN.
- */
- if (tp_ops != NULL) {
- module_put(tp_ops->owner);
- err = -EAGAIN;
- }
- }
-#endif
- kfree(tp);
+ tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
+ protocol, nprio, parent, q);
+ if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
goto errout;
}
- tp->ops = tp_ops;
- tp->protocol = protocol;
- tp->prio = nprio ? :
- TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
- tp->q = q;
- tp->classify = tp_ops->classify;
- tp->classid = parent;
-
- err = tp_ops->init(tp);
- if (err != 0) {
- module_put(tp_ops->owner);
- kfree(tp);
- goto errout;
- }
-
tp_created = 1;
-
- } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
+ } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ err = -EINVAL;
goto errout;
+ }
fh = tp->ops->get(tp, t->tcm_handle);
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- struct tcf_proto *next = rtnl_dereference(tp->next);
-
+ next = rtnl_dereference(tp->next);
RCU_INIT_POINTER(*back, next);
-
tfilter_notify(net, skb, n, tp, fh,
RTM_DELTFILTER, false);
- tcf_destroy(tp, true);
+ tcf_proto_destroy(tp, true);
err = 0;
goto errout;
}
- err = -ENOENT;
if (n->nlmsg_type != RTM_NEWTFILTER ||
- !(n->nlmsg_flags & NLM_F_CREATE))
+ !(n->nlmsg_flags & NLM_F_CREATE)) {
+ err = -ENOENT;
goto errout;
+ }
} else {
switch (n->nlmsg_type) {
case RTM_NEWTFILTER:
- err = -EEXIST;
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_destroy(tp, true);
+ tcf_proto_destroy(tp, true);
+ err = -EEXIST;
goto errout;
}
break;
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
- if (err == 0) {
- struct tcf_proto *next = rtnl_dereference(tp->next);
-
- tfilter_notify(net, skb, n, tp,
- t->tcm_handle,
- RTM_DELTFILTER, false);
- if (tcf_destroy(tp, false))
- RCU_INIT_POINTER(*back, next);
- }
+ if (err)
+ goto errout;
+ next = rtnl_dereference(tp->next);
+ tfilter_notify(net, skb, n, tp, t->tcm_handle,
+ RTM_DELTFILTER, false);
+ if (tcf_proto_destroy(tp, false))
+ RCU_INIT_POINTER(*back, next);
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh,
@@ -372,7 +410,7 @@ replay:
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_destroy(tp, true);
+ tcf_proto_destroy(tp, true);
}
errout:
@@ -682,6 +720,30 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
+int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
+ struct net_device **hw_dev)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+
+ if (tc_no_actions(exts))
+ return -EINVAL;
+
+ tcf_exts_to_list(exts, &actions);
+ list_for_each_entry(a, &actions, list) {
+ if (a->ops->get_dev) {
+ a->ops->get_dev(a, dev_net(dev), hw_dev);
+ break;
+ }
+ }
+ if (*hw_dev)
+ return 0;
+#endif
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tcf_exts_get_dev);
+
static int __init tc_filter_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 0a47ba5e6109..80f688436dd7 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -45,10 +45,7 @@ struct cls_bpf_prog {
u32 gen_flags;
struct tcf_exts exts;
u32 handle;
- union {
- u32 bpf_fd;
- u16 bpf_num_ops;
- };
+ u16 bpf_num_ops;
struct sock_filter *bpf_ops;
const char *bpf_name;
struct tcf_proto *tp;
@@ -151,6 +148,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_bpf_offload bpf_offload = {};
struct tc_to_netdev offload;
+ int err;
offload.type = TC_SETUP_CLSBPF;
offload.cls_bpf = &bpf_offload;
@@ -162,8 +160,13 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
bpf_offload.exts_integrated = prog->exts_integrated;
bpf_offload.gen_flags = prog->gen_flags;
- return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->protocol, &offload);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+
+ if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
+ prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+
+ return err;
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
@@ -244,7 +247,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
return 0;
}
-static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
@@ -258,22 +261,22 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
kfree(prog);
}
-static void __cls_bpf_delete_prog(struct rcu_head *rcu)
+static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
{
- struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
-
- cls_bpf_delete_prog(prog->tp, prog);
+ __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
}
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
- struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
-
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+ call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+}
+static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
return 0;
}
@@ -285,12 +288,8 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
if (!force && !list_empty(&head->plist))
return false;
- list_for_each_entry_safe(prog, tmp, &head->plist, link) {
- cls_bpf_stop_offload(tp, prog);
- list_del_rcu(&prog->link);
- tcf_unbind_filter(tp, &prog->res);
- call_rcu(&prog->rcu, __cls_bpf_delete_prog);
- }
+ list_for_each_entry_safe(prog, tmp, &head->plist, link)
+ __cls_bpf_delete(tp, prog);
kfree_rcu(head, rcu);
return true;
@@ -365,9 +364,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
return PTR_ERR(fp);
if (tb[TCA_BPF_NAME]) {
- name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
- nla_len(tb[TCA_BPF_NAME]),
- GFP_KERNEL);
+ name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL);
if (!name) {
bpf_prog_put(fp);
return -ENOMEM;
@@ -375,7 +372,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
}
prog->bpf_ops = NULL;
- prog->bpf_fd = bpf_fd;
prog->bpf_name = name;
prog->filter = fp;
@@ -517,14 +513,17 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
ret = cls_bpf_offload(tp, prog, oldprog);
if (ret) {
- cls_bpf_delete_prog(tp, prog);
+ __cls_bpf_delete_prog(prog);
return ret;
}
+ if (!tc_in_hw(prog->gen_flags))
+ prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
if (oldprog) {
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
- call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
+ call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
} else {
list_add_rcu(&prog->link, &head->plist);
}
@@ -559,13 +558,18 @@ static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
struct sk_buff *skb)
{
- if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
- return -EMSGSIZE;
+ struct nlattr *nla;
if (prog->bpf_name &&
nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
return -EMSGSIZE;
+ nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
+
return 0;
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6575aba87630..3d6b9286c203 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -129,7 +129,7 @@ static u32 flow_get_mark(const struct sk_buff *skb)
static u32 flow_get_nfct(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- return addr_fold(skb->nfct);
+ return addr_fold(skb_nfct(skb));
#else
return 0;
#endif
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 904442421db3..9d0c99d2e9fb 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -39,11 +39,14 @@ struct fl_flow_key {
struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_ports tp;
+ struct flow_dissector_key_icmp icmp;
+ struct flow_dissector_key_arp arp;
struct flow_dissector_key_keyid enc_key_id;
union {
struct flow_dissector_key_ipv4_addrs enc_ipv4;
struct flow_dissector_key_ipv6_addrs enc_ipv6;
};
+ struct flow_dissector_key_ports enc_tp;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -81,6 +84,8 @@ struct cls_fl_filter {
u32 handle;
u32 flags;
struct rcu_head rcu;
+ struct tc_to_netdev tc;
+ struct net_device *hw_dev;
};
static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -129,6 +134,14 @@ static void fl_clear_masked_range(struct fl_flow_key *key,
memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
}
+static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head,
+ struct fl_flow_key *mkey)
+{
+ return rhashtable_lookup_fast(&head->ht,
+ fl_key_get_start(mkey, &head->mask),
+ head->ht_params);
+}
+
static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -149,16 +162,22 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
switch (ip_tunnel_info_af(info)) {
case AF_INET:
+ skb_key.enc_control.addr_type =
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS;
skb_key.enc_ipv4.src = key->u.ipv4.src;
skb_key.enc_ipv4.dst = key->u.ipv4.dst;
break;
case AF_INET6:
+ skb_key.enc_control.addr_type =
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS;
skb_key.enc_ipv6.src = key->u.ipv6.src;
skb_key.enc_ipv6.dst = key->u.ipv6.dst;
break;
}
skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
+ skb_key.enc_tp.src = key->tp_src;
+ skb_key.enc_tp.dst = key->tp_dst;
}
skb_key.indev_ifindex = skb->skb_iif;
@@ -170,9 +189,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
- f = rhashtable_lookup_fast(&head->ht,
- fl_key_get_start(&skb_mkey, &head->mask),
- head->ht_params);
+ f = fl_lookup(head, &skb_mkey);
if (f && !tc_skip_sw(f->flags)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
@@ -202,75 +219,95 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
}
-static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
- struct tc_to_netdev tc;
+ struct net_device *dev = f->hw_dev;
+ struct tc_to_netdev *tc = &f->tc;
- if (!tc_should_offload(dev, tp, 0))
+ if (!tc_can_offload(dev, tp))
return;
offload.command = TC_CLSFLOWER_DESTROY;
- offload.cookie = cookie;
+ offload.prio = tp->prio;
+ offload.cookie = (unsigned long)f;
- tc.type = TC_SETUP_CLSFLOWER;
- tc.cls_flower = &offload;
+ tc->type = TC_SETUP_CLSFLOWER;
+ tc->cls_flower = &offload;
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
struct flow_dissector *dissector,
struct fl_flow_key *mask,
- struct fl_flow_key *key,
- struct tcf_exts *actions,
- unsigned long cookie, u32 flags)
+ struct cls_fl_filter *f)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
- struct tc_to_netdev tc;
+ struct tc_to_netdev *tc = &f->tc;
int err;
- if (!tc_should_offload(dev, tp, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
+ if (!tc_can_offload(dev, tp)) {
+ if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
+ (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
+ f->hw_dev = dev;
+ return tc_skip_sw(f->flags) ? -EINVAL : 0;
+ }
+ dev = f->hw_dev;
+ tc->egress_dev = true;
+ } else {
+ f->hw_dev = dev;
+ }
offload.command = TC_CLSFLOWER_REPLACE;
- offload.cookie = cookie;
+ offload.prio = tp->prio;
+ offload.cookie = (unsigned long)f;
offload.dissector = dissector;
offload.mask = mask;
- offload.key = key;
- offload.exts = actions;
+ offload.key = &f->mkey;
+ offload.exts = &f->exts;
- tc.type = TC_SETUP_CLSFLOWER;
- tc.cls_flower = &offload;
+ tc->type = TC_SETUP_CLSFLOWER;
+ tc->cls_flower = &offload;
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
- &tc);
+ tc);
+ if (!err)
+ f->flags |= TCA_CLS_FLAGS_IN_HW;
- if (tc_skip_sw(flags))
+ if (tc_skip_sw(f->flags))
return err;
-
return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload offload = {0};
- struct tc_to_netdev tc;
+ struct net_device *dev = f->hw_dev;
+ struct tc_to_netdev *tc = &f->tc;
- if (!tc_should_offload(dev, tp, 0))
+ if (!tc_can_offload(dev, tp))
return;
offload.command = TC_CLSFLOWER_STATS;
+ offload.prio = tp->prio;
offload.cookie = (unsigned long)f;
offload.exts = &f->exts;
- tc.type = TC_SETUP_CLSFLOWER;
- tc.cls_flower = &offload;
+ tc->type = TC_SETUP_CLSFLOWER;
+ tc->cls_flower = &offload;
+
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
+}
- dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
+{
+ list_del_rcu(&f->list);
+ if (!tc_skip_hw(f->flags))
+ fl_hw_destroy_filter(tp, f);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fl_destroy_filter);
}
static void fl_destroy_sleepable(struct work_struct *work)
@@ -299,14 +336,12 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
if (!force && !list_empty(&head->filters))
return false;
- list_for_each_entry_safe(f, next, &head->filters, list) {
- fl_hw_destroy_filter(tp, (unsigned long)f);
- list_del_rcu(&f->list);
- call_rcu(&f->rcu, fl_destroy_filter);
- }
+ list_for_each_entry_safe(f, next, &head->filters, list)
+ __fl_delete(tp, f);
__module_get(THIS_MODULE);
call_rcu(&head->rcu, fl_destroy_rcu);
+
return true;
}
@@ -360,6 +395,34 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_SRC_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_SCTP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ARP_SIP] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_SIP_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_TIP] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_TIP_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_OP] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ARP_OP_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ARP_SHA] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ARP_SHA_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ARP_THA] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ARP_THA_MASK] = { .len = ETH_ALEN },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -394,10 +457,43 @@ static void fl_set_key_vlan(struct nlattr **tb,
}
}
+static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
+ u32 *dissector_key, u32 *dissector_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (flower_mask & flower_flag_bit) {
+ *dissector_mask |= dissector_flag_bit;
+ if (flower_key & flower_flag_bit)
+ *dissector_key |= dissector_flag_bit;
+ }
+}
+
+static int fl_set_key_flags(struct nlattr **tb,
+ u32 *flags_key, u32 *flags_mask)
+{
+ u32 key, mask;
+
+ /* mask is mandatory for flags */
+ if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
+ return -EINVAL;
+
+ key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
+ mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+
+ *flags_key = 0;
+ *flags_mask = 0;
+
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+
+ return 0;
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
__be16 ethertype;
+ int ret = 0;
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FLOWER_INDEV]) {
int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -439,6 +535,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ mask->control.addr_type = ~0;
fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
&mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
sizeof(key->ipv4.src));
@@ -447,6 +544,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
sizeof(key->ipv4.dst));
} else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ mask->control.addr_type = ~0;
fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
&mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
sizeof(key->ipv6.src));
@@ -469,11 +567,56 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
&mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst));
+ } else if (key->basic.ip_proto == IPPROTO_SCTP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
+ &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
+ &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
+ sizeof(key->tp.dst));
+ } else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
+ sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_ARP) ||
+ key->basic.n_proto == htons(ETH_P_RARP)) {
+ fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
+ &mask->arp.sip, TCA_FLOWER_KEY_ARP_SIP_MASK,
+ sizeof(key->arp.sip));
+ fl_set_key_val(tb, &key->arp.tip, TCA_FLOWER_KEY_ARP_TIP,
+ &mask->arp.tip, TCA_FLOWER_KEY_ARP_TIP_MASK,
+ sizeof(key->arp.tip));
+ fl_set_key_val(tb, &key->arp.op, TCA_FLOWER_KEY_ARP_OP,
+ &mask->arp.op, TCA_FLOWER_KEY_ARP_OP_MASK,
+ sizeof(key->arp.op));
+ fl_set_key_val(tb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
+ mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
+ sizeof(key->arp.sha));
+ fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
+ mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
+ sizeof(key->arp.tha));
}
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ mask->enc_control.addr_type = ~0;
fl_set_key_val(tb, &key->enc_ipv4.src,
TCA_FLOWER_KEY_ENC_IPV4_SRC,
&mask->enc_ipv4.src,
@@ -489,6 +632,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ mask->enc_control.addr_type = ~0;
fl_set_key_val(tb, &key->enc_ipv6.src,
TCA_FLOWER_KEY_ENC_IPV6_SRC,
&mask->enc_ipv6.src,
@@ -505,7 +649,18 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
sizeof(key->enc_key_id.keyid));
- return 0;
+ fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
+ &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
+ sizeof(key->enc_tp.src));
+
+ fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
+ &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
+ sizeof(key->enc_tp.dst));
+
+ if (tb[TCA_FLOWER_KEY_FLAGS])
+ ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
+
+ return ret;
}
static bool fl_mask_eq(struct fl_flow_mask *mask1,
@@ -571,7 +726,23 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_PORTS, tp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ICMP, icmp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ARP, arp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
+ if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
+ FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ enc_control);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
skb_flow_dissector_init(&head->dissector, keys, cnt);
}
@@ -666,23 +837,31 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
struct cls_fl_filter *fnew;
- struct nlattr *tb[TCA_FLOWER_MAX + 1];
+ struct nlattr **tb;
struct fl_flow_mask mask = {};
int err;
if (!tca[TCA_OPTIONS])
return -EINVAL;
+ tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
+ if (!tb)
+ return -ENOBUFS;
+
err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
if (err < 0)
- return err;
+ goto errout_tb;
- if (fold && handle && fold->handle != handle)
- return -EINVAL;
+ if (fold && handle && fold->handle != handle) {
+ err = -EINVAL;
+ goto errout_tb;
+ }
fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
- if (!fnew)
- return -ENOBUFS;
+ if (!fnew) {
+ err = -ENOBUFS;
+ goto errout_tb;
+ }
err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
if (err < 0)
@@ -715,27 +894,35 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (!tc_skip_sw(fnew->flags)) {
+ if (!fold && fl_lookup(head, &fnew->mkey)) {
+ err = -EEXIST;
+ goto errout;
+ }
+
err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
head->ht_params);
if (err)
goto errout;
}
- err = fl_hw_replace_filter(tp,
- &head->dissector,
- &mask.key,
- &fnew->key,
- &fnew->exts,
- (unsigned long)fnew,
- fnew->flags);
- if (err)
- goto errout;
+ if (!tc_skip_hw(fnew->flags)) {
+ err = fl_hw_replace_filter(tp,
+ &head->dissector,
+ &mask.key,
+ fnew);
+ if (err)
+ goto errout;
+ }
+
+ if (!tc_in_hw(fnew->flags))
+ fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
if (fold) {
if (!tc_skip_sw(fold->flags))
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
- fl_hw_destroy_filter(tp, (unsigned long)fold);
+ if (!tc_skip_hw(fold->flags))
+ fl_hw_destroy_filter(tp, fold);
}
*arg = (unsigned long) fnew;
@@ -748,11 +935,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
list_add_tail_rcu(&fnew->list, &head->filters);
}
+ kfree(tb);
return 0;
errout:
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
+errout_tb:
+ kfree(tb);
return err;
}
@@ -764,10 +954,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
if (!tc_skip_sw(f->flags))
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
- list_del_rcu(&f->list);
- fl_hw_destroy_filter(tp, (unsigned long)f);
- tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, fl_destroy_filter);
+ __fl_delete(tp, f);
return 0;
}
@@ -830,6 +1017,42 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
return 0;
}
+static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
+ u32 *flower_key, u32 *flower_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (dissector_mask & dissector_flag_bit) {
+ *flower_mask |= flower_flag_bit;
+ if (dissector_key & dissector_flag_bit)
+ *flower_key |= flower_flag_bit;
+ }
+}
+
+static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
+{
+ u32 key, mask;
+ __be32 _key, _mask;
+ int err;
+
+ if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
+ return 0;
+
+ key = 0;
+ mask = 0;
+
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+
+ _key = cpu_to_be32(key);
+ _mask = cpu_to_be32(mask);
+
+ err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
+ if (err)
+ return err;
+
+ return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
+}
+
static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
@@ -862,7 +1085,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
}
- fl_hw_update_stats(tp, f);
+ if (!tc_skip_hw(f->flags))
+ fl_hw_update_stats(tp, f);
if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
@@ -918,6 +1142,57 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
&mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
+ else if (key->basic.ip_proto == IPPROTO_SCTP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
+ &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
+ &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6 &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
+ else if ((key->basic.n_proto == htons(ETH_P_ARP) ||
+ key->basic.n_proto == htons(ETH_P_RARP)) &&
+ (fl_dump_key_val(skb, &key->arp.sip,
+ TCA_FLOWER_KEY_ARP_SIP, &mask->arp.sip,
+ TCA_FLOWER_KEY_ARP_SIP_MASK,
+ sizeof(key->arp.sip)) ||
+ fl_dump_key_val(skb, &key->arp.tip,
+ TCA_FLOWER_KEY_ARP_TIP, &mask->arp.tip,
+ TCA_FLOWER_KEY_ARP_TIP_MASK,
+ sizeof(key->arp.tip)) ||
+ fl_dump_key_val(skb, &key->arp.op,
+ TCA_FLOWER_KEY_ARP_OP, &mask->arp.op,
+ TCA_FLOWER_KEY_ARP_OP_MASK,
+ sizeof(key->arp.op)) ||
+ fl_dump_key_val(skb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
+ mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
+ sizeof(key->arp.sha)) ||
+ fl_dump_key_val(skb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
+ mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
+ sizeof(key->arp.tha))))
+ goto nla_put_failure;
if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
(fl_dump_key_val(skb, &key->enc_ipv4.src,
@@ -943,10 +1218,24 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
&mask->enc_key_id, TCA_FLOWER_UNSPEC,
- sizeof(key->enc_key_id)))
+ sizeof(key->enc_key_id)) ||
+ fl_dump_key_val(skb, &key->enc_tp.src,
+ TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
+ &mask->enc_tp.src,
+ TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
+ sizeof(key->enc_tp.src)) ||
+ fl_dump_key_val(skb, &key->enc_tp.dst,
+ TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
+ &mask->enc_tp.dst,
+ TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
+ sizeof(key->enc_tp.dst)))
goto nla_put_failure;
- nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
+ if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
+ goto nla_put_failure;
+
+ if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index f935429bd5ef..224eb2c14346 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -16,16 +16,11 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
-struct cls_mall_filter {
+struct cls_mall_head {
struct tcf_exts exts;
struct tcf_result res;
u32 handle;
- struct rcu_head rcu;
u32 flags;
-};
-
-struct cls_mall_head {
- struct cls_mall_filter *filter;
struct rcu_head rcu;
};
@@ -33,56 +28,52 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_mall_head *head = rcu_dereference_bh(tp->root);
- struct cls_mall_filter *f = head->filter;
- if (tc_skip_sw(f->flags))
+ if (tc_skip_sw(head->flags))
return -1;
- return tcf_exts_exec(skb, &f->exts, res);
+ return tcf_exts_exec(skb, &head->exts, res);
}
static int mall_init(struct tcf_proto *tp)
{
- struct cls_mall_head *head;
-
- head = kzalloc(sizeof(*head), GFP_KERNEL);
- if (!head)
- return -ENOBUFS;
-
- rcu_assign_pointer(tp->root, head);
-
return 0;
}
-static void mall_destroy_filter(struct rcu_head *head)
+static void mall_destroy_rcu(struct rcu_head *rcu)
{
- struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu);
+ struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
+ rcu);
- tcf_exts_destroy(&f->exts);
-
- kfree(f);
+ tcf_exts_destroy(&head->exts);
+ kfree(head);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
- struct cls_mall_filter *f,
+ struct cls_mall_head *head,
unsigned long cookie)
{
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_to_netdev offload;
struct tc_cls_matchall_offload mall_offload = {0};
+ int err;
offload.type = TC_SETUP_MATCHALL;
offload.cls_mall = &mall_offload;
offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
- offload.cls_mall->exts = &f->exts;
+ offload.cls_mall->exts = &head->exts;
offload.cls_mall->cookie = cookie;
- return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
- &offload);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+ &offload);
+ if (!err)
+ head->flags |= TCA_CLS_FLAGS_IN_HW;
+
+ return err;
}
static void mall_destroy_hw_filter(struct tcf_proto *tp,
- struct cls_mall_filter *f,
+ struct cls_mall_head *head,
unsigned long cookie)
{
struct net_device *dev = tp->q->dev_queue->dev;
@@ -103,29 +94,20 @@ static bool mall_destroy(struct tcf_proto *tp, bool force)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct net_device *dev = tp->q->dev_queue->dev;
- struct cls_mall_filter *f = head->filter;
- if (!force && f)
- return false;
+ if (!head)
+ return true;
- if (f) {
- if (tc_should_offload(dev, tp, f->flags))
- mall_destroy_hw_filter(tp, f, (unsigned long) f);
+ if (tc_should_offload(dev, tp, head->flags))
+ mall_destroy_hw_filter(tp, head, (unsigned long) head);
- call_rcu(&f->rcu, mall_destroy_filter);
- }
- kfree_rcu(head, rcu);
+ call_rcu(&head->rcu, mall_destroy_rcu);
return true;
}
static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
{
- struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *f = head->filter;
-
- if (f && f->handle == handle)
- return (unsigned long) f;
- return 0;
+ return 0UL;
}
static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
@@ -134,26 +116,31 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
};
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_mall_filter *f,
+ struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
struct tcf_exts e;
int err;
- tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
+ err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
+ if (err)
+ return err;
err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
- return err;
+ goto errout;
if (tb[TCA_MATCHALL_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
- tcf_bind_filter(tp, &f->res, base);
+ head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+ tcf_bind_filter(tp, &head->res, base);
}
- tcf_exts_change(tp, &f->exts, &e);
+ tcf_exts_change(tp, &head->exts, &e);
return 0;
+errout:
+ tcf_exts_destroy(&e);
+ return err;
}
static int mall_change(struct net *net, struct sk_buff *in_skb,
@@ -162,21 +149,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
unsigned long *arg, bool ovr)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg;
struct net_device *dev = tp->q->dev_queue->dev;
- struct cls_mall_filter *f;
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+ struct cls_mall_head *new;
u32 flags = 0;
int err;
if (!tca[TCA_OPTIONS])
return -EINVAL;
- if (head->filter)
- return -EBUSY;
-
- if (fold)
- return -EINVAL;
+ if (head)
+ return -EEXIST;
err = nla_parse_nested(tb, TCA_MATCHALL_MAX,
tca[TCA_OPTIONS], mall_policy);
@@ -189,64 +172,62 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (!f)
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
return -ENOBUFS;
- tcf_exts_init(&f->exts, TCA_MATCHALL_ACT, 0);
+ err = tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
+ if (err)
+ goto err_exts_init;
if (!handle)
handle = 1;
- f->handle = handle;
- f->flags = flags;
+ new->handle = handle;
+ new->flags = flags;
- err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
+ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr);
if (err)
- goto errout;
+ goto err_set_parms;
if (tc_should_offload(dev, tp, flags)) {
- err = mall_replace_hw_filter(tp, f, (unsigned long) f);
+ err = mall_replace_hw_filter(tp, new, (unsigned long) new);
if (err) {
if (tc_skip_sw(flags))
- goto errout;
+ goto err_replace_hw_filter;
else
err = 0;
}
}
- *arg = (unsigned long) f;
- rcu_assign_pointer(head->filter, f);
+ if (!tc_in_hw(new->flags))
+ new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ *arg = (unsigned long) head;
+ rcu_assign_pointer(tp->root, new);
+ if (head)
+ call_rcu(&head->rcu, mall_destroy_rcu);
return 0;
-errout:
- kfree(f);
+err_replace_hw_filter:
+err_set_parms:
+ tcf_exts_destroy(&new->exts);
+err_exts_init:
+ kfree(new);
return err;
}
static int mall_delete(struct tcf_proto *tp, unsigned long arg)
{
- struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *f = (struct cls_mall_filter *) arg;
- struct net_device *dev = tp->q->dev_queue->dev;
-
- if (tc_should_offload(dev, tp, f->flags))
- mall_destroy_hw_filter(tp, f, (unsigned long) f);
-
- RCU_INIT_POINTER(head->filter, NULL);
- tcf_unbind_filter(tp, &f->res);
- call_rcu(&f->rcu, mall_destroy_filter);
- return 0;
+ return -EOPNOTSUPP;
}
static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct cls_mall_filter *f = head->filter;
if (arg->count < arg->skip)
goto skip;
- if (arg->fn(tp, (unsigned long) f, arg) < 0)
+ if (arg->fn(tp, (unsigned long) head, arg) < 0)
arg->stop = 1;
skip:
arg->count++;
@@ -255,28 +236,31 @@ skip:
static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
- struct cls_mall_filter *f = (struct cls_mall_filter *) fh;
+ struct cls_mall_head *head = (struct cls_mall_head *) fh;
struct nlattr *nest;
- if (!f)
+ if (!head)
return skb->len;
- t->tcm_handle = f->handle;
+ t->tcm_handle = head->handle;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (!nest)
goto nla_put_failure;
- if (f->res.classid &&
- nla_put_u32(skb, TCA_MATCHALL_CLASSID, f->res.classid))
+ if (head->res.classid &&
+ nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid))
+ goto nla_put_failure;
+
+ if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
goto nla_put_failure;
- if (tcf_exts_dump(skb, &f->exts))
+ if (tcf_exts_dump(skb, &head->exts))
goto nla_put_failure;
nla_nest_end(skb, nest);
- if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+ if (tcf_exts_dump_stats(skb, &head->exts) < 0)
goto nla_put_failure;
return skb->len;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ae83c3aec308..4dbe0c680fe6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -334,7 +334,6 @@ static int u32_init(struct tcf_proto *tp)
if (root_ht == NULL)
return -ENOBUFS;
- root_ht->divisor = 0;
root_ht->refcnt++;
root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
root_ht->prio = tp->prio;
@@ -524,6 +523,10 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
tp->protocol, &offload);
+
+ if (!err)
+ n->flags |= TCA_CLS_FLAGS_IN_HW;
+
if (tc_skip_sw(flags))
return err;
@@ -896,6 +899,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
+ if (!tc_in_hw(new->flags))
+ new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
call_rcu(&n->rcu, u32_delete_key_rcu);
@@ -1015,6 +1021,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errhw;
+ if (!tc_in_hw(n->flags))
+ n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
ins = &ht->ht[TC_U32_HASH(handle)];
for (pins = rtnl_dereference(*ins); pins;
ins = &pins->next, pins = rtnl_dereference(*ins))
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index c66ca9400ab4..c1b23e3060b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
struct xt_action_param acpar;
const struct xt_set_info *set = (const void *) em->data;
struct net_device *dev, *indev = NULL;
+ struct nf_hook_state state = {
+ .net = em->net,
+ };
int ret, network_offset;
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- acpar.family = NFPROTO_IPV4;
+ state.pf = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
return 0;
acpar.thoff = ip_hdrlen(skb);
break;
case htons(ETH_P_IPV6):
- acpar.family = NFPROTO_IPV6;
+ state.pf = NFPROTO_IPV6;
if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
return 0;
/* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */
@@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
return 0;
}
- acpar.hooknum = 0;
-
- opt.family = acpar.family;
+ opt.family = state.pf;
opt.dim = set->dim;
opt.flags = set->flags;
opt.cmdflags = 0;
@@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
- acpar.net = em->net;
- acpar.in = indev ? indev : dev;
- acpar.out = dev;
+ state.in = indev ? indev : dev;
+ state.out = dev;
+ acpar.state = &state;
ret = ip_set_test(set->index, skb, &acpar, &opt);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index a309a07ccb35..ae7e4f5b348b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -63,6 +63,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/random.h>
@@ -176,11 +177,12 @@ META_COLLECTOR(int_vlan_tag)
{
unsigned short tag;
- tag = skb_vlan_tag_get(skb);
- if (!tag && __vlan_get_tag(skb, &tag))
- *err = -1;
- else
+ if (skb_vlan_tag_present(skb))
+ dst->value = skb_vlan_tag_get(skb);
+ else if (!__vlan_get_tag(skb, &tag))
dst->value = tag;
+ else
+ *err = -1;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 206dc24add3a..bcf49cd22786 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -440,7 +440,6 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
EXPORT_SYMBOL(qdisc_put_rtab);
static LIST_HEAD(qdisc_stab_list);
-static DEFINE_SPINLOCK(qdisc_stab_lock);
static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
[TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
@@ -474,20 +473,15 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
if (tsize != s->tsize || (!tab && tsize > 0))
return ERR_PTR(-EINVAL);
- spin_lock(&qdisc_stab_lock);
-
list_for_each_entry(stab, &qdisc_stab_list, list) {
if (memcmp(&stab->szopts, s, sizeof(*s)))
continue;
if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
continue;
stab->refcnt++;
- spin_unlock(&qdisc_stab_lock);
return stab;
}
- spin_unlock(&qdisc_stab_lock);
-
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
if (!stab)
return ERR_PTR(-ENOMEM);
@@ -497,9 +491,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
if (tsize > 0)
memcpy(stab->data, tab, tsize * sizeof(u16));
- spin_lock(&qdisc_stab_lock);
list_add_tail(&stab->list, &qdisc_stab_list);
- spin_unlock(&qdisc_stab_lock);
return stab;
}
@@ -514,14 +506,10 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
if (!tab)
return;
- spin_lock(&qdisc_stab_lock);
-
if (--tab->refcnt == 0) {
list_del(&tab->list);
call_rcu_bh(&tab->rcu, stab_kfree_rcu);
}
-
- spin_unlock(&qdisc_stab_lock);
}
EXPORT_SYMBOL(qdisc_put_stab);
@@ -960,6 +948,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
sch->handle = handle;
+ /* This exist to keep backward compatible with a userspace
+ * loophole, what allowed userspace to get IFF_NO_QUEUE
+ * facility on older kernels by setting tx_queue_len=0 (prior
+ * to qdisc init), and then forgot to reinit tx_queue_len
+ * before again attaching a qdisc.
+ */
+ if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
+ dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+ netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
+ }
+
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
if (qdisc_is_percpu_stats(sch)) {
sch->cpu_bstats =
@@ -1008,6 +1007,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
}
+ /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
+ ops->destroy(sch);
err_out3:
dev_put(dev);
kfree((char *) sch - sch->padded);
@@ -1384,7 +1385,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
&d, cpu_bstats, &q->bstats) < 0 ||
- gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
goto nla_put_failure;
@@ -1850,6 +1851,7 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
__be16 protocol = tc_skb_protocol(skb);
#ifdef CONFIG_NET_CLS_ACT
+ const int max_reclassify_loop = 4;
const struct tcf_proto *old_tp = tp;
int limit = 0;
@@ -1874,7 +1876,7 @@ reclassify:
return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
- if (unlikely(limit++ >= MAX_REC_LOOP)) {
+ if (unlikely(limit++ >= max_reclassify_loop)) {
net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
tp->q->ops->id, tp->prio & 0xffff,
ntohs(tp->protocol));
@@ -1888,28 +1890,6 @@ reset:
}
EXPORT_SYMBOL(tc_classify);
-bool tcf_destroy(struct tcf_proto *tp, bool force)
-{
- if (tp->ops->destroy(tp, force)) {
- module_put(tp->ops->owner);
- kfree_rcu(tp, rcu);
- return true;
- }
-
- return false;
-}
-
-void tcf_destroy_chain(struct tcf_proto __rcu **fl)
-{
- struct tcf_proto *tp;
-
- while ((tp = rtnl_dereference(*fl)) != NULL) {
- RCU_INIT_POINTER(*fl, tp->next);
- tcf_destroy(tp, true);
- }
-}
-EXPORT_SYMBOL(tcf_destroy_chain);
-
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 481e4f12aeb4..2209c2ddacbf 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -15,6 +15,7 @@
#include <linux/file.h> /* for fput */
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
/*
* The ATM queuing discipline provides a framework for invoking classifiers
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index beb554aa8cfb..d6ca18dc04c3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -19,6 +19,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
/* Class-Based Queueing (CBQ) algorithm.
@@ -122,7 +123,7 @@ struct cbq_class {
psched_time_t penalized;
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct tc_cbq_xstats xstats;
struct tcf_proto __rcu *filter_list;
@@ -509,7 +510,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
if (delay) {
ktime_t time;
- time = ktime_set(0, 0);
+ time = 0;
time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
}
@@ -1346,7 +1347,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
return -1;
@@ -1405,7 +1406,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
tcf_destroy_chain(&cl->filter_list);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
if (cl != &q->link)
kfree(cl);
}
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3b6d5bd69101..3b86a97bc67c 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -16,6 +16,7 @@
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
#include <net/red.h>
#include <net/flow_dissector.h>
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8af5c59eef84..bb4cbdf75004 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -25,7 +25,7 @@ struct drr_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct list_head alist;
struct Qdisc *qdisc;
@@ -142,7 +142,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
{
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
qdisc_destroy(cl->qdisc);
kfree(cl);
}
@@ -283,7 +283,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
return -1;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1308bbf460f7..5334e309f17f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -13,6 +13,7 @@
#include <linux/rtnetlink.h>
#include <linux/bitops.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <asm/byteorder.h>
@@ -200,9 +201,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
+ int wlen = skb_network_offset(skb);
+
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- if (skb_cow_head(skb, sizeof(struct iphdr)))
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
goto drop;
skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
@@ -210,7 +215,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
break;
case htons(ETH_P_IPV6):
- if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
goto drop;
skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 18e752439f6f..a4f738ac7728 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -136,7 +136,7 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
struct fq_flow *aux;
parent = *p;
- aux = container_of(parent, struct fq_flow, rate_node);
+ aux = rb_entry(parent, struct fq_flow, rate_node);
if (f->time_next_packet >= aux->time_next_packet)
p = &parent->rb_right;
else
@@ -188,7 +188,7 @@ static void fq_gc(struct fq_sched_data *q,
while (*p) {
parent = *p;
- f = container_of(parent, struct fq_flow, fq_node);
+ f = rb_entry(parent, struct fq_flow, fq_node);
if (f->sk == sk)
break;
@@ -245,7 +245,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
skb_orphan(skb);
}
- root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
+ root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
if (q->flows >= (2U << q->fq_trees_log) &&
q->inactive_flows > q->flows/2)
@@ -256,7 +256,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
while (*p) {
parent = *p;
- f = container_of(parent, struct fq_flow, fq_node);
+ f = rb_entry(parent, struct fq_flow, fq_node);
if (f->sk == sk) {
/* socket might have been reallocated, so check
* if its sk_hash is the same.
@@ -424,7 +424,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
q->time_next_delayed_flow = ~0ULL;
while ((p = rb_first(&q->delayed)) != NULL) {
- struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
+ struct fq_flow *f = rb_entry(p, struct fq_flow, rate_node);
if (f->time_next_packet > now) {
q->time_next_delayed_flow = f->time_next_packet;
@@ -563,7 +563,7 @@ static void fq_reset(struct Qdisc *sch)
for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
root = &q->fq_root[idx];
while ((p = rb_first(root)) != NULL) {
- f = container_of(p, struct fq_flow, fq_node);
+ f = rb_entry(p, struct fq_flow, fq_node);
rb_erase(p, root);
fq_flow_purge(f);
@@ -593,20 +593,20 @@ static void fq_rehash(struct fq_sched_data *q,
oroot = &old_array[idx];
while ((op = rb_first(oroot)) != NULL) {
rb_erase(op, oroot);
- of = container_of(op, struct fq_flow, fq_node);
+ of = rb_entry(op, struct fq_flow, fq_node);
if (fq_gc_candidate(of)) {
fcnt++;
kmem_cache_free(fq_flow_cachep, of);
continue;
}
- nroot = &new_array[hash_32((u32)(long)of->sk, new_log)];
+ nroot = &new_array[hash_ptr(of->sk, new_log)];
np = &nroot->rb_node;
parent = NULL;
while (*np) {
parent = *np;
- nf = container_of(parent, struct fq_flow, fq_node);
+ nf = rb_entry(parent, struct fq_flow, fq_node);
BUG_ON(nf->sk == of->sk);
if (nf->sk > of->sk)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a5ea0e9b6be4..9f3a884d1590 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -23,6 +23,7 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/codel.h>
#include <net/codel_impl.h>
#include <net/codel_qdisc.h>
@@ -57,7 +58,6 @@ struct fq_codel_sched_data {
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
- u32 perturbation; /* hash perturbation */
u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
u32 drop_batch_size;
u32 memory_limit;
@@ -75,9 +75,7 @@ struct fq_codel_sched_data {
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
struct sk_buff *skb)
{
- u32 hash = skb_get_hash_perturb(skb, q->perturbation);
-
- return reciprocal_scale(hash, q->flows_cnt);
+ return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
}
static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -482,7 +480,6 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
q->memory_limit = 32 << 20; /* 32 MBytes */
q->drop_batch_size = 64;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->perturbation = prandom_u32();
INIT_LIST_HEAD(&q->new_flows);
INIT_LIST_HEAD(&q->old_flows);
codel_params_init(&q->cparams);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6cfb6e9038c2..1a2f9e964330 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = weight_p;
+ int quota = dev_tx_weight;
int packets;
while (qdisc_restart(q, &packets)) {
@@ -709,7 +709,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
- gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+ gen_kill_estimator(&qdisc->rate_est);
if (ops->reset)
ops->reset(qdisc);
if (ops->destroy)
@@ -794,7 +794,7 @@ static void attach_default_qdiscs(struct net_device *dev)
}
}
#ifdef CONFIG_NET_SCHED
- if (dev->qdisc)
+ if (dev->qdisc != &noop_qdisc)
qdisc_hash_add(dev->qdisc);
#endif
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 000f1d36128e..3ffaa6fb0990 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -114,7 +114,7 @@ struct hfsc_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
unsigned int filter_cnt; /* filter count */
unsigned int level; /* class level in hierarchy */
@@ -1091,7 +1091,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
tcf_destroy_chain(&cl->filter_list);
qdisc_destroy(cl->qdisc);
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
if (cl != &q->root)
kfree(cl);
}
@@ -1348,7 +1348,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
xstats.rtwork = cl->cl_cumul;
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index e3d0458af17b..2fae8b5f1b80 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -627,7 +627,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
sizeof(u32));
if (!q->hhf_arrays[i]) {
- hhf_destroy(sch);
+ /* Note: hhf_destroy() will be called
+ * by our caller.
+ */
return -ENOMEM;
}
}
@@ -638,7 +640,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
BITS_PER_BYTE);
if (!q->hhf_valid_bits[i]) {
- hhf_destroy(sch);
+ /* Note: hhf_destroy() will be called
+ * by our caller.
+ */
return -ENOMEM;
}
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c798d0de8a9d..4cd5fb134bc9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -40,6 +40,7 @@
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
/* HTB algorithm.
Author: devik@cdi.cz
@@ -111,7 +112,7 @@ struct htb_class {
unsigned int children;
struct htb_class *parent; /* parent class */
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
/*
* Written often fields
@@ -1145,7 +1146,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
return -1;
@@ -1228,7 +1229,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
WARN_ON(!cl->un.leaf.q);
qdisc_destroy(cl->un.leaf.q);
}
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
tcf_destroy_chain(&cl->filter_list);
kfree(cl);
}
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 8fe6999b642a..3bab5f66c392 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -16,6 +16,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 2bc8d7f8df16..20b7f1646f69 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -52,7 +52,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
/* pre-allocate qdiscs, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
- if (priv->qdiscs == NULL)
+ if (!priv->qdiscs)
return -ENOMEM;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
@@ -60,18 +60,14 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
- if (qdisc == NULL)
- goto err;
+ if (!qdisc)
+ return -ENOMEM;
priv->qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
return 0;
-
-err:
- mq_destroy(sch);
- return -ENOMEM;
}
static void mq_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b5c502c78143..922683418e53 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -118,10 +118,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
/* pre-allocate qdisc, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
- if (priv->qdiscs == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ if (!priv->qdiscs)
+ return -ENOMEM;
for (i = 0; i < dev->num_tx_queues; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
@@ -129,10 +127,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
get_default_qdisc_ops(dev, i),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(i + 1)));
- if (qdisc == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ if (!qdisc)
+ return -ENOMEM;
+
priv->qdiscs[i] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
@@ -148,7 +145,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
priv->hw_owned = 1;
err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
if (err)
- goto err;
+ return err;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -162,10 +159,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
sch->flags |= TCQ_F_MQROOT;
return 0;
-
-err:
- mqprio_destroy(sch);
- return err;
}
static void mqprio_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 9ffbb025b37e..e7839a0d0eaa 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -25,7 +25,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
+#include <net/pkt_cls.h>
struct multiq_sched_data {
u16 bands;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9f7b380cf0a3..c8bb62a1e744 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -152,7 +152,7 @@ struct netem_skb_cb {
static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
{
- return container_of(rb, struct sk_buff, rbnode);
+ return rb_entry(rb, struct sk_buff, rbnode);
}
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
@@ -626,8 +626,8 @@ deliver:
* If it's at ingress let's pretend the delay is
* from the network (tstamp will be updated).
*/
- if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
- skb->tstamp.tv64 = 0;
+ if (skb->tc_redirected && skb->tc_from_ingress)
+ skb->tstamp = 0;
#endif
if (q->qdisc) {
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8f575899adfa..d4d7db267b6e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -20,7 +20,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
+#include <net/pkt_cls.h>
struct prio_sched_data {
int bands;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index ca0516e6f743..f9e712ce2d15 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -137,7 +137,7 @@ struct qfq_class {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_queue qstats;
- struct gnet_stats_rate_est64 rate_est;
+ struct net_rate_estimator __rcu *rate_est;
struct Qdisc *qdisc;
struct list_head alist; /* Link for active-classes list. */
struct qfq_aggregate *agg; /* Parent aggregate. */
@@ -508,7 +508,7 @@ set_change_agg:
new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
if (new_agg == NULL) {
err = -ENOBUFS;
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
goto destroy_class;
}
sch_tree_lock(sch);
@@ -533,7 +533,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
struct qfq_sched *q = qdisc_priv(sch);
qfq_rm_from_agg(q, cl);
- gen_kill_estimator(&cl->bstats, &cl->rate_est);
+ gen_kill_estimator(&cl->rate_est);
qdisc_destroy(cl->qdisc);
kfree(cl);
}
@@ -667,7 +667,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL,
&cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
return -1;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 20a350bd1b1d..fe6963d21519 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -25,6 +25,7 @@
#include <linux/jhash.h>
#include <net/ip.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
/*
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 7f195ed4d568..42e8c8615e65 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,6 +23,7 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/red.h>
@@ -742,9 +743,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
if (!q->ht || !q->slots) {
- sfq_destroy(sch);
+ /* Note: sfq_destroy() will be called by our caller */
return -ENOMEM;
}
+
for (i = 0; i < q->divisor; i++)
q->ht[i] = SFQ_EMPTY_SLOT;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 2cd9b4478b92..9fe6b427afed 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -401,8 +401,8 @@ static int teql_master_close(struct net_device *dev)
return 0;
}
-static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *stats)
+static void teql_master_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
{
struct teql_master *m = netdev_priv(dev);
@@ -410,7 +410,6 @@ static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
stats->tx_bytes = m->tx_bytes;
stats->tx_errors = m->tx_errors;
stats->tx_dropped = m->tx_dropped;
- return stats;
}
static int teql_master_mtu(struct net_device *dev, int new_mtu)
@@ -418,9 +417,6 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
struct teql_master *m = netdev_priv(dev);
struct Qdisc *q;
- if (new_mtu < 68)
- return -EINVAL;
-
q = m->slaves;
if (q) {
do {
@@ -460,6 +456,8 @@ static __init void teql_master_setup(struct net_device *dev)
dev->netdev_ops = &teql_netdev_ops;
dev->type = ARPHRD_VOID;
dev->mtu = 1500;
+ dev->min_mtu = 68;
+ dev->max_mtu = 65535;
dev->tx_queue_len = 100;
dev->flags = IFF_NOARP;
dev->hard_header_len = LL_MAX_HEADER;