From 56af5e749f20c3a540310c207dcc373f4f09156e Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 27 Jul 2021 18:33:15 -0700 Subject: net/sched: act_skbmod: Add SKBMOD_F_ECN option support Currently, when doing rate limiting using the tc-police(8) action, the easiest way is to simply drop the packets which exceed or conform the configured bandwidth limit. Add a new option to tc-skbmod(8), so that users may use the ECN [1] extension to explicitly inform the receiver about the congestion instead of dropping packets "on the floor". The 2 least significant bits of the Traffic Class field in IPv4 and IPv6 headers are used to represent different ECN states [2]: 0b00: "Non ECN-Capable Transport", Non-ECT 0b10: "ECN Capable Transport", ECT(0) 0b01: "ECN Capable Transport", ECT(1) 0b11: "Congestion Encountered", CE As an example: $ tc filter add dev eth0 parent 1: protocol ip prio 10 \ matchall action skbmod ecn Doing the above marks all ECT(0) and ECT(1) packets as CE. It does NOT affect Non-ECT or non-IP packets. In the tc-police scenario mentioned above, users may pipe a tc-police action and a tc-skbmod "ecn" action together to achieve ECN-based rate limiting. For TCP connections, upon receiving a CE packet, the receiver will respond with an ECE packet, asking the sender to reduce their congestion window. However ECN also works with other L4 protocols e.g. DCCP and SCTP [2], and our implementation does not touch or care about L4 headers. The updated tc-skbmod SYNOPSIS looks like the following: tc ... action skbmod { set SETTABLE | swap SWAPPABLE | ecn } ... Only one of "set", "swap" or "ecn" shall be used in a single tc-skbmod command. Trying to use more than one of them at a time is considered undefined behavior; pipe multiple tc-skbmod commands together instead. "set" and "swap" only affect Ethernet packets, while "ecn" only affects IPv{4,6} packets. It is also worth mentioning that, in theory, the same effect could be achieved by piping a "police" action and a "bpf" action using the bpf_skb_ecn_set_ce() helper, but this requires eBPF programming from the user, thus impractical. Depends on patch "net/sched: act_skbmod: Skip non-Ethernet packets". [1] https://datatracker.ietf.org/doc/html/rfc3168 [2] https://en.wikipedia.org/wiki/Explicit_Congestion_Notification Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 8d17a543cc9f..762ceec3e6f6 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -21,15 +22,13 @@ static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; -#define MAX_EDIT_LEN ETH_HLEN static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); - int action; + int action, max_edit_len, err; struct tcf_skbmod_params *p; u64 flags; - int err; tcf_lastuse_update(&d->tcf_tm); bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); @@ -38,19 +37,34 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, if (unlikely(action == TC_ACT_SHOT)) goto drop; - if (!skb->dev || skb->dev->type != ARPHRD_ETHER) - return action; + max_edit_len = skb_mac_header_len(skb); + p = rcu_dereference_bh(d->skbmod_p); + flags = p->flags; + + /* tcf_skbmod_init() guarantees "flags" to be one of the following: + * 1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE} + * 2. SKBMOD_F_SWAPMAC + * 3. SKBMOD_F_ECN + * SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet + * packets. + */ + if (flags == SKBMOD_F_ECN) { + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): + case cpu_to_be16(ETH_P_IPV6): + max_edit_len += skb_network_header_len(skb); + break; + default: + goto out; + } + } else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) { + goto out; + } - /* XXX: if you are going to edit more fields beyond ethernet header - * (example when you add IP header replacement or vlan swap) - * then MAX_EDIT_LEN needs to change appropriately - */ - err = skb_ensure_writable(skb, MAX_EDIT_LEN); + err = skb_ensure_writable(skb, max_edit_len); if (unlikely(err)) /* best policy is to drop on the floor */ goto drop; - p = rcu_dereference_bh(d->skbmod_p); - flags = p->flags; if (flags & SKBMOD_F_DMAC) ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); if (flags & SKBMOD_F_SMAC) @@ -66,6 +80,10 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); } + if (flags & SKBMOD_F_ECN) + INET_ECN_set_ce(skb); + +out: return action; drop: @@ -129,6 +147,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, index = parm->index; if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; + if (parm->flags & SKBMOD_F_ECN) + lflags = SKBMOD_F_ECN; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) -- cgit v1.2.3