From 0b67c43ce36a9964f1d5e3f973ee19eefd3f9f8f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Apr 2015 22:36:27 +0200 Subject: netfilter: bridge: really save frag_max_size between PRE and POST_ROUTING We also need to save/store in forward, else br_parse_ip_options call will zero frag_max_size as well. Fixes: 93fdd47e5 ('bridge: Save frag_max_size between PRE_ROUTING and POST_ROUTING') Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'net/bridge') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f3884a1b942f..282ed76c49e0 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -686,6 +686,13 @@ static int br_nf_forward_finish(struct sk_buff *skb) struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { + int frag_max_size; + + if (skb->protocol == htons(ETH_P_IP)) { + frag_max_size = IPCB(skb)->frag_max_size; + BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + } + in = nf_bridge->physindev; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; @@ -745,8 +752,14 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) - return NF_DROP; + if (pf == NFPROTO_IPV4) { + int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; + + if (br_parse_ip_options(skb)) + return NF_DROP; + + IPCB(skb)->frag_max_size = frag_max; + } nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) -- cgit v1.2.3 From e70deecbf8e1562cac0b19f23848919e2f5d65aa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:40 +0200 Subject: netfilter: bridge: don't use nf_bridge_info data to store mac header br_netfilter maintains an extra state, nf_bridge_info, which is attached to skb via skb->nf_bridge pointer. Amongst other things we use skb->nf_bridge->data to store the original mac header for every processed skb. This is required for ip refragmentation when using conntrack on top of bridge, because ip_fragment doesn't copy it from original skb. However there is no need anymore to do this unconditionally. Move this to the one place where its needed -- when br_netfilter calls ip_fragment(). Also switch to percpu storage for this so we can handle fragmenting without accessing nf_bridge meta data. Only user left is neigh resolution when DNAT is detected, to hold the original source mac address (neigh resolution builds new mac header using bridge mac), so rename ->data and reduce its size to whats needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 2 +- net/bridge/br_netfilter.c | 70 +++++++++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 30 deletions(-) (limited to 'net/bridge') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 36f3f43c0117..f66a089afc41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -169,7 +169,7 @@ struct nf_bridge_info { unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; - unsigned long data[32 / sizeof(unsigned long)]; + char neigh_header[8]; }; #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 282ed76c49e0..ca1cb6704a78 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,6 +111,19 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +/* largest possible L2 header, see br_nf_dev_queue_xmit() */ +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +struct brnf_frag_data { + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; + u8 encap_size; + u8 size; +}; + +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); +#endif + static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { struct net_bridge_port *port; @@ -189,14 +202,6 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) skb->network_header += len; } -static inline void nf_bridge_save_header(struct sk_buff *skb) -{ - int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - - skb_copy_from_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); -} - /* When handing a packet over to the IP layer * check whether we have a skb that is in the * expected format @@ -318,7 +323,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, + nf_bridge->neigh_header, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; @@ -810,30 +815,22 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) -static bool nf_bridge_copy_header(struct sk_buff *skb) +static int br_nf_push_frag_xmit(struct sk_buff *skb) { + struct brnf_frag_data *data; int err; - unsigned int header_size; - nf_bridge_update_protocol(skb); - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow_head(skb, header_size); - if (err) - return false; + data = this_cpu_ptr(&brnf_frag_data_storage); + err = skb_cow_head(skb, data->size); - skb_copy_to_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); - __skb_push(skb, nf_bridge_encap_header_len(skb)); - return true; -} - -static int br_nf_push_frag_xmit(struct sk_buff *skb) -{ - if (!nf_bridge_copy_header(skb)) { + if (err) { kfree_skb(skb); return 0; } + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); + __skb_push(skb, data->encap_size); + return br_dev_queue_push_xmit(skb); } @@ -851,14 +848,27 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) * boundaries by preserving frag_list rather than refragmenting. */ if (skb->len + mtu_reserved > skb->dev->mtu) { + struct brnf_frag_data *data; + frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + ret = ip_fragment(skb, br_nf_push_frag_xmit); - } else + } else { ret = br_dev_queue_push_xmit(skb); + } return ret; } @@ -906,7 +916,6 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, } nf_bridge_pull_encap_header(skb); - nf_bridge_save_header(skb); if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else @@ -951,8 +960,11 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) skb_pull(skb, ETH_HLEN); nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN - ETH_ALEN); skb->dev = nf_bridge->physindev; br_handle_frame_finish(skb); } -- cgit v1.2.3 From 383307838d41935841ba6b2e939b968326e2dea1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:43 +0200 Subject: netfilter: bridge: add and use nf_bridge_info_get helper Don't access skb->nf_bridge directly, this pointer will be removed soon. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'net/bridge') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ca1cb6704a78..301f12b0a7cd 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -124,6 +124,11 @@ struct brnf_frag_data { static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); #endif +static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { struct net_bridge_port *port; @@ -268,7 +273,7 @@ static void nf_bridge_update_protocol(struct sk_buff *skb) * bridge PRE_ROUTING hook. */ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; if (nf_bridge->mask & BRNF_PKT_TYPE) { @@ -300,7 +305,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) */ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct neighbour *neigh; struct dst_entry *dst; @@ -310,6 +314,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) dst = skb_dst(skb); neigh = dst_neigh_lookup_skb(dst, skb); if (neigh) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; if (neigh->hh.hh_len) { @@ -396,7 +401,7 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) { struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; int frag_max_size; @@ -488,7 +493,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct /* Some common code for IPv4/IPv6 */ static struct net_device *setup_pre_routing(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; @@ -687,7 +692,7 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { @@ -738,6 +743,10 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (!nf_bridge_unshare(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) + return NF_DROP; + parent = bridge_parent(out); if (!parent) return NF_DROP; @@ -751,7 +760,6 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, nf_bridge_pull_encap_header(skb); - nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->mask |= BRNF_PKT_TYPE; @@ -886,7 +894,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; @@ -955,7 +963,7 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, */ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); skb_pull(skb, ETH_HLEN); nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; -- cgit v1.2.3 From 3eaf402502e49ad9c58c73e8599c7c4f345d62da Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:44 +0200 Subject: netfilter: bridge: start splitting mask into public/private chunks ->mask is a bit info field that mixes various use cases. In particular, we have flags that are mutually exlusive, and flags that are only used within br_netfilter while others need to be exposed to other parts of the kernel. Remove BRNF_8021Q/PPPoE flags. They're mutually exclusive and only needed within br_netfilter context. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 4 +--- include/linux/skbuff.h | 5 +++++ net/bridge/br_netfilter.c | 15 +++++++++++---- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'net/bridge') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index e1d96bc2767c..d47a32dffa15 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -20,12 +20,10 @@ enum nf_br_hook_priorities { #define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -#define BRNF_8021Q 0x10 -#define BRNF_PPPoE 0x20 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { - if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) return PPPOE_SES_HLEN; return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f66a089afc41..6f75fb5c6ed7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -166,6 +166,11 @@ struct nf_conntrack { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { atomic_t use; + enum { + BRNF_PROTO_UNCHANGED, + BRNF_PROTO_8021Q, + BRNF_PROTO_PPPOE + } orig_proto; unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 301f12b0a7cd..ab1e988ca4b8 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -262,10 +262,16 @@ drop: static void nf_bridge_update_protocol(struct sk_buff *skb) { - if (skb->nf_bridge->mask & BRNF_8021Q) + switch (skb->nf_bridge->orig_proto) { + case BRNF_PROTO_8021Q: skb->protocol = htons(ETH_P_8021Q); - else if (skb->nf_bridge->mask & BRNF_PPPoE) + break; + case BRNF_PROTO_PPPOE: skb->protocol = htons(ETH_P_PPP_SES); + break; + case BRNF_PROTO_UNCHANGED: + break; + } } /* PF_BRIDGE/PRE_ROUTING *********************************************/ @@ -503,10 +509,11 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; skb->dev = brnf_get_logical_dev(skb, skb->dev); + if (skb->protocol == htons(ETH_P_8021Q)) - nf_bridge->mask |= BRNF_8021Q; + nf_bridge->orig_proto = BRNF_PROTO_8021Q; else if (skb->protocol == htons(ETH_P_PPP_SES)) - nf_bridge->mask |= BRNF_PPPoE; + nf_bridge->orig_proto = BRNF_PROTO_PPPOE; /* Must drop socket now because of tproxy. */ skb_orphan(skb); -- cgit v1.2.3 From a1e67951e6c0b11bb11c256f8e1c45ed51fcd760 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Apr 2015 14:31:45 +0200 Subject: netfilter: bridge: make BRNF_PKT_TYPE flag a bool nf_bridge_info->mask is used for several things, for example to remember if skb->pkt_type was set to OTHER_HOST. For a bridge, OTHER_HOST is expected case. For ip forward its a non-starter though -- routing expects PACKET_HOST. Bridge netfilter thus changes OTHER_HOST to PACKET_HOST before hook invocation and then un-does it after hook traversal. This information is irrelevant outside of br_netfilter. After this change, ->mask now only contains flags that need to be known outside of br_netfilter in fast-path. Future patch changes mask into a 2bit state field in sk_buff, so that we can remove skb->nf_bridge pointer for good and consider all remaining places that access nf_bridge info content a not-so fastpath. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 1 - include/linux/skbuff.h | 1 + net/bridge/br_netfilter.c | 18 +++++++++--------- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/bridge') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index d47a32dffa15..8912e8c355fd 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -17,7 +17,6 @@ enum nf_br_hook_priorities { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -#define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f75fb5c6ed7..0991259643d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -171,6 +171,7 @@ struct nf_bridge_info { BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE } orig_proto; + bool pkt_otherhost; unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ab1e988ca4b8..e8ac7432acb6 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -282,9 +282,9 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; @@ -415,9 +415,9 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) frag_max_size = IPCB(skb)->frag_max_size; BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { @@ -503,7 +503,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; @@ -711,9 +711,9 @@ static int br_nf_forward_finish(struct sk_buff *skb) } in = nf_bridge->physindev; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge_update_protocol(skb); } else { @@ -769,7 +769,7 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } if (pf == NFPROTO_IPV4) { @@ -927,7 +927,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge_pull_encap_header(skb); -- cgit v1.2.3