diff options
Diffstat (limited to 'net/bridge')
-rw-r--r-- | net/bridge/br_device.c | 1 | ||||
-rw-r--r-- | net/bridge/br_netfilter_hooks.c | 247 | ||||
-rw-r--r-- | net/bridge/br_netfilter_ipv6.c | 2 | ||||
-rw-r--r-- | net/bridge/br_private.h | 1 | ||||
-rw-r--r-- | net/bridge/netfilter/Kconfig | 14 | ||||
-rw-r--r-- | net/bridge/netfilter/Makefile | 3 | ||||
-rw-r--r-- | net/bridge/netfilter/ebt_dnat.c | 2 | ||||
-rw-r--r-- | net/bridge/netfilter/ebt_redirect.c | 2 | ||||
-rw-r--r-- | net/bridge/netfilter/ebt_snat.c | 2 | ||||
-rw-r--r-- | net/bridge/netfilter/nf_conntrack_bridge.c | 435 |
10 files changed, 619 insertions, 90 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index c05def8fd9cd..681b72862c16 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -52,6 +52,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_switchdev_frame_unmark(skb); BR_INPUT_SKB_CB(skb)->brdev = dev; + BR_INPUT_SKB_CB(skb)->frag_max_size = 0; skb_reset_mac_header(skb); eth = eth_hdr(skb); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 34fa72c72ad8..d3f9592f4ff8 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -47,25 +47,22 @@ static unsigned int brnf_net_id __read_mostly; struct brnf_net { bool enabled; -}; #ifdef CONFIG_SYSCTL -static struct ctl_table_header *brnf_sysctl_header; -static int brnf_call_iptables __read_mostly = 1; -static int brnf_call_ip6tables __read_mostly = 1; -static int brnf_call_arptables __read_mostly = 1; -static int brnf_filter_vlan_tagged __read_mostly; -static int brnf_filter_pppoe_tagged __read_mostly; -static int brnf_pass_vlan_indev __read_mostly; -#else -#define brnf_call_iptables 1 -#define brnf_call_ip6tables 1 -#define brnf_call_arptables 1 -#define brnf_filter_vlan_tagged 0 -#define brnf_filter_pppoe_tagged 0 -#define brnf_pass_vlan_indev 0 + struct ctl_table_header *ctl_hdr; #endif + /* default value is 1 */ + int call_iptables; + int call_ip6tables; + int call_arptables; + + /* default value is 0 */ + int filter_vlan_tagged; + int filter_pppoe_tagged; + int pass_vlan_indev; +}; + #define IS_IP(skb) \ (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) @@ -85,17 +82,28 @@ static inline __be16 vlan_proto(const struct sk_buff *skb) return 0; } -#define IS_VLAN_IP(skb) \ - (vlan_proto(skb) == htons(ETH_P_IP) && \ - brnf_filter_vlan_tagged) +static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged; +} -#define IS_VLAN_IPV6(skb) \ - (vlan_proto(skb) == htons(ETH_P_IPV6) && \ - brnf_filter_vlan_tagged) +static inline bool is_vlan_ipv6(const struct sk_buff *skb, + const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); -#define IS_VLAN_ARP(skb) \ - (vlan_proto(skb) == htons(ETH_P_ARP) && \ - brnf_filter_vlan_tagged) + return vlan_proto(skb) == htons(ETH_P_IPV6) && + brnet->filter_vlan_tagged; +} + +static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged; +} static inline __be16 pppoe_proto(const struct sk_buff *skb) { @@ -103,15 +111,23 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) sizeof(struct pppoe_hdr))); } -#define IS_PPPOE_IP(skb) \ - (skb->protocol == htons(ETH_P_PPP_SES) && \ - pppoe_proto(skb) == htons(PPP_IP) && \ - brnf_filter_pppoe_tagged) +static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); + + return skb->protocol == htons(ETH_P_PPP_SES) && + pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged; +} + +static inline bool is_pppoe_ipv6(const struct sk_buff *skb, + const struct net *net) +{ + struct brnf_net *brnet = net_generic(net, brnf_net_id); -#define IS_PPPOE_IPV6(skb) \ - (skb->protocol == htons(ETH_P_PPP_SES) && \ - pppoe_proto(skb) == htons(PPP_IPV6) && \ - brnf_filter_pppoe_tagged) + return skb->protocol == htons(ETH_P_PPP_SES) && + pppoe_proto(skb) == htons(PPP_IPV6) && + brnet->filter_pppoe_tagged; +} /* largest possible L2 header, see br_nf_dev_queue_xmit() */ #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) @@ -408,12 +424,16 @@ bridged_dnat: return 0; } -static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) +static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, + const struct net_device *dev, + const struct net *net) { struct net_device *vlan, *br; + struct brnf_net *brnet = net_generic(net, brnf_net_id); br = bridge_parent(dev); - if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) + + if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb)) return br; vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, @@ -423,7 +443,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct } /* Some common code for IPv4/IPv6 */ -struct net_device *setup_pre_routing(struct sk_buff *skb) +struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); @@ -434,7 +454,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb) nf_bridge->in_prerouting = 1; nf_bridge->physindev = skb->dev; - skb->dev = brnf_get_logical_dev(skb, skb->dev); + skb->dev = brnf_get_logical_dev(skb, skb->dev, net); if (skb->protocol == htons(ETH_P_8021Q)) nf_bridge->orig_proto = BRNF_PROTO_8021Q; @@ -460,6 +480,7 @@ static unsigned int br_nf_pre_routing(void *priv, struct net_bridge_port *p; struct net_bridge *br; __u32 len = nf_bridge_encap_header_len(skb); + struct brnf_net *brnet; if (unlikely(!pskb_may_pull(skb, len))) return NF_DROP; @@ -469,8 +490,10 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_DROP; br = p->br; - if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { - if (!brnf_call_ip6tables && + brnet = net_generic(state->net, brnf_net_id); + if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) { + if (!brnet->call_ip6tables && !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) return NF_ACCEPT; @@ -478,10 +501,11 @@ static unsigned int br_nf_pre_routing(void *priv, return br_nf_pre_routing_ipv6(priv, skb, state); } - if (!brnf_call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) + if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) return NF_ACCEPT; - if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) + if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) && + !is_pppoe_ip(skb, state->net)) return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); @@ -491,7 +515,7 @@ static unsigned int br_nf_pre_routing(void *priv, if (!nf_bridge_alloc(skb)) return NF_DROP; - if (!setup_pre_routing(skb)) + if (!setup_pre_routing(skb, state->net)) return NF_DROP; nf_bridge = nf_bridge_info_get(skb); @@ -514,7 +538,7 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; - if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { + if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) { if (skb->protocol == htons(ETH_P_IP)) nf_bridge->frag_max_size = IPCB(skb)->frag_max_size; @@ -569,9 +593,11 @@ static unsigned int br_nf_forward_ip(void *priv, if (!parent) return NF_DROP; - if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) + if (IS_IP(skb) || is_vlan_ip(skb, state->net) || + is_pppoe_ip(skb, state->net)) pf = NFPROTO_IPV4; - else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) + else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) pf = NFPROTO_IPV6; else return NF_ACCEPT; @@ -602,7 +628,7 @@ static unsigned int br_nf_forward_ip(void *priv, skb->protocol = htons(ETH_P_IPV6); NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, - brnf_get_logical_dev(skb, state->in), + brnf_get_logical_dev(skb, state->in, state->net), parent, br_nf_forward_finish); return NF_STOLEN; @@ -615,23 +641,25 @@ static unsigned int br_nf_forward_arp(void *priv, struct net_bridge_port *p; struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); + struct brnf_net *brnet; p = br_port_get_rcu(state->out); if (p == NULL) return NF_ACCEPT; br = p->br; - if (!brnf_call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) + brnet = net_generic(state->net, brnf_net_id); + if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) return NF_ACCEPT; if (!IS_ARP(skb)) { - if (!IS_VLAN_ARP(skb)) + if (!is_vlan_arp(skb, state->net)) return NF_ACCEPT; nf_bridge_pull_encap_header(skb); } if (arp_hdr(skb)->ar_pln != 4) { - if (IS_VLAN_ARP(skb)) + if (is_vlan_arp(skb, state->net)) nf_bridge_push_encap_header(skb); return NF_ACCEPT; } @@ -791,9 +819,11 @@ static unsigned int br_nf_post_routing(void *priv, if (!realoutdev) return NF_DROP; - if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) + if (IS_IP(skb) || is_vlan_ip(skb, state->net) || + is_pppoe_ip(skb, state->net)) pf = NFPROTO_IPV4; - else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) + else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) pf = NFPROTO_IPV6; else return NF_ACCEPT; @@ -946,23 +976,6 @@ static int brnf_device_event(struct notifier_block *unused, unsigned long event, return NOTIFY_OK; } -static void __net_exit brnf_exit_net(struct net *net) -{ - struct brnf_net *brnet = net_generic(net, brnf_net_id); - - if (!brnet->enabled) - return; - - nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); - brnet->enabled = false; -} - -static struct pernet_operations brnf_net_ops __read_mostly = { - .exit = brnf_exit_net, - .id = &brnf_net_id, - .size = sizeof(struct brnf_net), -}; - static struct notifier_block brnf_notifier __read_mostly = { .notifier_call = brnf_device_event, }; @@ -1021,49 +1034,124 @@ int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, static struct ctl_table brnf_table[] = { { .procname = "bridge-nf-call-arptables", - .data = &brnf_call_arptables, .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-iptables", - .data = &brnf_call_iptables, .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-call-ip6tables", - .data = &brnf_call_ip6tables, .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-vlan-tagged", - .data = &brnf_filter_vlan_tagged, .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-filter-pppoe-tagged", - .data = &brnf_filter_pppoe_tagged, .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { .procname = "bridge-nf-pass-vlan-input-dev", - .data = &brnf_pass_vlan_indev, .maxlen = sizeof(int), .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, { } }; + +static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) +{ + brnf->call_iptables = 1; + brnf->call_ip6tables = 1; + brnf->call_arptables = 1; + brnf->filter_vlan_tagged = 0; + brnf->filter_pppoe_tagged = 0; + brnf->pass_vlan_indev = 0; +} + +static int br_netfilter_sysctl_init_net(struct net *net) +{ + struct ctl_table *table = brnf_table; + struct brnf_net *brnet; + + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + } + + brnet = net_generic(net, brnf_net_id); + table[0].data = &brnet->call_arptables; + table[1].data = &brnet->call_iptables; + table[2].data = &brnet->call_ip6tables; + table[3].data = &brnet->filter_vlan_tagged; + table[4].data = &brnet->filter_pppoe_tagged; + table[5].data = &brnet->pass_vlan_indev; + + br_netfilter_sysctl_default(brnet); + + brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table); + if (!brnet->ctl_hdr) { + if (!net_eq(net, &init_net)) + kfree(table); + + return -ENOMEM; + } + + return 0; +} + +static void br_netfilter_sysctl_exit_net(struct net *net, + struct brnf_net *brnet) +{ + struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg; + + unregister_net_sysctl_table(brnet->ctl_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static int __net_init brnf_init_net(struct net *net) +{ + return br_netfilter_sysctl_init_net(net); +} +#endif + +static void __net_exit brnf_exit_net(struct net *net) +{ + struct brnf_net *brnet; + + brnet = net_generic(net, brnf_net_id); + if (brnet->enabled) { + nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops)); + brnet->enabled = false; + } + +#ifdef CONFIG_SYSCTL + br_netfilter_sysctl_exit_net(net, brnet); #endif +} + +static struct pernet_operations brnf_net_ops __read_mostly = { +#ifdef CONFIG_SYSCTL + .init = brnf_init_net, +#endif + .exit = brnf_exit_net, + .id = &brnf_net_id, + .size = sizeof(struct brnf_net), +}; static int __init br_netfilter_init(void) { @@ -1079,16 +1167,6 @@ static int __init br_netfilter_init(void) return ret; } -#ifdef CONFIG_SYSCTL - brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); - if (brnf_sysctl_header == NULL) { - printk(KERN_WARNING - "br_netfilter: can't register to sysctl.\n"); - unregister_netdevice_notifier(&brnf_notifier); - unregister_pernet_subsys(&brnf_net_ops); - return -ENOMEM; - } -#endif RCU_INIT_POINTER(nf_br_ops, &br_ops); printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; @@ -1099,9 +1177,6 @@ static void __exit br_netfilter_fini(void) RCU_INIT_POINTER(nf_br_ops, NULL); unregister_netdevice_notifier(&brnf_notifier); unregister_pernet_subsys(&brnf_net_ops); -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(brnf_sysctl_header); -#endif } module_init(br_netfilter_init); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 0e63e5dc5ac4..e4e0c836c3f5 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -224,7 +224,7 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, nf_bridge = nf_bridge_alloc(skb); if (!nf_bridge) return NF_DROP; - if (!setup_pre_routing(skb)) + if (!setup_pre_routing(skb, state->net)) return NF_DROP; nf_bridge = nf_bridge_info_get(skb); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 159a0e2cb0f6..e8cf03b43b7d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -421,6 +421,7 @@ struct net_bridge { struct br_input_skb_cb { struct net_device *brdev; + u16 frag_max_size; #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u8 igmp; u8 mrouters_only:1; diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index c3ad90c43801..f4fb0b9b927d 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -19,6 +19,20 @@ config NF_LOG_BRIDGE tristate "Bridge packet logging" select NF_LOG_COMMON +config NF_CONNTRACK_BRIDGE + tristate "IPv4/IPV6 bridge connection tracking support" + depends on NF_CONNTRACK + default n + help + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. This is used to enhance packet filtering via + stateful policies. Enable this if you want native tracking from + the bridge. This provides a replacement for the `br_netfilter' + infrastructure. + + To compile it as a module, choose M here. If unsure, say N. + endif # NF_TABLES_BRIDGE menuconfig BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 9b868861f21a..9d7767322a64 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -5,6 +5,9 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o +# connection tracking +obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o + # packet logging obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index eeae23a73c6a..ed91ea31978a 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -22,7 +22,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) const struct ebt_nat_info *info = par->targinfo; struct net_device *dev; - if (!skb_make_writable(skb, 0)) + if (skb_ensure_writable(skb, ETH_ALEN)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 53ef08e6765f..0cad62a4052b 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -21,7 +21,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_redirect_info *info = par->targinfo; - if (!skb_make_writable(skb, 0)) + if (skb_ensure_writable(skb, ETH_ALEN)) return EBT_DROP; if (xt_hooknum(par) != NF_BR_BROUTING) diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 700d338d5ddb..27443bf229a3 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -22,7 +22,7 @@ ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - if (!skb_make_writable(skb, 0)) + if (skb_ensure_writable(skb, ETH_ALEN * 2)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_source, info->mac); diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c new file mode 100644 index 000000000000..4f5444d2a526 --- /dev/null +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -0,0 +1,435 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/icmp.h> +#include <linux/sysctl.h> +#include <net/route.h> +#include <net/ip.h> + +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_bridge.h> + +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <net/netfilter/nf_tables.h> + +#include "../br_private.h" + +/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff + * has been linearized or cloned. + */ +static int nf_br_ip_fragment(struct net *net, struct sock *sk, + struct sk_buff *skb, + struct nf_ct_bridge_frag_data *data, + int (*output)(struct net *, struct sock *sk, + const struct nf_ct_bridge_frag_data *data, + struct sk_buff *)) +{ + int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; + unsigned int hlen, ll_rs, mtu; + struct ip_frag_state state; + struct iphdr *iph; + int err; + + /* for offloaded checksums cleanup checksum before fragmentation */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto blackhole; + + iph = ip_hdr(skb); + + /* + * Setup starting values + */ + + hlen = iph->ihl * 4; + frag_max_size -= hlen; + ll_rs = LL_RESERVED_SPACE(skb->dev); + mtu = skb->dev->mtu; + + if (skb_has_frag_list(skb)) { + unsigned int first_len = skb_pagelen(skb); + struct ip_fraglist_iter iter; + struct sk_buff *frag; + + if (first_len - hlen > mtu || + skb_headroom(skb) < ll_rs) + goto blackhole; + + if (skb_cloned(skb)) + goto slow_path; + + skb_walk_frags(skb, frag) { + if (frag->len > mtu || + skb_headroom(frag) < hlen + ll_rs) + goto blackhole; + + if (skb_shared(frag)) + goto slow_path; + } + + ip_fraglist_init(skb, iph, hlen, &iter); + + for (;;) { + if (iter.frag) + ip_fraglist_prepare(skb, &iter); + + err = output(net, sk, data, skb); + if (err || !iter.frag) + break; + + skb = ip_fraglist_next(&iter); + } + return err; + } +slow_path: + /* This is a linearized skbuff, the original geometry is lost for us. + * This may also be a clone skbuff, we could preserve the geometry for + * the copies but probably not worth the effort. + */ + ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state); + + while (state.left > 0) { + struct sk_buff *skb2; + + skb2 = ip_frag_next(skb, &state); + if (IS_ERR(skb2)) { + err = PTR_ERR(skb2); + goto blackhole; + } + + err = output(net, sk, data, skb2); + if (err) + goto blackhole; + } + consume_skb(skb); + return err; + +blackhole: + kfree_skb(skb); + return 0; +} + +/* ip_defrag() expects IPCB() in place. */ +static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, + size_t inet_skb_parm_size) +{ + memcpy(cb, skb->cb, sizeof(*cb)); + memset(skb->cb, 0, inet_skb_parm_size); +} + +static void br_skb_cb_restore(struct sk_buff *skb, + const struct br_input_skb_cb *cb, + u16 fragsz) +{ + memcpy(skb->cb, cb, sizeof(*cb)); + BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; +} + +static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, + const struct nf_hook_state *state) +{ + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; + enum ip_conntrack_info ctinfo; + struct br_input_skb_cb cb; + const struct nf_conn *ct; + int err; + + if (!ip_is_fragment(ip_hdr(skb))) + return NF_ACCEPT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + + br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm)); + local_bh_disable(); + err = ip_defrag(state->net, skb, + IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); + local_bh_enable(); + if (!err) { + br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size); + skb->ignore_df = 1; + return NF_ACCEPT; + } + + return NF_STOLEN; +} + +static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, + const struct nf_hook_state *state) +{ + u16 zone_id = NF_CT_DEFAULT_ZONE_ID; + enum ip_conntrack_info ctinfo; + struct br_input_skb_cb cb; + const struct nf_conn *ct; + int err; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) + zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); + + br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); + + err = nf_ipv6_br_defrag(state->net, skb, + IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); + /* queued */ + if (err == -EINPROGRESS) + return NF_STOLEN; + + br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); + return err == 0 ? NF_ACCEPT : NF_DROP; +} + +static int nf_ct_br_ip_check(const struct sk_buff *skb) +{ + const struct iphdr *iph; + int nhoff, len; + + nhoff = skb_network_offset(skb); + iph = ip_hdr(skb); + if (iph->ihl < 5 || + iph->version != 4) + return -1; + + len = ntohs(iph->tot_len); + if (skb->len < nhoff + len || + len < (iph->ihl * 4)) + return -1; + + return 0; +} + +static int nf_ct_br_ipv6_check(const struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + int nhoff, len; + + nhoff = skb_network_offset(skb); + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return -1; + + len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; + if (skb->len < len) + return -1; + + return 0; +} + +static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nf_hook_state bridge_state = *state; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + u32 len; + int ret; + + ct = nf_ct_get(skb, &ctinfo); + if ((ct && !nf_ct_is_template(ct)) || + ctinfo == IP_CT_UNTRACKED) + return NF_ACCEPT; + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return NF_ACCEPT; + + len = ntohs(ip_hdr(skb)->tot_len); + if (pskb_trim_rcsum(skb, len)) + return NF_ACCEPT; + + if (nf_ct_br_ip_check(skb)) + return NF_ACCEPT; + + bridge_state.pf = NFPROTO_IPV4; + ret = nf_ct_br_defrag4(skb, &bridge_state); + break; + case htons(ETH_P_IPV6): + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return NF_ACCEPT; + + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + if (pskb_trim_rcsum(skb, len)) + return NF_ACCEPT; + + if (nf_ct_br_ipv6_check(skb)) + return NF_ACCEPT; + + bridge_state.pf = NFPROTO_IPV6; + ret = nf_ct_br_defrag6(skb, &bridge_state); + break; + default: + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); + return NF_ACCEPT; + } + + if (ret != NF_ACCEPT) + return ret; + + return nf_conntrack_in(skb, &bridge_state); +} + +static void nf_ct_bridge_frag_save(struct sk_buff *skb, + struct nf_ct_bridge_frag_data *data) +{ + if (skb_vlan_tag_present(skb)) { + data->vlan_present = true; + data->vlan_tci = skb->vlan_tci; + data->vlan_proto = skb->vlan_proto; + } else { + data->vlan_present = false; + } + skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); +} + +static unsigned int +nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, + int (*output)(struct net *, struct sock *sk, + const struct nf_ct_bridge_frag_data *data, + struct sk_buff *)) +{ + struct nf_ct_bridge_frag_data data; + + if (!BR_INPUT_SKB_CB(skb)->frag_max_size) + return NF_ACCEPT; + + nf_ct_bridge_frag_save(skb, &data); + switch (skb->protocol) { + case htons(ETH_P_IP): + nf_br_ip_fragment(state->net, state->sk, skb, &data, output); + break; + case htons(ETH_P_IPV6): + nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); + break; + default: + WARN_ON_ONCE(1); + return NF_DROP; + } + + return NF_STOLEN; +} + +/* Actually only slow path refragmentation needs this. */ +static int nf_ct_bridge_frag_restore(struct sk_buff *skb, + const struct nf_ct_bridge_frag_data *data) +{ + int err; + + err = skb_cow_head(skb, ETH_HLEN); + if (err) { + kfree_skb(skb); + return -ENOMEM; + } + if (data->vlan_present) + __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); + else if (skb_vlan_tag_present(skb)) + __vlan_hwaccel_clear_tag(skb); + + skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); + skb_reset_mac_header(skb); + + return 0; +} + +static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, + const struct nf_ct_bridge_frag_data *data, + struct sk_buff *skb) +{ + int err; + + err = nf_ct_bridge_frag_restore(skb, data); + if (err < 0) + return err; + + return br_dev_queue_push_xmit(net, sk, skb); +} + +static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + int protoff; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + return nf_conntrack_confirm(skb); + + switch (skb->protocol) { + case htons(ETH_P_IP): + protoff = skb_network_offset(skb) + ip_hdrlen(skb); + break; + case htons(ETH_P_IPV6): { + unsigned char pnum = ipv6_hdr(skb)->nexthdr; + __be16 frag_off; + + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) + return nf_conntrack_confirm(skb); + } + break; + default: + return NF_ACCEPT; + } + return nf_confirm(skb, protoff, ct, ctinfo); +} + +static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + int ret; + + ret = nf_ct_bridge_confirm(skb); + if (ret != NF_ACCEPT) + return ret; + + return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post); +} + +static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { + { + .hook = nf_ct_bridge_pre, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, + }, + { + .hook = nf_ct_bridge_post, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, +}; + +static struct nf_ct_bridge_info bridge_info = { + .ops = nf_ct_bridge_hook_ops, + .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), + .me = THIS_MODULE, +}; + +static int __init nf_conntrack_l3proto_bridge_init(void) +{ + nf_ct_bridge_register(&bridge_info); + + return 0; +} + +static void __exit nf_conntrack_l3proto_bridge_fini(void) +{ + nf_ct_bridge_unregister(&bridge_info); +} + +module_init(nf_conntrack_l3proto_bridge_init); +module_exit(nf_conntrack_l3proto_bridge_fini); + +MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); +MODULE_LICENSE("GPL"); |