diff options
Diffstat (limited to 'net/ipv6')
34 files changed, 2187 insertions, 400 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 3f8411328de5..2343e4f2e0bf 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -232,6 +232,15 @@ config IPV6_GRE Saying M here will produce a module called ip6_gre. If unsure, say N. +config IPV6_FOU + tristate + default NET_FOU && IPV6 + +config IPV6_FOU_TUNNEL + tristate + default NET_FOU_IP_TUNNELS && IPV6_FOU + select IPV6_TUNNEL + config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" select FIB_RULES diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 7ec3129c9ace..c174ccb340a1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -22,6 +22,7 @@ ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o +ipv6-$(CONFIG_NETLABEL) += calipso.o ipv6-objs += $(ipv6-y) @@ -42,7 +43,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-$(CONFIG_NET_FOU) += fou6.o +obj-$(CONFIG_IPV6_FOU) += fou6.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 47f837a58e0a..6287a8b9f428 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -547,7 +547,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); + skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL); if (!skb) goto errout; @@ -559,7 +559,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL); return; errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); @@ -1524,6 +1524,28 @@ out: return hiscore_idx; } +static int ipv6_get_saddr_master(struct net *net, + const struct net_device *dst_dev, + const struct net_device *master, + struct ipv6_saddr_dst *dst, + struct ipv6_saddr_score *scores, + int hiscore_idx) +{ + struct inet6_dev *idev; + + idev = __in6_dev_get(dst_dev); + if (idev) + hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev, + scores, hiscore_idx); + + idev = __in6_dev_get(master); + if (idev) + hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev, + scores, hiscore_idx); + + return hiscore_idx; +} + int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) @@ -1577,13 +1599,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, if (idev) hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); } else { + const struct net_device *master; + int master_idx = 0; + + /* if dst_dev exists and is enslaved to an L3 device, then + * prefer addresses from dst_dev and then the master over + * any other enslaved devices in the L3 domain. + */ + master = l3mdev_master_dev_rcu(dst_dev); + if (master) { + master_idx = master->ifindex; + + hiscore_idx = ipv6_get_saddr_master(net, dst_dev, + master, &dst, + scores, hiscore_idx); + + if (scores[hiscore_idx].ifa) + goto out; + } + for_each_netdev_rcu(net, dev) { + /* only consider addresses on devices in the + * same L3 domain + */ + if (l3mdev_master_ifindex_rcu(dev) != master_idx) + continue; idev = __in6_dev_get(dev); if (!idev) continue; hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); } } + +out: rcu_read_unlock(); hiscore = &scores[hiscore_idx]; @@ -2254,7 +2302,7 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return ERR_PTR(-EACCES); /* Add default multicast route */ - if (!(dev->flags & IFF_LOOPBACK)) + if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev)) addrconf_add_mroute(dev); return idev; @@ -2333,12 +2381,109 @@ static bool is_addr_mode_generate_stable(struct inet6_dev *idev) idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; } +int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, + const struct prefix_info *pinfo, + struct inet6_dev *in6_dev, + const struct in6_addr *addr, int addr_type, + u32 addr_flags, bool sllao, bool tokenized, + __u32 valid_lft, u32 prefered_lft) +{ + struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1); + int create = 0, update_lft = 0; + + if (!ifp && valid_lft) { + int max_addresses = in6_dev->cnf.max_addresses; + +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + if (in6_dev->cnf.optimistic_dad && + !net->ipv6.devconf_all->forwarding && sllao) + addr_flags |= IFA_F_OPTIMISTIC; +#endif + + /* Do not allow to create too much of autoconfigured + * addresses; this would be too easy way to crash kernel. + */ + if (!max_addresses || + ipv6_count_addresses(in6_dev) < max_addresses) + ifp = ipv6_add_addr(in6_dev, addr, NULL, + pinfo->prefix_len, + addr_type&IPV6_ADDR_SCOPE_MASK, + addr_flags, valid_lft, + prefered_lft); + + if (IS_ERR_OR_NULL(ifp)) + return -1; + + update_lft = 0; + create = 1; + spin_lock_bh(&ifp->lock); + ifp->flags |= IFA_F_MANAGETEMPADDR; + ifp->cstamp = jiffies; + ifp->tokenized = tokenized; + spin_unlock_bh(&ifp->lock); + addrconf_dad_start(ifp); + } + + if (ifp) { + u32 flags; + unsigned long now; + u32 stored_lft; + + /* update lifetime (RFC2462 5.5.3 e) */ + spin_lock_bh(&ifp->lock); + now = jiffies; + if (ifp->valid_lft > (now - ifp->tstamp) / HZ) + stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; + else + stored_lft = 0; + if (!update_lft && !create && stored_lft) { + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; + } + + if (update_lft) { + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = now; + flags = ifp->flags; + ifp->flags &= ~IFA_F_DEPRECATED; + spin_unlock_bh(&ifp->lock); + + if (!(flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ifp); + } else + spin_unlock_bh(&ifp->lock); + + manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, + create, now); + + in6_ifa_put(ifp); + addrconf_verify(); + } + + return 0; +} +EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr); + void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; __u32 valid_lft; __u32 prefered_lft; - int addr_type; + int addr_type, err; u32 addr_flags = 0; struct inet6_dev *in6_dev; struct net *net = dev_net(dev); @@ -2432,10 +2577,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) /* Try to figure out our local address for this prefix */ if (pinfo->autoconf && in6_dev->cnf.autoconf) { - struct inet6_ifaddr *ifp; struct in6_addr addr; - int create = 0, update_lft = 0; - bool tokenized = false; + bool tokenized = false, dev_addr_generated = false; if (pinfo->prefix_len == 64) { memcpy(&addr, &pinfo->prefix, 8); @@ -2453,106 +2596,36 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) goto ok; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { - in6_dev_put(in6_dev); - return; + goto put; + } else { + dev_addr_generated = true; } goto ok; } net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n", pinfo->prefix_len); - in6_dev_put(in6_dev); - return; + goto put; ok: + err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, + &addr, addr_type, + addr_flags, sllao, + tokenized, valid_lft, + prefered_lft); + if (err) + goto put; - ifp = ipv6_get_ifaddr(net, &addr, dev, 1); - - if (!ifp && valid_lft) { - int max_addresses = in6_dev->cnf.max_addresses; - -#ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if (in6_dev->cnf.optimistic_dad && - !net->ipv6.devconf_all->forwarding && sllao) - addr_flags |= IFA_F_OPTIMISTIC; -#endif - - /* Do not allow to create too much of autoconfigured - * addresses; this would be too easy way to crash kernel. - */ - if (!max_addresses || - ipv6_count_addresses(in6_dev) < max_addresses) - ifp = ipv6_add_addr(in6_dev, &addr, NULL, - pinfo->prefix_len, - addr_type&IPV6_ADDR_SCOPE_MASK, - addr_flags, valid_lft, - prefered_lft); - - if (IS_ERR_OR_NULL(ifp)) { - in6_dev_put(in6_dev); - return; - } - - update_lft = 0; - create = 1; - spin_lock_bh(&ifp->lock); - ifp->flags |= IFA_F_MANAGETEMPADDR; - ifp->cstamp = jiffies; - ifp->tokenized = tokenized; - spin_unlock_bh(&ifp->lock); - addrconf_dad_start(ifp); - } - - if (ifp) { - u32 flags; - unsigned long now; - u32 stored_lft; - - /* update lifetime (RFC2462 5.5.3 e) */ - spin_lock_bh(&ifp->lock); - now = jiffies; - if (ifp->valid_lft > (now - ifp->tstamp) / HZ) - stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; - else - stored_lft = 0; - if (!update_lft && !create && stored_lft) { - const u32 minimum_lft = min_t(u32, - stored_lft, MIN_VALID_LIFETIME); - valid_lft = max(valid_lft, minimum_lft); - - /* RFC4862 Section 5.5.3e: - * "Note that the preferred lifetime of the - * corresponding address is always reset to - * the Preferred Lifetime in the received - * Prefix Information option, regardless of - * whether the valid lifetime is also reset or - * ignored." - * - * So we should always update prefered_lft here. - */ - update_lft = 1; - } - - if (update_lft) { - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - ifp->tstamp = now; - flags = ifp->flags; - ifp->flags &= ~IFA_F_DEPRECATED; - spin_unlock_bh(&ifp->lock); - - if (!(flags&IFA_F_TENTATIVE)) - ipv6_ifa_notify(0, ifp); - } else - spin_unlock_bh(&ifp->lock); - - manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, - create, now); - - in6_ifa_put(ifp); - addrconf_verify(); - } + /* Ignore error case here because previous prefix add addr was + * successful which will be notified. + */ + ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr, + addr_type, addr_flags, sllao, + tokenized, valid_lft, + prefered_lft, + dev_addr_generated); } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); +put: in6_dev_put(in6_dev); } @@ -2947,8 +3020,8 @@ static void init_loopback(struct net_device *dev) } } -static void addrconf_add_linklocal(struct inet6_dev *idev, - const struct in6_addr *addr, u32 flags) +void addrconf_add_linklocal(struct inet6_dev *idev, + const struct in6_addr *addr, u32 flags) { struct inet6_ifaddr *ifp; u32 addr_flags = flags | IFA_F_PERMANENT; @@ -2967,6 +3040,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, in6_ifa_put(ifp); } } +EXPORT_SYMBOL_GPL(addrconf_add_linklocal); static bool ipv6_reserved_interfaceid(struct in6_addr address) { @@ -3562,6 +3636,10 @@ restart: if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); + } else { + if (idev->cnf.forwarding) + addrconf_leave_anycast(ifa); + addrconf_leave_solict(ifa->idev, &ifa->addr); } write_lock_bh(&idev->lock); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bfa86f040c16..b454055ba625 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -60,6 +60,7 @@ #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> #endif +#include <net/calipso.h> #include <asm/uaccess.h> #include <linux/mroute6.h> @@ -92,6 +93,12 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); +bool ipv6_mod_enabled(void) +{ + return disable_ipv6_mod == 0; +} +EXPORT_SYMBOL_GPL(ipv6_mod_enabled); + static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); @@ -977,6 +984,10 @@ static int __init inet6_init(void) if (err) goto pingv6_fail; + err = calipso_init(); + if (err) + goto calipso_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -987,8 +998,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - pingv6_exit(); + calipso_exit(); #endif +calipso_fail: + pingv6_exit(); pingv6_fail: ipv6_packet_cleanup(); ipv6_packet_fail: diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c new file mode 100644 index 000000000000..c53b92c617c5 --- /dev/null +++ b/net/ipv6/calipso.c @@ -0,0 +1,1473 @@ +/* + * CALIPSO - Common Architecture Label IPv6 Security Option + * + * This is an implementation of the CALIPSO protocol as specified in + * RFC 5570. + * + * Authors: Paul Moore <paul.moore@hp.com> + * Huw Davies <huw@codeweavers.com> + * + */ + +/* (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 + * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/jhash.h> +#include <linux/audit.h> +#include <linux/slab.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/tcp.h> +#include <net/netlabel.h> +#include <net/calipso.h> +#include <linux/atomic.h> +#include <linux/bug.h> +#include <asm/unaligned.h> +#include <linux/crc-ccitt.h> + +/* Maximium size of the calipso option including + * the two-byte TLV header. + */ +#define CALIPSO_OPT_LEN_MAX (2 + 252) + +/* Size of the minimum calipso option including + * the two-byte TLV header. + */ +#define CALIPSO_HDR_LEN (2 + 8) + +/* Maximium size of the calipso option including + * the two-byte TLV header and upto 3 bytes of + * leading pad and 7 bytes of trailing pad. + */ +#define CALIPSO_OPT_LEN_MAX_WITH_PAD (3 + CALIPSO_OPT_LEN_MAX + 7) + + /* Maximium size of u32 aligned buffer required to hold calipso + * option. Max of 3 initial pad bytes starting from buffer + 3. + * i.e. the worst case is when the previous tlv finishes on 4n + 3. + */ +#define CALIPSO_MAX_BUFFER (6 + CALIPSO_OPT_LEN_MAX) + +/* List of available DOI definitions */ +static DEFINE_SPINLOCK(calipso_doi_list_lock); +static LIST_HEAD(calipso_doi_list); + +/* Label mapping cache */ +int calipso_cache_enabled = 1; +int calipso_cache_bucketsize = 10; +#define CALIPSO_CACHE_BUCKETBITS 7 +#define CALIPSO_CACHE_BUCKETS BIT(CALIPSO_CACHE_BUCKETBITS) +#define CALIPSO_CACHE_REORDERLIMIT 10 +struct calipso_map_cache_bkt { + spinlock_t lock; + u32 size; + struct list_head list; +}; + +struct calipso_map_cache_entry { + u32 hash; + unsigned char *key; + size_t key_len; + + struct netlbl_lsm_cache *lsm_data; + + u32 activity; + struct list_head list; +}; + +static struct calipso_map_cache_bkt *calipso_cache; + +/* Label Mapping Cache Functions + */ + +/** + * calipso_cache_entry_free - Frees a cache entry + * @entry: the entry to free + * + * Description: + * This function frees the memory associated with a cache entry including the + * LSM cache data if there are no longer any users, i.e. reference count == 0. + * + */ +static void calipso_cache_entry_free(struct calipso_map_cache_entry *entry) +{ + if (entry->lsm_data) + netlbl_secattr_cache_free(entry->lsm_data); + kfree(entry->key); + kfree(entry); +} + +/** + * calipso_map_cache_hash - Hashing function for the CALIPSO cache + * @key: the hash key + * @key_len: the length of the key in bytes + * + * Description: + * The CALIPSO tag hashing function. Returns a 32-bit hash value. + * + */ +static u32 calipso_map_cache_hash(const unsigned char *key, u32 key_len) +{ + return jhash(key, key_len, 0); +} + +/** + * calipso_cache_init - Initialize the CALIPSO cache + * + * Description: + * Initializes the CALIPSO label mapping cache, this function should be called + * before any of the other functions defined in this file. Returns zero on + * success, negative values on error. + * + */ +static int __init calipso_cache_init(void) +{ + u32 iter; + + calipso_cache = kcalloc(CALIPSO_CACHE_BUCKETS, + sizeof(struct calipso_map_cache_bkt), + GFP_KERNEL); + if (!calipso_cache) + return -ENOMEM; + + for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) { + spin_lock_init(&calipso_cache[iter].lock); + calipso_cache[iter].size = 0; + INIT_LIST_HEAD(&calipso_cache[iter].list); + } + + return 0; +} + +/** + * calipso_cache_invalidate - Invalidates the current CALIPSO cache + * + * Description: + * Invalidates and frees any entries in the CALIPSO cache. Returns zero on + * success and negative values on failure. + * + */ +static void calipso_cache_invalidate(void) +{ + struct calipso_map_cache_entry *entry, *tmp_entry; + u32 iter; + + for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) { + spin_lock_bh(&calipso_cache[iter].lock); + list_for_each_entry_safe(entry, + tmp_entry, + &calipso_cache[iter].list, list) { + list_del(&entry->list); + calipso_cache_entry_free(entry); + } + calipso_cache[iter].size = 0; + spin_unlock_bh(&calipso_cache[iter].lock); + } +} + +/** + * calipso_cache_check - Check the CALIPSO cache for a label mapping + * @key: the buffer to check + * @key_len: buffer length in bytes + * @secattr: the security attribute struct to use + * + * Description: + * This function checks the cache to see if a label mapping already exists for + * the given key. If there is a match then the cache is adjusted and the + * @secattr struct is populated with the correct LSM security attributes. The + * cache is adjusted in the following manner if the entry is not already the + * first in the cache bucket: + * + * 1. The cache entry's activity counter is incremented + * 2. The previous (higher ranking) entry's activity counter is decremented + * 3. If the difference between the two activity counters is geater than + * CALIPSO_CACHE_REORDERLIMIT the two entries are swapped + * + * Returns zero on success, -ENOENT for a cache miss, and other negative values + * on error. + * + */ +static int calipso_cache_check(const unsigned char *key, + u32 key_len, + struct netlbl_lsm_secattr *secattr) +{ + u32 bkt; + struct calipso_map_cache_entry *entry; + struct calipso_map_cache_entry *prev_entry = NULL; + u32 hash; + + if (!calipso_cache_enabled) + return -ENOENT; + + hash = calipso_map_cache_hash(key, key_len); + bkt = hash & (CALIPSO_CACHE_BUCKETS - 1); + spin_lock_bh(&calipso_cache[bkt].lock); + list_for_each_entry(entry, &calipso_cache[bkt].list, list) { + if (entry->hash == hash && + entry->key_len == key_len && + memcmp(entry->key, key, key_len) == 0) { + entry->activity += 1; + atomic_inc(&entry->lsm_data->refcount); + secattr->cache = entry->lsm_data; + secattr->flags |= NETLBL_SECATTR_CACHE; + secattr->type = NETLBL_NLTYPE_CALIPSO; + if (!prev_entry) { + spin_unlock_bh(&calipso_cache[bkt].lock); + return 0; + } + + if (prev_entry->activity > 0) + prev_entry->activity -= 1; + if (entry->activity > prev_entry->activity && + entry->activity - prev_entry->activity > + CALIPSO_CACHE_REORDERLIMIT) { + __list_del(entry->list.prev, entry->list.next); + __list_add(&entry->list, + prev_entry->list.prev, + &prev_entry->list); + } + + spin_unlock_bh(&calipso_cache[bkt].lock); + return 0; + } + prev_entry = entry; + } + spin_unlock_bh(&calipso_cache[bkt].lock); + + return -ENOENT; +} + +/** + * calipso_cache_add - Add an entry to the CALIPSO cache + * @calipso_ptr: the CALIPSO option + * @secattr: the packet's security attributes + * + * Description: + * Add a new entry into the CALIPSO label mapping cache. Add the new entry to + * head of the cache bucket's list, if the cache bucket is out of room remove + * the last entry in the list first. It is important to note that there is + * currently no checking for duplicate keys. Returns zero on success, + * negative values on failure. The key stored starts at calipso_ptr + 2, + * i.e. the type and length bytes are not stored, this corresponds to + * calipso_ptr[1] bytes of data. + * + */ +static int calipso_cache_add(const unsigned char *calipso_ptr, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 bkt; + struct calipso_map_cache_entry *entry = NULL; + struct calipso_map_cache_entry *old_entry = NULL; + u32 calipso_ptr_len; + + if (!calipso_cache_enabled || calipso_cache_bucketsize <= 0) + return 0; + + calipso_ptr_len = calipso_ptr[1]; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + entry->key = kmemdup(calipso_ptr + 2, calipso_ptr_len, GFP_ATOMIC); + if (!entry->key) { + ret_val = -ENOMEM; + goto cache_add_failure; + } + entry->key_len = calipso_ptr_len; + entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len); + atomic_inc(&secattr->cache->refcount); + entry->lsm_data = secattr->cache; + + bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1); + spin_lock_bh(&calipso_cache[bkt].lock); + if (calipso_cache[bkt].size < calipso_cache_bucketsize) { + list_add(&entry->list, &calipso_cache[bkt].list); + calipso_cache[bkt].size += 1; + } else { + old_entry = list_entry(calipso_cache[bkt].list.prev, + struct calipso_map_cache_entry, list); + list_del(&old_entry->list); + list_add(&entry->list, &calipso_cache[bkt].list); + calipso_cache_entry_free(old_entry); + } + spin_unlock_bh(&calipso_cache[bkt].lock); + + return 0; + +cache_add_failure: + if (entry) + calipso_cache_entry_free(entry); + return ret_val; +} + +/* DOI List Functions + */ + +/** + * calipso_doi_search - Searches for a DOI definition + * @doi: the DOI to search for + * + * Description: + * Search the DOI definition list for a DOI definition with a DOI value that + * matches @doi. The caller is responsible for calling rcu_read_[un]lock(). + * Returns a pointer to the DOI definition on success and NULL on failure. + */ +static struct calipso_doi *calipso_doi_search(u32 doi) +{ + struct calipso_doi *iter; + + list_for_each_entry_rcu(iter, &calipso_doi_list, list) + if (iter->doi == doi && atomic_read(&iter->refcount)) + return iter; + return NULL; +} + +/** + * calipso_doi_add - Add a new DOI to the CALIPSO protocol engine + * @doi_def: the DOI structure + * @audit_info: NetLabel audit information + * + * Description: + * The caller defines a new DOI for use by the CALIPSO engine and calls this + * function to add it to the list of acceptable domains. The caller must + * ensure that the mapping table specified in @doi_def->map meets all of the + * requirements of the mapping type (see calipso.h for details). Returns + * zero on success and non-zero on failure. + * + */ +static int calipso_doi_add(struct calipso_doi *doi_def, + struct netlbl_audit *audit_info) +{ + int ret_val = -EINVAL; + u32 doi; + u32 doi_type; + struct audit_buffer *audit_buf; + + doi = doi_def->doi; + doi_type = doi_def->type; + + if (doi_def->doi == CALIPSO_DOI_UNKNOWN) + goto doi_add_return; + + atomic_set(&doi_def->refcount, 1); + + spin_lock(&calipso_doi_list_lock); + if (calipso_doi_search(doi_def->doi)) { + spin_unlock(&calipso_doi_list_lock); + ret_val = -EEXIST; + goto doi_add_return; + } + list_add_tail_rcu(&doi_def->list, &calipso_doi_list); + spin_unlock(&calipso_doi_list_lock); + ret_val = 0; + +doi_add_return: + audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_ADD, audit_info); + if (audit_buf) { + const char *type_str; + + switch (doi_type) { + case CALIPSO_MAP_PASS: + type_str = "pass"; + break; + default: + type_str = "(unknown)"; + } + audit_log_format(audit_buf, + " calipso_doi=%u calipso_type=%s res=%u", + doi, type_str, ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + + return ret_val; +} + +/** + * calipso_doi_free - Frees a DOI definition + * @doi_def: the DOI definition + * + * Description: + * This function frees all of the memory associated with a DOI definition. + * + */ +static void calipso_doi_free(struct calipso_doi *doi_def) +{ + kfree(doi_def); +} + +/** + * calipso_doi_free_rcu - Frees a DOI definition via the RCU pointer + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to the DOI definition can be released + * safely. + * + */ +static void calipso_doi_free_rcu(struct rcu_head *entry) +{ + struct calipso_doi *doi_def; + + doi_def = container_of(entry, struct calipso_doi, rcu); + calipso_doi_free(doi_def); +} + +/** + * calipso_doi_remove - Remove an existing DOI from the CALIPSO protocol engine + * @doi: the DOI value + * @audit_secid: the LSM secid to use in the audit message + * + * Description: + * Removes a DOI definition from the CALIPSO engine. The NetLabel routines will + * be called to release their own LSM domain mappings as well as our own + * domain list. Returns zero on success and negative values on failure. + * + */ +static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info) +{ + int ret_val; + struct calipso_doi *doi_def; + struct audit_buffer *audit_buf; + + spin_lock(&calipso_doi_list_lock); + doi_def = calipso_doi_search(doi); + if (!doi_def) { + spin_unlock(&calipso_doi_list_lock); + ret_val = -ENOENT; + goto doi_remove_return; + } + if (!atomic_dec_and_test(&doi_def->refcount)) { + spin_unlock(&calipso_doi_list_lock); + ret_val = -EBUSY; + goto doi_remove_return; + } + list_del_rcu(&doi_def->list); + spin_unlock(&calipso_doi_list_lock); + + call_rcu(&doi_def->rcu, calipso_doi_free_rcu); + ret_val = 0; + +doi_remove_return: + audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_DEL, audit_info); + if (audit_buf) { + audit_log_format(audit_buf, + " calipso_doi=%u res=%u", + doi, ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + + return ret_val; +} + +/** + * calipso_doi_getdef - Returns a reference to a valid DOI definition + * @doi: the DOI value + * + * Description: + * Searches for a valid DOI definition and if one is found it is returned to + * the caller. Otherwise NULL is returned. The caller must ensure that + * calipso_doi_putdef() is called when the caller is done. + * + */ +static struct calipso_doi *calipso_doi_getdef(u32 doi) +{ + struct calipso_doi *doi_def; + + rcu_read_lock(); + doi_def = calipso_doi_search(doi); + if (!doi_def) + goto doi_getdef_return; + if (!atomic_inc_not_zero(&doi_def->refcount)) + doi_def = NULL; + +doi_getdef_return: + rcu_read_unlock(); + return doi_def; +} + +/** + * calipso_doi_putdef - Releases a reference for the given DOI definition + * @doi_def: the DOI definition + * + * Description: + * Releases a DOI definition reference obtained from calipso_doi_getdef(). + * + */ +static void calipso_doi_putdef(struct calipso_doi *doi_def) +{ + if (!doi_def) + return; + + if (!atomic_dec_and_test(&doi_def->refcount)) + return; + spin_lock(&calipso_doi_list_lock); + list_del_rcu(&doi_def->list); + spin_unlock(&calipso_doi_list_lock); + + call_rcu(&doi_def->rcu, calipso_doi_free_rcu); +} + +/** + * calipso_doi_walk - Iterate through the DOI definitions + * @skip_cnt: skip past this number of DOI definitions, updated + * @callback: callback for each DOI definition + * @cb_arg: argument for the callback function + * + * Description: + * Iterate over the DOI definition list, skipping the first @skip_cnt entries. + * For each entry call @callback, if @callback returns a negative value stop + * 'walking' through the list and return. Updates the value in @skip_cnt upon + * return. Returns zero on success, negative values on failure. + * + */ +static int calipso_doi_walk(u32 *skip_cnt, + int (*callback)(struct calipso_doi *doi_def, + void *arg), + void *cb_arg) +{ + int ret_val = -ENOENT; + u32 doi_cnt = 0; + struct calipso_doi *iter_doi; + + rcu_read_lock(); + list_for_each_entry_rcu(iter_doi, &calipso_doi_list, list) + if (atomic_read(&iter_doi->refcount) > 0) { + if (doi_cnt++ < *skip_cnt) + continue; + ret_val = callback(iter_doi, cb_arg); + if (ret_val < 0) { + doi_cnt--; + goto doi_walk_return; + } + } + +doi_walk_return: + rcu_read_unlock(); + *skip_cnt = doi_cnt; + return ret_val; +} + +/** + * calipso_validate - Validate a CALIPSO option + * @skb: the packet + * @option: the start of the option + * + * Description: + * This routine is called to validate a CALIPSO option. + * If the option is valid then %true is returned, otherwise + * %false is returned. + * + * The caller should have already checked that the length of the + * option (including the TLV header) is >= 10 and that the catmap + * length is consistent with the option length. + * + * We leave checks on the level and categories to the socket layer. + */ +bool calipso_validate(const struct sk_buff *skb, const unsigned char *option) +{ + struct calipso_doi *doi_def; + bool ret_val; + u16 crc, len = option[1] + 2; + static const u8 zero[2]; + + /* The original CRC runs over the option including the TLV header + * with the CRC-16 field (at offset 8) zeroed out. */ + crc = crc_ccitt(0xffff, option, 8); + crc = crc_ccitt(crc, zero, sizeof(zero)); + if (len > 10) + crc = crc_ccitt(crc, option + 10, len - 10); + crc = ~crc; + if (option[8] != (crc & 0xff) || option[9] != ((crc >> 8) & 0xff)) + return false; + + rcu_read_lock(); + doi_def = calipso_doi_search(get_unaligned_be32(option + 2)); + ret_val = !!doi_def; + rcu_read_unlock(); + + return ret_val; +} + +/** + * calipso_map_cat_hton - Perform a category mapping from host to network + * @doi_def: the DOI definition + * @secattr: the security attributes + * @net_cat: the zero'd out category bitmap in network/CALIPSO format + * @net_cat_len: the length of the CALIPSO bitmap in bytes + * + * Description: + * Perform a label mapping to translate a local MLS category bitmap to the + * correct CALIPSO bitmap using the given DOI definition. Returns the minimum + * size in bytes of the network bitmap on success, negative values otherwise. + * + */ +static int calipso_map_cat_hton(const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *net_cat, + u32 net_cat_len) +{ + int spot = -1; + u32 net_spot_max = 0; + u32 net_clen_bits = net_cat_len * 8; + + for (;;) { + spot = netlbl_catmap_walk(secattr->attr.mls.cat, + spot + 1); + if (spot < 0) + break; + if (spot >= net_clen_bits) + return -ENOSPC; + netlbl_bitmap_setbit(net_cat, spot, 1); + + if (spot > net_spot_max) + net_spot_max = spot; + } + + return (net_spot_max / 32 + 1) * 4; +} + +/** + * calipso_map_cat_ntoh - Perform a category mapping from network to host + * @doi_def: the DOI definition + * @net_cat: the category bitmap in network/CALIPSO format + * @net_cat_len: the length of the CALIPSO bitmap in bytes + * @secattr: the security attributes + * + * Description: + * Perform a label mapping to translate a CALIPSO bitmap to the correct local + * MLS category bitmap using the given DOI definition. Returns zero on + * success, negative values on failure. + * + */ +static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def, + const unsigned char *net_cat, + u32 net_cat_len, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + int spot = -1; + u32 net_clen_bits = net_cat_len * 8; + + for (;;) { + spot = netlbl_bitmap_walk(net_cat, + net_clen_bits, + spot + 1, + 1); + if (spot < 0) { + if (spot == -2) + return -EFAULT; + return 0; + } + + ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, + spot, + GFP_ATOMIC); + if (ret_val != 0) + return ret_val; + } + + return -EINVAL; +} + +/** + * calipso_pad_write - Writes pad bytes in TLV format + * @buf: the buffer + * @offset: offset from start of buffer to write padding + * @count: number of pad bytes to write + * + * Description: + * Write @count bytes of TLV padding into @buffer starting at offset @offset. + * @count should be less than 8 - see RFC 4942. + * + */ +static int calipso_pad_write(unsigned char *buf, unsigned int offset, + unsigned int count) +{ + if (WARN_ON_ONCE(count >= 8)) + return -EINVAL; + + switch (count) { + case 0: + break; + case 1: + buf[offset] = IPV6_TLV_PAD1; + break; + default: + buf[offset] = IPV6_TLV_PADN; + buf[offset + 1] = count - 2; + if (count > 2) + memset(buf + offset + 2, 0, count - 2); + break; + } + return 0; +} + +/** + * calipso_genopt - Generate a CALIPSO option + * @buf: the option buffer + * @start: offset from which to write + * @buf_len: the size of opt_buf + * @doi_def: the CALIPSO DOI to use + * @secattr: the security attributes + * + * Description: + * Generate a CALIPSO option using the DOI definition and security attributes + * passed to the function. This also generates upto three bytes of leading + * padding that ensures that the option is 4n + 2 aligned. It returns the + * number of bytes written (including any initial padding). + */ +static int calipso_genopt(unsigned char *buf, u32 start, u32 buf_len, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u32 len, pad; + u16 crc; + static const unsigned char padding[4] = {2, 1, 0, 3}; + unsigned char *calipso; + + /* CALIPSO has 4n + 2 alignment */ + pad = padding[start & 3]; + if (buf_len <= start + pad + CALIPSO_HDR_LEN) + return -ENOSPC; + + if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0) + return -EPERM; + + len = CALIPSO_HDR_LEN; + + if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { + ret_val = calipso_map_cat_hton(doi_def, + secattr, + buf + start + pad + len, + buf_len - start - pad - len); + if (ret_val < 0) + return ret_val; + len += ret_val; + } + + calipso_pad_write(buf, start, pad); + calipso = buf + start + pad; + + calipso[0] = IPV6_TLV_CALIPSO; + calipso[1] = len - 2; + *(__be32 *)(calipso + 2) = htonl(doi_def->doi); + calipso[6] = (len - CALIPSO_HDR_LEN) / 4; + calipso[7] = secattr->attr.mls.lvl, + crc = ~crc_ccitt(0xffff, calipso, len); + calipso[8] = crc & 0xff; + calipso[9] = (crc >> 8) & 0xff; + return pad + len; +} + +/* Hop-by-hop hdr helper functions + */ + +/** + * calipso_opt_update - Replaces socket's hop options with a new set + * @sk: the socket + * @hop: new hop options + * + * Description: + * Replaces @sk's hop options with @hop. @hop may be NULL to leave + * the socket with no hop options. + * + */ +static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop) +{ + struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts; + + txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS, + hop, hop ? ipv6_optlen(hop) : 0); + txopt_put(old); + if (IS_ERR(txopts)) + return PTR_ERR(txopts); + + txopts = ipv6_update_options(sk, txopts); + if (txopts) { + atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); + txopt_put(txopts); + } + + return 0; +} + +/** + * calipso_tlv_len - Returns the length of the TLV + * @opt: the option header + * @offset: offset of the TLV within the header + * + * Description: + * Returns the length of the TLV option at offset @offset within + * the option header @opt. Checks that the entire TLV fits inside + * the option header, returns a negative value if this is not the case. + */ +static int calipso_tlv_len(struct ipv6_opt_hdr *opt, unsigned int offset) +{ + unsigned char *tlv = (unsigned char *)opt; + unsigned int opt_len = ipv6_optlen(opt), tlv_len; + + if (offset < sizeof(*opt) || offset >= opt_len) + return -EINVAL; + if (tlv[offset] == IPV6_TLV_PAD1) + return 1; + if (offset + 1 >= opt_len) + return -EINVAL; + tlv_len = tlv[offset + 1] + 2; + if (offset + tlv_len > opt_len) + return -EINVAL; + return tlv_len; +} + +/** + * calipso_opt_find - Finds the CALIPSO option in an IPv6 hop options header + * @hop: the hop options header + * @start: on return holds the offset of any leading padding + * @end: on return holds the offset of the first non-pad TLV after CALIPSO + * + * Description: + * Finds the space occupied by a CALIPSO option (including any leading and + * trailing padding). + * + * If a CALIPSO option exists set @start and @end to the + * offsets within @hop of the start of padding before the first + * CALIPSO option and the end of padding after the first CALIPSO + * option. In this case the function returns 0. + * + * In the absence of a CALIPSO option, @start and @end will be + * set to the start and end of any trailing padding in the header. + * This is useful when appending a new option, as the caller may want + * to overwrite some of this padding. In this case the function will + * return -ENOENT. + */ +static int calipso_opt_find(struct ipv6_opt_hdr *hop, unsigned int *start, + unsigned int *end) +{ + int ret_val = -ENOENT, tlv_len; + unsigned int opt_len, offset, offset_s = 0, offset_e = 0; + unsigned char *opt = (unsigned char *)hop; + + opt_len = ipv6_optlen(hop); + offset = sizeof(*hop); + + while (offset < opt_len) { + tlv_len = calipso_tlv_len(hop, offset); + if (tlv_len < 0) + return tlv_len; + + switch (opt[offset]) { + case IPV6_TLV_PAD1: + case IPV6_TLV_PADN: + if (offset_e) + offset_e = offset; + break; + case IPV6_TLV_CALIPSO: + ret_val = 0; + offset_e = offset; + break; + default: + if (offset_e == 0) + offset_s = offset; + else + goto out; + } + offset += tlv_len; + } + +out: + if (offset_s) + *start = offset_s + calipso_tlv_len(hop, offset_s); + else + *start = sizeof(*hop); + if (offset_e) + *end = offset_e + calipso_tlv_len(hop, offset_e); + else + *end = opt_len; + + return ret_val; +} + +/** + * calipso_opt_insert - Inserts a CALIPSO option into an IPv6 hop opt hdr + * @hop: the original hop options header + * @doi_def: the CALIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Creates a new hop options header based on @hop with a + * CALIPSO option added to it. If @hop already contains a CALIPSO + * option this is overwritten, otherwise the new option is appended + * after any existing options. If @hop is NULL then the new header + * will contain just the CALIPSO option and any needed padding. + * + */ +static struct ipv6_opt_hdr * +calipso_opt_insert(struct ipv6_opt_hdr *hop, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + unsigned int start, end, buf_len, pad, hop_len; + struct ipv6_opt_hdr *new; + int ret_val; + + if (hop) { + hop_len = ipv6_optlen(hop); + ret_val = calipso_opt_find(hop, &start, &end); + if (ret_val && ret_val != -ENOENT) + return ERR_PTR(ret_val); + } else { + hop_len = 0; + start = sizeof(*hop); + end = 0; + } + + buf_len = hop_len + start - end + CALIPSO_OPT_LEN_MAX_WITH_PAD; + new = kzalloc(buf_len, GFP_ATOMIC); + if (!new) + return ERR_PTR(-ENOMEM); + + if (start > sizeof(*hop)) + memcpy(new, hop, start); + ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def, + secattr); + if (ret_val < 0) + return ERR_PTR(ret_val); + + buf_len = start + ret_val; + /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */ + pad = ((buf_len & 4) + (end & 7)) & 7; + calipso_pad_write((unsigned char *)new, buf_len, pad); + buf_len += pad; + + if (end != hop_len) { + memcpy((char *)new + buf_len, (char *)hop + end, hop_len - end); + buf_len += hop_len - end; + } + new->nexthdr = 0; + new->hdrlen = buf_len / 8 - 1; + + return new; +} + +/** + * calipso_opt_del - Removes the CALIPSO option from an option header + * @hop: the original header + * @new: the new header + * + * Description: + * Creates a new header based on @hop without any CALIPSO option. If @hop + * doesn't contain a CALIPSO option it returns -ENOENT. If @hop contains + * no other non-padding options, it returns zero with @new set to NULL. + * Otherwise it returns zero, creates a new header without the CALIPSO + * option (and removing as much padding as possible) and returns with + * @new set to that header. + * + */ +static int calipso_opt_del(struct ipv6_opt_hdr *hop, + struct ipv6_opt_hdr **new) +{ + int ret_val; + unsigned int start, end, delta, pad, hop_len; + + ret_val = calipso_opt_find(hop, &start, &end); + if (ret_val) + return ret_val; + + hop_len = ipv6_optlen(hop); + if (start == sizeof(*hop) && end == hop_len) { + /* There's no other option in the header so return NULL */ + *new = NULL; + return 0; + } + + delta = (end - start) & ~7; + *new = kzalloc(hop_len - delta, GFP_ATOMIC); + if (!*new) + return -ENOMEM; + + memcpy(*new, hop, start); + (*new)->hdrlen -= delta / 8; + pad = (end - start) & 7; + calipso_pad_write((unsigned char *)*new, start, pad); + if (end != hop_len) + memcpy((char *)*new + start + pad, (char *)hop + end, + hop_len - end); + + return 0; +} + +/** + * calipso_opt_getattr - Get the security attributes from a memory block + * @calipso: the CALIPSO option + * @secattr: the security attributes + * + * Description: + * Inspect @calipso and return the security attributes in @secattr. + * Returns zero on success and negative values on failure. + * + */ +static int calipso_opt_getattr(const unsigned char *calipso, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + u32 doi, len = calipso[1], cat_len = calipso[6] * 4; + struct calipso_doi *doi_def; + + if (cat_len + 8 > len) + return -EINVAL; + + if (calipso_cache_check(calipso + 2, calipso[1], secattr) == 0) + return 0; + + doi = get_unaligned_be32(calipso + 2); + rcu_read_lock(); + doi_def = calipso_doi_search(doi); + if (!doi_def) + goto getattr_return; + + secattr->attr.mls.lvl = calipso[7]; + secattr->flags |= NETLBL_SECATTR_MLS_LVL; + + if (cat_len) { + ret_val = calipso_map_cat_ntoh(doi_def, + calipso + 10, + cat_len, + secattr); + if (ret_val != 0) { + netlbl_catmap_free(secattr->attr.mls.cat); + goto getattr_return; + } + + secattr->flags |= NETLBL_SECATTR_MLS_CAT; + } + + secattr->type = NETLBL_NLTYPE_CALIPSO; + +getattr_return: + rcu_read_unlock(); + return ret_val; +} + +/* sock functions. + */ + +/** + * calipso_sock_getattr - Get the security attributes from a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Query @sk to see if there is a CALIPSO option attached to the sock and if + * there is return the CALIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. + * + */ +static int calipso_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr) +{ + struct ipv6_opt_hdr *hop; + int opt_len, len, ret_val = -ENOMSG, offset; + unsigned char *opt; + struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + + if (!txopts || !txopts->hopopt) + goto done; + + hop = txopts->hopopt; + opt = (unsigned char *)hop; + opt_len = ipv6_optlen(hop); + offset = sizeof(*hop); + while (offset < opt_len) { + len = calipso_tlv_len(hop, offset); + if (len < 0) { + ret_val = len; + goto done; + } + switch (opt[offset]) { + case IPV6_TLV_CALIPSO: + if (len < CALIPSO_HDR_LEN) + ret_val = -EINVAL; + else + ret_val = calipso_opt_getattr(&opt[offset], + secattr); + goto done; + default: + offset += len; + break; + } + } +done: + txopt_put(txopts); + return ret_val; +} + +/** + * calipso_sock_setattr - Add a CALIPSO option to a socket + * @sk: the socket + * @doi_def: the CALIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CALIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sk, which means it either needs to be in the + * process of being created or locked. Returns zero on success and negative + * values on failure. + * + */ +static int calipso_sock_setattr(struct sock *sk, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct ipv6_opt_hdr *old, *new; + struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + + old = NULL; + if (txopts) + old = txopts->hopopt; + + new = calipso_opt_insert(old, doi_def, secattr); + txopt_put(txopts); + if (IS_ERR(new)) + return PTR_ERR(new); + + ret_val = calipso_opt_update(sk, new); + + kfree(new); + return ret_val; +} + +/** + * calipso_sock_delattr - Delete the CALIPSO option from a socket + * @sk: the socket + * + * Description: + * Removes the CALIPSO option from a socket, if present. + * + */ +static void calipso_sock_delattr(struct sock *sk) +{ + struct ipv6_opt_hdr *new_hop; + struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + + if (!txopts || !txopts->hopopt) + goto done; + + if (calipso_opt_del(txopts->hopopt, &new_hop)) + goto done; + + calipso_opt_update(sk, new_hop); + kfree(new_hop); + +done: + txopt_put(txopts); +} + +/* request sock functions. + */ + +/** + * calipso_req_setattr - Add a CALIPSO option to a connection request socket + * @req: the connection request socket + * @doi_def: the CALIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CALIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. Returns zero on success and + * negative values on failure. + * + */ +static int calipso_req_setattr(struct request_sock *req, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + struct ipv6_txoptions *txopts; + struct inet_request_sock *req_inet = inet_rsk(req); + struct ipv6_opt_hdr *old, *new; + struct sock *sk = sk_to_full_sk(req_to_sk(req)); + + if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt) + old = req_inet->ipv6_opt->hopopt; + else + old = NULL; + + new = calipso_opt_insert(old, doi_def, secattr); + if (IS_ERR(new)) + return PTR_ERR(new); + + txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, + new, new ? ipv6_optlen(new) : 0); + + kfree(new); + + if (IS_ERR(txopts)) + return PTR_ERR(txopts); + + txopts = xchg(&req_inet->ipv6_opt, txopts); + if (txopts) { + atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); + txopt_put(txopts); + } + + return 0; +} + +/** + * calipso_req_delattr - Delete the CALIPSO option from a request socket + * @reg: the request socket + * + * Description: + * Removes the CALIPSO option from a request socket, if present. + * + */ +static void calipso_req_delattr(struct request_sock *req) +{ + struct inet_request_sock *req_inet = inet_rsk(req); + struct ipv6_opt_hdr *new; + struct ipv6_txoptions *txopts; + struct sock *sk = sk_to_full_sk(req_to_sk(req)); + + if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt) + return; + + if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new)) + return; /* Nothing to do */ + + txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, + new, new ? ipv6_optlen(new) : 0); + + if (!IS_ERR(txopts)) { + txopts = xchg(&req_inet->ipv6_opt, txopts); + if (txopts) { + atomic_sub(txopts->tot_len, &sk->sk_omem_alloc); + txopt_put(txopts); + } + } + kfree(new); +} + +/* skbuff functions. + */ + +/** + * calipso_skbuff_optptr - Find the CALIPSO option in the packet + * @skb: the packet + * + * Description: + * Parse the packet's IP header looking for a CALIPSO option. Returns a pointer + * to the start of the CALIPSO option on success, NULL if one if not found. + * + */ +static unsigned char *calipso_skbuff_optptr(const struct sk_buff *skb) +{ + const struct ipv6hdr *ip6_hdr = ipv6_hdr(skb); + int offset; + + if (ip6_hdr->nexthdr != NEXTHDR_HOP) + return NULL; + + offset = ipv6_find_tlv(skb, sizeof(*ip6_hdr), IPV6_TLV_CALIPSO); + if (offset >= 0) + return (unsigned char *)ip6_hdr + offset; + + return NULL; +} + +/** + * calipso_skbuff_setattr - Set the CALIPSO option on a packet + * @skb: the packet + * @doi_def: the CALIPSO DOI to use + * @secattr: the security attributes + * + * Description: + * Set the CALIPSO option on the given packet based on the security attributes. + * Returns a pointer to the IP header on success and NULL on failure. + * + */ +static int calipso_skbuff_setattr(struct sk_buff *skb, + const struct calipso_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct ipv6hdr *ip6_hdr; + struct ipv6_opt_hdr *hop; + unsigned char buf[CALIPSO_MAX_BUFFER]; + int len_delta, new_end, pad; + unsigned int start, end; + + ip6_hdr = ipv6_hdr(skb); + if (ip6_hdr->nexthdr == NEXTHDR_HOP) { + hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); + ret_val = calipso_opt_find(hop, &start, &end); + if (ret_val && ret_val != -ENOENT) + return ret_val; + } else { + start = 0; + end = 0; + } + + memset(buf, 0, sizeof(buf)); + ret_val = calipso_genopt(buf, start & 3, sizeof(buf), doi_def, secattr); + if (ret_val < 0) + return ret_val; + + new_end = start + ret_val; + /* At this point new_end aligns to 4n, so (new_end & 4) pads to 8n */ + pad = ((new_end & 4) + (end & 7)) & 7; + len_delta = new_end - (int)end + pad; + ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); + if (ret_val < 0) + return ret_val; + + if (len_delta) { + if (len_delta > 0) + skb_push(skb, len_delta); + else + skb_pull(skb, -len_delta); + memmove((char *)ip6_hdr - len_delta, ip6_hdr, + sizeof(*ip6_hdr) + start); + skb_reset_network_header(skb); + ip6_hdr = ipv6_hdr(skb); + } + + hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); + if (start == 0) { + struct ipv6_opt_hdr *new_hop = (struct ipv6_opt_hdr *)buf; + + new_hop->nexthdr = ip6_hdr->nexthdr; + new_hop->hdrlen = len_delta / 8 - 1; + ip6_hdr->nexthdr = NEXTHDR_HOP; + } else { + hop->hdrlen += len_delta / 8; + } + memcpy((char *)hop + start, buf + (start & 3), new_end - start); + calipso_pad_write((unsigned char *)hop, new_end, pad); + + return 0; +} + +/** + * calipso_skbuff_delattr - Delete any CALIPSO options from a packet + * @skb: the packet + * + * Description: + * Removes any and all CALIPSO options from the given packet. Returns zero on + * success, negative values on failure. + * + */ +static int calipso_skbuff_delattr(struct sk_buff *skb) +{ + int ret_val; + struct ipv6hdr *ip6_hdr; + struct ipv6_opt_hdr *old_hop; + u32 old_hop_len, start = 0, end = 0, delta, size, pad; + + if (!calipso_skbuff_optptr(skb)) + return 0; + + /* since we are changing the packet we should make a copy */ + ret_val = skb_cow(skb, skb_headroom(skb)); + if (ret_val < 0) + return ret_val; + + ip6_hdr = ipv6_hdr(skb); + old_hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); + old_hop_len = ipv6_optlen(old_hop); + + ret_val = calipso_opt_find(old_hop, &start, &end); + if (ret_val) + return ret_val; + + if (start == sizeof(*old_hop) && end == old_hop_len) { + /* There's no other option in the header so we delete + * the whole thing. */ + delta = old_hop_len; + size = sizeof(*ip6_hdr); + ip6_hdr->nexthdr = old_hop->nexthdr; + } else { + delta = (end - start) & ~7; + if (delta) + old_hop->hdrlen -= delta / 8; + pad = (end - start) & 7; + size = sizeof(*ip6_hdr) + start + pad; + calipso_pad_write((unsigned char *)old_hop, start, pad); + } + + if (delta) { + skb_pull(skb, delta); + memmove((char *)ip6_hdr + delta, ip6_hdr, size); + skb_reset_network_header(skb); + } + + return 0; +} + +static const struct netlbl_calipso_ops ops = { + .doi_add = calipso_doi_add, + .doi_free = calipso_doi_free, + .doi_remove = calipso_doi_remove, + .doi_getdef = calipso_doi_getdef, + .doi_putdef = calipso_doi_putdef, + .doi_walk = calipso_doi_walk, + .sock_getattr = calipso_sock_getattr, + .sock_setattr = calipso_sock_setattr, + .sock_delattr = calipso_sock_delattr, + .req_setattr = calipso_req_setattr, + .req_delattr = calipso_req_delattr, + .opt_getattr = calipso_opt_getattr, + .skbuff_optptr = calipso_skbuff_optptr, + .skbuff_setattr = calipso_skbuff_setattr, + .skbuff_delattr = calipso_skbuff_delattr, + .cache_invalidate = calipso_cache_invalidate, + .cache_add = calipso_cache_add +}; + +/** + * calipso_init - Initialize the CALIPSO module + * + * Description: + * Initialize the CALIPSO module and prepare it for use. Returns zero on + * success and negative values on failure. + * + */ +int __init calipso_init(void) +{ + int ret_val; + + ret_val = calipso_cache_init(); + if (!ret_val) + netlbl_calipso_ops_register(&ops); + return ret_val; +} + +void calipso_exit(void) +{ + netlbl_calipso_ops_register(NULL); + calipso_cache_invalidate(); + kfree(calipso_cache); +} diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8de5dd7aaa05..139ceb68bd37 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -43,6 +43,7 @@ #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> +#include <net/calipso.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif @@ -603,6 +604,28 @@ drop: return false; } +/* CALIPSO RFC 5570 */ + +static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff) +{ + const unsigned char *nh = skb_network_header(skb); + + if (nh[optoff + 1] < 8) + goto drop; + + if (nh[optoff + 6] * 4 + 8 > nh[optoff + 1]) + goto drop; + + if (!calipso_validate(skb, nh + optoff)) + goto drop; + + return true; + +drop: + kfree_skb(skb); + return false; +} + static const struct tlvtype_proc tlvprochopopt_lst[] = { { .type = IPV6_TLV_ROUTERALERT, @@ -612,6 +635,10 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = { .type = IPV6_TLV_JUMBO, .func = ipv6_hop_jumbo, }, + { + .type = IPV6_TLV_CALIPSO, + .func = ipv6_hop_calipso, + }, { -1, } }; @@ -758,6 +785,27 @@ static int ipv6_renew_option(void *ohdr, return 0; } +/** + * ipv6_renew_options - replace a specific ext hdr with a new one. + * + * @sk: sock from which to allocate memory + * @opt: original options + * @newtype: option type to replace in @opt + * @newopt: new option of type @newtype to replace (user-mem) + * @newoptlen: length of @newopt + * + * Returns a new set of options which is a copy of @opt with the + * option type @newtype replaced with @newopt. + * + * @opt may be NULL, in which case a new set of options is returned + * containing just @newopt. + * + * @newopt may be NULL, in which case the specified option type is + * not copied into the new set of options. + * + * The new set of options is allocated from the socket option memory + * buffer of @sk. + */ struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, @@ -830,6 +878,34 @@ out: return ERR_PTR(err); } +/** + * ipv6_renew_options_kern - replace a specific ext hdr with a new one. + * + * @sk: sock from which to allocate memory + * @opt: original options + * @newtype: option type to replace in @opt + * @newopt: new option of type @newtype to replace (kernel-mem) + * @newoptlen: length of @newopt + * + * See ipv6_renew_options(). The difference is that @newopt is + * kernel memory, rather than user memory. + */ +struct ipv6_txoptions * +ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt, + int newtype, struct ipv6_opt_hdr *newopt, + int newoptlen) +{ + struct ipv6_txoptions *ret_val; + const mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret_val = ipv6_renew_options(sk, opt, newtype, + (struct ipv6_opt_hdr __user *)newopt, + newoptlen); + set_fs(old_fs); + return ret_val; +} + struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) { diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 9508a20fbf61..305e2ed730bf 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -112,7 +112,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, } EXPORT_SYMBOL(ipv6_skip_exthdr); -int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) +int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ed33abf57abd..5857c1fc8b67 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, struct net *net = rule->fr_net; pol_lookup_t lookup = arg->lookup_ptr; int err = 0; + u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: @@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto discard_pkt; } - table = fib6_get_table(net, rule->table); + tb_id = fib_rule_get_table(rule, arg); + table = fib6_get_table(net, tb_id); if (!table) { err = -EAGAIN; goto out; @@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; - if (rule->action == FR_ACT_TO_TBL) { + if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) goto errout; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index c972d0b52579..9ea249b9451e 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -69,7 +69,7 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, } EXPORT_SYMBOL(gue6_build_header); -#ifdef CONFIG_NET_FOU_IP_TUNNELS +#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL) static const struct ip6_tnl_encap_ops fou_ip6tun_ops = { .encap_hlen = fou_encap_hlen, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4527285fcaa2..bd59c343d35f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) - ping_err(skb, offset, info); + ping_err(skb, offset, ntohl(info)); } static int icmpv6_rcv(struct sk_buff *skb); @@ -388,7 +388,8 @@ relookup_failed: /* * Send an ICMP message in response to a packet in error */ -static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct in6_addr *force_saddr) { struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; @@ -475,6 +476,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = hdr->saddr; + if (force_saddr) + saddr = force_saddr; if (saddr) fl6.saddr = *saddr; fl6.flowi6_mark = mark; @@ -502,12 +505,14 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; + ipc6.tclass = np->tclass; + fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - ipc6.tclass = np->tclass; ipc6.dontfrag = np->dontfrag; ipc6.opt = NULL; @@ -549,10 +554,75 @@ out: */ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { - icmp6_send(skb, ICMPV6_PARAMPROB, code, pos); + icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL); kfree_skb(skb); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + * @nhs is the size of the tunnel header(s) : + * Either an IPv4 header for SIT encap + * an IPv4 header + GRE header for GRE encap + */ +int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, + unsigned int data_len) +{ + struct in6_addr temp_saddr; + struct rt6_info *rt; + struct sk_buff *skb2; + u32 info = 0; + + if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) + return 1; + + /* RFC 4884 (partial) support for ICMP extensions */ + if (data_len < 128 || (data_len & 7) || skb->len < data_len) + data_len = 0; + + skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, nhs); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); + + if (data_len) { + /* RFC 4884 (partial) support : + * insert 0 padding at the end, before the extensions + */ + __skb_push(skb2, nhs); + skb_reset_network_header(skb2); + memmove(skb2->data, skb2->data + nhs, data_len - nhs); + memset(skb2->data + data_len - nhs, 0, nhs); + /* RFC 4884 4.5 : Length is measured in 64-bit words, + * and stored in reserved[0] + */ + info = (data_len/8) << 24; + } + if (type == ICMP_TIME_EXCEEDED) + icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, + info, &temp_saddr); + else + icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, + info, &temp_saddr); + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} +EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); + static void icmpv6_echo_reply(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); @@ -585,7 +655,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = l3mdev_fib_oif(skb->dev); + fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index d08fd2d48a78..e0170f62bc39 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -109,7 +109,8 @@ static inline bool ila_csum_neutral_set(struct ila_identifier ident) return !!(ident.csum_neutral); } -void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); +void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, + bool set_csum_neutral); void ila_init_saved_csum(struct ila_params *p); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 0e94042d1289..ec9efbcdad35 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -34,12 +34,12 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr, if (p->locator_match.v64) { diff = p->csum_diff; } else { - diff = compute_csum_diff8((__be32 *)iaddr, - (__be32 *)&p->locator); + diff = compute_csum_diff8((__be32 *)&p->locator, + (__be32 *)iaddr); } fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? - ~CSUM_NEUTRAL_FLAG : CSUM_NEUTRAL_FLAG); + CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG); diff = csum_add(diff, fval); @@ -103,7 +103,8 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, iaddr->loc = p->locator; } -void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, + bool set_csum_neutral) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); @@ -114,7 +115,8 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) * is a locator being translated to a SIR address. * Perform (receiver) checksum-neutral translation. */ - ila_csum_do_neutral(iaddr, p); + if (!set_csum_neutral) + ila_csum_do_neutral(iaddr, p); } else { switch (p->csum_mode) { case ILA_CSUM_ADJUST_TRANSPORT: @@ -138,8 +140,8 @@ void ila_init_saved_csum(struct ila_params *p) return; p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, - (__be32 *)&p->locator); + (__be32 *)&p->locator, + (__be32 *)&p->locator_match); } static int __init ila_init(void) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 1dfb64166d7d..c8314c6b6154 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true); return dst->lwtstate->orig_output(net, sk, skb); @@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false); return dst->lwtstate->orig_input(skb); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index a90e57229c6c..e6eca5fdf4c9 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -210,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg) } } -static int ila_xlat_addr(struct sk_buff *skb); +static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral); static unsigned int ila_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - ila_xlat_addr(skb); + ila_xlat_addr(skb, false); return NF_ACCEPT; } @@ -597,7 +597,7 @@ static struct pernet_operations ila_net_ops = { .size = sizeof(struct ila_net), }; -static int ila_xlat_addr(struct sk_buff *skb) +static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -616,7 +616,7 @@ static int ila_xlat_addr(struct sk_buff *skb) ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) - ila_update_ipv6_locator(skb, &ila->xp.ip); + ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral); rcu_read_unlock(); diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index b2025bf3da4a..c0cbcb259f5a 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * we accept a checksum of zero here. When we find the socket * for the UDP packet we'll check if that socket allows zero checksum * for IPv6 (set by socket option). + * + * Note, we are only interested in != 0 or == 0, thus the + * force to int. */ - return skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); } EXPORT_SYMBOL(udp6_csum_init); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bcef2369d64..771be1fa4176 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) } } + free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index af503f518278..776d145113e1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; @@ -712,6 +712,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; + fl6->flowi6_proto = IPPROTO_GRE; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; @@ -1027,6 +1028,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; @@ -1253,6 +1256,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (ret) return ret; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); ip6gre_tnl_link_config(tunnel, 1); @@ -1286,6 +1291,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 14dacc544c3e..713676f14a0e 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -39,7 +39,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (!send) goto out; - send(skb, type, code, info); + send(skb, type, code, info, NULL); out: rcu_read_unlock(); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 94611e450ec9..aacfb4bce153 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -323,6 +323,7 @@ int ip6_input(struct sk_buff *skb) dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip6_input_finish); } +EXPORT_SYMBOL_GPL(ip6_input); int ip6_mc_input(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cbf127ae7c67..1dfc402d9ad1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -368,7 +368,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->ignore_df) return false; - if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) return false; return true; @@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int err; int flags = 0; + if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif && + (!*dst || !(*dst)->error)) { + err = l3mdev_get_saddr6(net, sk, fl6); + if (err) + goto out_err; + } + /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the @@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, return 0; out_err_release: - if (err == -ENETUNREACH) - IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; +out_err: + if (err == -ENETUNREACH) + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; } @@ -1071,17 +1079,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f2e2013f8346..6122f9c5cc49 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -921,6 +921,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca cache->mfc_un.res.maxvif = vifi + 1; } } + cache->mfc_un.res.lastuse = jiffies; } static int mif6_add(struct net *net, struct mr6_table *mrt, @@ -1074,6 +1075,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } @@ -1591,14 +1593,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) if (likely(mrt->mroute6_sk == NULL)) { mrt->mroute6_sk = sk; net->ipv6.devconf_all->mc_forwarding++; - inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, - NETCONFA_IFINDEX_ALL, - net->ipv6.devconf_all); - } - else + } else { err = -EADDRINUSE; + } write_unlock_bh(&mrt_lock); + if (!err) + inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); rtnl_unlock(); return err; @@ -1616,11 +1619,11 @@ int ip6mr_sk_done(struct sock *sk) write_lock_bh(&mrt_lock); mrt->mroute6_sk = NULL; net->ipv6.devconf_all->mc_forwarding--; + write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); - write_unlock_bh(&mrt_lock); mroute_clean_tables(mrt, false); err = 0; @@ -2090,6 +2093,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + cache->mfc_un.res.lastuse = jiffies; if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { struct mfc6_cache *cache_proxy; @@ -2232,10 +2236,10 @@ int ip6_mr_input(struct sk_buff *skb) static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) { - int ct; - struct rtnexthop *nhp; - struct nlattr *mp_attr; struct rta_mfc_stats mfcs; + struct nlattr *mp_attr; + struct rtnexthop *nhp; + int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ if (c->mf6c_parent >= MAXMIFS) @@ -2268,7 +2272,10 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) < 0) + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || + nla_put_u64_64bit(skb, RTA_EXPIRES, + jiffies_to_clock_t(c->mfc_un.res.lastuse), + RTA_PAD)) return -EMSGSIZE; rtm->rtm_type = RTN_MULTICAST; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a9895e15ee9c..5330262ab673 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -98,7 +98,6 @@ int ip6_ra_control(struct sock *sk, int sel) return 0; } -static struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c245895a3d41..fe65cdc28a45 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -73,15 +73,6 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> -/* Set to 3 to get tracing... */ -#define ND_DEBUG 1 - -#define ND_PRINTK(val, level, fmt, ...) \ -do { \ - if (val <= ND_DEBUG) \ - net_##level##_ratelimited(fmt, ##__VA_ARGS__); \ -} while (0) - static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); @@ -150,11 +141,10 @@ struct neigh_table nd_tbl = { }; EXPORT_SYMBOL_GPL(nd_tbl); -static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) +void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data, + int data_len, int pad) { - int pad = ndisc_addr_option_pad(skb->dev->type); - int data_len = skb->dev->addr_len; - int space = ndisc_opt_addr_space(skb->dev); + int space = __ndisc_opt_addr_space(data_len, pad); u8 *opt = skb_put(skb, space); opt[0] = type; @@ -171,6 +161,23 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) if (space > 0) memset(opt, 0, space); } +EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); + +static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, + void *data, u8 icmp6_type) +{ + __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, + ndisc_addr_option_pad(skb->dev->type)); + ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type); +} + +static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb, + void *ha, + const u8 *ops_data) +{ + ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT); + ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data); +} static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, struct nd_opt_hdr *end) @@ -185,24 +192,28 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, return cur <= end && cur->nd_opt_type == type ? cur : NULL; } -static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) +static inline int ndisc_is_useropt(const struct net_device *dev, + struct nd_opt_hdr *opt) { return opt->nd_opt_type == ND_OPT_RDNSS || - opt->nd_opt_type == ND_OPT_DNSSL; + opt->nd_opt_type == ND_OPT_DNSSL || + ndisc_ops_is_useropt(dev, opt->nd_opt_type); } -static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, +static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev, + struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { if (!cur || !end || cur >= end) return NULL; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); - } while (cur < end && !ndisc_is_useropt(cur)); - return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; + } while (cur < end && !ndisc_is_useropt(dev, cur)); + return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL; } -struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, +struct ndisc_options *ndisc_parse_options(const struct net_device *dev, + u8 *opt, int opt_len, struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; @@ -217,6 +228,8 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, l = nd_opt->nd_opt_len << 3; if (opt_len < l || l == 0) return NULL; + if (ndisc_ops_parse_options(dev, nd_opt, ndopts)) + goto next_opt; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LL_ADDR: case ND_OPT_TARGET_LL_ADDR: @@ -243,7 +256,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, break; #endif default: - if (ndisc_is_useropt(nd_opt)) { + if (ndisc_is_useropt(dev, nd_opt)) { ndopts->nd_useropts_end = nd_opt; if (!ndopts->nd_useropts) ndopts->nd_useropts = nd_opt; @@ -260,6 +273,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, nd_opt->nd_opt_len); } } +next_opt: opt_len -= l; nd_opt = ((void *)nd_opt) + l; } @@ -509,7 +523,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, if (!dev->addr_len) inc_opt = 0; if (inc_opt) - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev, + NDISC_NEIGHBOUR_ADVERTISEMENT); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -528,8 +543,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, - dev->dev_addr); - + dev->dev_addr, + NDISC_NEIGHBOUR_ADVERTISEMENT); ndisc_send_skb(skb, daddr, src_addr); } @@ -574,7 +589,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, if (ipv6_addr_any(saddr)) inc_opt = false; if (inc_opt) - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev, + NDISC_NEIGHBOUR_SOLICITATION); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -590,7 +606,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, - dev->dev_addr); + dev->dev_addr, + NDISC_NEIGHBOUR_SOLICITATION); ndisc_send_skb(skb, daddr, saddr); } @@ -626,7 +643,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, } #endif if (send_sllao) - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -641,7 +658,8 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, if (send_sllao) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, - dev->dev_addr); + dev->dev_addr, + NDISC_ROUTER_SOLICITATION); ndisc_send_skb(skb, daddr, saddr); } @@ -702,6 +720,15 @@ static int pndisc_is_router(const void *pkey, return ret; } +void ndisc_update(const struct net_device *dev, struct neighbour *neigh, + const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, + struct ndisc_options *ndopts) +{ + neigh_update(neigh, lladdr, new, flags); + /* report ndisc ops about neighbour update */ + ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts); +} + static void ndisc_recv_ns(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); @@ -738,7 +765,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) return; } - if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { + if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, warn, "NS: invalid ND options\n"); return; } @@ -856,9 +883,10 @@ have_ifp: neigh = __neigh_lookup(&nd_tbl, saddr, dev, !inc || lladdr || !dev->addr_len); if (neigh) - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| - NEIGH_UPDATE_F_OVERRIDE); + NEIGH_UPDATE_F_OVERRIDE, + NDISC_NEIGHBOUR_SOLICITATION, &ndopts); if (neigh || !dev->header_ops) { ndisc_send_na(dev, saddr, &msg->target, !!is_router, true, (ifp != NULL && inc), inc); @@ -911,7 +939,7 @@ static void ndisc_recv_na(struct sk_buff *skb) idev->cnf.drop_unsolicited_na) return; - if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { + if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, warn, "NS: invalid ND option\n"); return; } @@ -967,12 +995,13 @@ static void ndisc_recv_na(struct sk_buff *skb) goto out; } - neigh_update(neigh, lladdr, + ndisc_update(dev, neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| - (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0)); + (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0), + NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts); if ((old_flags & ~neigh->flags) & NTF_ROUTER) { /* @@ -1017,7 +1046,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) goto out; /* Parse ND options */ - if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) { + if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) { ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n"); goto out; } @@ -1031,10 +1060,11 @@ static void ndisc_recv_rs(struct sk_buff *skb) neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1); if (neigh) { - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_OVERRIDE_ISROUTER); + NEIGH_UPDATE_F_OVERRIDE_ISROUTER, + NDISC_ROUTER_SOLICITATION, &ndopts); neigh_release(neigh); } out: @@ -1135,7 +1165,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - if (!ndisc_parse_options(opt, optlen, &ndopts)) { + if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) { ND_PRINTK(2, warn, "RA: invalid ND options\n"); return; } @@ -1329,11 +1359,12 @@ skip_linkparms: goto out; } } - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| - NEIGH_UPDATE_F_ISROUTER); + NEIGH_UPDATE_F_ISROUTER, + NDISC_ROUTER_ADVERTISEMENT, &ndopts); } if (!ipv6_accept_ra(in6_dev)) { @@ -1421,7 +1452,8 @@ skip_routeinfo: struct nd_opt_hdr *p; for (p = ndopts.nd_useropts; p; - p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) { + p = ndisc_next_useropt(skb->dev, p, + ndopts.nd_useropts_end)) { ndisc_ra_useropt(skb, p); } } @@ -1459,7 +1491,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) + if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts)) return; if (!ndopts.nd_opts_rh) { @@ -1504,7 +1536,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct dst_entry *dst; struct flowi6 fl6; int rd_len; - u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; + u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, + ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; int oif = l3mdev_fib_oif(dev); bool ret; @@ -1563,7 +1596,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; - optlen += ndisc_opt_addr_space(dev); + optlen += ndisc_redirect_opt_addr_space(dev, neigh, + ops_data_buf, + &ops_data); } else read_unlock_bh(&neigh->lock); @@ -1594,7 +1629,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) */ if (ha) - ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha); + ndisc_fill_redirect_addr_option(buff, ha, ops_data); /* * build redirect option and copy skb over to the new packet. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 63e06c3dd319..552fac2f390a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb, unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); -#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) - - if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, - &ip6info->src), IP6T_INV_SRCIP) || - FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, - &ip6info->dst), IP6T_INV_DSTIP)) + if (NF_INVF(ip6info, IP6T_INV_SRCIP, + ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, + &ip6info->src)) || + NF_INVF(ip6info, IP6T_INV_DSTIP, + ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, + &ip6info->dst))) return false; ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_IN)) + if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) + if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0)) return false; /* ... might want to do something with class and flowlabel here ... */ @@ -402,23 +402,12 @@ ip6t_do_table(struct sk_buff *skb, else return verdict; } -static bool find_jump_target(const struct xt_table_info *t, - const struct ip6t_entry *target) -{ - struct ip6t_entry *iter; - - xt_entry_foreach(iter, t->entries, t->size) { - if (iter == target) - return true; - } - return false; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, - unsigned int valid_hooks, void *entry0) + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) { unsigned int hook; @@ -487,10 +476,11 @@ mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; e = (struct ip6t_entry *) (entry0 + newpos); - if (!find_jump_target(newinfo, e)) - return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -724,6 +714,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { struct ip6t_entry *iter; + unsigned int *offsets; unsigned int i; int ret = 0; @@ -736,6 +727,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -745,15 +739,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, repl->underflow, repl->valid_hooks); if (ret != 0) - return ret; + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ip6t_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } + ret = -EINVAL; if (i != repl->num_entries) - return -EINVAL; + goto out_free; /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -761,13 +758,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; if (newinfo->underflow[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) - return -ELOOP; + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; @@ -788,6 +788,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, } return ret; + out_free: + kvfree(offsets); + return ret; } static void diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index cb2b28883252..2b1a9dcdbcb3 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, { if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, state); - if (state->hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, state, - state->net->ipv6.ip6table_mangle); - /* INPUT/FORWARD */ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); } diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 6989c70ae29f..4a84b5ad9ecb 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -33,6 +33,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, fl6.daddr = *gw; fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]); + fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 3ee3e444a66b..fed40d1ec29b 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -116,6 +116,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; + ipc6.tclass = np->tclass; + fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -140,7 +143,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) pfh.family = AF_INET6; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - ipc6.tclass = np->tclass; ipc6.dontfrag = np->dontfrag; ipc6.opt = NULL; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 896350df6423..590dd1f7746f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -878,6 +878,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (inet->hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; + if (ipc6.tclass < 0) + ipc6.tclass = np->tclass; + + fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -886,9 +891,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - if (ipc6.dontfrag < 0) ipc6.dontfrag = np->dontfrag; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 969913da494f..49817555449e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1042,8 +1042,8 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) return pcpu_rt; } -static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, - struct flowi6 *fl6, int flags) +struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; struct rt6_info *rt; @@ -1139,6 +1139,7 @@ redo_rt6_select: } } +EXPORT_SYMBOL_GPL(ip6_pol_route); static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi6 *fl6, int flags) @@ -1782,7 +1783,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = 0; + int flags = RT6_LOOKUP_F_IFACE; table = fib6_get_table(net, cfg->fc_table); if (!table) @@ -2200,7 +2201,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * first-hop router for the specified ICMP Destination Address. */ - if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { + if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) { net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); return; } @@ -2235,12 +2236,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * We have finally decided to accept it. */ - neigh_update(neigh, lladdr, NUD_STALE, + ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| - NEIGH_UPDATE_F_ISROUTER)) - ); + NEIGH_UPDATE_F_ISROUTER)), + NDISC_REDIRECT, &ndopts); nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); if (!nrt) @@ -2585,23 +2586,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, return rt; } -int ip6_route_get_saddr(struct net *net, - struct rt6_info *rt, - const struct in6_addr *daddr, - unsigned int prefs, - struct in6_addr *saddr) -{ - struct inet6_dev *idev = - rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; - int err = 0; - if (rt && rt->rt6i_prefsrc.plen) - *saddr = rt->rt6i_prefsrc.addr; - else - err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, - daddr, prefs, saddr); - return err; -} - /* remove deleted ip from prefsrc entries */ struct arg_dev_net_ip { struct net_device *dev; @@ -3306,6 +3290,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) err = -EINVAL; memset(&fl6, 0, sizeof(fl6)); + rtm = nlmsg_data(nlh); + fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0a5a255277e5..182b6a9be29d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -479,47 +479,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } -/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH - * if sufficient data bytes are available - */ -static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) -{ - int ihl = ((const struct iphdr *)skb->data)->ihl*4; - struct rt6_info *rt; - struct sk_buff *skb2; - - if (!pskb_may_pull(skb, ihl + sizeof(struct ipv6hdr) + 8)) - return 1; - - skb2 = skb_clone(skb, GFP_ATOMIC); - - if (!skb2) - return 1; - - skb_dst_drop(skb2); - skb_pull(skb2, ihl); - skb_reset_network_header(skb2); - - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); - - if (rt && rt->dst.dev) - skb2->dev = rt->dst.dev; - - icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); - - if (rt) - ip6_rt_put(rt); - - kfree_skb(skb2); - - return 0; -} - static int ipip6_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + unsigned int data_len = 0; struct ip_tunnel *t; int err; @@ -544,6 +509,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: break; @@ -560,22 +526,22 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } - if (t->parms.iph.daddr == 0) + err = 0; + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; - err = 0; - if (!ipip6_err_gen_icmpv6_unreach(skb)) + if (t->parms.iph.daddr == 0) goto out; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) @@ -722,12 +688,19 @@ out: return 0; } -static const struct tnl_ptk_info tpi = { +static const struct tnl_ptk_info ipip_tpi = { /* no tunnel info required for ipip. */ .proto = htons(ETH_P_IP), }; -static int ipip_rcv(struct sk_buff *skb) +#if IS_ENABLED(CONFIG_MPLS) +static const struct tnl_ptk_info mplsip_tpi = { + /* no tunnel info required for mplsip. */ + .proto = htons(ETH_P_MPLS_UC), +}; +#endif + +static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { const struct iphdr *iph; struct ip_tunnel *tunnel; @@ -736,15 +709,23 @@ static int ipip_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel) { - if (tunnel->parms.iph.protocol != IPPROTO_IPIP && + const struct tnl_ptk_info *tpi; + + if (tunnel->parms.iph.protocol != ipproto && tunnel->parms.iph.protocol != 0) goto drop; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto, false)) +#if IS_ENABLED(CONFIG_MPLS) + if (ipproto == IPPROTO_MPLS) + tpi = &mplsip_tpi; + else +#endif + tpi = &ipip_tpi; + if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); + return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } return 1; @@ -754,6 +735,18 @@ drop: return 0; } +static int ipip_rcv(struct sk_buff *skb) +{ + return sit_tunnel_rcv(skb, IPPROTO_IPIP); +} + +#if IS_ENABLED(CONFIG_MPLS) +static int mplsip_rcv(struct sk_buff *skb) +{ + return sit_tunnel_rcv(skb, IPPROTO_MPLS); +} +#endif + /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. @@ -825,9 +818,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, u8 protocol = IPPROTO_IPV6; int t_hlen = tunnel->hlen + sizeof(struct iphdr); - if (skb->protocol != htons(ETH_P_IPV6)) - goto tx_error; - if (tos == 1) tos = ipv6_get_dsfield(iph6); @@ -995,7 +985,8 @@ tx_error: return NETDEV_TX_OK; } -static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb, + struct net_device *dev, u8 ipproto) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; @@ -1003,9 +994,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) goto tx_error; - skb_set_inner_ipproto(skb, IPPROTO_IPIP); + skb_set_inner_ipproto(skb, ipproto); - ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); + ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; tx_error: kfree_skb(skb); @@ -1018,11 +1009,16 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, { switch (skb->protocol) { case htons(ETH_P_IP): - ipip_tunnel_xmit(skb, dev); + sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP); break; case htons(ETH_P_IPV6): ipip6_tunnel_xmit(skb, dev); break; +#if IS_ENABLED(CONFIG_MPLS) + case htons(ETH_P_MPLS_UC): + sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS); + break; +#endif default: goto tx_err; } @@ -1130,6 +1126,16 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, } #endif +bool ipip6_valid_ip_proto(u8 ipproto) +{ + return ipproto == IPPROTO_IPV6 || + ipproto == IPPROTO_IPIP || +#if IS_ENABLED(CONFIG_MPLS) + ipproto == IPPROTO_MPLS || +#endif + ipproto == 0; +} + static int ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -1189,9 +1195,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -EINVAL; - if (p.iph.protocol != IPPROTO_IPV6 && - p.iph.protocol != IPPROTO_IPIP && - p.iph.protocol != 0) + if (!ipip6_valid_ip_proto(p.iph.protocol)) goto done; if (p.iph.version != 4 || p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) @@ -1416,9 +1420,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); - if (proto != IPPROTO_IPV6 && - proto != IPPROTO_IPIP && - proto != 0) + if (!ipip6_valid_ip_proto(proto)) return -EINVAL; return 0; @@ -1760,6 +1762,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { .priority = 2, }; +#if IS_ENABLED(CONFIG_MPLS) +static struct xfrm_tunnel mplsip_handler __read_mostly = { + .handler = mplsip_rcv, + .err_handler = ipip6_err, + .priority = 2, +}; +#endif + static void __net_exit sit_destroy_tunnels(struct net *net, struct list_head *head) { @@ -1855,6 +1865,9 @@ static void __exit sit_cleanup(void) rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +#if IS_ENABLED(CONFIG_MPLS) + xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); +#endif unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ @@ -1864,7 +1877,7 @@ static int __init sit_init(void) { int err; - pr_info("IPv6 over IPv4 tunneling driver\n"); + pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n"); err = register_pernet_device(&sit_net_ops); if (err < 0) @@ -1879,6 +1892,13 @@ static int __init sit_init(void) pr_info("%s: can't register ip4ip4\n", __func__); goto xfrm_tunnel4_failed; } +#if IS_ENABLED(CONFIG_MPLS) + err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS); + if (err < 0) { + pr_info("%s: can't register mplsip\n", __func__); + goto xfrm_tunnel_mpls_failed; + } +#endif err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1887,6 +1907,10 @@ out: return err; rtnl_link_failed: +#if IS_ENABLED(CONFIG_MPLS) + xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); +xfrm_tunnel_mpls_failed: +#endif xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 45243bbe5253..69c50e737c54 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -15,6 +15,9 @@ #include <net/ipv6.h> #include <net/addrconf.h> #include <net/inet_frag.h> +#ifdef CONFIG_NETLABEL +#include <net/calipso.h> +#endif static int one = 1; static int auto_flowlabels_min; @@ -106,6 +109,22 @@ static struct ctl_table ipv6_rotable[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one }, +#ifdef CONFIG_NETLABEL + { + .procname = "calipso_cache_enable", + .data = &calipso_cache_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "calipso_cache_bucket_size", + .data = &calipso_cache_bucketsize, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif /* CONFIG_NETLABEL */ { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 79e33e02f11a..33df8b8575cc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -443,6 +443,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; @@ -463,8 +464,10 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); rcu_read_lock(); - err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), - np->tclass); + opt = ireq->ipv6_opt; + if (!opt) + opt = rcu_dereference(np->opt); + err = ip6_xmit(sk, skb, fl6, opt, np->tclass); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -476,6 +479,7 @@ done: static void tcp_v6_reqsk_destructor(struct request_sock *req) { + kfree(inet_rsk(req)->ipv6_opt); kfree_skb(inet_rsk(req)->pktopts); } @@ -526,26 +530,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } -static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, - const struct in6_addr *daddr, - const struct in6_addr *saddr, int nbytes) +static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + const struct tcphdr *th, int nbytes) { struct tcp6_pseudohdr *bp; struct scatterlist sg; + struct tcphdr *_th; - bp = &hp->md5_blk.ip6; + bp = hp->scratch; /* 1. TCP pseudo-header (RFC2460) */ bp->saddr = *saddr; bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); - sg_init_one(&sg, bp, sizeof(*bp)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + _th = (struct tcphdr *)(bp + 1); + memcpy(_th, th, sizeof(*th)); + _th->check = 0; + + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, + sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->md5_req); } -static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { @@ -559,9 +570,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; @@ -606,9 +615,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) goto clear_hash; @@ -738,7 +745,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, u32 label) + u8 tclass, __be32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -911,7 +918,7 @@ out: static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, - u32 label) + __be32 label) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); @@ -1109,7 +1116,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * but we make one more one thing there: reattach optmem to newsk. */ - opt = rcu_dereference(np->opt); + opt = ireq->ipv6_opt; + if (!opt) + opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); RCU_INIT_POINTER(newnp->opt, opt); @@ -1721,7 +1730,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2da1896af934..81e2f98b958d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static inline int compute_score(struct sock *sk, struct net *net, - unsigned short hnum, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif) +static int compute_score(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) { int score; struct inet_sock *inet; @@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - udp_sk(sk)->udp_port_hash != hnum || - sk->sk_family != PF_INET6) - return -1; - - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - - score = 0; - inet = inet_sk(sk); - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -286,7 +247,7 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -653,12 +614,12 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto csum_error; + + if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { @@ -1246,6 +1207,11 @@ do_udp_sendmsg: security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + if (ipc6.tclass < 0) + ipc6.tclass = np->tclass; + + fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -1256,9 +1222,6 @@ do_udp_sendmsg: if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index c074771a10f7..6cc97003e4a9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -366,12 +366,12 @@ static void __net_exit xfrm6_net_sysctl_exit(struct net *net) kfree(table); } #else /* CONFIG_SYSCTL */ -static int inline xfrm6_net_sysctl_init(struct net *net) +static inline int xfrm6_net_sysctl_init(struct net *net) { return 0; } -static void inline xfrm6_net_sysctl_exit(struct net *net) +static inline void xfrm6_net_sysctl_exit(struct net *net) { } #endif |