summaryrefslogtreecommitdiff
path: root/net/xfrm/xfrm_policy.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-07-27 19:33:37 +0300
committerDavid S. Miller <davem@davemloft.net>2018-07-27 19:33:37 +0300
commit7a49d3d4ea42fe15db0d36e042df14a645d1fdce (patch)
treec3a66431d28945993858715f69a15635bf4d0b59 /net/xfrm/xfrm_policy.c
parentecbcd689d74a394b711d2360aef7e5d007ec9d98 (diff)
parentc6f5e017df9dfa9f6cbe70da008e7d716d726f1b (diff)
downloadlinux-7a49d3d4ea42fe15db0d36e042df14a645d1fdce.tar.xz
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next
Steffen Klassert says: ==================== pull request (net-next): ipsec-next 2018-07-27 1) Extend the output_mark to also support the input direction and masking the mark values before applying to the skb. 2) Add a new lookup key for the upcomming xfrm interfaces. 3) Extend the xfrm lookups to match xfrm interface IDs. 4) Add virtual xfrm interfaces. The purpose of these interfaces is to overcome the design limitations that the existing VTI devices have. The main limitations that we see with the current VTI are the following: VTI interfaces are L3 tunnels with configurable endpoints. For xfrm, the tunnel endpoint are already determined by the SA. So the VTI tunnel endpoints must be either the same as on the SA or wildcards. In case VTI tunnel endpoints are same as on the SA, we get a one to one correlation between the SA and the tunnel. So each SA needs its own tunnel interface. On the other hand, we can have only one VTI tunnel with wildcard src/dst tunnel endpoints in the system because the lookup is based on the tunnel endpoints. The existing tunnel lookup won't work with multiple tunnels with wildcard tunnel endpoints. Some usecases require more than on VTI tunnel of this type, for example if somebody has multiple namespaces and every namespace requires such a VTI. VTI needs separate interfaces for IPv4 and IPv6 tunnels. So when routing to a VTI, we have to know to which address family this traffic class is going to be encapsulated. This is a lmitation because it makes routing more complex and it is not always possible to know what happens behind the VTI, e.g. when the VTI is move to some namespace. VTI works just with tunnel mode SAs. We need generic interfaces that ensures transfomation, regardless of the xfrm mode and the encapsulated address family. VTI is configured with a combination GRE keys and xfrm marks. With this we have to deal with some extra cases in the generic tunnel lookup because the GRE keys on the VTI are actually not GRE keys, the GRE keys were just reused for something else. All extensions to the VTI interfaces would require to add even more complexity to the generic tunnel lookup. So to overcome this, we developed xfrm interfaces with the following design goal: It should be possible to tunnel IPv4 and IPv6 through the same interface. No limitation on xfrm mode (tunnel, transport and beet). Should be a generic virtual interface that ensures IPsec transformation, no need to know what happens behind the interface. Interfaces should be configured with a new key that must match a new policy/SA lookup key. The lookup logic should stay in the xfrm codebase, no need to change or extend generic routing and tunnel lookups. Should be possible to use IPsec hardware offloads of the underlying interface. 5) Remove xfrm pcpu policy cache. This was added after the flowcache removal, but it turned out to make things even worse. From Florian Westphal. 6) Allow to update the set mark on SA updates. From Nathan Harold. 7) Convert some timestamps to time64_t. From Arnd Bergmann. 8) Don't check the offload_handle in xfrm code, it is an opaque data cookie for the driver. From Shannon Nelson. 9) Remove xfrmi interface ID from flowi. After this pach no generic code is touched anymore to do xfrm interface lookups. From Benedict Wong. 10) Allow to update the xfrm interface ID on SA updates. From Nathan Harold. 11) Don't pass zero to ERR_PTR() in xfrm_resolve_and_create_bundle. From YueHaibing. 12) Return more detailed errors on xfrm interface creation. From Benedict Wong. 13) Use PTR_ERR_OR_ZERO instead of IS_ERR + PTR_ERR. From the kbuild test robot. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/xfrm/xfrm_policy.c')
-rw-r--r--net/xfrm/xfrm_policy.c314
1 files changed, 133 insertions, 181 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5f48251c1319..69f06f879091 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,8 +45,9 @@ struct xfrm_flo {
u8 flags;
};
-static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
-static struct work_struct *xfrm_pcpu_work __read_mostly;
+static DEFINE_SPINLOCK(xfrm_if_cb_lock);
+static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
+
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly;
@@ -119,6 +120,12 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
return afinfo;
}
+/* Called with rcu_read_lock(). */
+static const struct xfrm_if_cb *xfrm_if_get_cb(void)
+{
+ return rcu_dereference(xfrm_if_cb);
+}
+
struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
@@ -182,8 +189,8 @@ static inline unsigned long make_jiffies(long secs)
static void xfrm_policy_timer(struct timer_list *t)
{
struct xfrm_policy *xp = from_timer(xp, t, timer);
- unsigned long now = get_seconds();
- long next = LONG_MAX;
+ time64_t now = ktime_get_real_seconds();
+ time64_t next = TIME64_MAX;
int warn = 0;
int dir;
@@ -195,7 +202,7 @@ static void xfrm_policy_timer(struct timer_list *t)
dir = xfrm_policy_id2dir(xp->index);
if (xp->lft.hard_add_expires_seconds) {
- long tmo = xp->lft.hard_add_expires_seconds +
+ time64_t tmo = xp->lft.hard_add_expires_seconds +
xp->curlft.add_time - now;
if (tmo <= 0)
goto expired;
@@ -203,7 +210,7 @@ static void xfrm_policy_timer(struct timer_list *t)
next = tmo;
}
if (xp->lft.hard_use_expires_seconds) {
- long tmo = xp->lft.hard_use_expires_seconds +
+ time64_t tmo = xp->lft.hard_use_expires_seconds +
(xp->curlft.use_time ? : xp->curlft.add_time) - now;
if (tmo <= 0)
goto expired;
@@ -211,7 +218,7 @@ static void xfrm_policy_timer(struct timer_list *t)
next = tmo;
}
if (xp->lft.soft_add_expires_seconds) {
- long tmo = xp->lft.soft_add_expires_seconds +
+ time64_t tmo = xp->lft.soft_add_expires_seconds +
xp->curlft.add_time - now;
if (tmo <= 0) {
warn = 1;
@@ -221,7 +228,7 @@ static void xfrm_policy_timer(struct timer_list *t)
next = tmo;
}
if (xp->lft.soft_use_expires_seconds) {
- long tmo = xp->lft.soft_use_expires_seconds +
+ time64_t tmo = xp->lft.soft_use_expires_seconds +
(xp->curlft.use_time ? : xp->curlft.add_time) - now;
if (tmo <= 0) {
warn = 1;
@@ -233,7 +240,7 @@ static void xfrm_policy_timer(struct timer_list *t)
if (warn)
km_policy_expired(xp, dir, 0, 0);
- if (next != LONG_MAX &&
+ if (next != TIME64_MAX &&
!mod_timer(&xp->timer, jiffies + make_jiffies(next)))
xfrm_pol_hold(xp);
@@ -747,6 +754,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
newpos = NULL;
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == policy->type &&
+ pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
xfrm_policy_mark_match(policy, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
@@ -783,7 +791,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
- policy->curlft.add_time = get_seconds();
+ policy->curlft.add_time = ktime_get_real_seconds();
policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ))
xfrm_pol_hold(policy);
@@ -798,8 +806,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
EXPORT_SYMBOL(xfrm_policy_insert);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
- int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
+ u8 type, int dir,
+ struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err)
{
@@ -812,6 +821,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
ret = NULL;
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == type &&
+ pol->if_id == if_id &&
(mark & pol->mark.m) == pol->mark.v &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security)) {
@@ -837,8 +847,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
- int dir, u32 id, int delete, int *err)
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete,
+ int *err)
{
struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
@@ -853,6 +864,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
ret = NULL;
hlist_for_each_entry(pol, chain, byidx) {
if (pol->type == type && pol->index == id &&
+ pol->if_id == if_id &&
(mark & pol->mark.m) == pol->mark.v) {
xfrm_pol_hold(pol);
if (delete) {
@@ -1056,13 +1068,14 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
*/
static int xfrm_policy_match(const struct xfrm_policy *pol,
const struct flowi *fl,
- u8 type, u16 family, int dir)
+ u8 type, u16 family, int dir, u32 if_id)
{
const struct xfrm_selector *sel = &pol->selector;
int ret = -ESRCH;
bool match;
if (pol->family != family ||
+ pol->if_id != if_id ||
(fl->flowi_mark & pol->mark.m) != pol->mark.v ||
pol->type != type)
return ret;
@@ -1077,7 +1090,8 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
const struct flowi *fl,
- u16 family, u8 dir)
+ u16 family, u8 dir,
+ u32 if_id)
{
int err;
struct xfrm_policy *pol, *ret;
@@ -1101,7 +1115,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
priority = ~0U;
ret = NULL;
hlist_for_each_entry_rcu(pol, chain, bydst) {
- err = xfrm_policy_match(pol, fl, type, family, dir);
+ err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
if (err) {
if (err == -ESRCH)
continue;
@@ -1120,7 +1134,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
if ((pol->priority >= priority) && ret)
break;
- err = xfrm_policy_match(pol, fl, type, family, dir);
+ err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
if (err) {
if (err == -ESRCH)
continue;
@@ -1145,21 +1159,25 @@ fail:
return ret;
}
-static struct xfrm_policy *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+static struct xfrm_policy *xfrm_policy_lookup(struct net *net,
+ const struct flowi *fl,
+ u16 family, u8 dir, u32 if_id)
{
#ifdef CONFIG_XFRM_SUB_POLICY
struct xfrm_policy *pol;
- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+ pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family,
+ dir, if_id);
if (pol != NULL)
return pol;
#endif
- return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+ return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family,
+ dir, if_id);
}
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
- const struct flowi *fl, u16 family)
+ const struct flowi *fl,
+ u16 family, u32 if_id)
{
struct xfrm_policy *pol;
@@ -1177,7 +1195,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
- if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
+ if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+ pol->if_id != if_id) {
pol = NULL;
goto out;
}
@@ -1268,7 +1287,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
old_pol = rcu_dereference_protected(sk->sk_policy[dir],
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
if (pol) {
- pol->curlft.add_time = get_seconds();
+ pol->curlft.add_time = ktime_get_real_seconds();
pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
xfrm_sk_policy_link(pol, dir);
}
@@ -1305,6 +1324,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->lft = old->lft;
newp->curlft = old->curlft;
newp->mark = old->mark;
+ newp->if_id = old->if_id;
newp->action = old->action;
newp->flags = old->flags;
newp->xfrm_nr = old->xfrm_nr;
@@ -1390,7 +1410,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
}
}
- x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+ x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
+ family, policy->if_id);
if (x && x->km.state == XFRM_STATE_VALID) {
xfrm[nx++] = x;
@@ -1607,10 +1628,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst_copy_metrics(dst1, dst);
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
+ __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+
family = xfrm[i]->props.family;
dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
- &saddr, &daddr, family,
- xfrm[i]->props.output_mark);
+ &saddr, &daddr, family, mark);
err = PTR_ERR(dst);
if (IS_ERR(dst))
goto put_states;
@@ -1692,7 +1714,8 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
XFRM_POLICY_TYPE_MAIN,
fl, family,
- XFRM_POLICY_OUT);
+ XFRM_POLICY_OUT,
+ pols[0]->if_id);
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
@@ -1714,108 +1737,6 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
}
-static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
-{
- this_cpu_write(xfrm_last_dst, xdst);
- if (old)
- dst_release(&old->u.dst);
-}
-
-static void __xfrm_pcpu_work_fn(void)
-{
- struct xfrm_dst *old;
-
- old = this_cpu_read(xfrm_last_dst);
- if (old && !xfrm_bundle_ok(old))
- xfrm_last_dst_update(NULL, old);
-}
-
-static void xfrm_pcpu_work_fn(struct work_struct *work)
-{
- local_bh_disable();
- rcu_read_lock();
- __xfrm_pcpu_work_fn();
- rcu_read_unlock();
- local_bh_enable();
-}
-
-void xfrm_policy_cache_flush(void)
-{
- struct xfrm_dst *old;
- bool found = false;
- int cpu;
-
- might_sleep();
-
- local_bh_disable();
- rcu_read_lock();
- for_each_possible_cpu(cpu) {
- old = per_cpu(xfrm_last_dst, cpu);
- if (old && !xfrm_bundle_ok(old)) {
- if (smp_processor_id() == cpu) {
- __xfrm_pcpu_work_fn();
- continue;
- }
- found = true;
- break;
- }
- }
-
- rcu_read_unlock();
- local_bh_enable();
-
- if (!found)
- return;
-
- get_online_cpus();
-
- for_each_possible_cpu(cpu) {
- bool bundle_release;
-
- rcu_read_lock();
- old = per_cpu(xfrm_last_dst, cpu);
- bundle_release = old && !xfrm_bundle_ok(old);
- rcu_read_unlock();
-
- if (!bundle_release)
- continue;
-
- if (cpu_online(cpu)) {
- schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
- continue;
- }
-
- rcu_read_lock();
- old = per_cpu(xfrm_last_dst, cpu);
- if (old && !xfrm_bundle_ok(old)) {
- per_cpu(xfrm_last_dst, cpu) = NULL;
- dst_release(&old->u.dst);
- }
- rcu_read_unlock();
- }
-
- put_online_cpus();
-}
-
-static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
- struct xfrm_state * const xfrm[],
- int num)
-{
- const struct dst_entry *dst = &xdst->u.dst;
- int i;
-
- if (xdst->num_xfrms != num)
- return false;
-
- for (i = 0; i < num; i++) {
- if (!dst || dst->xfrm != xfrm[i])
- return false;
- dst = xfrm_dst_child(dst);
- }
-
- return xfrm_bundle_ok(xdst);
-}
-
static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
const struct flowi *fl, u16 family,
@@ -1824,34 +1745,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
- struct xfrm_dst *xdst, *old;
+ struct xfrm_dst *xdst;
struct dst_entry *dst;
int err;
/* Try to instantiate a bundle */
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
if (err <= 0) {
- if (err != 0 && err != -EAGAIN)
+ if (err == 0)
+ return NULL;
+
+ if (err != -EAGAIN)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
return ERR_PTR(err);
}
- xdst = this_cpu_read(xfrm_last_dst);
- if (xdst &&
- xdst->u.dst.dev == dst_orig->dev &&
- xdst->num_pols == num_pols &&
- memcmp(xdst->pols, pols,
- sizeof(struct xfrm_policy *) * num_pols) == 0 &&
- xfrm_xdst_can_reuse(xdst, xfrm, err)) {
- dst_hold(&xdst->u.dst);
- xfrm_pols_put(pols, num_pols);
- while (err > 0)
- xfrm_state_put(xfrm[--err]);
- return xdst;
- }
-
- old = xdst;
-
dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1864,9 +1772,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
xdst->policy_genid = atomic_read(&pols[0]->genid);
- atomic_set(&xdst->u.dst.__refcnt, 2);
- xfrm_last_dst_update(xdst, old);
-
return xdst;
}
@@ -2047,8 +1952,10 @@ free_dst:
goto out;
}
-static struct xfrm_dst *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
+static struct xfrm_dst *xfrm_bundle_lookup(struct net *net,
+ const struct flowi *fl,
+ u16 family, u8 dir,
+ struct xfrm_flo *xflo, u32 if_id)
{
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols = 0, num_xfrms = 0, err;
@@ -2057,7 +1964,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
/* Resolve policies to use if we couldn't get them from
* previous cache entry */
num_pols = 1;
- pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+ pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@@ -2067,13 +1974,15 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
if (num_xfrms <= 0)
goto make_dummy_bundle;
- local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
xflo->dst_orig);
- local_bh_enable();
-
if (IS_ERR(xdst)) {
err = PTR_ERR(xdst);
+ if (err == -EREMOTE) {
+ xfrm_pols_put(pols, num_pols);
+ return NULL;
+ }
+
if (err != -EAGAIN)
goto error;
goto make_dummy_bundle;
@@ -2123,14 +2032,19 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
return ret;
}
-/* Main function: finds/creates a bundle for given flow.
+/* Finds/creates a bundle for given flow and if_id
*
* At the moment we eat a raw IP route. Mostly to speed up lookups
* on interfaces with disabled IPsec.
+ *
+ * xfrm_lookup uses an if_id of 0 by default, and is provided for
+ * compatibility
*/
-struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
- const struct flowi *fl,
- const struct sock *sk, int flags)
+struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+ struct dst_entry *dst_orig,
+ const struct flowi *fl,
+ const struct sock *sk,
+ int flags, u32 if_id)
{
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst;
@@ -2146,7 +2060,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
sk = sk_const_to_full_sk(sk);
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
num_pols = 1;
- pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
+ pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family,
+ if_id);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@@ -2158,15 +2073,16 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
- local_bh_disable();
xdst = xfrm_resolve_and_create_bundle(
pols, num_pols, fl,
family, dst_orig);
- local_bh_enable();
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
err = PTR_ERR(xdst);
+ if (err == -EREMOTE)
+ goto nopol;
+
goto dropdst;
} else if (xdst == NULL) {
num_xfrms = 0;
@@ -2189,7 +2105,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
- xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+ xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
if (xdst == NULL)
goto nopol;
if (IS_ERR(xdst)) {
@@ -2234,7 +2150,7 @@ no_transform:
}
for (i = 0; i < num_pols; i++)
- pols[i]->curlft.use_time = get_seconds();
+ pols[i]->curlft.use_time = ktime_get_real_seconds();
if (num_xfrms < 0) {
/* Prohibit the flow */
@@ -2270,6 +2186,19 @@ dropdst:
xfrm_pols_put(pols, drop_pols);
return ERR_PTR(err);
}
+EXPORT_SYMBOL(xfrm_lookup_with_ifid);
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
+ const struct flowi *fl, const struct sock *sk,
+ int flags)
+{
+ return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0);
+}
EXPORT_SYMBOL(xfrm_lookup);
/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
@@ -2365,6 +2294,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
return -EAFNOSUPPORT;
afinfo->decode_session(skb, fl, reverse);
+
err = security_xfrm_decode_session(skb, &fl->flowi_secid);
rcu_read_unlock();
return err;
@@ -2395,6 +2325,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
int reverse;
struct flowi fl;
int xerr_idx = -1;
+ const struct xfrm_if_cb *ifcb;
+ struct xfrm_if *xi;
+ u32 if_id = 0;
+
+ rcu_read_lock();
+ ifcb = xfrm_if_get_cb();
+
+ if (ifcb) {
+ xi = ifcb->decode_session(skb);
+ if (xi)
+ if_id = xi->p.if_id;
+ }
+ rcu_read_unlock();
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
@@ -2422,7 +2365,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = NULL;
sk = sk_to_full_sk(sk);
if (sk && sk->sk_policy[dir]) {
- pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
+ pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
return 0;
@@ -2430,7 +2373,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
if (!pol)
- pol = xfrm_policy_lookup(net, &fl, family, dir);
+ pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2446,7 +2389,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = get_seconds();
+ pol->curlft.use_time = ktime_get_real_seconds();
pols[0] = pol;
npols++;
@@ -2454,13 +2397,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
&fl, family,
- XFRM_POLICY_IN);
+ XFRM_POLICY_IN, if_id);
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
return 0;
}
- pols[1]->curlft.use_time = get_seconds();
+ pols[1]->curlft.use_time = ktime_get_real_seconds();
npols++;
}
}
@@ -2819,6 +2762,21 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
+{
+ spin_lock(&xfrm_if_cb_lock);
+ rcu_assign_pointer(xfrm_if_cb, ifcb);
+ spin_unlock(&xfrm_if_cb_lock);
+}
+EXPORT_SYMBOL(xfrm_if_register_cb);
+
+void xfrm_if_unregister_cb(void)
+{
+ RCU_INIT_POINTER(xfrm_if_cb, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(xfrm_if_unregister_cb);
+
#ifdef CONFIG_XFRM_STATISTICS
static int __net_init xfrm_statistics_init(struct net *net)
{
@@ -2986,19 +2944,13 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
void __init xfrm_init(void)
{
- int i;
-
- xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
- GFP_KERNEL);
- BUG_ON(!xfrm_pcpu_work);
-
- for (i = 0; i < NR_CPUS; i++)
- INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
-
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
+
+ RCU_INIT_POINTER(xfrm_if_cb, NULL);
+ synchronize_rcu();
}
#ifdef CONFIG_AUDITSYSCALL