diff options
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 127 |
1 files changed, 89 insertions, 38 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7f7997460764..31399c53dfb1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; u32 next_bucket; + u32 avg_timeout; + u32 start_time; bool exiting; bool early_drop; }; @@ -77,8 +79,19 @@ static __read_mostly bool nf_conntrack_locks_all; /* serialize hash resizes and nf_ct_iterate_cleanup */ static DEFINE_MUTEX(nf_conntrack_mutex); -#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_INTERVAL_MAX (60ul * HZ) +#define GC_SCAN_INTERVAL_MIN (1ul * HZ) + +/* clamp timeouts to this value (TCP unacked) */ +#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ) + +/* large initial bias so that we don't scan often just because we have + * three entries with a 1s timeout. + */ +#define GC_SCAN_INTERVAL_INIT INT_MAX + #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) +#define GC_SCAN_EXPIRED_MAX (64000u / HZ) #define MIN_CHAINLEN 8u #define MAX_CHAINLEN (32u - MIN_CHAINLEN) @@ -558,7 +571,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) -/* Released via destroy_conntrack() */ +/* Released via nf_ct_destroy() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) @@ -585,7 +598,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); nf_ct_zone_add(tmpl, zone); - atomic_set(&tmpl->ct_general.use, 0); + refcount_set(&tmpl->ct_general.use, 1); return tmpl; } @@ -612,13 +625,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct) #endif } -static void -destroy_conntrack(struct nf_conntrack *nfct) +void nf_ct_destroy(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - pr_debug("destroy_conntrack(%p)\n", ct); - WARN_ON(atomic_read(&nfct->use) != 0); + pr_debug("%s(%p)\n", __func__, ct); + WARN_ON(refcount_read(&nfct->use) != 0); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); @@ -643,9 +655,10 @@ destroy_conntrack(struct nf_conntrack *nfct) if (ct->master) nf_ct_put(ct->master); - pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); + pr_debug("%s: returning ct=%p to slab\n", __func__, ct); nf_conntrack_free(ct); } +EXPORT_SYMBOL(nf_ct_destroy); static void nf_ct_delete_from_lists(struct nf_conn *ct) { @@ -742,7 +755,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { - if (!atomic_inc_not_zero(&ct->ct_general.use)) + if (!refcount_inc_not_zero(&ct->ct_general.use)) return; if (nf_ct_should_gc(ct)) @@ -810,7 +823,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, * in, try to obtain a reference and re-check tuple */ ct = nf_ct_tuplehash_to_ctrack(h); - if (likely(atomic_inc_not_zero(&ct->ct_general.use))) { + if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { if (likely(nf_ct_key_equal(h, tuple, zone, net))) goto found; @@ -907,7 +920,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) smp_wmb(); /* The caller holds a reference to this object */ - atomic_set(&ct->ct_general.use, 2); + refcount_set(&ct->ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); @@ -958,7 +971,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) { struct nf_conn_tstamp *tstamp; - atomic_inc(&ct->ct_general.use); + refcount_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; /* set conntrack timestamp, if enabled. */ @@ -989,7 +1002,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_ct_add_to_dying_list(loser_ct); - nf_conntrack_put(&loser_ct->ct_general); + nf_ct_put(loser_ct); nf_ct_set(skb, ct, ctinfo); NF_CT_STAT_INC(net, clash_resolve); @@ -1351,7 +1364,7 @@ static unsigned int early_drop_list(struct net *net, nf_ct_is_dying(tmp)) continue; - if (!atomic_inc_not_zero(&tmp->ct_general.use)) + if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; /* kill only if still in same netns -- might have moved due to @@ -1420,16 +1433,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; unsigned int i, hashsz, nf_conntrack_max95 = 0; - unsigned long next_run = GC_SCAN_INTERVAL; + u32 end_time, start_time = nfct_time_stamp; struct conntrack_gc_work *gc_work; + unsigned int expired_count = 0; + unsigned long next_run; + s32 delta_time; + gc_work = container_of(work, struct conntrack_gc_work, dwork.work); i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; + if (i == 0) { + gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; + gc_work->start_time = start_time; + } + + next_run = gc_work->avg_timeout; + + end_time = start_time + GC_SCAN_MAX_DURATION; + do { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; @@ -1446,6 +1471,7 @@ static void gc_worker(struct work_struct *work) hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; + unsigned long expires; struct net *net; tmp = nf_ct_tuplehash_to_ctrack(h); @@ -1455,11 +1481,29 @@ static void gc_worker(struct work_struct *work) continue; } + if (expired_count > GC_SCAN_EXPIRED_MAX) { + rcu_read_unlock(); + + gc_work->next_bucket = i; + gc_work->avg_timeout = next_run; + + delta_time = nfct_time_stamp - gc_work->start_time; + + /* re-sched immediately if total cycle time is exceeded */ + next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX; + goto early_exit; + } + if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); + expired_count++; continue; } + expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); + next_run += expires; + next_run /= 2u; + if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; @@ -1469,7 +1513,7 @@ static void gc_worker(struct work_struct *work) continue; /* need to take reference to avoid possible races */ - if (!atomic_inc_not_zero(&tmp->ct_general.use)) + if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; if (gc_worker_skip_ct(tmp)) { @@ -1477,8 +1521,10 @@ static void gc_worker(struct work_struct *work) continue; } - if (gc_worker_can_early_drop(tmp)) + if (gc_worker_can_early_drop(tmp)) { nf_ct_kill(tmp); + expired_count++; + } nf_ct_put(tmp); } @@ -1491,33 +1537,38 @@ static void gc_worker(struct work_struct *work) cond_resched(); i++; - if (time_after(jiffies, end_time) && i < hashsz) { + delta_time = nfct_time_stamp - end_time; + if (delta_time > 0 && i < hashsz) { + gc_work->avg_timeout = next_run; gc_work->next_bucket = i; next_run = 0; - break; + goto early_exit; } } while (i < hashsz); + gc_work->next_bucket = 0; + + next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX); + + delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1); + if (next_run > (unsigned long)delta_time) + next_run -= delta_time; + else + next_run = 1; + +early_exit: if (gc_work->exiting) return; - /* - * Eviction will normally happen from the packet path, and not - * from this gc worker. - * - * This worker is only here to reap expired entries when system went - * idle after a busy period. - */ - if (next_run) { + if (next_run) gc_work->early_drop = false; - gc_work->next_bucket = 0; - } + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { - INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); + INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); gc_work->exiting = false; } @@ -1571,7 +1622,7 @@ __nf_conntrack_alloc(struct net *net, /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ - atomic_set(&ct->ct_general.use, 0); + refcount_set(&ct->ct_general.use, 0); return ct; out: atomic_dec(&cnet->count); @@ -1596,7 +1647,7 @@ void nf_conntrack_free(struct nf_conn *ct) /* A freed object has refcnt == 0, that's * the golden rule for SLAB_TYPESAFE_BY_RCU */ - WARN_ON(atomic_read(&ct->ct_general.use) != 0); + WARN_ON(refcount_read(&ct->ct_general.use) != 0); nf_ct_ext_destroy(ct); kmem_cache_free(nf_conntrack_cachep, ct); @@ -1688,8 +1739,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - /* Now it is inserted into the unconfirmed list, bump refcount */ - nf_conntrack_get(&ct->ct_general); + /* Now it is inserted into the unconfirmed list, set refcount to 1. */ + refcount_set(&ct->ct_general.use, 1); nf_ct_add_to_unconfirmed_list(ct); local_bh_enable(); @@ -1920,7 +1971,7 @@ repeat: /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put(&ct->ct_general); + nf_ct_put(ct); skb->_nfct = 0; /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a @@ -2301,7 +2352,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), return NULL; found: - atomic_inc(&ct->ct_general.use); + refcount_inc(&ct->ct_general.use); spin_unlock(lockp); local_bh_enable(); return ct; @@ -2774,7 +2825,7 @@ err_cachep: static struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, - .destroy = destroy_conntrack, + .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, }; |