summaryrefslogtreecommitdiff
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c127
1 files changed, 89 insertions, 38 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7f7997460764..31399c53dfb1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
u32 next_bucket;
+ u32 avg_timeout;
+ u32 start_time;
bool exiting;
bool early_drop;
};
@@ -77,8 +79,19 @@ static __read_mostly bool nf_conntrack_locks_all;
/* serialize hash resizes and nf_ct_iterate_cleanup */
static DEFINE_MUTEX(nf_conntrack_mutex);
-#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_INTERVAL_MAX (60ul * HZ)
+#define GC_SCAN_INTERVAL_MIN (1ul * HZ)
+
+/* clamp timeouts to this value (TCP unacked) */
+#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ)
+
+/* large initial bias so that we don't scan often just because we have
+ * three entries with a 1s timeout.
+ */
+#define GC_SCAN_INTERVAL_INIT INT_MAX
+
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
+#define GC_SCAN_EXPIRED_MAX (64000u / HZ)
#define MIN_CHAINLEN 8u
#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
@@ -558,7 +571,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
-/* Released via destroy_conntrack() */
+/* Released via nf_ct_destroy() */
struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
gfp_t flags)
@@ -585,7 +598,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
nf_ct_zone_add(tmpl, zone);
- atomic_set(&tmpl->ct_general.use, 0);
+ refcount_set(&tmpl->ct_general.use, 1);
return tmpl;
}
@@ -612,13 +625,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
#endif
}
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
+void nf_ct_destroy(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- pr_debug("destroy_conntrack(%p)\n", ct);
- WARN_ON(atomic_read(&nfct->use) != 0);
+ pr_debug("%s(%p)\n", __func__, ct);
+ WARN_ON(refcount_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
@@ -643,9 +655,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (ct->master)
nf_ct_put(ct->master);
- pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
+ pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
nf_conntrack_free(ct);
}
+EXPORT_SYMBOL(nf_ct_destroy);
static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
@@ -742,7 +755,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
- if (!atomic_inc_not_zero(&ct->ct_general.use))
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
return;
if (nf_ct_should_gc(ct))
@@ -810,7 +823,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
* in, try to obtain a reference and re-check tuple
*/
ct = nf_ct_tuplehash_to_ctrack(h);
- if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
+ if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
goto found;
@@ -907,7 +920,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
smp_wmb();
/* The caller holds a reference to this object */
- atomic_set(&ct->ct_general.use, 2);
+ refcount_set(&ct->ct_general.use, 2);
__nf_conntrack_hash_insert(ct, hash, reply_hash);
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
@@ -958,7 +971,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
{
struct nf_conn_tstamp *tstamp;
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
/* set conntrack timestamp, if enabled. */
@@ -989,7 +1002,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_ct_add_to_dying_list(loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_put(loser_ct);
nf_ct_set(skb, ct, ctinfo);
NF_CT_STAT_INC(net, clash_resolve);
@@ -1351,7 +1364,7 @@ static unsigned int early_drop_list(struct net *net,
nf_ct_is_dying(tmp))
continue;
- if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
/* kill only if still in same netns -- might have moved due to
@@ -1420,16 +1433,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
unsigned int i, hashsz, nf_conntrack_max95 = 0;
- unsigned long next_run = GC_SCAN_INTERVAL;
+ u32 end_time, start_time = nfct_time_stamp;
struct conntrack_gc_work *gc_work;
+ unsigned int expired_count = 0;
+ unsigned long next_run;
+ s32 delta_time;
+
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
+ if (i == 0) {
+ gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+ gc_work->start_time = start_time;
+ }
+
+ next_run = gc_work->avg_timeout;
+
+ end_time = start_time + GC_SCAN_MAX_DURATION;
+
do {
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
@@ -1446,6 +1471,7 @@ static void gc_worker(struct work_struct *work)
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
+ unsigned long expires;
struct net *net;
tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1455,11 +1481,29 @@ static void gc_worker(struct work_struct *work)
continue;
}
+ if (expired_count > GC_SCAN_EXPIRED_MAX) {
+ rcu_read_unlock();
+
+ gc_work->next_bucket = i;
+ gc_work->avg_timeout = next_run;
+
+ delta_time = nfct_time_stamp - gc_work->start_time;
+
+ /* re-sched immediately if total cycle time is exceeded */
+ next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
+ goto early_exit;
+ }
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
+ expired_count++;
continue;
}
+ expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+ next_run += expires;
+ next_run /= 2u;
+
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue;
@@ -1469,7 +1513,7 @@ static void gc_worker(struct work_struct *work)
continue;
/* need to take reference to avoid possible races */
- if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
if (gc_worker_skip_ct(tmp)) {
@@ -1477,8 +1521,10 @@ static void gc_worker(struct work_struct *work)
continue;
}
- if (gc_worker_can_early_drop(tmp))
+ if (gc_worker_can_early_drop(tmp)) {
nf_ct_kill(tmp);
+ expired_count++;
+ }
nf_ct_put(tmp);
}
@@ -1491,33 +1537,38 @@ static void gc_worker(struct work_struct *work)
cond_resched();
i++;
- if (time_after(jiffies, end_time) && i < hashsz) {
+ delta_time = nfct_time_stamp - end_time;
+ if (delta_time > 0 && i < hashsz) {
+ gc_work->avg_timeout = next_run;
gc_work->next_bucket = i;
next_run = 0;
- break;
+ goto early_exit;
}
} while (i < hashsz);
+ gc_work->next_bucket = 0;
+
+ next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
+
+ delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
+ if (next_run > (unsigned long)delta_time)
+ next_run -= delta_time;
+ else
+ next_run = 1;
+
+early_exit:
if (gc_work->exiting)
return;
- /*
- * Eviction will normally happen from the packet path, and not
- * from this gc worker.
- *
- * This worker is only here to reap expired entries when system went
- * idle after a busy period.
- */
- if (next_run) {
+ if (next_run)
gc_work->early_drop = false;
- gc_work->next_bucket = 0;
- }
+
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
gc_work->exiting = false;
}
@@ -1571,7 +1622,7 @@ __nf_conntrack_alloc(struct net *net,
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
- atomic_set(&ct->ct_general.use, 0);
+ refcount_set(&ct->ct_general.use, 0);
return ct;
out:
atomic_dec(&cnet->count);
@@ -1596,7 +1647,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
- WARN_ON(atomic_read(&ct->ct_general.use) != 0);
+ WARN_ON(refcount_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
@@ -1688,8 +1739,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
- /* Now it is inserted into the unconfirmed list, bump refcount */
- nf_conntrack_get(&ct->ct_general);
+ /* Now it is inserted into the unconfirmed list, set refcount to 1. */
+ refcount_set(&ct->ct_general.use, 1);
nf_ct_add_to_unconfirmed_list(ct);
local_bh_enable();
@@ -1920,7 +1971,7 @@ repeat:
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
- nf_conntrack_put(&ct->ct_general);
+ nf_ct_put(ct);
skb->_nfct = 0;
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
@@ -2301,7 +2352,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
return NULL;
found:
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
spin_unlock(lockp);
local_bh_enable();
return ct;
@@ -2774,7 +2825,7 @@ err_cachep:
static struct nf_ct_hook nf_conntrack_hook = {
.update = nf_conntrack_update,
- .destroy = destroy_conntrack,
+ .destroy = nf_ct_destroy,
.get_tuple_skb = nf_conntrack_get_tuple_skb,
};