diff options
Diffstat (limited to 'net/netfilter/ipvs/ip_vs_ctl.c')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 452 |
1 files changed, 364 insertions, 88 deletions
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 988222fff9f0..c9f598505642 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -49,8 +49,7 @@ MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME); -/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ -static DEFINE_MUTEX(__ip_vs_mutex); +DEFINE_MUTEX(__ip_vs_mutex); /* Serialize configuration with sockopt/netlink */ /* sysctl variables */ @@ -241,6 +240,47 @@ static void defense_work_handler(struct work_struct *work) } #endif +static void est_reload_work_handler(struct work_struct *work) +{ + struct netns_ipvs *ipvs = + container_of(work, struct netns_ipvs, est_reload_work.work); + int genid_done = atomic_read(&ipvs->est_genid_done); + unsigned long delay = HZ / 10; /* repeat startups after failure */ + bool repeat = false; + int genid; + int id; + + mutex_lock(&ipvs->est_mutex); + genid = atomic_read(&ipvs->est_genid); + for (id = 0; id < ipvs->est_kt_count; id++) { + struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id]; + + /* netns clean up started, abort delayed work */ + if (!ipvs->enable) + goto unlock; + if (!kd) + continue; + /* New config ? Stop kthread tasks */ + if (genid != genid_done) + ip_vs_est_kthread_stop(kd); + if (!kd->task && !ip_vs_est_stopped(ipvs)) { + /* Do not start kthreads above 0 in calc phase */ + if ((!id || !ipvs->est_calc_phase) && + ip_vs_est_kthread_start(ipvs, kd) < 0) + repeat = true; + } + } + + atomic_set(&ipvs->est_genid_done, genid); + + if (repeat) + queue_delayed_work(system_long_wq, &ipvs->est_reload_work, + delay); + +unlock: + mutex_unlock(&ipvs->est_mutex); +} + int ip_vs_use_count_inc(void) { @@ -471,7 +511,7 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) static void ip_vs_service_free(struct ip_vs_service *svc) { - free_percpu(svc->stats.cpustats); + ip_vs_stats_release(&svc->stats); kfree(svc); } @@ -483,17 +523,14 @@ static void ip_vs_service_rcu_free(struct rcu_head *head) ip_vs_service_free(svc); } -static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) +static void __ip_vs_svc_put(struct ip_vs_service *svc) { if (atomic_dec_and_test(&svc->refcnt)) { IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port)); - if (do_delay) - call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); - else - ip_vs_service_free(svc); + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); } } @@ -780,14 +817,22 @@ out: return dest; } +static void ip_vs_dest_rcu_free(struct rcu_head *head) +{ + struct ip_vs_dest *dest; + + dest = container_of(head, struct ip_vs_dest, rcu_head); + ip_vs_stats_release(&dest->stats); + ip_vs_dest_put_and_free(dest); +} + static void ip_vs_dest_free(struct ip_vs_dest *dest) { struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); __ip_vs_dst_cache_reset(dest); - __ip_vs_svc_put(svc, false); - free_percpu(dest->stats.cpustats); - ip_vs_dest_put_and_free(dest); + __ip_vs_svc_put(svc); + call_rcu(&dest->rcu_head, ip_vs_dest_rcu_free); } /* @@ -811,12 +856,22 @@ static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) } } +static void ip_vs_stats_rcu_free(struct rcu_head *head) +{ + struct ip_vs_stats_rcu *rs = container_of(head, + struct ip_vs_stats_rcu, + rcu_head); + + ip_vs_stats_release(&rs->s); + kfree(rs); +} + static void ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) { #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c - spin_lock_bh(&src->lock); + spin_lock(&src->lock); IP_VS_SHOW_STATS_COUNTER(conns); IP_VS_SHOW_STATS_COUNTER(inpkts); @@ -826,7 +881,7 @@ ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) ip_vs_read_estimator(dst, src); - spin_unlock_bh(&src->lock); + spin_unlock(&src->lock); } static void @@ -847,7 +902,7 @@ ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) static void ip_vs_zero_stats(struct ip_vs_stats *stats) { - spin_lock_bh(&stats->lock); + spin_lock(&stats->lock); /* get current counters as zero point, rates are zeroed */ @@ -861,7 +916,48 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) ip_vs_zero_estimator(stats); - spin_unlock_bh(&stats->lock); + spin_unlock(&stats->lock); +} + +/* Allocate fields after kzalloc */ +int ip_vs_stats_init_alloc(struct ip_vs_stats *s) +{ + int i; + + spin_lock_init(&s->lock); + s->cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!s->cpustats) + return -ENOMEM; + + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *cs = per_cpu_ptr(s->cpustats, i); + + u64_stats_init(&cs->syncp); + } + return 0; +} + +struct ip_vs_stats *ip_vs_stats_alloc(void) +{ + struct ip_vs_stats *s = kzalloc(sizeof(*s), GFP_KERNEL); + + if (s && ip_vs_stats_init_alloc(s) >= 0) + return s; + kfree(s); + return NULL; +} + +void ip_vs_stats_release(struct ip_vs_stats *stats) +{ + free_percpu(stats->cpustats); +} + +void ip_vs_stats_free(struct ip_vs_stats *stats) +{ + if (stats) { + ip_vs_stats_release(stats); + kfree(stats); + } } /* @@ -923,7 +1019,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, if (old_svc != svc) { ip_vs_zero_stats(&dest->stats); __ip_vs_bind_svc(dest, svc); - __ip_vs_svc_put(old_svc, true); + __ip_vs_svc_put(old_svc); } } @@ -942,7 +1038,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock_bh(&dest->dst_lock); if (add) { - ip_vs_start_estimator(svc->ipvs, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; sched = rcu_dereference_protected(svc->scheduler, 1); @@ -963,14 +1058,13 @@ static int ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - unsigned int atype, i; + unsigned int atype; + int ret; EnterFunction(2); #ifdef CONFIG_IP_VS_IPV6 if (udest->af == AF_INET6) { - int ret; - atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && @@ -992,15 +1086,13 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) if (dest == NULL) return -ENOMEM; - dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!dest->stats.cpustats) + ret = ip_vs_stats_init_alloc(&dest->stats); + if (ret < 0) goto err_alloc; - for_each_possible_cpu(i) { - struct ip_vs_cpu_stats *ip_vs_dest_stats; - ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); - u64_stats_init(&ip_vs_dest_stats->syncp); - } + ret = ip_vs_start_estimator(svc->ipvs, &dest->stats); + if (ret < 0) + goto err_stats; dest->af = udest->af; dest->protocol = svc->protocol; @@ -1017,15 +1109,17 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) INIT_HLIST_NODE(&dest->d_list); spin_lock_init(&dest->dst_lock); - spin_lock_init(&dest->stats.lock); __ip_vs_update_dest(svc, dest, udest, 1); LeaveFunction(2); return 0; +err_stats: + ip_vs_stats_release(&dest->stats); + err_alloc: kfree(dest); - return -ENOMEM; + return ret; } @@ -1087,14 +1181,18 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); + ret = ip_vs_start_estimator(svc->ipvs, &dest->stats); + if (ret < 0) + goto err; __ip_vs_update_dest(svc, dest, udest, 1); - ret = 0; } else { /* * Allocate and initialize the dest structure */ ret = ip_vs_new_dest(svc, udest); } + +err: LeaveFunction(2); return ret; @@ -1284,7 +1382,7 @@ static int ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { - int ret = 0, i; + int ret = 0; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; @@ -1344,18 +1442,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = -ENOMEM; goto out_err; } - svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!svc->stats.cpustats) { - ret = -ENOMEM; + ret = ip_vs_stats_init_alloc(&svc->stats); + if (ret < 0) goto out_err; - } - - for_each_possible_cpu(i) { - struct ip_vs_cpu_stats *ip_vs_stats; - ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); - u64_stats_init(&ip_vs_stats->syncp); - } - /* I'm the first user of the service */ atomic_set(&svc->refcnt, 0); @@ -1372,7 +1461,6 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, INIT_LIST_HEAD(&svc->destinations); spin_lock_init(&svc->sched_lock); - spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ if (sched) { @@ -1382,6 +1470,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, sched = NULL; } + ret = ip_vs_start_estimator(ipvs, &svc->stats); + if (ret < 0) + goto out_err; + /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); pe = NULL; @@ -1394,8 +1486,6 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, if (svc->pe && svc->pe->conn_out) atomic_inc(&ipvs->conn_out_counter); - ip_vs_start_estimator(ipvs, &svc->stats); - /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) ipvs->num_services++; @@ -1406,8 +1496,15 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ip_vs_svc_hash(svc); *svc_p = svc; - /* Now there is a service - full throttle */ - ipvs->enable = 1; + + if (!ipvs->enable) { + /* Now there is a service - full throttle */ + ipvs->enable = 1; + + /* Start estimation for first time */ + ip_vs_est_reload_start(ipvs); + } + return 0; @@ -1571,7 +1668,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* * Free the service if nobody refers to it */ - __ip_vs_svc_put(svc, true); + __ip_vs_svc_put(svc); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -1761,7 +1858,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) } } - ip_vs_zero_stats(&ipvs->tot_stats); + ip_vs_zero_stats(&ipvs->tot_stats->s); return 0; } @@ -1843,6 +1940,148 @@ proc_do_sync_ports(struct ctl_table *table, int write, return rc; } +static int ipvs_proc_est_cpumask_set(struct ctl_table *table, void *buffer) +{ + struct netns_ipvs *ipvs = table->extra2; + cpumask_var_t *valp = table->data; + cpumask_var_t newmask; + int ret; + + if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) + return -ENOMEM; + + ret = cpulist_parse(buffer, newmask); + if (ret) + goto out; + + mutex_lock(&ipvs->est_mutex); + + if (!ipvs->est_cpulist_valid) { + if (!zalloc_cpumask_var(valp, GFP_KERNEL)) { + ret = -ENOMEM; + goto unlock; + } + ipvs->est_cpulist_valid = 1; + } + cpumask_and(newmask, newmask, ¤t->cpus_mask); + cpumask_copy(*valp, newmask); + /* est_max_threads may depend on cpulist size */ + ipvs->est_max_threads = ip_vs_est_max_threads(ipvs); + ipvs->est_calc_phase = 1; + ip_vs_est_reload_start(ipvs); + +unlock: + mutex_unlock(&ipvs->est_mutex); + +out: + free_cpumask_var(newmask); + return ret; +} + +static int ipvs_proc_est_cpumask_get(struct ctl_table *table, void *buffer, + size_t size) +{ + struct netns_ipvs *ipvs = table->extra2; + cpumask_var_t *valp = table->data; + struct cpumask *mask; + int ret; + + mutex_lock(&ipvs->est_mutex); + + if (ipvs->est_cpulist_valid) + mask = *valp; + else + mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD); + ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask)); + + mutex_unlock(&ipvs->est_mutex); + + return ret; +} + +static int ipvs_proc_est_cpulist(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + /* Ignore both read and write(append) if *ppos not 0 */ + if (*ppos || !*lenp) { + *lenp = 0; + return 0; + } + if (write) { + /* proc_sys_call_handler() appends terminator */ + ret = ipvs_proc_est_cpumask_set(table, buffer); + if (ret >= 0) + *ppos += *lenp; + } else { + /* proc_sys_call_handler() allocates 1 byte for terminator */ + ret = ipvs_proc_est_cpumask_get(table, buffer, *lenp + 1); + if (ret >= 0) { + *lenp = ret; + *ppos += *lenp; + ret = 0; + } + } + return ret; +} + +static int ipvs_proc_est_nice(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < MIN_NICE || val > MAX_NICE) { + ret = -EINVAL; + } else { + mutex_lock(&ipvs->est_mutex); + if (*valp != val) { + *valp = val; + ip_vs_est_reload_start(ipvs); + } + mutex_unlock(&ipvs->est_mutex); + } + } + return ret; +} + +static int ipvs_proc_run_estimation(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + mutex_lock(&ipvs->est_mutex); + if (*valp != val) { + *valp = val; + ip_vs_est_reload_start(ipvs); + } + mutex_unlock(&ipvs->est_mutex); + } + return ret; +} + /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without @@ -2017,7 +2256,19 @@ static struct ctl_table vs_vars[] = { .procname = "run_estimation", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = ipvs_proc_run_estimation, + }, + { + .procname = "est_cpulist", + .maxlen = NR_CPUS, /* unused */ + .mode = 0644, + .proc_handler = ipvs_proc_est_cpulist, + }, + { + .procname = "est_nice", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_est_nice, }, #ifdef CONFIG_IP_VS_DEBUG { @@ -2255,7 +2506,7 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) seq_puts(seq, " Conns Packets Packets Bytes Bytes\n"); - ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); + ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats->s); seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", (unsigned long long)show.conns, (unsigned long long)show.inpkts, @@ -2279,7 +2530,7 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; + struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats->s; struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; struct ip_vs_kstats kstats; int i; @@ -2296,13 +2547,13 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) u64 conns, inpkts, outpkts, inbytes, outbytes; do { - start = u64_stats_fetch_begin_irq(&u->syncp); - conns = u->cnt.conns; - inpkts = u->cnt.inpkts; - outpkts = u->cnt.outpkts; - inbytes = u->cnt.inbytes; - outbytes = u->cnt.outbytes; - } while (u64_stats_fetch_retry_irq(&u->syncp, start)); + start = u64_stats_fetch_begin(&u->syncp); + conns = u64_stats_read(&u->cnt.conns); + inpkts = u64_stats_read(&u->cnt.inpkts); + outpkts = u64_stats_read(&u->cnt.outpkts); + inbytes = u64_stats_read(&u->cnt.inbytes); + outbytes = u64_stats_read(&u->cnt.outbytes); + } while (u64_stats_fetch_retry(&u->syncp, start)); seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", i, (u64)conns, (u64)inpkts, @@ -4027,13 +4278,17 @@ static void ip_vs_genl_unregister(void) static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { struct net *net = ipvs->net; - int idx; struct ctl_table *tbl; + int idx, ret; atomic_set(&ipvs->dropentry, 0); spin_lock_init(&ipvs->dropentry_lock); spin_lock_init(&ipvs->droppacket_lock); spin_lock_init(&ipvs->securetcp_lock); + INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); + INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, + expire_nodest_conn_handler); + ipvs->est_stopped = 0; if (!net_eq(net, &init_net)) { tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); @@ -4094,31 +4349,44 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; ipvs->sysctl_run_estimation = 1; + tbl[idx].extra2 = ipvs; tbl[idx++].data = &ipvs->sysctl_run_estimation; + + ipvs->est_cpulist_valid = 0; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_est_cpulist; + + ipvs->sysctl_est_nice = IPVS_EST_NICE; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_est_nice; + #ifdef CONFIG_IP_VS_DEBUG /* Global sysctls must be ro in non-init netns */ if (!net_eq(net, &init_net)) tbl[idx++].mode = 0444; #endif + ret = -ENOMEM; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); - if (ipvs->sysctl_hdr == NULL) { - if (!net_eq(net, &init_net)) - kfree(tbl); - return -ENOMEM; - } - ip_vs_start_estimator(ipvs, &ipvs->tot_stats); + if (!ipvs->sysctl_hdr) + goto err; ipvs->sysctl_tbl = tbl; + + ret = ip_vs_start_estimator(ipvs, &ipvs->tot_stats->s); + if (ret < 0) + goto err; + /* Schedule defense work */ - INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); queue_delayed_work(system_long_wq, &ipvs->defense_work, DEFENSE_TIMER_PERIOD); - /* Init delayed work for expiring no dest conn */ - INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, - expire_nodest_conn_handler); - return 0; + +err: + unregister_net_sysctl_table(ipvs->sysctl_hdr); + if (!net_eq(net, &init_net)) + kfree(tbl); + return ret; } static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) @@ -4129,7 +4397,10 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); - ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s); + + if (ipvs->est_cpulist_valid) + free_cpumask_var(ipvs->sysctl_est_cpulist); if (!net_eq(net, &init_net)) kfree(ipvs->sysctl_tbl); @@ -4151,7 +4422,8 @@ static struct notifier_block ip_vs_dst_notifier = { int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) { - int i, idx; + int ret = -ENOMEM; + int idx; /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) @@ -4164,18 +4436,14 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) atomic_set(&ipvs->nullsvc_counter, 0); atomic_set(&ipvs->conn_out_counter, 0); - /* procfs stats */ - ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!ipvs->tot_stats.cpustats) - return -ENOMEM; - - for_each_possible_cpu(i) { - struct ip_vs_cpu_stats *ipvs_tot_stats; - ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); - u64_stats_init(&ipvs_tot_stats->syncp); - } + INIT_DELAYED_WORK(&ipvs->est_reload_work, est_reload_work_handler); - spin_lock_init(&ipvs->tot_stats.lock); + /* procfs stats */ + ipvs->tot_stats = kzalloc(sizeof(*ipvs->tot_stats), GFP_KERNEL); + if (!ipvs->tot_stats) + goto out; + if (ip_vs_stats_init_alloc(&ipvs->tot_stats->s) < 0) + goto err_tot_stats; #ifdef CONFIG_PROC_FS if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net, @@ -4190,7 +4458,8 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) goto err_percpu; #endif - if (ip_vs_control_net_init_sysctl(ipvs)) + ret = ip_vs_control_net_init_sysctl(ipvs); + if (ret < 0) goto err; return 0; @@ -4207,20 +4476,26 @@ err_stats: err_vs: #endif - free_percpu(ipvs->tot_stats.cpustats); - return -ENOMEM; + ip_vs_stats_release(&ipvs->tot_stats->s); + +err_tot_stats: + kfree(ipvs->tot_stats); + +out: + return ret; } void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { ip_vs_trash_cleanup(ipvs); ip_vs_control_net_cleanup_sysctl(ipvs); + cancel_delayed_work_sync(&ipvs->est_reload_work); #ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); remove_proc_entry("ip_vs", ipvs->net->proc_net); #endif - free_percpu(ipvs->tot_stats.cpustats); + call_rcu(&ipvs->tot_stats->rcu_head, ip_vs_stats_rcu_free); } int __init ip_vs_register_nl_ioctl(void) @@ -4280,5 +4555,6 @@ void ip_vs_control_cleanup(void) { EnterFunction(2); unregister_netdevice_notifier(&ip_vs_dst_notifier); + /* relying on common rcu_barrier() in ip_vs_cleanup() */ LeaveFunction(2); } |