summaryrefslogtreecommitdiff
path: root/mm/vmstat.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c246
1 files changed, 102 insertions, 144 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index cccee36b289c..b0534e068166 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -31,8 +31,6 @@
#include "internal.h"
-#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
-
#ifdef CONFIG_NUMA
int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
@@ -41,11 +39,12 @@ static void zero_zone_numa_counters(struct zone *zone)
{
int item, cpu;
- for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
- atomic_long_set(&zone->vm_numa_stat[item], 0);
- for_each_online_cpu(cpu)
- per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
+ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
+ atomic_long_set(&zone->vm_numa_event[item], 0);
+ for_each_online_cpu(cpu) {
+ per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
= 0;
+ }
}
}
@@ -63,8 +62,8 @@ static void zero_global_numa_counters(void)
{
int item;
- for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
- atomic_long_set(&vm_numa_stat[item], 0);
+ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+ atomic_long_set(&vm_numa_event[item], 0);
}
static void invalid_numa_statistics(void)
@@ -161,10 +160,9 @@ void vm_events_fold_cpu(int cpu)
* vm_stat contains the global counters
*/
atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
-atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
EXPORT_SYMBOL(vm_zone_stat);
-EXPORT_SYMBOL(vm_numa_stat);
EXPORT_SYMBOL(vm_node_stat);
#ifdef CONFIG_SMP
@@ -266,7 +264,7 @@ void refresh_zone_stat_thresholds(void)
for_each_online_cpu(cpu) {
int pgdat_threshold;
- per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+ per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
= threshold;
/* Base nodestat threshold on the largest populated zone. */
@@ -303,7 +301,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
threshold = (*calculate_pressure)(zone);
for_each_online_cpu(cpu)
- per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+ per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
= threshold;
}
}
@@ -316,7 +314,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
long delta)
{
- struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
s8 __percpu *p = pcp->vm_stat_diff + item;
long x;
long t;
@@ -389,7 +387,7 @@ EXPORT_SYMBOL(__mod_node_page_state);
*/
void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
@@ -435,7 +433,7 @@ EXPORT_SYMBOL(__inc_node_page_state);
void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
@@ -495,7 +493,7 @@ EXPORT_SYMBOL(__dec_node_page_state);
static inline void mod_zone_state(struct zone *zone,
enum zone_stat_item item, long delta, int overstep_mode)
{
- struct per_cpu_pageset __percpu *pcp = zone->pageset;
+ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
s8 __percpu *p = pcp->vm_stat_diff + item;
long o, n, t, z;
@@ -706,8 +704,7 @@ EXPORT_SYMBOL(dec_node_page_state);
* Fold a differential into the global counters.
* Returns the number of counters updated.
*/
-#ifdef CONFIG_NUMA
-static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+static int fold_diff(int *zone_diff, int *node_diff)
{
int i;
int changes = 0;
@@ -718,12 +715,6 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
changes++;
}
- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- if (numa_diff[i]) {
- atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
- changes++;
- }
-
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
if (node_diff[i]) {
atomic_long_add(node_diff[i], &vm_node_stat[i]);
@@ -731,26 +722,34 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
}
return changes;
}
-#else
-static int fold_diff(int *zone_diff, int *node_diff)
+
+#ifdef CONFIG_NUMA
+static void fold_vm_zone_numa_events(struct zone *zone)
{
- int i;
- int changes = 0;
+ unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
+ int cpu;
+ enum numa_stat_item item;
- for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (zone_diff[i]) {
- atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
- changes++;
- }
+ for_each_online_cpu(cpu) {
+ struct per_cpu_zonestat *pzstats;
- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
- if (node_diff[i]) {
- atomic_long_add(node_diff[i], &vm_node_stat[i]);
- changes++;
+ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
+ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+ zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
}
- return changes;
+
+ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+ zone_numa_event_add(zone_numa_events[item], zone, item);
+}
+
+void fold_vm_numa_events(void)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone)
+ fold_vm_zone_numa_events(zone);
}
-#endif /* CONFIG_NUMA */
+#endif
/*
* Update the zone counters for the current cpu.
@@ -774,41 +773,30 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
struct zone *zone;
int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
- int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
int changes = 0;
for_each_populated_zone(zone) {
- struct per_cpu_pageset __percpu *p = zone->pageset;
+ struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+#ifdef CONFIG_NUMA
+ struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
+#endif
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
int v;
- v = this_cpu_xchg(p->vm_stat_diff[i], 0);
+ v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
if (v) {
atomic_long_add(v, &zone->vm_stat[i]);
global_zone_diff[i] += v;
#ifdef CONFIG_NUMA
/* 3 seconds idle till flush */
- __this_cpu_write(p->expire, 3);
+ __this_cpu_write(pcp->expire, 3);
#endif
}
}
#ifdef CONFIG_NUMA
- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
- int v;
-
- v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
- if (v) {
-
- atomic_long_add(v, &zone->vm_numa_stat[i]);
- global_numa_diff[i] += v;
- __this_cpu_write(p->expire, 3);
- }
- }
if (do_pagesets) {
cond_resched();
@@ -819,23 +807,23 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
* Check if there are pages remaining in this pageset
* if not then there is nothing to expire.
*/
- if (!__this_cpu_read(p->expire) ||
- !__this_cpu_read(p->pcp.count))
+ if (!__this_cpu_read(pcp->expire) ||
+ !__this_cpu_read(pcp->count))
continue;
/*
* We never drain zones local to this processor.
*/
if (zone_to_nid(zone) == numa_node_id()) {
- __this_cpu_write(p->expire, 0);
+ __this_cpu_write(pcp->expire, 0);
continue;
}
- if (__this_cpu_dec_return(p->expire))
+ if (__this_cpu_dec_return(pcp->expire))
continue;
- if (__this_cpu_read(p->pcp.count)) {
- drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+ if (__this_cpu_read(pcp->count)) {
+ drain_zone_pages(zone, this_cpu_ptr(pcp));
changes++;
}
}
@@ -856,12 +844,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
}
}
-#ifdef CONFIG_NUMA
- changes += fold_diff(global_zone_diff, global_numa_diff,
- global_node_diff);
-#else
changes += fold_diff(global_zone_diff, global_node_diff);
-#endif
return changes;
}
@@ -876,36 +859,33 @@ void cpu_vm_stats_fold(int cpu)
struct zone *zone;
int i;
int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
- int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
for_each_populated_zone(zone) {
- struct per_cpu_pageset *p;
+ struct per_cpu_zonestat *pzstats;
- p = per_cpu_ptr(zone->pageset, cpu);
+ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
- for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (p->vm_stat_diff[i]) {
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
+ if (pzstats->vm_stat_diff[i]) {
int v;
- v = p->vm_stat_diff[i];
- p->vm_stat_diff[i] = 0;
+ v = pzstats->vm_stat_diff[i];
+ pzstats->vm_stat_diff[i] = 0;
atomic_long_add(v, &zone->vm_stat[i]);
global_zone_diff[i] += v;
}
-
+ }
#ifdef CONFIG_NUMA
- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- if (p->vm_numa_stat_diff[i]) {
- int v;
+ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+ if (pzstats->vm_numa_event[i]) {
+ unsigned long v;
- v = p->vm_numa_stat_diff[i];
- p->vm_numa_stat_diff[i] = 0;
- atomic_long_add(v, &zone->vm_numa_stat[i]);
- global_numa_diff[i] += v;
+ v = pzstats->vm_numa_event[i];
+ pzstats->vm_numa_event[i] = 0;
+ zone_numa_event_add(v, zone, i);
}
+ }
#endif
}
@@ -925,58 +905,39 @@ void cpu_vm_stats_fold(int cpu)
}
}
-#ifdef CONFIG_NUMA
- fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
-#else
fold_diff(global_zone_diff, global_node_diff);
-#endif
}
/*
* this is only called if !populated_zone(zone), which implies no other users of
* pset->vm_stat_diff[] exist.
*/
-void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
+void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
{
+ unsigned long v;
int i;
- for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (pset->vm_stat_diff[i]) {
- int v = pset->vm_stat_diff[i];
- pset->vm_stat_diff[i] = 0;
- atomic_long_add(v, &zone->vm_stat[i]);
- atomic_long_add(v, &vm_zone_stat[i]);
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
+ if (pzstats->vm_stat_diff[i]) {
+ v = pzstats->vm_stat_diff[i];
+ pzstats->vm_stat_diff[i] = 0;
+ zone_page_state_add(v, zone, i);
}
+ }
#ifdef CONFIG_NUMA
- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- if (pset->vm_numa_stat_diff[i]) {
- int v = pset->vm_numa_stat_diff[i];
-
- pset->vm_numa_stat_diff[i] = 0;
- atomic_long_add(v, &zone->vm_numa_stat[i]);
- atomic_long_add(v, &vm_numa_stat[i]);
+ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+ if (pzstats->vm_numa_event[i]) {
+ v = pzstats->vm_numa_event[i];
+ pzstats->vm_numa_event[i] = 0;
+ zone_numa_event_add(v, zone, i);
}
+ }
#endif
}
#endif
#ifdef CONFIG_NUMA
-void __inc_numa_state(struct zone *zone,
- enum numa_stat_item item)
-{
- struct per_cpu_pageset __percpu *pcp = zone->pageset;
- u16 __percpu *p = pcp->vm_numa_stat_diff + item;
- u16 v;
-
- v = __this_cpu_inc_return(*p);
-
- if (unlikely(v > NUMA_STATS_THRESHOLD)) {
- zone_numa_state_add(v, zone, item);
- __this_cpu_write(*p, 0);
- }
-}
-
/*
* Determine the per node value of a stat item. This function
* is called frequently in a NUMA machine, so try to be as
@@ -995,19 +956,16 @@ unsigned long sum_zone_node_page_state(int node,
return count;
}
-/*
- * Determine the per node value of a numa stat item. To avoid deviation,
- * the per cpu stat number in vm_numa_stat_diff[] is also included.
- */
-unsigned long sum_zone_numa_state(int node,
+/* Determine the per node value of a numa stat item. */
+unsigned long sum_zone_numa_event_state(int node,
enum numa_stat_item item)
{
struct zone *zones = NODE_DATA(node)->node_zones;
- int i;
unsigned long count = 0;
+ int i;
for (i = 0; i < MAX_NR_ZONES; i++)
- count += zone_numa_state_snapshot(zones + i, item);
+ count += zone_numa_event_state(zones + i, item);
return count;
}
@@ -1686,28 +1644,30 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
zone_page_state(zone, i));
#ifdef CONFIG_NUMA
- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
- zone_numa_state_snapshot(zone, i));
+ zone_numa_event_state(zone, i));
#endif
seq_printf(m, "\n pagesets");
for_each_online_cpu(i) {
- struct per_cpu_pageset *pageset;
+ struct per_cpu_pages *pcp;
+ struct per_cpu_zonestat __maybe_unused *pzstats;
- pageset = per_cpu_ptr(zone->pageset, i);
+ pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
seq_printf(m,
"\n cpu: %i"
"\n count: %i"
"\n high: %i"
"\n batch: %i",
i,
- pageset->pcp.count,
- pageset->pcp.high,
- pageset->pcp.batch);
+ pcp->count,
+ pcp->high,
+ pcp->batch);
#ifdef CONFIG_SMP
+ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
seq_printf(m, "\n vm stats threshold: %d",
- pageset->stat_threshold);
+ pzstats->stat_threshold);
#endif
}
seq_printf(m,
@@ -1740,7 +1700,7 @@ static const struct seq_operations zoneinfo_op = {
};
#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
- NR_VM_NUMA_STAT_ITEMS + \
+ NR_VM_NUMA_EVENT_ITEMS + \
NR_VM_NODE_STAT_ITEMS + \
NR_VM_WRITEBACK_STAT_ITEMS + \
(IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
@@ -1755,6 +1715,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
return NULL;
BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+ fold_vm_numa_events();
v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
m->private = v;
if (!v)
@@ -1764,9 +1725,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
v += NR_VM_ZONE_STAT_ITEMS;
#ifdef CONFIG_NUMA
- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
- v[i] = global_numa_state(i);
- v += NR_VM_NUMA_STAT_ITEMS;
+ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
+ v[i] = global_numa_event_state(i);
+ v += NR_VM_NUMA_EVENT_ITEMS;
#endif
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
@@ -1927,19 +1888,16 @@ static bool need_update(int cpu)
struct zone *zone;
for_each_populated_zone(zone) {
- struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+ struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
struct per_cpu_nodestat *n;
+
/*
* The fast way of checking if there are any vmstat diffs.
*/
- if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
- sizeof(p->vm_stat_diff[0])))
- return true;
-#ifdef CONFIG_NUMA
- if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
- sizeof(p->vm_numa_stat_diff[0])))
+ if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
+ sizeof(pzstats->vm_stat_diff[0])))
return true;
-#endif
+
if (last_pgdat == zone->zone_pgdat)
continue;
last_pgdat = zone->zone_pgdat;