summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c118
1 files changed, 94 insertions, 24 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e6de0d6a3a8d..e3d56927a724 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4275,6 +4275,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
}
spin_unlock(&memcg->event_list_lock);
+ page_counter_set_min(&memcg->memory, 0);
page_counter_set_low(&memcg->memory, 0);
memcg_offline_kmem(memcg);
@@ -4329,6 +4330,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
page_counter_set_max(&memcg->memsw, PAGE_COUNTER_MAX);
page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX);
page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
+ page_counter_set_min(&memcg->memory, 0);
page_counter_set_low(&memcg->memory, 0);
memcg->high = PAGE_COUNTER_MAX;
memcg->soft_limit = PAGE_COUNTER_MAX;
@@ -5066,6 +5068,36 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
}
+static int memory_min_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+ unsigned long min = READ_ONCE(memcg->memory.min);
+
+ if (min == PAGE_COUNTER_MAX)
+ seq_puts(m, "max\n");
+ else
+ seq_printf(m, "%llu\n", (u64)min * PAGE_SIZE);
+
+ return 0;
+}
+
+static ssize_t memory_min_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ unsigned long min;
+ int err;
+
+ buf = strstrip(buf);
+ err = page_counter_memparse(buf, "max", &min);
+ if (err)
+ return err;
+
+ page_counter_set_min(&memcg->memory, min);
+
+ return nbytes;
+}
+
static int memory_low_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -5301,6 +5333,12 @@ static struct cftype memory_files[] = {
.read_u64 = memory_current_read,
},
{
+ .name = "min",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = memory_min_show,
+ .write = memory_min_write,
+ },
+ {
.name = "low",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_low_show,
@@ -5349,19 +5387,24 @@ struct cgroup_subsys memory_cgrp_subsys = {
};
/**
- * mem_cgroup_low - check if memory consumption is in the normal range
+ * mem_cgroup_protected - check if memory consumption is in the normal range
* @root: the top ancestor of the sub-tree being checked
* @memcg: the memory cgroup to check
*
* WARNING: This function is not stateless! It can only be used as part
* of a top-down tree iteration, not for isolated queries.
*
- * Returns %true if memory consumption of @memcg is in the normal range.
+ * Returns one of the following:
+ * MEMCG_PROT_NONE: cgroup memory is not protected
+ * MEMCG_PROT_LOW: cgroup memory is protected as long there is
+ * an unprotected supply of reclaimable memory from other cgroups.
+ * MEMCG_PROT_MIN: cgroup memory is protected
*
- * @root is exclusive; it is never low when looked at directly
+ * @root is exclusive; it is never protected when looked at directly
*
- * To provide a proper hierarchical behavior, effective memory.low value
- * is used.
+ * To provide a proper hierarchical behavior, effective memory.min/low values
+ * are used. Below is the description of how effective memory.low is calculated.
+ * Effective memory.min values is calculated in the same way.
*
* Effective memory.low is always equal or less than the original memory.low.
* If there is no memory.low overcommittment (which is always true for
@@ -5406,51 +5449,78 @@ struct cgroup_subsys memory_cgrp_subsys = {
* E/memory.current = 0
*
* These calculations require constant tracking of the actual low usages
- * (see propagate_low_usage()), as well as recursive calculation of
- * effective memory.low values. But as we do call mem_cgroup_low()
+ * (see propagate_protected_usage()), as well as recursive calculation of
+ * effective memory.low values. But as we do call mem_cgroup_protected()
* path for each memory cgroup top-down from the reclaim,
* it's possible to optimize this part, and save calculated elow
* for next usage. This part is intentionally racy, but it's ok,
* as memory.low is a best-effort mechanism.
*/
-bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
+enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
+ struct mem_cgroup *memcg)
{
- unsigned long usage, low_usage, siblings_low_usage;
- unsigned long elow, parent_elow;
struct mem_cgroup *parent;
+ unsigned long emin, parent_emin;
+ unsigned long elow, parent_elow;
+ unsigned long usage;
if (mem_cgroup_disabled())
- return false;
+ return MEMCG_PROT_NONE;
if (!root)
root = root_mem_cgroup;
if (memcg == root)
- return false;
+ return MEMCG_PROT_NONE;
- elow = memcg->memory.low;
usage = page_counter_read(&memcg->memory);
- parent = parent_mem_cgroup(memcg);
+ if (!usage)
+ return MEMCG_PROT_NONE;
+
+ emin = memcg->memory.min;
+ elow = memcg->memory.low;
+ parent = parent_mem_cgroup(memcg);
if (parent == root)
goto exit;
+ parent_emin = READ_ONCE(parent->memory.emin);
+ emin = min(emin, parent_emin);
+ if (emin && parent_emin) {
+ unsigned long min_usage, siblings_min_usage;
+
+ min_usage = min(usage, memcg->memory.min);
+ siblings_min_usage = atomic_long_read(
+ &parent->memory.children_min_usage);
+
+ if (min_usage && siblings_min_usage)
+ emin = min(emin, parent_emin * min_usage /
+ siblings_min_usage);
+ }
+
parent_elow = READ_ONCE(parent->memory.elow);
elow = min(elow, parent_elow);
+ if (elow && parent_elow) {
+ unsigned long low_usage, siblings_low_usage;
- if (!elow || !parent_elow)
- goto exit;
+ low_usage = min(usage, memcg->memory.low);
+ siblings_low_usage = atomic_long_read(
+ &parent->memory.children_low_usage);
- low_usage = min(usage, memcg->memory.low);
- siblings_low_usage = atomic_long_read(
- &parent->memory.children_low_usage);
-
- if (!low_usage || !siblings_low_usage)
- goto exit;
+ if (low_usage && siblings_low_usage)
+ elow = min(elow, parent_elow * low_usage /
+ siblings_low_usage);
+ }
- elow = min(elow, parent_elow * low_usage / siblings_low_usage);
exit:
+ memcg->memory.emin = emin;
memcg->memory.elow = elow;
- return usage && usage <= elow;
+
+ if (usage <= emin)
+ return MEMCG_PROT_MIN;
+ else if (usage <= elow)
+ return MEMCG_PROT_LOW;
+ else
+ return MEMCG_PROT_NONE;
}
/**