mm: vmstat: use a single setter function and callback for adjusting percpu thresholds
reduce_pgdat_percpu_threshold() and restore_pgdat_percpu_threshold() exist
to adjust the per-cpu vmstat thresholds while kswapd is awake to avoid
errors due to counter drift. The functions duplicate some code so this
patch replaces them with a single set_pgdat_percpu_threshold() that takes
a callback function to calculate the desired threshold as a parameter.
[akpm@linux-foundation.org: readability tweak]
[kosaki.motohiro@jp.fujitsu.com: set_pgdat_percpu_threshold(): don't use for_each_online_cpu]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e4cc21c..833e676 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -254,8 +254,11 @@
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
void refresh_cpu_vm_stats(int);
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
+
+int calculate_pressure_threshold(struct zone *zone);
+int calculate_normal_threshold(struct zone *zone);
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+ int (*calculate_pressure)(struct zone *));
#else /* CONFIG_SMP */
/*
@@ -300,8 +303,7 @@
#define dec_zone_page_state __dec_zone_page_state
#define mod_zone_page_state __mod_zone_page_state
-static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
-static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+#define set_pgdat_percpu_threshold(pgdat, callback) { }
static inline void refresh_cpu_vm_stats(int cpu) { }
#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5da4295..86f8c34 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2448,9 +2448,24 @@
*/
if (!sleeping_prematurely(pgdat, order, remaining)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
- restore_pgdat_percpu_threshold(pgdat);
+
+ /*
+ * vmstat counters are not perfectly
+ * accurate and the estimated value
+ * for counters such as NR_FREE_PAGES
+ * can deviate from the true value by
+ * nr_online_cpus * threshold. To
+ * avoid the zone watermarks being
+ * breached while under pressure, we
+ * reduce the per-cpu vmstat threshold
+ * while kswapd is awake and restore
+ * them before going back to sleep.
+ */
+ set_pgdat_percpu_threshold(pgdat,
+ calculate_normal_threshold);
schedule();
- reduce_pgdat_percpu_threshold(pgdat);
+ set_pgdat_percpu_threshold(pgdat,
+ calculate_pressure_threshold);
} else {
if (remaining)
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index bc0f095..751a65e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -83,7 +83,7 @@
#ifdef CONFIG_SMP
-static int calculate_pressure_threshold(struct zone *zone)
+int calculate_pressure_threshold(struct zone *zone)
{
int threshold;
int watermark_distance;
@@ -107,7 +107,7 @@
return threshold;
}
-static int calculate_threshold(struct zone *zone)
+int calculate_normal_threshold(struct zone *zone)
{
int threshold;
int mem; /* memory in 128 MB units */
@@ -166,7 +166,7 @@
for_each_populated_zone(zone) {
unsigned long max_drift, tolerate_drift;
- threshold = calculate_threshold(zone);
+ threshold = calculate_normal_threshold(zone);
for_each_online_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
@@ -185,46 +185,24 @@
}
}
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+ int (*calculate_pressure)(struct zone *))
{
struct zone *zone;
int cpu;
int threshold;
int i;
- get_online_cpus();
for (i = 0; i < pgdat->nr_zones; i++) {
zone = &pgdat->node_zones[i];
if (!zone->percpu_drift_mark)
continue;
- threshold = calculate_pressure_threshold(zone);
- for_each_online_cpu(cpu)
+ threshold = (*calculate_pressure)(zone);
+ for_each_possible_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
}
- put_online_cpus();
-}
-
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
-{
- struct zone *zone;
- int cpu;
- int threshold;
- int i;
-
- get_online_cpus();
- for (i = 0; i < pgdat->nr_zones; i++) {
- zone = &pgdat->node_zones[i];
- if (!zone->percpu_drift_mark)
- continue;
-
- threshold = calculate_threshold(zone);
- for_each_online_cpu(cpu)
- per_cpu_ptr(zone->pageset, cpu)->stat_threshold
- = threshold;
- }
- put_online_cpus();
}
/*