[PATCH] memory hotplug locking: zone span seqlock
See the "fixup bad_range()" patch for more information, but this actually
creates a the lock to protect things making assumptions about a zone's size
staying constant at runtime.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e8103be..4b08bc9 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -16,13 +16,36 @@
static inline
void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
{
- spin_lock_irqrestore(&pgdat->node_size_lock, *flags);
+ spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
}
static inline
void pgdat_resize_init(struct pglist_data *pgdat)
{
spin_lock_init(&pgdat->node_size_lock);
}
+/*
+ * Zone resizing functions
+ */
+static inline unsigned zone_span_seqbegin(struct zone *zone)
+{
+ return read_seqbegin(&zone->span_seqlock);
+}
+static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
+{
+ return read_seqretry(&zone->span_seqlock, iv);
+}
+static inline void zone_span_writelock(struct zone *zone)
+{
+ write_seqlock(&zone->span_seqlock);
+}
+static inline void zone_span_writeunlock(struct zone *zone)
+{
+ write_sequnlock(&zone->span_seqlock);
+}
+static inline void zone_seqlock_init(struct zone *zone)
+{
+ seqlock_init(&zone->span_seqlock);
+}
#else /* ! CONFIG_MEMORY_HOTPLUG */
/*
* Stub functions for when hotplug is off
@@ -30,5 +53,17 @@
static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
-#endif
+
+static inline unsigned zone_span_seqbegin(struct zone *zone)
+{
+ return 0;
+}
+static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
+{
+ return 0;
+}
+static inline void zone_span_writelock(struct zone *zone) {}
+static inline void zone_span_writeunlock(struct zone *zone) {}
+static inline void zone_seqlock_init(struct zone *zone) {}
+#endif /* ! CONFIG_MEMORY_HOTPLUG */
#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e050d68..f5fa308 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -12,6 +12,7 @@
#include <linux/threads.h>
#include <linux/numa.h>
#include <linux/init.h>
+#include <linux/seqlock.h>
#include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */
@@ -137,6 +138,10 @@
* free areas of different sizes
*/
spinlock_t lock;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* see spanned/present_pages for more description */
+ seqlock_t span_seqlock;
+#endif
struct free_area free_area[MAX_ORDER];
@@ -220,6 +225,16 @@
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
+ /*
+ * zone_start_pfn, spanned_pages and present_pages are all
+ * protected by span_seqlock. It is a seqlock because it has
+ * to be read outside of zone->lock, and it is done in the main
+ * allocator path. But, it is written quite infrequently.
+ *
+ * The lock is declared along with zone->lock because it is
+ * frequently read in proximity to zone->lock. It's good to
+ * give them a chance of being in the same cacheline.
+ */
unsigned long spanned_pages; /* total size, including holes */
unsigned long present_pages; /* amount of memory (excluding holes) */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 32fad6d..817635f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -33,6 +33,7 @@
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
+#include <linux/memory_hotplug.h>
#include <linux/nodemask.h>
#include <linux/vmalloc.h>
@@ -80,12 +81,19 @@
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
{
- if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
- return 1;
- if (page_to_pfn(page) < zone->zone_start_pfn)
- return 1;
+ int ret = 0;
+ unsigned seq;
+ unsigned long pfn = page_to_pfn(page);
- return 0;
+ do {
+ seq = zone_span_seqbegin(zone);
+ if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
+ ret = 1;
+ else if (pfn < zone->zone_start_pfn)
+ ret = 1;
+ } while (zone_span_seqretry(zone, seq));
+
+ return ret;
}
static int page_is_consistent(struct zone *zone, struct page *page)
@@ -1980,6 +1988,7 @@
zone->name = zone_names[j];
spin_lock_init(&zone->lock);
spin_lock_init(&zone->lru_lock);
+ zone_seqlock_init(zone);
zone->zone_pgdat = pgdat;
zone->free_pages = 0;