thp: introduce deferred_split_huge_page() Currently we don't split huge page on partial unmap. It's not an ideal situation. It can lead to memory overhead. Furtunately, we can detect partial unmap on page_remove_rmap(). But we cannot call split_huge_page() from there due to locking context. It's also counterproductive to do directly from munmap() codepath: in many cases we will hit this from exit(2) and splitting the huge page just to free it up in small pages is not what we really want. The patch introduce deferred_split_huge_page() which put the huge page into queue for splitting. The splitting itself will happen when we get memory pressure via shrinker interface. The page will be dropped from list on freeing through compound page destructor. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Sasha Levin <sasha.levin@oracle.com> Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Jerome Marchand <jmarchan@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Steve Capper <steve.capper@linaro.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: 9a982250f773cc8c76f1eee68a770b7cbf2faf78 [log] [tgz]
author: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Fri Jan 15 16:54:17 2016 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Fri Jan 15 17:56:32 2016 -0800
tree: de5a99423acf031b98510369d4dc2cf4b6e496ac
parent: 248db92da13f25073e7ebbd5fb95615aafd771d1 [diff]
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 90e11e6..7aec5ee 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h

@@ -90,11 +90,15 @@
 
 extern unsigned long transparent_hugepage_flags;
 
+extern void prep_transhuge_page(struct page *page);
+extern void free_transhuge_page(struct page *page);
+
 int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
 	return split_huge_page_to_list(page, NULL);
 }
+void deferred_split_huge_page(struct page *page);
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long address);
@@ -170,6 +174,7 @@
 {
 	return 0;
 }
+static inline void deferred_split_huge_page(struct page *page) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
 static inline int hugepage_madvise(struct vm_area_struct *vma,

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e4397f6..aa8ae83 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -508,6 +508,9 @@
 #ifdef CONFIG_HUGETLB_PAGE
 	HUGETLB_PAGE_DTOR,
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	TRANSHUGE_PAGE_DTOR,
+#endif
 	NR_COMPOUND_DTORS,
 };
 extern compound_page_dtor * const compound_page_dtors[];
@@ -537,6 +540,8 @@
 	page[1].compound_order = order;
 }
 
+void free_compound_page(struct page *page);
+
 #ifdef CONFIG_MMU
 /*
  * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 809defe..2dd9c31 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h

@@ -55,6 +55,7 @@
 						 */
 		void *s_mem;			/* slab first object */
 		atomic_t compound_mapcount;	/* first tail page */
+		/* page_deferred_list().next	 -- second tail page */
 	};
 
 	/* Second double word */
@@ -62,6 +63,7 @@
 		union {
 			pgoff_t index;		/* Our offset within mapping. */
 			void *freelist;		/* sl[aou]b first free object */
+			/* page_deferred_list().prev	-- second tail page */
 		};
 
 		union {
commit	9a982250f773cc8c76f1eee68a770b7cbf2faf78	[log] [tgz]
author	Kirill A. Shutemov <kirill.shutemov@linux.intel.com>	Fri Jan 15 16:54:17 2016 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Fri Jan 15 17:56:32 2016 -0800
tree	de5a99423acf031b98510369d4dc2cf4b6e496ac
parent	248db92da13f25073e7ebbd5fb95615aafd771d1 [diff]