summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-09-21 17:03:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 07:17:40 -0700
commit8e4b9a60718970bbc02dfd3abd0b956ab65af231 (patch)
tree4c19152cea19882071a74f92c0cf6a16d5711f41
parentf3e8fccd06d27773186a0094371daf2d84c79469 (diff)
downloadkernel-crypto-8e4b9a60718970bbc02dfd3abd0b956ab65af231.tar.gz
kernel-crypto-8e4b9a60718970bbc02dfd3abd0b956ab65af231.tar.xz
kernel-crypto-8e4b9a60718970bbc02dfd3abd0b956ab65af231.zip
mm: FOLL_DUMP replace FOLL_ANON
The "FOLL_ANON optimization" and its use_zero_page() test have caused confusion and bugs: why does it test VM_SHARED? for the very good but unsatisfying reason that VMware crashed without. As we look to maybe reinstating anonymous use of the ZERO_PAGE, we need to sort this out. Easily done: it's silly for __get_user_pages() and follow_page() to be guessing whether it's safe to assume that they're being used for a coredump (which can take a shortcut snapshot where other uses must handle a fault) - just tell them with GUP_FLAGS_DUMP and FOLL_DUMP. get_dump_page() doesn't even want a ZERO_PAGE: an error suits fine. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/internal.h1
-rw-r--r--mm/memory.c43
3 files changed, 14 insertions, 32 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e41795bba95..45ee5b5a343 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1231,7 +1231,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
#define FOLL_GET 0x04 /* do get_page on page */
-#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
+#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
diff --git a/mm/internal.h b/mm/internal.h
index 166765cd58d..d41475078b2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -252,6 +252,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
#define GUP_FLAGS_WRITE 0x01
#define GUP_FLAGS_FORCE 0x02
+#define GUP_FLAGS_DUMP 0x04
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, int flags,
diff --git a/mm/memory.c b/mm/memory.c
index a8430ff1383..532a55bce6a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1174,41 +1174,22 @@ no_page:
pte_unmap_unlock(ptep, ptl);
if (!pte_none(pte))
return page;
- /* Fall through to ZERO_PAGE handling */
+
no_page_table:
/*
* When core dumping an enormous anonymous area that nobody
- * has touched so far, we don't want to allocate page tables.
+ * has touched so far, we don't want to allocate unnecessary pages or
+ * page tables. Return error instead of NULL to skip handle_mm_fault,
+ * then get_dump_page() will return NULL to leave a hole in the dump.
+ * But we can only make this optimization where a hole would surely
+ * be zero-filled if handle_mm_fault() actually did handle it.
*/
- if (flags & FOLL_ANON) {
- page = ZERO_PAGE(0);
- if (flags & FOLL_GET)
- get_page(page);
- BUG_ON(flags & FOLL_WRITE);
- }
+ if ((flags & FOLL_DUMP) &&
+ (!vma->vm_ops || !vma->vm_ops->fault))
+ return ERR_PTR(-EFAULT);
return page;
}
-/* Can we do the FOLL_ANON optimization? */
-static inline int use_zero_page(struct vm_area_struct *vma)
-{
- /*
- * We don't want to optimize FOLL_ANON for make_pages_present()
- * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
- * we want to get the page from the page tables to make sure
- * that we serialize and update with any other user of that
- * mapping.
- */
- if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
- return 0;
- /*
- * And if we have a fault routine, it's not an anonymous region.
- */
- return !vma->vm_ops || !vma->vm_ops->fault;
-}
-
-
-
int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int nr_pages, int flags,
struct page **pages, struct vm_area_struct **vmas)
@@ -1288,8 +1269,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
foll_flags = FOLL_TOUCH;
if (pages)
foll_flags |= FOLL_GET;
- if (!write && use_zero_page(vma))
- foll_flags |= FOLL_ANON;
+ if (flags & GUP_FLAGS_DUMP)
+ foll_flags |= FOLL_DUMP;
do {
struct page *page;
@@ -1446,7 +1427,7 @@ struct page *get_dump_page(unsigned long addr)
struct page *page;
if (__get_user_pages(current, current->mm, addr, 1,
- GUP_FLAGS_FORCE, &page, &vma) < 1)
+ GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
return NULL;
if (page == ZERO_PAGE(0)) {
page_cache_release(page);