From 01edcd891c3e9f4bb992ff2ceb69836bf76f8ddf Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 23 Nov 2005 13:37:39 -0800 Subject: [PATCH] mm: powerpc ptlock comments Update comments (only) on page_table_lock and mmap_sem in arch/powerpc. Removed the comment on page_table_lock from hash_huge_page: since it's no longer taking page_table_lock itself, it's irrelevant whether others are; but how it is safe (even against huge file truncation?) I can't say. Signed-off-by: Hugh Dickins Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 426c269e552..9250f14be8e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -754,9 +754,7 @@ repeat: } /* - * No need to use ldarx/stdcx here because all who - * might be updating the pte will hold the - * page_table_lock + * No need to use ldarx/stdcx here */ *ptep = __pte(new_pte & ~_PAGE_BUSY); -- cgit From 5e391dc9e3fec68922137ae317bf680a74656c1b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 23 Nov 2005 13:37:45 -0800 Subject: [PATCH] powerpc: fix for hugepage areas straddling 4GB boundary Commit 7d24f0b8a53261709938ffabe3e00f88f6498df9 fixed bugs in the ppc64 SLB miss handler with respect to hugepage handling, and in the process tweaked the semantics of the hugepage address masks in mm_context_t. Unfortunately, it left out a couple of necessary changes to go with that change. First, the in_hugepage_area() macro was not updated to match, second prepare_hugepage_range() was not updated to correctly handle hugepages regions which straddled the 4GB point. The latter appears only to cause process-hangs when attempting to map such a region, but the former can cause oopses if a get_user_pages() is triggered at the wrong point. This patch addresses both bugs. Signed-off-by: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9250f14be8e..f867bba893c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -287,15 +287,15 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) int prepare_hugepage_range(unsigned long addr, unsigned long len) { - int err; + int err = 0; if ( (addr+len) < addr ) return -EINVAL; - if ((addr + len) < 0x100000000UL) + if (addr < 0x100000000UL) err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); - else + if ((addr + len) >= 0x100000000UL) err = open_high_hpage_areas(current->mm, HTLB_AREA_MASK(addr, len)); if (err) { -- cgit From 9a94c5793a7b44720f19ebb71b636bc9c31b44d8 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 24 Nov 2005 13:34:56 +1100 Subject: [PATCH] powerpc: More hugepage boundary case fixes Blah. The patch [0] I recently sent fixing errors with in_hugepage_area() and prepare_hugepage_range() for powerpc itself has an off-by-one bug. Furthermore, the related functions touches_hugepage_*_range() and within_hugepage_*_range() are also buggy. Some of the bugs, like those addressed in [0] originated with commit 7d24f0b8a53261709938ffabe3e00f88f6498df9 where we tweaked the semantics of where hugepages are allowed. Other bugs have been there essentially forever, and are due to the undefined behaviour of '<<' with shift counts greater than the type width (LOW_ESID_MASK could return non-zero for high ranges with the right congruences). The good news is that I now have a testsuite which should pick up things like this if they creep in again. [0] "powerpc-fix-for-hugepage-areas-straddling-4gb-boundary" Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f867bba893c..6bc9dbad7de 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -295,7 +295,7 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len) if (addr < 0x100000000UL) err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); - if ((addr + len) >= 0x100000000UL) + if ((addr + len) > 0x100000000UL) err = open_high_hpage_areas(current->mm, HTLB_AREA_MASK(addr, len)); if (err) { -- cgit From 325c82a029ca7ea80f8cb24815d6c9288d177190 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 8 Dec 2005 16:51:44 +1100 Subject: [PATCH] powerpc: Fix a huge page bug The 64k pages patch changed the meaning of one argument passed to the low level hash functions (from "large" it became "psize" or page size index), but one of the call sites wasn't properly updated, causing potential random weird problems with huge pages. This fixes it. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hugetlbpage.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 6bc9dbad7de..8bce515dc32 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -703,7 +703,8 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } -- cgit From cbf52afdc0eb88492cf7808cc4b4f58a46f1b1ad Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 9 Dec 2005 14:20:52 +1100 Subject: [PATCH] powerpc: Add missing icache flushes for hugepages On most powerpc CPUs, the dcache and icache are not coherent so between writing and executing a page, the caches must be flushed. Userspace programs assume pages given to them by the kernel are icache clean, so we must do this flush between the kernel clearing a page and it being mapped into userspace for execute. We were not doing this for hugepages, this patch corrects the situation. We use the same lazy mechanism as we use for normal pages, delaying the flush until userspace actually attempts to execute from the page in question. Tested on G5. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hugetlbpage.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8bce515dc32..97512b89e7b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -639,8 +639,36 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } +/* + * Called by asm hashtable.S for doing lazy icache flush + */ +static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, + pte_t pte, int trap) +{ + struct page *page; + int i; + + if (!pfn_valid(pte_pfn(pte))) + return rflags; + + page = pte_page(pte); + + /* page is dirty */ + if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (trap == 0x400) { + for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + __flush_dcache_icache(page_address(page+i)); + set_bit(PG_arch_1, &page->flags); + } else { + rflags |= HPTE_R_N; + } + } + return rflags; +} + int hash_huge_page(struct mm_struct *mm, unsigned long access, - unsigned long ea, unsigned long vsid, int local) + unsigned long ea, unsigned long vsid, int local, + unsigned long trap) { pte_t *ptep; unsigned long old_pte, new_pte; @@ -691,6 +719,11 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + /* No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case */ + rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), + trap); /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & _PAGE_HASHPTE)) { -- cgit From 23ed6cb9a237902cce6018a24d1993c346abddb4 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 9 Dec 2005 16:45:17 +1100 Subject: [PATCH] powerpc: Fix SLB flushing path in hugepage On ppc64, when opening a new hugepage region, we need to make sure any old normal-page SLBs for the area are flushed on all CPUs. There was a bug in this logic - after putting the new hugepage area masks into the thread structure, we copied it into the paca (read by the SLB miss handler) only on one CPU, not on all. This could cause incorrect SLB entries to be loaded when a multithreaded program was running simultaneously on several CPUs. This patch corrects the error, copying the context information into the PACA on all CPUs using the mm in question before flushing any existing SLB entries. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hugetlbpage.c | 57 +++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 97512b89e7b..54131b877da 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -148,43 +148,63 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } +struct slb_flush_info { + struct mm_struct *mm; + u16 newareas; +}; + static void flush_low_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i; - asm volatile("isync" : : : "memory"); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); + + if (current->active_mm != fi->mm) + return; - BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ + + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; asm volatile("slbie %0" : : "r" ((i << SID_SHIFT) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } static void flush_high_segments(void *parm) { - u16 areas = (unsigned long) parm; + struct slb_flush_info *fi = parm; unsigned long i, j; - asm volatile("isync" : : : "memory"); - BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); + BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); + + if (current->active_mm != fi->mm) + return; + + /* Only need to do anything if this CPU is working in the same + * mm as the one which has changed */ + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + asm volatile("isync" : : : "memory"); for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (areas & (1U << i))) + if (! (fi->newareas & (1U << i))) continue; for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) asm volatile("slbie %0" :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); + + (j << SID_SHIFT)) | SLBIE_C)); } - asm volatile("isync" : : : "memory"); } @@ -229,6 +249,7 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) { unsigned long i; + struct slb_flush_info fi; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); @@ -244,19 +265,20 @@ static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) mm->context.low_htlb_areas |= newareas; - /* update the paca copy of the context struct */ - get_paca()->context = mm->context; - /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_low_segments, &fi, 0, 1); return 0; } static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) { + struct slb_flush_info fi; unsigned long i; BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); @@ -280,7 +302,10 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); + + fi.mm = mm; + fi.newareas = newareas; + on_each_cpu(flush_high_segments, &fi, 0, 1); return 0; } -- cgit