diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/hash_low_32.S | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 127 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/lmb.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 24 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 76 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 68 |
13 files changed, 279 insertions, 81 deletions
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index bd68df5fa78..ddceefc06ec 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -283,6 +283,7 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) #define PTEG_SIZE 64 #define LG_PTEG_SIZE 6 #define LDPTEu lwzu +#define LDPTE lwz #define STPTE stw #define CMPPTE cmpw #define PTE_H 0x40 @@ -389,13 +390,30 @@ _GLOBAL(hash_page_patch_C) * and we know there is a definite (although small) speed * advantage to putting the PTE in the primary PTEG, we always * put the PTE in the primary PTEG. + * + * In addition, we skip any slot that is mapping kernel text in + * order to avoid a deadlock when not using BAT mappings if + * trying to hash in the kernel hash code itself after it has + * already taken the hash table lock. This works in conjunction + * with pre-faulting of the kernel text. + * + * If the hash table bucket is full of kernel text entries, we'll + * lockup here but that shouldn't happen */ - addis r4,r7,next_slot@ha + +1: addis r4,r7,next_slot@ha /* get next evict slot */ lwz r6,next_slot@l(r4) - addi r6,r6,PTE_SIZE + addi r6,r6,PTE_SIZE /* search for candidate */ andi. r6,r6,7*PTE_SIZE stw r6,next_slot@l(r4) add r4,r3,r6 + LDPTE r0,PTE_SIZE/2(r4) /* get PTE second word */ + clrrwi r0,r0,12 + lis r6,etext@h + ori r6,r6,etext@l /* get etext */ + tophys(r6,r6) + cmpl cr0,r0,r6 /* compare and try again */ + blt 1b #ifndef CONFIG_SMP /* Store PTE in PTEG */ diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 9bc0a9c2b9b..e64ce3eec36 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -445,9 +445,12 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) htab_insert_pte: /* real page number in r5, PTE RPN value + index */ - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT + andis. r0,r31,_PAGE_4K_PFN@h + srdi r5,r31,PTE_RPN_SHIFT + bne- htab_special_pfn sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT add r5,r5,r25 +htab_special_pfn: sldi r5,r5,HW_PAGE_SHIFT /* Calculate primary group hash */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 6f1016acdbf..79aedaf36f2 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -505,7 +505,7 @@ static inline int tlb_batching_enabled(void) int enabled = 1; if (root) { - const char *model = get_property(root, "model", NULL); + const char *model = of_get_property(root, "model", NULL); if (model && !strcmp(model, "IBM,9076-N81")) enabled = 0; of_node_put(root); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3c7fe2c65b5..49618461def 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -100,6 +100,11 @@ unsigned int HPAGE_SHIFT; #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif +#ifdef CONFIG_DEBUG_PAGEALLOC +static u8 *linear_map_hash_slots; +static unsigned long linear_map_hash_count; +static spinlock_t linear_map_hash_lock; +#endif /* CONFIG_DEBUG_PAGEALLOC */ /* There are definitions of page sizes arrays to be used when none * is provided by the firmware. @@ -152,11 +157,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, for (vaddr = vstart, paddr = pstart; vaddr < vend; vaddr += step, paddr += step) { - unsigned long vpn, hash, hpteg; + unsigned long hash, hpteg; unsigned long vsid = get_kernel_vsid(vaddr); unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); - vpn = va >> shift; tmp_mode = mode; /* Make non-kernel text non-executable */ @@ -174,6 +178,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, if (ret < 0) break; +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) + linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; +#endif /* CONFIG_DEBUG_PAGEALLOC */ } return ret < 0 ? ret : 0; } @@ -281,6 +289,7 @@ static void __init htab_init_page_sizes(void) memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); found: +#ifndef CONFIG_DEBUG_PAGEALLOC /* * Pick a size for the linear mapping. Currently, we only support * 16M, 1M and 4K which is the default @@ -289,6 +298,7 @@ static void __init htab_init_page_sizes(void) mmu_linear_psize = MMU_PAGE_16M; else if (mmu_psize_defs[MMU_PAGE_1M].shift) mmu_linear_psize = MMU_PAGE_1M; +#endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef CONFIG_PPC_64K_PAGES /* @@ -303,12 +313,14 @@ static void __init htab_init_page_sizes(void) if (mmu_psize_defs[MMU_PAGE_64K].shift) { mmu_virtual_psize = MMU_PAGE_64K; mmu_vmalloc_psize = MMU_PAGE_64K; + if (mmu_linear_psize == MMU_PAGE_4K) + mmu_linear_psize = MMU_PAGE_64K; if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) mmu_io_psize = MMU_PAGE_64K; else mmu_ci_restrictions = 1; } -#endif +#endif /* CONFIG_PPC_64K_PAGES */ printk(KERN_DEBUG "Page orders: linear mapping = %d, " "virtual = %d, io = %d\n", @@ -476,6 +488,13 @@ void __init htab_initialize(void) mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; +#ifdef CONFIG_DEBUG_PAGEALLOC + linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = __va(lmb_alloc_base(linear_map_hash_count, + 1, lmb.rmo_size)); + memset(linear_map_hash_slots, 0, linear_map_hash_count); +#endif /* CONFIG_DEBUG_PAGEALLOC */ + /* On U3 based machines, we need to reserve the DART area and * _NOT_ map it to avoid cache paradoxes as it's remapped non * cacheable later on @@ -573,6 +592,27 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) return pp; } +/* + * Demote a segment to using 4k pages. + * For now this makes the whole process use 4k pages. + */ +void demote_segment_4k(struct mm_struct *mm, unsigned long addr) +{ +#ifdef CONFIG_PPC_64K_PAGES + if (mm->context.user_psize == MMU_PAGE_4K) + return; + mm->context.user_psize = MMU_PAGE_4K; + mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; + get_paca()->context = mm->context; + slb_flush_and_rebolt(); +#ifdef CONFIG_SPE_BASE + spu_flush_all_slbs(mm); +#endif +#endif +} + +EXPORT_SYMBOL_GPL(demote_segment_4k); + /* Result code is: * 0 - handled * 1 - normal page fault @@ -665,15 +705,19 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #ifndef CONFIG_PPC_64K_PAGES rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); #else + /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ + if (pte_val(*ptep) & _PAGE_4K_PFN) { + demote_segment_4k(mm, ea); + psize = MMU_PAGE_4K; + } + if (mmu_ci_restrictions) { /* If this PTE is non-cacheable, switch to 4k */ if (psize == MMU_PAGE_64K && (pte_val(*ptep) & _PAGE_NO_CACHE)) { if (user_region) { + demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; - mm->context.user_psize = MMU_PAGE_4K; - mm->context.sllp = SLB_VSID_USER | - mmu_psize_defs[MMU_PAGE_4K].sllp; } else if (ea < VMALLOC_END) { /* * some driver did a non-cacheable mapping @@ -756,16 +800,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, if (mmu_ci_restrictions) { /* If this PTE is non-cacheable, switch to 4k */ if (mm->context.user_psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) { - mm->context.user_psize = MMU_PAGE_4K; - mm->context.sllp = SLB_VSID_USER | - mmu_psize_defs[MMU_PAGE_4K].sllp; - get_paca()->context = mm->context; - slb_flush_and_rebolt(); -#ifdef CONFIG_SPE_BASE - spu_flush_all_slbs(mm); -#endif - } + (pte_val(*ptep) & _PAGE_NO_CACHE)) + demote_segment_4k(mm, ea); } if (mm->context.user_psize == MMU_PAGE_64K) __hash_page_64K(ea, access, vsid, ptep, trap, local); @@ -825,3 +861,62 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address) } bad_page_fault(regs, address, SIGBUS); } + +#ifdef CONFIG_DEBUG_PAGEALLOC +static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) +{ + unsigned long hash, hpteg, vsid = get_kernel_vsid(vaddr); + unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); + unsigned long mode = _PAGE_ACCESSED | _PAGE_DIRTY | + _PAGE_COHERENT | PP_RWXX | HPTE_R_N; + int ret; + + hash = hpt_hash(va, PAGE_SHIFT); + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + + ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr), + mode, HPTE_V_BOLTED, mmu_linear_psize); + BUG_ON (ret < 0); + spin_lock(&linear_map_hash_lock); + BUG_ON(linear_map_hash_slots[lmi] & 0x80); + linear_map_hash_slots[lmi] = ret | 0x80; + spin_unlock(&linear_map_hash_lock); +} + +static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) +{ + unsigned long hash, hidx, slot, vsid = get_kernel_vsid(vaddr); + unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); + + hash = hpt_hash(va, PAGE_SHIFT); + spin_lock(&linear_map_hash_lock); + BUG_ON(!(linear_map_hash_slots[lmi] & 0x80)); + hidx = linear_map_hash_slots[lmi] & 0x7f; + linear_map_hash_slots[lmi] = 0; + spin_unlock(&linear_map_hash_lock); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, 0); +} + +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long flags, vaddr, lmi; + int i; + + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + vaddr = (unsigned long)page_address(page); + lmi = __pa(vaddr) >> PAGE_SHIFT; + if (lmi >= linear_map_hash_count) + continue; + if (enable) + kernel_map_linear_page(vaddr, lmi); + else + kernel_unmap_linear_page(vaddr, lmi); + } + local_irq_restore(flags); +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f6ffaaa7a5b..8508f973d9c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -316,12 +316,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, { if (pte_present(*ptep)) { /* We open-code pte_clear because we need to pass the right - * argument to hpte_update (huge / !huge) + * argument to hpte_need_flush (huge / !huge). Might not be + * necessary anymore if we make hpte_need_flush() get the + * page size from the slices */ - unsigned long old = pte_update(ptep, ~0UL); - if (old & _PAGE_HASHPTE) - hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); - flush_tlb_pending(); + pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); } *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } @@ -329,12 +328,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long old = pte_update(ptep, ~0UL); - - if (old & _PAGE_HASHPTE) - hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); - *ptep = __pte(0); - + unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); return __pte(old); } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 0e53ca8f02f..5fce6ccecb8 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -115,6 +115,10 @@ void MMU_setup(void) if (strstr(cmd_line, "noltlbs")) { __map_without_ltlbs = 1; } +#ifdef CONFIG_DEBUG_PAGEALLOC + __map_without_bats = 1; + __map_without_ltlbs = 1; +#endif } /* diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index 716a2906a24..e3a1e8dc536 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -146,6 +146,10 @@ static long __init lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long rgnbase = rgn->region[i].base; unsigned long rgnsize = rgn->region[i].size; + if ((rgnbase == base) && (rgnsize == size)) + /* Already have this region, so we're done */ + return 0; + adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); if ( adjacent > 0 ) { rgn->region[i].base -= size; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 52f397c108a..c4bcd754642 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -58,9 +58,6 @@ int init_bootmem_done; int mem_init_done; unsigned long memory_limit; -extern void hash_preload(struct mm_struct *mm, unsigned long ea, - unsigned long access, unsigned long trap); - int page_is_ram(unsigned long pfn) { unsigned long paddr = (pfn << PAGE_SHIFT); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index bea2d21ac6f..9c4538bb04b 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -19,9 +19,14 @@ * 2 of the License, or (at your option) any later version. * */ +#include <linux/mm.h> #include <asm/tlbflush.h> #include <asm/mmu.h> +extern void hash_preload(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap); + + #ifdef CONFIG_PPC32 extern void mapin_ram(void); extern int map_page(unsigned long va, phys_addr_t pa, int flags); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index e86c37c82cf..b3a592b25ab 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -74,7 +74,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) { /* Try interrupt server first */ - interrupt_server = get_property(cpu_node, + interrupt_server = of_get_property(cpu_node, "ibm,ppc-interrupt-server#s", &len); len = len / sizeof(u32); @@ -85,7 +85,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) return cpu_node; } } else { - reg = get_property(cpu_node, "reg", &len); + reg = of_get_property(cpu_node, "reg", &len); if (reg && (len > 0) && (reg[0] == hw_cpuid)) return cpu_node; } @@ -97,7 +97,7 @@ static struct device_node * __cpuinit find_cpu_node(unsigned int cpu) /* must hold reference to node during call */ static const int *of_get_associativity(struct device_node *dev) { - return get_property(dev, "ibm,associativity", NULL); + return of_get_property(dev, "ibm,associativity", NULL); } /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa @@ -179,7 +179,7 @@ static int __init find_min_common_depth(void) * configuration (should be all 0's) and the second is for a normal * NUMA configuration. */ - ref_points = get_property(rtas_root, + ref_points = of_get_property(rtas_root, "ibm,associativity-reference-points", &len); if ((len >= 1) && ref_points) { @@ -201,8 +201,8 @@ static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) if (!memory) panic("numa.c: No memory nodes found!"); - *n_addr_cells = prom_n_addr_cells(memory); - *n_size_cells = prom_n_size_cells(memory); + *n_addr_cells = of_n_addr_cells(memory); + *n_size_cells = of_n_size_cells(memory); of_node_put(memory); } @@ -308,9 +308,9 @@ static void __init parse_drconf_memory(struct device_node *memory) int nid, default_nid = 0; unsigned int start, ai, flags; - lm = get_property(memory, "ibm,lmb-size", &ls); - dm = get_property(memory, "ibm,dynamic-memory", &ld); - aa = get_property(memory, "ibm,associativity-lookup-arrays", &la); + lm = of_get_property(memory, "ibm,lmb-size", &ls); + dm = of_get_property(memory, "ibm,dynamic-memory", &ld); + aa = of_get_property(memory, "ibm,associativity-lookup-arrays", &la); if (!lm || !dm || !aa || ls < sizeof(unsigned int) || ld < sizeof(unsigned int) || la < 2 * sizeof(unsigned int)) @@ -404,10 +404,10 @@ static int __init parse_numa_properties(void) const unsigned int *memcell_buf; unsigned int len; - memcell_buf = get_property(memory, + memcell_buf = of_get_property(memory, "linux,usable-memory", &len); if (!memcell_buf || len <= 0) - memcell_buf = get_property(memory, "reg", &len); + memcell_buf = of_get_property(memory, "reg", &len); if (!memcell_buf || len <= 0) continue; @@ -725,7 +725,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) const unsigned int *memcell_buf; unsigned int len; - memcell_buf = get_property(memory, "reg", &len); + memcell_buf = of_get_property(memory, "reg", &len); if (!memcell_buf || len <= 0) continue; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index c284bdac994..bca56037492 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -183,8 +183,8 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) * mem_init() sets high_memory so only do the check after that. */ if (mem_init_done && (p < virt_to_phys(high_memory))) { - printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p, - __builtin_return_address(0)); + printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", + (unsigned long long)p, __builtin_return_address(0)); return NULL; } @@ -266,9 +266,12 @@ int map_page(unsigned long va, phys_addr_t pa, int flags) pg = pte_alloc_kernel(pd, va); if (pg != 0) { err = 0; - set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); - if (mem_init_done) - flush_HPTE(0, va, pmd_val(*pd)); + /* The PTE should never be already set nor present in the + * hash table + */ + BUG_ON(pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)); + set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, + __pgprot(flags))); } return err; } @@ -279,16 +282,19 @@ int map_page(unsigned long va, phys_addr_t pa, int flags) void __init mapin_ram(void) { unsigned long v, p, s, f; + int ktext; s = mmu_mapin_ram(); v = KERNELBASE + s; p = PPC_MEMSTART + s; for (; s < total_lowmem; s += PAGE_SIZE) { - if ((char *) v >= _stext && (char *) v < etext) - f = _PAGE_RAM_TEXT; - else - f = _PAGE_RAM; + ktext = ((char *) v >= _stext && (char *) v < etext); + f = ktext ?_PAGE_RAM_TEXT : _PAGE_RAM; map_page(v, p, f); +#ifdef CONFIG_PPC_STD_MMU_32 + if (ktext) + hash_preload(&init_mm, v, 0, 0x300); +#endif v += PAGE_SIZE; p += PAGE_SIZE; } @@ -445,3 +451,55 @@ exit: return ret; } +#ifdef CONFIG_DEBUG_PAGEALLOC + +static int __change_page_attr(struct page *page, pgprot_t prot) +{ + pte_t *kpte; + pmd_t *kpmd; + unsigned long address; + + BUG_ON(PageHighMem(page)); + address = (unsigned long)page_address(page); + + if (v_mapped_by_bats(address) || v_mapped_by_tlbcam(address)) + return 0; + if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) + return -EINVAL; + set_pte_at(&init_mm, address, kpte, mk_pte(page, prot)); + wmb(); + flush_HPTE(0, address, pmd_val(*kpmd)); + pte_unmap(kpte); + + return 0; +} + +/* + * Change the page attributes of an page in the linear mapping. + * + * THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY + */ +static int change_page_attr(struct page *page, int numpages, pgprot_t prot) +{ + int i, err = 0; + unsigned long flags; + + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + err = __change_page_attr(page, prot); + if (err) + break; + } + local_irq_restore(flags); + return err; +} + + +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) + return; + + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 7cceb2c44cb..05066674a7a 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -85,8 +85,10 @@ unsigned long __init mmu_mapin_ram(void) unsigned long max_size = (256<<20); unsigned long align; - if (__map_without_bats) + if (__map_without_bats) { + printk(KERN_DEBUG "RAM mapped without BATs\n"); return 0; + } /* Set up BAT2 and if necessary BAT3 to cover RAM. */ diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index b58baa65c4a..fd8d08c325e 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -120,17 +120,20 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) } /* - * Update the MMU hash table to correspond with a change to - * a Linux PTE. If wrprot is true, it is permissible to - * change the existing HPTE to read-only rather than removing it - * (if we remove it we should clear the _PTE_HPTEFLAGS bits). + * A linux PTE was changed and the corresponding hash table entry + * neesd to be flushed. This function will either perform the flush + * immediately or will batch it up if the current CPU has an active + * batch on it. + * + * Must be called from within some kind of spinlock/non-preempt region... */ -void hpte_update(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, unsigned long pte, int huge) +void hpte_need_flush(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long pte, int huge) { struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - unsigned long vsid; + unsigned long vsid, vaddr; unsigned int psize; + real_pte_t rpte; int i; i = batch->index; @@ -151,6 +154,26 @@ void hpte_update(struct mm_struct *mm, unsigned long addr, } else psize = pte_pagesize_index(pte); + /* Build full vaddr */ + if (!is_kernel_addr(addr)) { + vsid = get_vsid(mm->context.id, addr); + WARN_ON(vsid == 0); + } else + vsid = get_kernel_vsid(addr); + vaddr = (vsid << 28 ) | (addr & 0x0fffffff); + rpte = __real_pte(__pte(pte), ptep); + + /* + * Check if we have an active batch on this CPU. If not, just + * flush now and return. For now, we don global invalidates + * in that case, might be worth testing the mm cpu mask though + * and decide to use local invalidates instead... + */ + if (!batch->active) { + flush_hash_page(vaddr, rpte, psize, 0); + return; + } + /* * This can happen when we are in the middle of a TLB batch and * we encounter memory pressure (eg copy_page_range when it tries @@ -162,47 +185,42 @@ void hpte_update(struct mm_struct *mm, unsigned long addr, * batch */ if (i != 0 && (mm != batch->mm || batch->psize != psize)) { - flush_tlb_pending(); + __flush_tlb_pending(batch); i = 0; } if (i == 0) { batch->mm = mm; batch->psize = psize; } - if (!is_kernel_addr(addr)) { - vsid = get_vsid(mm->context.id, addr); - WARN_ON(vsid == 0); - } else - vsid = get_kernel_vsid(addr); - batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); - batch->pte[i] = __real_pte(__pte(pte), ptep); + batch->pte[i] = rpte; + batch->vaddr[i] = vaddr; batch->index = ++i; if (i >= PPC64_TLB_BATCH_NR) - flush_tlb_pending(); + __flush_tlb_pending(batch); } +/* + * This function is called when terminating an mmu batch or when a batch + * is full. It will perform the flush of all the entries currently stored + * in a batch. + * + * Must be called from within some kind of spinlock/non-preempt region... + */ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) { - int i; - int cpu; cpumask_t tmp; - int local = 0; + int i, local = 0; - BUG_ON(in_interrupt()); - - cpu = get_cpu(); i = batch->index; - tmp = cpumask_of_cpu(cpu); + tmp = cpumask_of_cpu(smp_processor_id()); if (cpus_equal(batch->mm->cpu_vm_mask, tmp)) local = 1; - if (i == 1) flush_hash_page(batch->vaddr[0], batch->pte[0], batch->psize, local); else flush_hash_range(i, local); batch->index = 0; - put_cpu(); } void pte_free_finish(void) |