From 5f25f06529ecb4b20efc7ba00de599f5b9f4b63c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:07 +1000 Subject: [POWERPC] Move declaration of init_bootmem_done into system.h ... instead of having an extern declaration in a .c file. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/init_32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 1952b4d3fa7..45418590b6a 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -76,8 +77,6 @@ void MMU_init(void); /* XXX should be in current.h -- paulus */ extern struct task_struct *current_set[NR_CPUS]; -extern int init_bootmem_done; - /* * this tells the system to map all of ram with the segregs * (i.e. page tables) instead of the bats. -- cgit From 09de9ff872bca25ba717a7075c9ee49b1bdb56d2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:07 +1000 Subject: [POWERPC] Fix sparse warnings in arch/powerpc/mm Make two vmemmap helpers static in init_64.c Make stab variables static in stab.c Make psize defs static in hash_utils_64.c Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 4 ++-- arch/powerpc/mm/init_64.c | 4 ++-- arch/powerpc/mm/stab.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2b5a399f6fa..1a4b4b36b0c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -117,7 +117,7 @@ static DEFINE_SPINLOCK(linear_map_hash_lock); /* Pre-POWER4 CPUs (4k pages only) */ -struct mmu_psize_def mmu_psize_defaults_old[] = { +static struct mmu_psize_def mmu_psize_defaults_old[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, @@ -131,7 +131,7 @@ struct mmu_psize_def mmu_psize_defaults_old[] = { * * Support for 16Mb large pages */ -struct mmu_psize_def mmu_psize_defaults_gp[] = { +static struct mmu_psize_def mmu_psize_defaults_gp[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index c5ac532a016..1e52e97d740 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -183,7 +183,7 @@ void pgtable_cache_init(void) * do this by hand as the proffered address may not be correctly aligned. * Subtraction of non-aligned pointers produces undefined results. */ -unsigned long __meminit vmemmap_section_start(unsigned long page) +static unsigned long __meminit vmemmap_section_start(unsigned long page) { unsigned long offset = page - ((unsigned long)(vmemmap)); @@ -196,7 +196,7 @@ unsigned long __meminit vmemmap_section_start(unsigned long page) * which overlaps this vmemmap page is initialised then this page is * initialised already. */ -int __meminit vmemmap_populated(unsigned long start, int page_size) +static int __meminit vmemmap_populated(unsigned long start, int page_size) { unsigned long end = start + page_size; diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index efbbd13d93e..60e6032a808 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -30,8 +30,8 @@ struct stab_entry { }; #define NR_STAB_CACHE_ENTRIES 8 -DEFINE_PER_CPU(long, stab_cache_ptr); -DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); +static DEFINE_PER_CPU(long, stab_cache_ptr); +static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); /* * Create a segment table entry for the given esid/vsid pair. -- cgit From 572fb578de59efaaa8d197b0183db43b1128a06e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:08 +1000 Subject: [POWERPC] Move declaration of tce variables into mmu-hash64.h ... instead of having extern declarations in a .c file. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_utils_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1a4b4b36b0c..05f955f8df3 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -509,8 +509,6 @@ void __init htab_initialize(void) unsigned long base = 0, size = 0, limit; int i; - extern unsigned long tce_alloc_start, tce_alloc_end; - DBG(" -> htab_initialize()\n"); /* Initialize segment sizes */ -- cgit From c884116ac375c5dd137109b58acaa9cf50164796 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 May 2008 14:27:09 +1000 Subject: [POWERPC] Remove duplicate variable definitions in mm/tlb_64.c Somewhere along the way (e28f7faf05159f1cfd564596f5e6178edba6bd49, "Four level pagetables for ppc64") we ended up with duplicate definitions for pte_freelist_cur and pte_freelist_force_free. Somehow this compiles, but it would be better to just have one definition for each. The two definitions we end up with can be static too! Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/mm/tlb_64.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index e2d867ce1c7..509bc560159 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -37,8 +37,8 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); * include/asm-powerpc/tlb.h file -- tgall */ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; +static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +static unsigned long pte_freelist_forced_free; struct pte_freelist_batch { @@ -47,9 +47,6 @@ struct pte_freelist_batch pgtable_free_t tables[0]; }; -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; - #define PTE_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ / sizeof(pgtable_free_t)) -- cgit From 15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 May 2008 09:39:44 +0200 Subject: on_each_cpu(): kill unused 'retry' parameter It's not even passed on to smp_call_function() anymore, since that was removed. So kill it. Acked-by: Jeremy Fitzhardinge Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- arch/powerpc/mm/slice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ad928edafb0..2bd12d965db 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -218,7 +218,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz mb(); /* XXX this is sub-optimal but will do for now */ - on_each_cpu(slice_flush_segments, mm, 0, 1); + on_each_cpu(slice_flush_segments, mm, 1); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif -- cgit From 41743a4e34f0777f51c1cf0675b91508ba143050 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 11 Jun 2008 15:37:10 +1000 Subject: powerpc: Free a PTE bit on ppc64 with 64K pages This frees a PTE bit when using 64K pages on ppc64. This is done by getting rid of the separate _PAGE_HASHPTE bit. Instead, we just test if any of the 16 sub-page bits is set. For non-combo pages (ie. real 64K pages), we set SUB0 and the location encoding in that field. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hash_low_64.S | 17 ++++++++++------- arch/powerpc/mm/hugetlbpage.c | 8 +++++--- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 70f4c833fa3..a719f53921a 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -388,7 +388,7 @@ _GLOBAL(__hash_page_4K) */ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED oris r30,r30,_PAGE_COMBO@h /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 @@ -468,7 +468,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) * go to out-of-line code to try to modify the HPTE. We look for * the bit at (1 >> (index + 32)) */ - andi. r0,r31,_PAGE_HASHPTE + rldicl. r0,r31,64-12,48 li r26,0 /* Default hidx */ beq htab_insert_pte @@ -726,11 +726,11 @@ BEGIN_FTR_SECTION bne- ht64_bail_ok END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) /* Prepare new PTE value (turn access RW into DIRTY, then - * add BUSY,HASHPTE and ACCESSED) + * add BUSY and ACCESSED) */ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 bne- 1b @@ -798,18 +798,21 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) /* Check if we may already be in the hashtable, in this case, we * go to out-of-line code to try to modify the HPTE */ - andi. r0,r31,_PAGE_HASHPTE + rldicl. r0,r31,64-12,48 bne ht64_modify_pte ht64_insert_pte: /* Clear hpte bits in new pte (we also clear BUSY btw) and - * add _PAGE_HASHPTE + * add _PAGE_HPTE_SUB0 */ lis r0,_PAGE_HPTEFLAGS@h ori r0,r0,_PAGE_HPTEFLAGS@l andc r30,r30,r0 +#ifdef CONFIG_PPC_64K_PAGES + oris r30,r30,_PAGE_HPTE_SUB0@h +#else ori r30,r30,_PAGE_HASHPTE - +#endif /* Phyical address in r5 */ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT sldi r5,r5,PAGE_SHIFT diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a02266dad21..8fa07f3f6c2 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -458,8 +458,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, old_pte = pte_val(*ptep); if (old_pte & _PAGE_BUSY) goto out; - new_pte = old_pte | _PAGE_BUSY | - _PAGE_ACCESSED | _PAGE_HASHPTE; + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, old_pte, new_pte)); @@ -499,8 +498,11 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; /* clear HPTE slot informations in new PTE */ +#ifdef CONFIG_PPC_64K_PAGES + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; +#else new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; - +#endif /* Add in WIMG bits */ /* XXX We should store these in the pte */ /* --BenH: I think they are ... */ -- cgit From 7c5c4325d2d911fe54db3bc14149bfa558ae0acb Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Sat, 14 Jun 2008 09:41:42 +1000 Subject: powerpc: Change BAT code to use phys_addr_t Currently, the physical address is an unsigned long, but it should be phys_addr_t in set_bat, [v/p]_mapped_by_bat. Also, create a macro that can convert a large physical address into the correct format for programming the BAT registers. Signed-off-by: Becky Bruce Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mmu_decl.h | 2 +- arch/powerpc/mm/pgtable_32.c | 6 +++--- arch/powerpc/mm/ppc_mmu_32.c | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 04802252a64..46585b7bb19 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -29,7 +29,7 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea, #ifdef CONFIG_PPC32 extern void mapin_ram(void); extern int map_page(unsigned long va, phys_addr_t pa, int flags); -extern void setbat(int index, unsigned long virt, unsigned long phys, +extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags); extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags, unsigned int pid); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index e0ff59f2113..c7584072dfc 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -53,9 +53,9 @@ extern void hash_page_sync(void); #endif #ifdef HAVE_BATS -extern unsigned long v_mapped_by_bats(unsigned long va); -extern unsigned long p_mapped_by_bats(unsigned long pa); -void setbat(int index, unsigned long virt, unsigned long phys, +extern phys_addr_t v_mapped_by_bats(unsigned long va); +extern unsigned long p_mapped_by_bats(phys_addr_t pa); +void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags); #else /* !HAVE_BATS */ diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index cef9f156874..55ec17ed8d7 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -46,13 +46,13 @@ union ubat { /* BAT register values to be loaded */ struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; - unsigned long phys; + phys_addr_t phys; } bat_addrs[8]; /* * Return PA for this VA if it is mapped by a BAT, or 0 */ -unsigned long v_mapped_by_bats(unsigned long va) +phys_addr_t v_mapped_by_bats(unsigned long va) { int b; for (b = 0; b < 4; ++b) @@ -64,7 +64,7 @@ unsigned long v_mapped_by_bats(unsigned long va) /* * Return VA for a given PA or 0 if not mapped */ -unsigned long p_mapped_by_bats(unsigned long pa) +unsigned long p_mapped_by_bats(phys_addr_t pa) { int b; for (b = 0; b < 4; ++b) @@ -119,7 +119,7 @@ unsigned long __init mmu_mapin_ram(void) * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. */ -void __init setbat(int index, unsigned long virt, unsigned long phys, +void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags) { unsigned int bl; @@ -138,7 +138,7 @@ void __init setbat(int index, unsigned long virt, unsigned long phys, | _PAGE_COHERENT | _PAGE_GUARDED); wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ - bat[1].word[1] = phys | wimgxpp; + bat[1].word[1] = BAT_PHYS_ADDR(phys) | wimgxpp; #ifndef CONFIG_KGDB /* want user access for breakpoints */ if (flags & _PAGE_USER) #endif -- cgit From 316a405841cc03723d401b098d188aa4e3daa69b Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Sat, 14 Jun 2008 09:41:43 +1000 Subject: powerpc: Get rid of bitfields in ppc_bat struct While working on the 36-bit physical support, I noticed that there was exactly one line of code that actually referenced the bitfields. So I got rid of them and redefined ppc_bat as a struct of 2 u32's: batu and batl. I also got rid of the previous union that held the bitfield structs and a word representation of the batu/l values. This seems like a nicer solution than adding in a bunch of new bitfields to support extended bat addressing that would never get used, and just leaving the struct as-is would have been incomplete in the face of large physical addressing. Signed-off-by: Becky Bruce Signed-off-by: Paul Mackerras --- arch/powerpc/mm/ppc_mmu_32.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 55ec17ed8d7..c53145f6194 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -38,10 +38,7 @@ struct hash_pte *Hash, *Hash_end; unsigned long Hash_size, Hash_mask; unsigned long _SDR1; -union ubat { /* BAT register values to be loaded */ - struct ppc_bat bat; - u32 word[2]; -} BATS[8][2]; /* 8 pairs of IBAT, DBAT */ +struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; @@ -124,7 +121,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, { unsigned int bl; int wimgxpp; - union ubat *bat = BATS[index]; + struct ppc_bat *bat = BATS[index]; if (((flags & _PAGE_NO_CACHE) == 0) && cpu_has_feature(CPU_FTR_NEED_COHERENT)) @@ -137,15 +134,15 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED); wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; - bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ - bat[1].word[1] = BAT_PHYS_ADDR(phys) | wimgxpp; + bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; #ifndef CONFIG_KGDB /* want user access for breakpoints */ if (flags & _PAGE_USER) #endif - bat[1].bat.batu.vp = 1; + bat[1].batu |= 1; /* Vp = 1 */ if (flags & _PAGE_GUARDED) { /* G bit must be zero in IBATs */ - bat[0].word[0] = bat[0].word[1] = 0; + bat[0].batu = bat[0].batl = 0; } else { /* make IBAT same as DBAT */ bat[0] = bat[1]; @@ -158,8 +155,8 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, | _PAGE_COHERENT); wimgxpp |= (flags & _PAGE_RW)? ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; - bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ - bat->word[1] = phys | bl | 0x40; /* V=1 */ + bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ + bat->batl = phys | bl | 0x40; /* V=1 */ } bat_addrs[index].start = virt; -- cgit From 3a8247cc2c856930f34eafce33f6a039227ee175 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 18 Jun 2008 15:29:12 +1000 Subject: powerpc: Only demote individual slices rather than whole process At present, if we have a kernel with a 64kB page size, and some process maps something that has to be mapped with 4kB pages (such as a cache-inhibited mapping on POWER5+, or the eHCA infiniband queue-pair pages), we change the process to use 4kB pages everywhere. This hurts the performance of HPC programs that access eHCA from userspace. With this patch, the kernel will only demote the slice(s) containing the eHCA or cache-inhibited mappings, leaving the remaining slices able to use 64kB hardware pages. This also changes the slice_get_unmapped_area code so that it is willing to place a 64k-page mapping into (or across) a 4k-page slice if there is no better alternative, i.e. if the program specified MAP_FIXED or if there is not sufficient space available in slices that are either empty or already have 64k-page mappings in them. Signed-off-by: Paul Mackerras Acked-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hash_utils_64.c | 36 ++++++-- arch/powerpc/mm/slice.c | 177 ++++++++++++++++++++++++++++++---------- 2 files changed, 160 insertions(+), 53 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index bf5b6d7ed30..8d3b58ebd38 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -695,6 +695,28 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) return pp; } +#ifdef CONFIG_PPC_MM_SLICES +unsigned int get_paca_psize(unsigned long addr) +{ + unsigned long index, slices; + + if (addr < SLICE_LOW_TOP) { + slices = get_paca()->context.low_slices_psize; + index = GET_LOW_SLICE_INDEX(addr); + } else { + slices = get_paca()->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); + } + return (slices >> (index * 4)) & 0xF; +} + +#else +unsigned int get_paca_psize(unsigned long addr) +{ + return get_paca()->context.user_psize; +} +#endif + /* * Demote a segment to using 4k pages. * For now this makes the whole process use 4k pages. @@ -702,13 +724,13 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_64K_PAGES void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { - if (mm->context.user_psize == MMU_PAGE_4K) + if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; - slice_set_user_psize(mm, MMU_PAGE_4K); + slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif - if (get_paca()->context.user_psize != MMU_PAGE_4K) { + if (get_paca_psize(addr) != MMU_PAGE_4K) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -792,11 +814,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" user region with no mm !\n"); return 1; } -#ifdef CONFIG_PPC_MM_SLICES psize = get_slice_psize(mm, ea); -#else - psize = mm->context.user_psize; -#endif ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); break; @@ -868,7 +886,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Do actual hashing */ #ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ - if (pte_val(*ptep) & _PAGE_4K_PFN) { + if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; } @@ -897,7 +915,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } } if (user_region) { - if (psize != get_paca()->context.user_psize) { + if (psize != get_paca_psize(ea)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ad928edafb0..583be67ad93 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -215,10 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz mm->context.high_slices_psize); spin_unlock_irqrestore(&slice_convert_lock, flags); - mb(); - /* XXX this is sub-optimal but will do for now */ - on_each_cpu(slice_flush_segments, mm, 0, 1); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif @@ -384,17 +381,34 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, return slice_find_area_bottomup(mm, len, mask, psize, use_cache); } +#define or_mask(dst, src) do { \ + (dst).low_slices |= (src).low_slices; \ + (dst).high_slices |= (src).high_slices; \ +} while (0) + +#define andnot_mask(dst, src) do { \ + (dst).low_slices &= ~(src).low_slices; \ + (dst).high_slices &= ~(src).high_slices; \ +} while (0) + +#ifdef CONFIG_PPC_64K_PAGES +#define MMU_PAGE_BASE MMU_PAGE_64K +#else +#define MMU_PAGE_BASE MMU_PAGE_4K +#endif + unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, int topdown, int use_cache) { - struct slice_mask mask; + struct slice_mask mask = {0, 0}; struct slice_mask good_mask; struct slice_mask potential_mask = {0,0} /* silence stupid warning */; - int pmask_set = 0; + struct slice_mask compat_mask = {0, 0}; int fixed = (flags & MAP_FIXED); int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); struct mm_struct *mm = current->mm; + unsigned long newaddr; /* Sanity checks */ BUG_ON(mm->task_size == 0); @@ -416,21 +430,48 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, if (!fixed && addr) { addr = _ALIGN_UP(addr, 1ul << pshift); slice_dbg(" aligned addr=%lx\n", addr); + /* Ignore hint if it's too large or overlaps a VMA */ + if (addr > mm->task_size - len || + !slice_area_is_free(mm, addr, len)) + addr = 0; } - /* First makeup a "good" mask of slices that have the right size + /* First make up a "good" mask of slices that have the right size * already */ good_mask = slice_mask_for_size(mm, psize); slice_print_mask(" good_mask", good_mask); - /* First check hint if it's valid or if we have MAP_FIXED */ - if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { + /* + * Here "good" means slices that are already the right page size, + * "compat" means slices that have a compatible page size (i.e. + * 4k in a 64k pagesize kernel), and "free" means slices without + * any VMAs. + * + * If MAP_FIXED: + * check if fits in good | compat => OK + * check if fits in good | compat | free => convert free + * else bad + * If have hint: + * check if hint fits in good => OK + * check if hint fits in good | free => convert free + * Otherwise: + * search in good, found => OK + * search in good | free, found => convert free + * search in good | compat | free, found => convert free. + */ - /* Don't bother with hint if it overlaps a VMA */ - if (!fixed && !slice_area_is_free(mm, addr, len)) - goto search; +#ifdef CONFIG_PPC_64K_PAGES + /* If we support combo pages, we can allow 64k pages in 4k slices */ + if (psize == MMU_PAGE_64K) { + compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K); + if (fixed) + or_mask(good_mask, compat_mask); + } +#endif + /* First check hint if it's valid or if we have MAP_FIXED */ + if (addr != 0 || fixed) { /* Build a mask for the requested range */ mask = slice_range_to_mask(addr, len); slice_print_mask(" mask", mask); @@ -442,54 +483,66 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, slice_dbg(" fits good !\n"); return addr; } - - /* We don't fit in the good mask, check what other slices are - * empty and thus can be converted + } else { + /* Now let's see if we can find something in the existing + * slices for that size */ - potential_mask = slice_mask_for_free(mm); - potential_mask.low_slices |= good_mask.low_slices; - potential_mask.high_slices |= good_mask.high_slices; - pmask_set = 1; - slice_print_mask(" potential", potential_mask); - if (slice_check_fit(mask, potential_mask)) { - slice_dbg(" fits potential !\n"); - goto convert; + newaddr = slice_find_area(mm, len, good_mask, psize, topdown, + use_cache); + if (newaddr != -ENOMEM) { + /* Found within the good mask, we don't have to setup, + * we thus return directly + */ + slice_dbg(" found area at 0x%lx\n", newaddr); + return newaddr; } } - /* If we have MAP_FIXED and failed the above step, then error out */ + /* We don't fit in the good mask, check what other slices are + * empty and thus can be converted + */ + potential_mask = slice_mask_for_free(mm); + or_mask(potential_mask, good_mask); + slice_print_mask(" potential", potential_mask); + + if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) { + slice_dbg(" fits potential !\n"); + goto convert; + } + + /* If we have MAP_FIXED and failed the above steps, then error out */ if (fixed) return -EBUSY; - search: slice_dbg(" search...\n"); - /* Now let's see if we can find something in the existing slices - * for that size + /* If we had a hint that didn't work out, see if we can fit + * anywhere in the good area. */ - addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); - if (addr != -ENOMEM) { - /* Found within the good mask, we don't have to setup, - * we thus return directly - */ - slice_dbg(" found area at 0x%lx\n", addr); - return addr; - } - - /* Won't fit, check what can be converted */ - if (!pmask_set) { - potential_mask = slice_mask_for_free(mm); - potential_mask.low_slices |= good_mask.low_slices; - potential_mask.high_slices |= good_mask.high_slices; - pmask_set = 1; - slice_print_mask(" potential", potential_mask); + if (addr) { + addr = slice_find_area(mm, len, good_mask, psize, topdown, + use_cache); + if (addr != -ENOMEM) { + slice_dbg(" found area at 0x%lx\n", addr); + return addr; + } } /* Now let's see if we can find something in the existing slices - * for that size + * for that size plus free slices */ addr = slice_find_area(mm, len, potential_mask, psize, topdown, use_cache); + +#ifdef CONFIG_PPC_64K_PAGES + if (addr == -ENOMEM && psize == MMU_PAGE_64K) { + /* retry the search with 4k-page slices included */ + or_mask(potential_mask, compat_mask); + addr = slice_find_area(mm, len, potential_mask, psize, + topdown, use_cache); + } +#endif + if (addr == -ENOMEM) return -ENOMEM; @@ -498,7 +551,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, slice_print_mask(" mask", mask); convert: - slice_convert(mm, mask, psize); + andnot_mask(mask, good_mask); + andnot_mask(mask, compat_mask); + if (mask.low_slices || mask.high_slices) { + slice_convert(mm, mask, psize); + if (psize > MMU_PAGE_BASE) + on_each_cpu(slice_flush_segments, mm, 0, 1); + } return addr; } @@ -598,6 +657,36 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) spin_unlock_irqrestore(&slice_convert_lock, flags); } +void slice_set_psize(struct mm_struct *mm, unsigned long address, + unsigned int psize) +{ + unsigned long i, flags; + u64 *p; + + spin_lock_irqsave(&slice_convert_lock, flags); + if (address < SLICE_LOW_TOP) { + i = GET_LOW_SLICE_INDEX(address); + p = &mm->context.low_slices_psize; + } else { + i = GET_HIGH_SLICE_INDEX(address); + p = &mm->context.high_slices_psize; + } + *p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4)); + spin_unlock_irqrestore(&slice_convert_lock, flags); + +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif +} + +void slice_set_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long len, unsigned int psize) +{ + struct slice_mask mask = slice_range_to_mask(start, len); + + slice_convert(mm, mask, psize); +} + /* * is_hugepage_only_range() is used by generic code to verify wether * a normal mmap mapping (non hugetlbfs) is valid on a given area. -- cgit From 87e9ab13c36b838f7d557a6111dfdd29fcde85ad Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 19 Jun 2008 08:32:56 +1000 Subject: powerpc: hash_huge_page() should get the WIMG bits from the lpte Signed-off-by: Dave Kleikamp Cc: Jon Tollefson Cc: Adam Litke Cc: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/hugetlbpage.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8fa07f3f6c2..0d12fba31bc 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -504,9 +504,8 @@ repeat: new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; #endif /* Add in WIMG bits */ - /* XXX We should store these in the pte */ - /* --BenH: I think they are ... */ - rflags |= _PAGE_COHERENT; + rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | + _PAGE_COHERENT | _PAGE_GUARDED)); /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, -- cgit From db7f37de2c8346c33cf9279fa9d8e8316e8d821c Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Tue, 1 Jul 2008 11:30:06 +1000 Subject: powerpc: Fix building of arch/powerpc/mm/mem.o when MEMORY_HOTPLUG=y and SPARSEMEM=n Currently the kernel fails to build with the above config options with: CC arch/powerpc/mm/mem.o arch/powerpc/mm/mem.c: In function 'arch_add_memory': arch/powerpc/mm/mem.c:130: error: implicit declaration of function 'create_section_mapping' This explicitly includes asm/sparsemem.h in arch/powerpc/mm/mem.c and moves the guards in include/asm-powerpc/sparsemem.h to protect the SPARSEMEM specific portions only. Signed-off-by: Tony Breeds Signed-off-by: Paul Mackerras --- arch/powerpc/mm/mem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 51f82d83bf1..776ba6ad5e1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include -- cgit From 8342681d3e1f5fbd404c21b5e10f87277411f3eb Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 3 Jul 2008 13:35:54 +1000 Subject: powerpc/pseries: Split code into helper routines for drconf memory This splits off several pieces of code that parse the ibm,dynamic-reconfiguration-memory node of the device tree into separate helper routines. This is in preparation for the next commit that will use these helper routines. There are no functional changes in this patch. Signed-off-by: Nathan Fontenot Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 209 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 170 insertions(+), 39 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dc704da363e..39328da32ed 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -268,6 +268,144 @@ static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) return result; } +struct of_drconf_cell { + u64 base_addr; + u32 drc_index; + u32 reserved; + u32 aa_index; + u32 flags; +}; + +#define DRCONF_MEM_ASSIGNED 0x00000008 +#define DRCONF_MEM_AI_INVALID 0x00000040 +#define DRCONF_MEM_RESERVED 0x00000080 + +/* + * Read the next lmb list entry from the ibm,dynamic-memory property + * and return the information in the provided of_drconf_cell structure. + */ +static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) +{ + const u32 *cp; + + drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); + + cp = *cellp; + drmem->drc_index = cp[0]; + drmem->reserved = cp[1]; + drmem->aa_index = cp[2]; + drmem->flags = cp[3]; + + *cellp = cp + 4; +} + +/* + * Retreive and validate the ibm,dynamic-memory property of the device tree. + * + * The layout of the ibm,dynamic-memory property is a number N of lmb + * list entries followed by N lmb list entries. Each lmb list entry + * contains information as layed out in the of_drconf_cell struct above. + */ +static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) +{ + const u32 *prop; + u32 len, entries; + + prop = of_get_property(memory, "ibm,dynamic-memory", &len); + if (!prop || len < sizeof(unsigned int)) + return 0; + + entries = *prop++; + + /* Now that we know the number of entries, revalidate the size + * of the property read in to ensure we have everything + */ + if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) + return 0; + + *dm = prop; + return entries; +} + +/* + * Retreive and validate the ibm,lmb-size property for drconf memory + * from the device tree. + */ +static u64 of_get_lmb_size(struct device_node *memory) +{ + const u32 *prop; + u32 len; + + prop = of_get_property(memory, "ibm,lmb-size", &len); + if (!prop || len < sizeof(unsigned int)) + return 0; + + return read_n_cells(n_mem_size_cells, &prop); +} + +struct assoc_arrays { + u32 n_arrays; + u32 array_sz; + const u32 *arrays; +}; + +/* + * Retreive and validate the list of associativity arrays for drconf + * memory from the ibm,associativity-lookup-arrays property of the + * device tree.. + * + * The layout of the ibm,associativity-lookup-arrays property is a number N + * indicating the number of associativity arrays, followed by a number M + * indicating the size of each associativity array, followed by a list + * of N associativity arrays. + */ +static int of_get_assoc_arrays(struct device_node *memory, + struct assoc_arrays *aa) +{ + const u32 *prop; + u32 len; + + prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); + if (!prop || len < 2 * sizeof(unsigned int)) + return -1; + + aa->n_arrays = *prop++; + aa->array_sz = *prop++; + + /* Now that we know the number of arrrays and size of each array, + * revalidate the size of the property read in. + */ + if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) + return -1; + + aa->arrays = prop; + return 0; +} + +/* + * This is like of_node_to_nid_single() for memory represented in the + * ibm,dynamic-reconfiguration-memory node. + */ +static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, + struct assoc_arrays *aa) +{ + int default_nid = 0; + int nid = default_nid; + int index; + + if (min_common_depth > 0 && min_common_depth <= aa->array_sz && + !(drmem->flags & DRCONF_MEM_AI_INVALID) && + drmem->aa_index < aa->n_arrays) { + index = drmem->aa_index * aa->array_sz + min_common_depth - 1; + nid = aa->arrays[index]; + + if (nid == 0xffff || nid >= MAX_NUMNODES) + nid = default_nid; + } + + return nid; +} + /* * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. @@ -355,57 +493,50 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, */ static void __init parse_drconf_memory(struct device_node *memory) { - const unsigned int *lm, *dm, *aa; - unsigned int ls, ld, la; - unsigned int n, aam, aalen; - unsigned long lmb_size, size, start; - int nid, default_nid = 0; - unsigned int ai, flags; - - lm = of_get_property(memory, "ibm,lmb-size", &ls); - dm = of_get_property(memory, "ibm,dynamic-memory", &ld); - aa = of_get_property(memory, "ibm,associativity-lookup-arrays", &la); - if (!lm || !dm || !aa || - ls < sizeof(unsigned int) || ld < sizeof(unsigned int) || - la < 2 * sizeof(unsigned int)) + const u32 *dm; + unsigned int n, rc; + unsigned long lmb_size, size; + int nid; + struct assoc_arrays aa; + + n = of_get_drconf_memory(memory, &dm); + if (!n) return; - lmb_size = read_n_cells(n_mem_size_cells, &lm); - n = *dm++; /* number of LMBs */ - aam = *aa++; /* number of associativity lists */ - aalen = *aa++; /* length of each associativity list */ - if (ld < (n * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int) || - la < (aam * aalen + 2) * sizeof(unsigned int)) + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) + return; + + rc = of_get_assoc_arrays(memory, &aa); + if (rc) return; for (; n != 0; --n) { - start = read_n_cells(n_mem_addr_cells, &dm); - ai = dm[2]; - flags = dm[3]; - dm += 4; - /* 0x80 == reserved, 0x8 = assigned to us */ - if ((flags & 0x80) || !(flags & 0x8)) + struct of_drconf_cell drmem; + + read_drconf_cell(&drmem, &dm); + + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((drmem.flags & DRCONF_MEM_RESERVED) + || !(drmem.flags & DRCONF_MEM_ASSIGNED)) continue; - nid = default_nid; - /* flags & 0x40 means associativity index is invalid */ - if (min_common_depth > 0 && min_common_depth <= aalen && - (flags & 0x40) == 0 && ai < aam) { - /* this is like of_node_to_nid_single */ - nid = aa[ai * aalen + min_common_depth - 1]; - if (nid == 0xffff || nid >= MAX_NUMNODES) - nid = default_nid; - } - fake_numa_create_new_node(((start + lmb_size) >> PAGE_SHIFT), - &nid); + nid = of_drconf_to_nid_single(&drmem, &aa); + + fake_numa_create_new_node( + ((drmem.base_addr + lmb_size) >> PAGE_SHIFT), + &nid); + node_set_online(nid); - size = numa_enforce_memory_limit(start, lmb_size); + size = numa_enforce_memory_limit(drmem.base_addr, lmb_size); if (!size) continue; - add_active_range(nid, start >> PAGE_SHIFT, - (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); + add_active_range(nid, drmem.base_addr >> PAGE_SHIFT, + (drmem.base_addr >> PAGE_SHIFT) + + (size >> PAGE_SHIFT)); } } -- cgit From 0db9360aaa9b95b0cf67f82874809f16e68068eb Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 3 Jul 2008 13:25:08 +1000 Subject: powerpc/pseries: Update numa association of hotplug memory add for drconf memory Update the association of a memory section with a numa node that occurs during hotplug add of a memory section. This adds a check in the hot_add_scn_to_nid() routine for the ibm,dynamic-reconfiguration-memory node in the device tree. If present the new hot_add_drconf_scn_to_nid() routine is invoked, which can properly parse the ibm,dynamic-reconfiguration-memory node of the device tree and make the proper numa node associations. This also introduces the valid_hot_add_scn() routine as a helper function for code that is common to the hot_add_scn_to_nid() and hot_add_drconf_scn_to_nid() routines. Signed-off-by: Nathan Fontenot Signed-off-by: Paul Mackerras --- arch/powerpc/mm/numa.c | 101 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 39328da32ed..cf4bffba6f7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -900,6 +900,79 @@ static int __init early_numa(char *p) early_param("numa", early_numa); #ifdef CONFIG_MEMORY_HOTPLUG +/* + * Validate the node associated with the memory section we are + * trying to add. + */ +int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size, + unsigned long scn_addr) +{ + nodemask_t nodes; + + if (*nid < 0 || !node_online(*nid)) + *nid = any_online_node(NODE_MASK_ALL); + + if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) { + nodes_setall(nodes); + while (NODE_DATA(*nid)->node_spanned_pages == 0) { + node_clear(*nid, nodes); + *nid = any_online_node(nodes); + } + + return 1; + } + + return 0; +} + +/* + * Find the node associated with a hot added memory section represented + * by the ibm,dynamic-reconfiguration-memory node. + */ +static int hot_add_drconf_scn_to_nid(struct device_node *memory, + unsigned long scn_addr) +{ + const u32 *dm; + unsigned int n, rc; + unsigned long lmb_size; + int default_nid = any_online_node(NODE_MASK_ALL); + int nid; + struct assoc_arrays aa; + + n = of_get_drconf_memory(memory, &dm); + if (!n) + return default_nid;; + + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) + return default_nid; + + rc = of_get_assoc_arrays(memory, &aa); + if (rc) + return default_nid; + + for (; n != 0; --n) { + struct of_drconf_cell drmem; + + read_drconf_cell(&drmem, &dm); + + /* skip this block if it is reserved or not assigned to + * this partition */ + if ((drmem.flags & DRCONF_MEM_RESERVED) + || !(drmem.flags & DRCONF_MEM_ASSIGNED)) + continue; + + nid = of_drconf_to_nid_single(&drmem, &aa); + + if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size, + scn_addr)) + return nid; + } + + BUG(); /* section address should be found above */ + return 0; +} + /* * Find the node associated with a hot added memory section. Section * corresponds to a SPARSEMEM section, not an LMB. It is assumed that @@ -908,12 +981,17 @@ early_param("numa", early_numa); int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; - nodemask_t nodes; - int default_nid = any_online_node(NODE_MASK_ALL); int nid; if (!numa_enabled || (min_common_depth < 0)) - return default_nid; + return any_online_node(NODE_MASK_ALL); + + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (memory) { + nid = hot_add_drconf_scn_to_nid(memory, scn_addr); + of_node_put(memory); + return nid; + } while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start, size; @@ -932,13 +1010,9 @@ ha_new_range: size = read_n_cells(n_mem_size_cells, &memcell_buf); nid = of_node_to_nid_single(memory); - /* Domains not present at boot default to 0 */ - if (nid < 0 || !node_online(nid)) - nid = default_nid; - - if ((scn_addr >= start) && (scn_addr < (start + size))) { + if (valid_hot_add_scn(&nid, start, size, scn_addr)) { of_node_put(memory); - goto got_nid; + return nid; } if (--ranges) /* process all ranges in cell */ @@ -946,14 +1020,5 @@ ha_new_range: } BUG(); /* section address should be found above */ return 0; - - /* Temporary code to ensure that returned node is not empty */ -got_nid: - nodes_setall(nodes); - while (NODE_DATA(nid)->node_spanned_pages == 0) { - node_clear(nid, nodes); - nid = any_online_node(nodes); - } - return nid; } #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit From 392096e98fd55e54035978fe03796fca8d26a574 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 3 Jul 2008 17:10:07 +1000 Subject: generic-ipi: fix linux-next tree build failure Today's linux-next build (powerpc ppc64_defconfig) failed like this: arch/powerpc/mm/tlb_64.c: In function 'pgtable_free_now': arch/powerpc/mm/tlb_64.c:66: error: too many arguments to function 'smp_call_function' arch/powerpc/kernel/machine_kexec_64.c: In function 'kexec_prepare_cpus': arch/powerpc/kernel/machine_kexec_64.c:175: error: too many arguments to function 'smp_call_function' Signed-off-by: Stephen Rothwell Acked-by: Jens Axboe Cc: Paul Mackerras Cc: Signed-off-by: Ingo Molnar --- arch/powerpc/mm/tlb_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index e2d867ce1c7..69ad829a7fa 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -66,7 +66,7 @@ static void pgtable_free_now(pgtable_free_t pgf) { pte_freelist_forced_free++; - smp_call_function(pte_free_smp_sync, NULL, 0, 1); + smp_call_function(pte_free_smp_sync, NULL, 1); pgtable_free(pgf); } -- cgit From 1bc54c03117b90716e0dedd7abb2a20405de65df Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 8 Jul 2008 15:54:40 +1000 Subject: powerpc: rework 4xx PTE access and TLB miss This is some preliminary work to improve TLB management on SW loaded TLB powerpc platforms. This introduce support for non-atomic PTE operations in pgtable-ppc32.h and removes write back to the PTE from the TLB miss handlers. In addition, the DSI interrupt code no longer tries to fixup write permission, this is left to generic code, and _PAGE_HWWRITE is gone. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Josh Boyer --- arch/powerpc/mm/44x_mmu.c | 29 ++++++++++++++++++++++++++++- arch/powerpc/mm/fault.c | 3 ++- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 953fb919eb0..98052ac9658 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -37,11 +38,35 @@ unsigned int tlb_44x_index; /* = 0 */ unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; int icache_44x_need_flush; +static void __init ppc44x_update_tlb_hwater(void) +{ + extern unsigned int tlb_44x_patch_hwater_D[]; + extern unsigned int tlb_44x_patch_hwater_I[]; + + /* The TLB miss handlers hard codes the watermark in a cmpli + * instruction to improve performances rather than loading it + * from the global variable. Thus, we patch the instructions + * in the 2 TLB miss handlers when updating the value + */ + tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) | + tlb_44x_hwater; + flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0], + (unsigned long)&tlb_44x_patch_hwater_D[1]); + tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) | + tlb_44x_hwater; + flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0], + (unsigned long)&tlb_44x_patch_hwater_I[1]); +} + /* * "Pins" a 256MB TLB entry in AS0 for kernel lowmem */ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) { + unsigned int entry = tlb_44x_hwater--; + + ppc44x_update_tlb_hwater(); + __asm__ __volatile__( "tlbwe %2,%3,%4\n" "tlbwe %1,%3,%5\n" @@ -50,7 +75,7 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), "r" (phys), "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), - "r" (tlb_44x_hwater--), /* slot for this TLB entry */ + "r" (entry), "i" (PPC44x_TLB_PAGEID), "i" (PPC44x_TLB_XLAT), "i" (PPC44x_TLB_ATTRIB)); @@ -58,6 +83,8 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) void __init MMU_init_hw(void) { + ppc44x_update_tlb_hwater(); + flush_instruction_cache(); } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7b251079926..1707d00331f 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -306,7 +306,8 @@ good_area: flush_dcache_icache_page(page); set_bit(PG_arch_1, &page->flags); } - pte_update(ptep, 0, _PAGE_HWEXEC); + pte_update(ptep, 0, _PAGE_HWEXEC | + _PAGE_ACCESSED); _tlbie(address, mm->context.id); pte_unmap_unlock(ptep, ptl); up_read(&mm->mmap_sem); -- cgit From 2bf3016f89344d4cd8b2c96bbec2b642a2bde413 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Thu, 10 Jul 2008 01:09:23 +1000 Subject: powerpc: Fix problems with 32bit PPC's running with >= 4GB of RAM This patch enables 32bit PPC's (with 36bit physical address space, e.g. IBM/AMCC PPC44x) to run with >= 4GB of RAM. Mostly its just replacing types (unsigned long -> phys_addr_t). Tested on an AMCC Katmai with 4GB of DDR2. Signed-off-by: Stefan Roese Signed-off-by: Josh Boyer --- arch/powerpc/mm/init_32.c | 4 ++-- arch/powerpc/mm/mem.c | 8 ++++---- arch/powerpc/mm/mmu_decl.h | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 45418590b6a..388ceda632f 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -57,8 +57,8 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -unsigned long total_memory; -unsigned long total_lowmem; +phys_addr_t total_memory; +phys_addr_t total_lowmem; phys_addr_t memstart_addr = (phys_addr_t)~0ull; EXPORT_SYMBOL(memstart_addr); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 776ba6ad5e1..1ca2235f096 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -330,7 +330,7 @@ static int __init mark_nonram_nosave(void) void __init paging_init(void) { unsigned long total_ram = lmb_phys_mem_size(); - unsigned long top_of_ram = lmb_end_of_DRAM(); + phys_addr_t top_of_ram = lmb_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; #ifdef CONFIG_PPC32 @@ -349,10 +349,10 @@ void __init paging_init(void) kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ - printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", - top_of_ram, total_ram); + printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n", + (u64)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", - (top_of_ram - total_ram) >> 20); + (long int)((top_of_ram - total_ram) >> 20)); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 46585b7bb19..fab3cfad409 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -49,8 +49,8 @@ extern unsigned int num_tlbcam_entries; extern unsigned long ioremap_bot; extern unsigned long __max_low_memory; extern phys_addr_t __initial_memory_limit_addr; -extern unsigned long total_memory; -extern unsigned long total_lowmem; +extern phys_addr_t total_memory; +extern phys_addr_t total_lowmem; extern phys_addr_t memstart_addr; extern phys_addr_t lowmem_end_addr; -- cgit