diff options
Diffstat (limited to 'arch/powerpc')
30 files changed, 1961 insertions, 1140 deletions
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h new file mode 100644 index 00000000000..9b198d1b3b2 --- /dev/null +++ b/arch/powerpc/include/asm/disassemble.h @@ -0,0 +1,80 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __ASM_PPC_DISASSEMBLE_H__ +#define __ASM_PPC_DISASSEMBLE_H__ + +#include <linux/types.h> + +static inline unsigned int get_op(u32 inst) +{ + return inst >> 26; +} + +static inline unsigned int get_xop(u32 inst) +{ + return (inst >> 1) & 0x3ff; +} + +static inline unsigned int get_sprn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_dcrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_rt(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_rs(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_ra(u32 inst) +{ + return (inst >> 16) & 0x1f; +} + +static inline unsigned int get_rb(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_rc(u32 inst) +{ + return inst & 0x1; +} + +static inline unsigned int get_ws(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_d(u32 inst) +{ + return inst & 0xffff; +} + +#endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h new file mode 100644 index 00000000000..f49031b632c --- /dev/null +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -0,0 +1,61 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __ASM_44X_H__ +#define __ASM_44X_H__ + +#include <linux/kvm_host.h> + +#define PPC44x_TLB_SIZE 64 + +/* If the guest is expecting it, this can be as large as we like; we'd just + * need to find some way of advertising it. */ +#define KVM44x_GUEST_TLB_SIZE 64 + +struct kvmppc_44x_shadow_ref { + struct page *page; + u16 gtlb_index; + u8 writeable; + u8 tid; +}; + +struct kvmppc_vcpu_44x { + /* Unmodified copy of the guest's TLB. */ + struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; + + /* References to guest pages in the hardware TLB. */ + struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; + + /* State of the shadow TLB at guest context switch time. */ + struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; + u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; + + struct kvm_vcpu vcpu; +}; + +static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); + +#endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 34b52b7180c..c1e436fe773 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -64,27 +64,58 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct tlbe { +struct kvmppc_44x_tlbe { u32 tid; /* Only the low 8 bits are used. */ u32 word0; u32 word1; u32 word2; }; -struct kvm_arch { +enum kvm_exit_types { + MMIO_EXITS, + DCR_EXITS, + SIGNAL_EXITS, + ITLB_REAL_MISS_EXITS, + ITLB_VIRT_MISS_EXITS, + DTLB_REAL_MISS_EXITS, + DTLB_VIRT_MISS_EXITS, + SYSCALL_EXITS, + ISI_EXITS, + DSI_EXITS, + EMULATED_INST_EXITS, + EMULATED_MTMSRWE_EXITS, + EMULATED_WRTEE_EXITS, + EMULATED_MTSPR_EXITS, + EMULATED_MFSPR_EXITS, + EMULATED_MTMSR_EXITS, + EMULATED_MFMSR_EXITS, + EMULATED_TLBSX_EXITS, + EMULATED_TLBWE_EXITS, + EMULATED_RFI_EXITS, + DEC_EXITS, + EXT_INTR_EXITS, + HALT_WAKEUP, + USR_PR_INST, + FP_UNAVAIL, + DEBUG_EXITS, + TIMEINGUEST, + __NUMBER_OF_KVM_EXIT_TYPES }; -struct kvm_vcpu_arch { - /* Unmodified copy of the guest's TLB. */ - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; - /* TLB that's actually used when the guest is running. */ - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; - /* Pages which are referenced in the shadow TLB. */ - struct page *shadow_pages[PPC44x_TLB_SIZE]; +/* allow access to big endian 32bit upper/lower parts and 64bit var */ +struct kvmppc_exit_timing { + union { + u64 tv64; + struct { + u32 tbu, tbl; + } tv32; + }; +}; - /* Track which TLB entries we've modified in the current exit. */ - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; +struct kvm_arch { +}; +struct kvm_vcpu_arch { u32 host_stack; u32 host_pid; u32 host_dbcr0; @@ -94,32 +125,32 @@ struct kvm_vcpu_arch { u32 host_msr; u64 fpr[32]; - u32 gpr[32]; + ulong gpr[32]; - u32 pc; + ulong pc; u32 cr; - u32 ctr; - u32 lr; - u32 xer; + ulong ctr; + ulong lr; + ulong xer; - u32 msr; + ulong msr; u32 mmucr; - u32 sprg0; - u32 sprg1; - u32 sprg2; - u32 sprg3; - u32 sprg4; - u32 sprg5; - u32 sprg6; - u32 sprg7; - u32 srr0; - u32 srr1; - u32 csrr0; - u32 csrr1; - u32 dsrr0; - u32 dsrr1; - u32 dear; - u32 esr; + ulong sprg0; + ulong sprg1; + ulong sprg2; + ulong sprg3; + ulong sprg4; + ulong sprg5; + ulong sprg6; + ulong sprg7; + ulong srr0; + ulong srr1; + ulong csrr0; + ulong csrr1; + ulong dsrr0; + ulong dsrr1; + ulong dear; + ulong esr; u32 dec; u32 decar; u32 tbl; @@ -127,7 +158,7 @@ struct kvm_vcpu_arch { u32 tcr; u32 tsr; u32 ivor[16]; - u32 ivpr; + ulong ivpr; u32 pir; u32 shadow_pid; @@ -140,9 +171,22 @@ struct kvm_vcpu_arch { u32 dbcr0; u32 dbcr1; +#ifdef CONFIG_KVM_EXIT_TIMING + struct kvmppc_exit_timing timing_exit; + struct kvmppc_exit_timing timing_last_enter; + u32 last_exit_type; + u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_last_exit; + struct dentry *debugfs_exit_timing; +#endif + u32 last_inst; - u32 fault_dear; - u32 fault_esr; + ulong fault_dear; + ulong fault_esr; gpa_t paddr_accessed; u8 io_gpr; /* GPR used as IO source/target */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bb62ad876de..36d2a50a848 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -29,11 +29,6 @@ #include <linux/kvm_types.h> #include <linux/kvm_host.h> -struct kvm_tlb { - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; -}; - enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ @@ -41,9 +36,6 @@ enum emulation_result { EMULATE_FAIL, /* can't emulate this instruction */ }; -extern const unsigned char exception_priority[]; -extern const unsigned char priority_exception[]; - extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern char kvmppc_handlers_start[]; extern unsigned long kvmppc_handler_len; @@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); -extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, - u64 asid, u32 flags); -extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, - gva_t eend, u32 asid); +extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, + u64 asid, u32 flags, u32 max_bytes, + unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); -/* XXX Book E specific */ -extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); - -extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); - -static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) -{ - unsigned int priority = exception_priority[exception]; - set_bit(priority, &vcpu->arch.pending_exceptions); -} - -static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) -{ - unsigned int priority = exception_priority[exception]; - clear_bit(priority, &vcpu->arch.pending_exceptions); -} - -/* Helper function for "full" MSR writes. No need to call this if only EE is - * changing. */ -static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) -{ - if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) - kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); - - vcpu->arch.msr = new_msr; - - if (vcpu->arch.msr & MSR_WE) - kvm_vcpu_block(vcpu); -} - -static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - if (vcpu->arch.pid != new_pid) { - vcpu->arch.pid = new_pid; - vcpu->arch.swap_pid = 1; - } -} +/* Core-specific hooks */ + +extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, + unsigned int id); +extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); +extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); +extern int kvmppc_core_check_processor_compat(void); +extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr); + +extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); + +extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu); +extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu); + +extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); +extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq); + +extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int op, int *advance); +extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); +extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); + +extern int kvmppc_booke_init(void); +extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index 8a97cfb08b7..27cc6fdcd3b 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -56,6 +56,7 @@ #ifndef __ASSEMBLY__ extern unsigned int tlb_44x_hwater; +extern unsigned int tlb_44x_index; typedef struct { unsigned int id; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c32da6f9799..375258559ae 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node) return numa_cpumask_lookup_table[node]; } +#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) + static inline int node_to_first_cpu(int node) { - cpumask_t tmp; - tmp = node_to_cpumask(node); - return first_cpu(tmp); + return cpumask_first(cpumask_of_node(node)); } int of_node_to_nid(struct device_node *device); @@ -46,9 +46,12 @@ static inline int pcibus_to_node(struct pci_bus *bus) node_to_cpumask(pcibus_to_node(bus)) \ ) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -109,6 +112,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev, #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 661d07d2146..9937fe44555 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -23,9 +23,6 @@ #include <linux/mm.h> #include <linux/suspend.h> #include <linux/hrtimer.h> -#ifdef CONFIG_KVM -#include <linux/kvm_host.h> -#endif #ifdef CONFIG_PPC64 #include <linux/time.h> #include <linux/hardirq.h> @@ -51,6 +48,9 @@ #ifdef CONFIG_PPC_ISERIES #include <asm/iseries/alpaca.h> #endif +#ifdef CONFIG_KVM +#include <asm/kvm_44x.h> +#endif #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) #include "head_booke.h" @@ -357,12 +357,10 @@ int main(void) DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM - DEFINE(TLBE_BYTES, sizeof(struct tlbe)); + DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); - DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); - DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); @@ -385,5 +383,16 @@ int main(void) DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif +#ifdef CONFIG_KVM_EXIT_TIMING + DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbu)); + DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbl)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbu)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbl)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ac222d0ab12..23b8b5e36f9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map) mask = map; } if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); + irq_desc[irq].chip->set_affinity(irq, &mask); else if (irq_desc[irq].action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 51b201ddf9a..fb7049c054c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -33,6 +33,7 @@ #include <linux/mqueue.h> #include <linux/hardirq.h> #include <linux/utsname.h> +#include <linux/kernel_stat.h> #include <asm/pgtable.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8ac3f721d23..65484b2200b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,13 +59,9 @@ struct thread_info *secondary_ti; -cpumask_t cpu_possible_map = CPU_MASK_NONE; -cpumask_t cpu_online_map = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e1f3a514042..c9564031a2a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk) delta += sys_time; get_paca()->system_time = 0; } - account_system_time(tsk, 0, delta); - account_system_time_scaled(tsk, deltascaled); + if (in_irq() || idle_task(smp_processor_id()) != tsk) + account_system_time(tsk, 0, delta, deltascaled); + else + account_idle_time(delta); per_cpu(cputime_last_delta, smp_processor_id()) = delta; per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; local_irq_restore(flags); @@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick) utime = get_paca()->user_time; get_paca()->user_time = 0; - account_user_time(tsk, utime); - utimescaled = cputime_to_scaled(utime); - account_user_time_scaled(tsk, utimescaled); + account_user_time(tsk, utime, utimescaled); } /* @@ -338,8 +338,12 @@ void calculate_steal_time(void) tb = mftb(); purr = mfspr(SPRN_PURR); stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) - account_steal_time(current, stolen); + if (stolen > 0) { + if (idle_task(smp_processor_id()) != current) + account_steal_time(stolen); + else + account_idle_time(stolen); + } pme->tb = tb; pme->purr = purr; } @@ -844,7 +848,7 @@ static void register_decrementer_clockevent(int cpu) struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; *dec = decrementer_clockevent; - dec->cpumask = cpumask_of_cpu(cpu); + dec->cpumask = cpumask_of(cpu); printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c new file mode 100644 index 00000000000..a66bec57265 --- /dev/null +++ b/arch/powerpc/kvm/44x.c @@ -0,0 +1,228 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/err.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/tlbflush.h> +#include <asm/kvm_44x.h> +#include <asm/kvm_ppc.h> + +#include "44x_tlb.h" + +/* Note: clearing MSR[DE] just means that the debug interrupt will not be + * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. + * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt + * will be delivered as an "imprecise debug event" (which is indicated by + * DBSR[IDE]. + */ +static void kvm44x_disable_debug_interrupts(void) +{ + mtmsr(mfmsr() & ~MSR_DE); +} + +void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) +{ + kvm44x_disable_debug_interrupts(); + + mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); + mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); + mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); + mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); + mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); + mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); + mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); + mtmsr(vcpu->arch.host_msr); +} + +void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) +{ + struct kvm_guest_debug *dbg = &vcpu->guest_debug; + u32 dbcr0 = 0; + + vcpu->arch.host_msr = mfmsr(); + kvm44x_disable_debug_interrupts(); + + /* Save host debug register state. */ + vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); + vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); + vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); + vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); + vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); + vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); + vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); + + /* set registers up for guest */ + + if (dbg->bp[0]) { + mtspr(SPRN_IAC1, dbg->bp[0]); + dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; + } + if (dbg->bp[1]) { + mtspr(SPRN_IAC2, dbg->bp[1]); + dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; + } + if (dbg->bp[2]) { + mtspr(SPRN_IAC3, dbg->bp[2]); + dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; + } + if (dbg->bp[3]) { + mtspr(SPRN_IAC4, dbg->bp[3]); + dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; + } + + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBCR1, 0); + mtspr(SPRN_DBCR2, 0); +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + kvmppc_44x_tlb_load(vcpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_44x_tlb_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; + int i; + + tlbe->tid = 0; + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; + tlbe->word1 = 0; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; + + tlbe++; + tlbe->tid = 0; + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; + tlbe->word1 = 0xef600000; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR + | PPC44x_TLB_I | PPC44x_TLB_G; + + /* Since the guest can directly access the timebase, it must know the + * real timebase frequency. Accordingly, it must see the state of + * CCR1[TCS]. */ + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) + vcpu_44x->shadow_refs[i].gtlb_index = -1; + + return 0; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); + if (index == -1) { + tr->valid = 0; + return 0; + } + + gtlbe = &vcpu_44x->guest_tlb[index]; + + tr->physical_address = tlb_xlate(gtlbe, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_44x *vcpu_44x; + struct kvm_vcpu *vcpu; + int err; + + vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_44x) { + err = -ENOMEM; + goto out; + } + + vcpu = &vcpu_44x->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +} + +static int kvmppc_44x_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); +} + +static void kvmppc_44x_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_44x_init); +module_exit(kvmppc_44x_exit); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c new file mode 100644 index 00000000000..82489a743a6 --- /dev/null +++ b/arch/powerpc/kvm/44x_emulate.c @@ -0,0 +1,371 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <asm/kvm_ppc.h> +#include <asm/dcr.h> +#include <asm/dcr-regs.h> +#include <asm/disassemble.h> +#include <asm/kvm_44x.h> +#include "timing.h" + +#include "booke.h" +#include "44x_tlb.h" + +#define OP_RFI 19 + +#define XOP_RFI 50 +#define XOP_MFMSR 83 +#define XOP_WRTEE 131 +#define XOP_MTMSR 146 +#define XOP_WRTEEI 163 +#define XOP_MFDCR 323 +#define XOP_MTDCR 451 +#define XOP_TLBSX 914 +#define XOP_ICCCI 966 +#define XOP_TLBWE 978 + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int dcrn; + int ra; + int rb; + int rc; + int rs; + int rt; + int ws; + + switch (get_op(inst)) { + case OP_RFI: + switch (get_xop(inst)) { + case XOP_RFI: + kvmppc_emul_rfi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); + *advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case XOP_MFMSR: + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); + break; + + case XOP_MTMSR: + rs = get_rs(inst); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + + case XOP_WRTEE: + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case XOP_WRTEEI: + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case XOP_MFDCR: + dcrn = get_dcrn(inst); + rt = get_rt(inst); + + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); + local_irq_enable(); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = 0; + run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_MTDCR: + dcrn = get_dcrn(inst); + rs = get_rs(inst); + + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = vcpu->arch.gpr[rs]; + run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_TLBWE: + ra = get_ra(inst); + rs = get_rs(inst); + ws = get_ws(inst); + emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); + break; + + case XOP_TLBSX: + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + rc = get_rc(inst); + emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); + break; + + case XOP_ICCCI: + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + switch (sprn) { + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; + case SPRN_PID: + kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; + case SPRN_CCR0: + vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; + case SPRN_CCR1: + vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR0: + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR1: + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR2: + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR3: + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR4: + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR5: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR6: + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR7: + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR8: + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR9: + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR10: + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR11: + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR12: + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR13: + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR14: + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR15: + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); + return EMULATE_DONE; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + switch (sprn) { + /* 440 */ + case SPRN_MMUCR: + vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; + case SPRN_CCR0: + vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; + case SPRN_CCR1: + vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; + + /* Book E */ + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; + break; + + default: + return EMULATE_FAIL; + } + + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); + return EMULATE_DONE; +} + diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index ad72c6f9811..9a34b8edb9e 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -22,20 +22,103 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/highmem.h> + +#include <asm/tlbflush.h> #include <asm/mmu-44x.h> #include <asm/kvm_ppc.h> +#include <asm/kvm_44x.h> +#include "timing.h" #include "44x_tlb.h" +#ifndef PPC44x_TLBE_SIZE +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#endif + +#define PAGE_SIZE_4K (1<<12) +#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) + +#define PPC44x_TLB_UATTR_MASK \ + (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) -static unsigned int kvmppc_tlb_44x_pos; +#ifdef DEBUG +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct kvmppc_44x_tlbe *tlbe; + int i; + + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); + printk("| %2s | %3s | %8s | %8s | %8s |\n", + "nr", "tid", "word0", "word1", "word2"); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + tlbe = &vcpu_44x->guest_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" G%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } +} +#endif + +static inline void kvmppc_44x_tlbie(unsigned int index) +{ + /* 0 <= index < 64, so the V bit is clear and we can use the index as + * word0. */ + asm volatile( + "tlbwe %[index], %[index], 0\n" + : + : [index] "r"(index) + ); +} + +static inline void kvmppc_44x_tlbre(unsigned int index, + struct kvmppc_44x_tlbe *tlbe) +{ + asm volatile( + "tlbre %[word0], %[index], 0\n" + "mfspr %[tid], %[sprn_mmucr]\n" + "andi. %[tid], %[tid], 0xff\n" + "tlbre %[word1], %[index], 1\n" + "tlbre %[word2], %[index], 2\n" + : [word0] "=r"(tlbe->word0), + [word1] "=r"(tlbe->word1), + [word2] "=r"(tlbe->word2), + [tid] "=r"(tlbe->tid) + : [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + : "cc" + ); +} + +static inline void kvmppc_44x_tlbwe(unsigned int index, + struct kvmppc_44x_tlbe *stlbe) +{ + unsigned long tmp; + + asm volatile( + "mfspr %[tmp], %[sprn_mmucr]\n" + "rlwimi %[tmp], %[tid], 0, 0xff\n" + "mtspr %[sprn_mmucr], %[tmp]\n" + "tlbwe %[word0], %[index], 0\n" + "tlbwe %[word1], %[index], 1\n" + "tlbwe %[word2], %[index], 2\n" + : [tmp] "=&r"(tmp) + : [word0] "r"(stlbe->word0), + [word1] "r"(stlbe->word1), + [word2] "r"(stlbe->word2), + [tid] "r"(stlbe->tid), + [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + ); +} static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) { - /* Mask off reserved bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; + /* We only care about the guest's permission and user bits. */ + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; if (!usermode) { /* Guest is in supervisor mode, so we need to translate guest @@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) /* Make sure host can always access this memory. */ attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; + /* WIMGE = 0b00100 */ + attrib |= PPC44x_TLB_M; + return attrib; } +/* Load shadow TLB back into hardware. */ +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbwe(i, stlbe); + } +} + +static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int i) +{ + vcpu_44x->shadow_tlb_mod[i] = 1; +} + +/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (vcpu_44x->shadow_tlb_mod[i]) + kvmppc_44x_tlbre(i, stlbe); + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbie(i); + } +} + + /* Search the guest TLB for a matching entry. */ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, return -1; } -struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_IS); - unsigned int index; - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); - if (index == -1) - return NULL; - return &vcpu->arch.guest_tlb[index]; + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_DS); - unsigned int index; - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); - if (index == -1) - return NULL; - return &vcpu->arch.guest_tlb[index]; + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) +static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int stlb_index) { - return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); -} + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; -static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, - unsigned int index) -{ - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; - struct page *page = vcpu->arch.shadow_pages[index]; + if (!ref->page) + return; - if (get_tlb_v(stlbe)) { - if (kvmppc_44x_tlbe_is_writable(stlbe)) - kvm_release_page_dirty(page); - else - kvm_release_page_clean(page); - } + /* Discard from the TLB. */ + /* Note: we could actually invalidate a host mapping, if the host overwrote + * this TLB entry since we inserted a guest mapping. */ + kvmppc_44x_tlbie(stlb_index); + + /* Now release the page. */ + if (ref->writeable) + kvm_release_page_dirty(ref->page); + else + kvm_release_page_clean(ref->page); + + ref->page = NULL; + + /* XXX set tlb_44x_index to stlb_index? */ + + KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); } void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu, i); -} - -void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i) -{ - vcpu->arch.shadow_tlb_mod[i] = 1; + kvmppc_44x_shadow_release(vcpu_44x, i); } -/* Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, - u32 flags) +/** + * kvmppc_mmu_map -- create a host mapping for guest memory + * + * If the guest wanted a larger page than the host supports, only the first + * host page is mapped here and the rest are demand faulted. + * + * If the guest wanted a smaller page than the host page size, we map only the + * guest-size page (i.e. not a full host page mapping). + * + * Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. + */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, + u32 flags, u32 max_bytes, unsigned int gtlb_index) { + struct kvmppc_44x_tlbe stlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_shadow_ref *ref; struct page *new_page; - struct tlbe *stlbe; hpa_t hpaddr; + gfn_t gfn; unsigned int victim; - /* Future optimization: don't overwrite the TLB entry containing the - * current PC (or stack?). */ - victim = kvmppc_tlb_44x_pos++; - if (kvmppc_tlb_44x_pos > tlb_44x_hwater) - kvmppc_tlb_44x_pos = 0; - stlbe = &vcpu->arch.shadow_tlb[victim]; + /* Select TLB entry to clobber. Indirectly guard against races with the TLB + * miss handler by disabling interrupts. */ + local_irq_disable(); + victim = ++tlb_44x_index; + if (victim > tlb_44x_hwater) + victim = 0; + tlb_44x_index = victim; + local_irq_enable(); /* Get reference to new page. */ + gfn = gpaddr >> PAGE_SHIFT; new_page = gfn_to_page(vcpu->kvm, gfn); if (is_error_page(new_page)) { printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); @@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, } hpaddr = page_to_phys(new_page); - /* Drop reference to old page. */ - kvmppc_44x_shadow_release(vcpu, victim); - - vcpu->arch.shadow_pages[victim] = new_page; + /* Invalidate any previous shadow mappings. */ + kvmppc_44x_shadow_release(vcpu_44x, victim); /* XXX Make sure (va, size) doesn't overlap any other * entries. 440x6 user manual says the result would be @@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, /* XXX what about AS? */ - stlbe->tid = !(asid & 0xff); - /* Force TS=1 for all guest mappings. */ - /* For now we hardcode 4KB mappings, but it will be important to - * use host large pages in the future. */ - stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS - | PPC44x_TLB_4K; - stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.msr & MSR_PR); - kvmppc_tlbe_set_modified(vcpu, victim); + stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; + + if (max_bytes >= PAGE_SIZE) { + /* Guest mapping is larger than or equal to host page size. We can use + * a "native" host mapping. */ + stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; + } else { + /* Guest mapping is smaller than host page size. We must restrict the + * size of the mapping to be at most the smaller of the two, but for + * simplicity we fall back to a 4K mapping (this is probably what the + * guest is using anyways). */ + stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; + + /* 'hpaddr' is a host page, which is larger than the mapping we're + * inserting here. To compensate, we must add the in-page offset to the + * sub-page. */ + hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); + } - KVMTRACE_5D(STLB_WRITE, vcpu, victim, - stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, - handler); + stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); + stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, + vcpu->arch.msr & MSR_PR); + stlbe.tid = !(asid & 0xff); + + /* Keep track of the reference so we can properly release it later. */ + ref = &vcpu_44x->shadow_refs[victim]; + ref->page = new_page; + ref->gtlb_index = gtlb_index; + ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); + ref->tid = stlbe.tid; + + /* Insert shadow mapping into hardware TLB. */ + kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); + kvmppc_44x_tlbwe(victim, &stlbe); + KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1, + stlbe.word2, handler); } -void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, - gva_t eend, u32 asid) +/* For a particular guest TLB entry, invalidate the corresponding host TLB + * mappings and release the host pages. */ +static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, + unsigned int gtlb_index) { - unsigned int pid = !(asid & 0xff); + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i <= tlb_44x_hwater; i++) { - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; - unsigned int tid; + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + if (ref->gtlb_index == gtlb_index) + kvmppc_44x_shadow_release(vcpu_44x, i); + } +} - if (!get_tlb_v(stlbe)) - continue; +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +{ + vcpu->arch.shadow_pid = !usermode; +} - if (eend < get_tlb_eaddr(stlbe)) - continue; +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; - if (eaddr > get_tlb_end(stlbe)) - continue; + if (unlikely(vcpu->arch.pid == new_pid)) + return; - tid = get_tlb_tid(stlbe); - if (tid && (tid != pid)) - continue; + vcpu->arch.pid = new_pid; - kvmppc_44x_shadow_release(vcpu, i); - stlbe->word0 = 0; - kvmppc_tlbe_set_modified(vcpu, i); - KVMTRACE_5D(STLB_INVAL, vcpu, i, - stlbe->tid, stlbe->word0, stlbe->word1, - stlbe->word2, handler); + /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it + * can't access guest kernel mappings (TID=1). When we switch to a new + * guest PID, which will also use host PID=0, we must discard the old guest + * userspace mappings. */ + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + + if (ref->tid == 0) + kvmppc_44x_shadow_release(vcpu_44x, i); } } -/* Invalidate all mappings on the privilege switch after PID has been changed. - * The guest always runs with PID=1, so we must clear the entire TLB when - * switching address spaces. */ -void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct kvmppc_44x_tlbe *tlbe) { - int i; + gpa_t gpa; - if (vcpu->arch.swap_pid) { - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i <= tlb_44x_hwater; i++) { - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; - - /* Future optimization: clear only userspace mappings. */ - kvmppc_44x_shadow_release(vcpu, i); - stlbe->word0 = 0; - kvmppc_tlbe_set_modified(vcpu, i); - KVMTRACE_5D(STLB_INVAL, vcpu, i, - stlbe->tid, stlbe->word0, stlbe->word1, - stlbe->word2, handler); - } - vcpu->arch.swap_pid = 0; + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe; + unsigned int gtlb_index; + + gtlb_index = vcpu->arch.gpr[ra]; + if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { + printk("%s: index %d\n", __func__, gtlb_index); + kvmppc_dump_vcpu(vcpu); + return EMULATE_FAIL; } - vcpu->arch.shadow_pid = !usermode; + tlbe = &vcpu_44x->guest_tlb[gtlb_index]; + + /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ + if (tlbe->word0 & PPC44x_TLB_VALID) + kvmppc_44x_invalidate(vcpu, gtlb_index); + + switch (ws) { + case PPC44x_TLB_PAGEID: + tlbe->tid = get_mmucr_stid(vcpu); + tlbe->word0 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_XLAT: + tlbe->word1 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_ATTRIB: + tlbe->word2 = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + if (tlbe_is_host_safe(vcpu, tlbe)) { + u64 asid; + gva_t eaddr; + gpa_t gpaddr; + u32 flags; + u32 bytes; + + eaddr = get_tlb_eaddr(tlbe); + gpaddr = get_tlb_raddr(tlbe); + + /* Use the advertised page size to mask effective and real addrs. */ + bytes = get_tlb_bytes(tlbe); + eaddr &= ~(bytes - 1); + gpaddr &= ~(bytes - 1); + + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + flags = tlbe->word2 & 0xffff; + + kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index); + } + + KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, + tlbe->word1, tlbe->word2, handler); + + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); + return EMULATE_DONE; +} + +int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) +{ + u32 ea; + int gtlb_index; + unsigned int as = get_mmucr_sts(vcpu); + unsigned int pid = get_mmucr_stid(vcpu); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); + if (rc) { + if (gtlb_index < 0) + vcpu->arch.cr &= ~0x20000000; + else + vcpu->arch.cr |= 0x20000000; + } + vcpu->arch.gpr[rt] = gtlb_index; + + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); + return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h index 2ccd46b6f6b..772191f29e6 100644 --- a/arch/powerpc/kvm/44x_tlb.h +++ b/arch/powerpc/kvm/44x_tlb.h @@ -25,48 +25,52 @@ extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as); -extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); -extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); + +extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, + u8 rc); +extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); /* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 4) & 0xf; } -static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) { return tlbe->word0 & 0xfffffc00; } -static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) +static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) { unsigned int pgsize = get_tlb_size(tlbe); return 1 << 10 << (pgsize << 1); } -static inline gva_t get_tlb_end(const struct tlbe *tlbe) +static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) { return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; } -static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) { u64 word1 = tlbe->word1; return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); } -static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) { return tlbe->tid & 0xff; } -static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 8) & 0x1; } -static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 9) & 0x1; } @@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) return (vcpu->arch.mmucr >> 16) & 0x1; } -static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) +static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr) { unsigned int pgmask = get_tlb_bytes(tlbe) - 1; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 53aaa66b25e..6dbdc4817d8 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION config KVM - bool "Kernel-based Virtual Machine (KVM) support" - depends on 44x && EXPERIMENTAL + bool select PREEMPT_NOTIFIERS select ANON_INODES - # We can only run on Book E hosts so far - select KVM_BOOKE_HOST + +config KVM_440 + bool "KVM support for PowerPC 440 processors" + depends on EXPERIMENTAL && 44x + select KVM ---help--- - Support hosting virtualized guest machines. You will also - need to select one or more of the processor modules below. + Support running unmodified 440 guest kernels in virtual machines on + 440 host processors. This module provides access to the hardware capabilities through a character device node named /dev/kvm. If unsure, say N. -config KVM_BOOKE_HOST - bool "KVM host support for Book E PowerPC processors" - depends on KVM && 44x +config KVM_EXIT_TIMING + bool "Detailed exit timing" + depends on KVM ---help--- - Provides host support for KVM on Book E PowerPC processors. Currently - this works on 440 processors only. + Calculate elapsed time for every exit/enter cycle. A per-vcpu + report is available in debugfs kvm/vm#_vcpu#_timing. + The overhead is relatively small, however it is not recommended for + production environments. + + If unsure, say N. config KVM_TRACE bool "KVM trace support" diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 2a5d4397ac4..df7ba59e6d5 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) -kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o +kvm-objs := $(common-objs-y) powerpc.o emulate.o +obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM) += kvm.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o -obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o +kvm-440-objs := \ + booke.o \ + booke_interrupts.o \ + 44x.o \ + 44x_tlb.o \ + 44x_emulate.o +obj-$(CONFIG_KVM_440) += kvm-440.o diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c index 7b2591e26ba..35485dd6927 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke.c @@ -24,21 +24,26 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> + #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> +#include "timing.h" +#include <asm/cacheflush.h> +#include <asm/kvm_44x.h> +#include "booke.h" #include "44x_tlb.h" +unsigned long kvmppc_booke_handlers; + #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_stats_debugfs_item debugfs_entries[] = { - { "exits", VCPU_STAT(sum_exits) }, { "mmio", VCPU_STAT(mmio_exits) }, { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, - { "light", VCPU_STAT(light_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, @@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static const u32 interrupt_msr_mask[16] = { - [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, - [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, - [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, - [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_DEBUG] = MSR_ME, -}; - -const unsigned char exception_priority[] = { - [BOOKE_INTERRUPT_DATA_STORAGE] = 0, - [BOOKE_INTERRUPT_INST_STORAGE] = 1, - [BOOKE_INTERRUPT_ALIGNMENT] = 2, - [BOOKE_INTERRUPT_PROGRAM] = 3, - [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, - [BOOKE_INTERRUPT_SYSCALL] = 5, - [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, - [BOOKE_INTERRUPT_DTLB_MISS] = 7, - [BOOKE_INTERRUPT_ITLB_MISS] = 8, - [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, - [BOOKE_INTERRUPT_DEBUG] = 10, - [BOOKE_INTERRUPT_CRITICAL] = 11, - [BOOKE_INTERRUPT_WATCHDOG] = 12, - [BOOKE_INTERRUPT_EXTERNAL] = 13, - [BOOKE_INTERRUPT_FIT] = 14, - [BOOKE_INTERRUPT_DECREMENTER] = 15, -}; - -const unsigned char priority_exception[] = { - BOOKE_INTERRUPT_DATA_STORAGE, - BOOKE_INTERRUPT_INST_STORAGE, - BOOKE_INTERRUPT_ALIGNMENT, - BOOKE_INTERRUPT_PROGRAM, - BOOKE_INTERRUPT_FP_UNAVAIL, - BOOKE_INTERRUPT_SYSCALL, - BOOKE_INTERRUPT_AP_UNAVAIL, - BOOKE_INTERRUPT_DTLB_MISS, - BOOKE_INTERRUPT_ITLB_MISS, - BOOKE_INTERRUPT_MACHINE_CHECK, - BOOKE_INTERRUPT_DEBUG, - BOOKE_INTERRUPT_CRITICAL, - BOOKE_INTERRUPT_WATCHDOG, - BOOKE_INTERRUPT_EXTERNAL, - BOOKE_INTERRUPT_FIT, - BOOKE_INTERRUPT_DECREMENTER, -}; - - -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - tlbe = &vcpu->arch.guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } - - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - tlbe = &vcpu->arch.shadow_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" S%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} - /* TODO: use vcpu_printf() */ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); - printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); - printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); + printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); + printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); for (i = 0; i < 32; i += 4) { - printk("gpr%02d: %08x %08x %08x %08x\n", i, + printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, vcpu->arch.gpr[i], vcpu->arch.gpr[i+1], vcpu->arch.gpr[i+2], @@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) } } -/* Check if we are ready to deliver the interrupt */ -static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, + unsigned int priority) { - int r; + set_bit(priority, &vcpu->arch.pending_exceptions); +} - switch (interrupt) { - case BOOKE_INTERRUPT_CRITICAL: - r = vcpu->arch.msr & MSR_CE; - break; - case BOOKE_INTERRUPT_MACHINE_CHECK: - r = vcpu->arch.msr & MSR_ME; - break; - case BOOKE_INTERRUPT_EXTERNAL: - r = vcpu->arch.msr & MSR_EE; +void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); +} + +void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); +} + +int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) +{ + return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); +} + +void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); +} + +/* Deliver the interrupt of the corresponding priority, if possible. */ +static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, + unsigned int priority) +{ + int allowed = 0; + ulong msr_mask; + + switch (priority) { + case BOOKE_IRQPRIO_PROGRAM: + case BOOKE_IRQPRIO_DTLB_MISS: + case BOOKE_IRQPRIO_ITLB_MISS: + case BOOKE_IRQPRIO_SYSCALL: + case BOOKE_IRQPRIO_DATA_STORAGE: + case BOOKE_IRQPRIO_INST_STORAGE: + case BOOKE_IRQPRIO_FP_UNAVAIL: + case BOOKE_IRQPRIO_AP_UNAVAIL: + case BOOKE_IRQPRIO_ALIGNMENT: + allowed = 1; + msr_mask = MSR_CE|MSR_ME|MSR_DE; break; - case BOOKE_INTERRUPT_DECREMENTER: - r = vcpu->arch.msr & MSR_EE; + case BOOKE_IRQPRIO_CRITICAL: + case BOOKE_IRQPRIO_WATCHDOG: + allowed = vcpu->arch.msr & MSR_CE; + msr_mask = MSR_ME; break; - case BOOKE_INTERRUPT_FIT: - r = vcpu->arch.msr & MSR_EE; + case BOOKE_IRQPRIO_MACHINE_CHECK: + allowed = vcpu->arch.msr & MSR_ME; + msr_mask = 0; break; - case BOOKE_INTERRUPT_WATCHDOG: - r = vcpu->arch.msr & MSR_CE; + case BOOKE_IRQPRIO_EXTERNAL: + case BOOKE_IRQPRIO_DECREMENTER: + case BOOKE_IRQPRIO_FIT: + allowed = vcpu->arch.msr & MSR_EE; + msr_mask = MSR_CE|MSR_ME|MSR_DE; break; - case BOOKE_INTERRUPT_DEBUG: - r = vcpu->arch.msr & MSR_DE; + case BOOKE_IRQPRIO_DEBUG: + allowed = vcpu->arch.msr & MSR_DE; + msr_mask = MSR_ME; break; - default: - r = 1; } - return r; -} + if (allowed) { + vcpu->arch.srr0 = vcpu->arch.pc; + vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; + kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); -static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) -{ - switch (interrupt) { - case BOOKE_INTERRUPT_DECREMENTER: - vcpu->arch.tsr |= TSR_DIS; - break; + clear_bit(priority, &vcpu->arch.pending_exceptions); } - vcpu->arch.srr0 = vcpu->arch.pc; - vcpu->arch.srr1 = vcpu->arch.msr; - vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; - kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); + return allowed; } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; - unsigned int exception; unsigned int priority; - priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); + priority = __ffs(*pending); while (priority <= BOOKE_MAX_INTERRUPT) { - exception = priority_exception[priority]; - if (kvmppc_can_deliver_interrupt(vcpu, exception)) { - kvmppc_clear_exception(vcpu, exception); - kvmppc_deliver_interrupt(vcpu, exception); + if (kvmppc_booke_irqprio_deliver(vcpu, priority)) break; - } priority = find_next_bit(pending, BITS_PER_BYTE * sizeof(*pending), @@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, enum emulation_result er; int r = RESUME_HOST; + /* update before a new last_exit_type is rewritten */ + kvmppc_update_timing_stats(vcpu); + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_EXTERNAL: + kvmppc_account_exit(vcpu, EXT_INTR_EXITS); + if (need_resched()) + cond_resched(); + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_DECREMENTER: /* Since we switched IVPR back to the host's value, the host * handled this interrupt the moment we enabled interrupts. * Now we just offer it a chance to reschedule the guest. */ - - /* XXX At this point the TLB still holds our shadow TLB, so if - * we do reschedule the host will fault over it. Perhaps we - * should politely restore the host's entries to minimize - * misses before ceding control. */ + kvmppc_account_exit(vcpu, DEC_EXITS); if (need_resched()) cond_resched(); - if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) - vcpu->stat.dec_exits++; - else - vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Program traps generated by user-level software must be handled * by the guest kernel. */ vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); r = RESUME_GUEST; + kvmppc_account_exit(vcpu, USR_PR_INST); break; } er = kvmppc_emulate_instruction(run, vcpu); switch (er) { case EMULATE_DONE: + /* don't overwrite subtypes, just account kvm_stats */ + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); /* Future optimization: only reload non-volatiles if * they were actually modified by emulation. */ - vcpu->stat.emulated_inst_exits++; r = RESUME_GUEST_NV; break; case EMULATE_DO_DCR: @@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case EMULATE_FAIL: /* XXX Deliver Program interrupt to guest. */ - printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ @@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_FP_UNAVAIL: - kvmppc_queue_exception(vcpu, exit_nr); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); + kvmppc_account_exit(vcpu, FP_UNAVAIL); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DATA_STORAGE: vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.dsi_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); + kvmppc_account_exit(vcpu, DSI_EXITS); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_INST_STORAGE: vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.isi_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); + kvmppc_account_exit(vcpu, ISI_EXITS); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_SYSCALL: - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.syscall_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; + /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_DTLB_MISS: { - struct tlbe *gtlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.fault_dear; + int gtlb_index; gfn_t gfn; /* Check the guest TLB. */ - gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); - if (!gtlbe) { + gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); + if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ - kvmppc_queue_exception(vcpu, exit_nr); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - vcpu->stat.dtlb_real_miss_exits++; + kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); r = RESUME_GUEST; break; } + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; @@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, - gtlbe->word2); - vcpu->stat.dtlb_virt_miss_exits++; + kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); r = RESUME_GUEST; } else { /* Guest has mapped and accessed a page which is not * actually RAM. */ r = kvmppc_emulate_mmio(run, vcpu); + kvmppc_account_exit(vcpu, MMIO_EXITS); } break; } + /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_ITLB_MISS: { - struct tlbe *gtlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.pc; + gpa_t gpaddr; gfn_t gfn; + int gtlb_index; r = RESUME_GUEST; /* Check the guest TLB. */ - gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); - if (!gtlbe) { + gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); + if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.itlb_real_miss_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); + kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); break; } - vcpu->stat.itlb_virt_miss_exits++; + kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); - gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; + gpaddr = tlb_xlate(gtlbe, eaddr); + gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { /* The guest TLB had a mapping, but the shadow TLB @@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, - gtlbe->word2); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); } else { /* Guest mapped and leaped at non-RAM! */ - kvmppc_queue_exception(vcpu, - BOOKE_INTERRUPT_MACHINE_CHECK); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); } break; @@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, mtspr(SPRN_DBSR, dbsr); run->exit_reason = KVM_EXIT_DEBUG; + kvmppc_account_exit(vcpu, DEBUG_EXITS); r = RESUME_HOST; break; } @@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_disable(); - kvmppc_check_and_deliver_interrupts(vcpu); + kvmppc_core_deliver_interrupts(vcpu); - /* Do some exit accounting. */ - vcpu->stat.sum_exits++; if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (signal_pending(current)) { run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - - vcpu->stat.signal_exits++; - } else { - vcpu->stat.light_exits++; - } - } else { - switch (run->exit_reason) { - case KVM_EXIT_MMIO: - vcpu->stat.mmio_exits++; - break; - case KVM_EXIT_DCR: - vcpu->stat.dcr_exits++; - break; - case KVM_EXIT_INTR: - vcpu->stat.signal_exits++; - break; + kvmppc_account_exit(vcpu, SIGNAL_EXITS); } } @@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - vcpu->arch.pc = 0; vcpu->arch.msr = 0; vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ @@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) * before it's programmed its own IVPR. */ vcpu->arch.ivpr = 0x55550000; - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + kvmppc_init_timing_stats(vcpu); - return 0; + return kvmppc_core_vcpu_setup(vcpu); } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.ctr = regs->ctr; vcpu->arch.lr = regs->lr; vcpu->arch.xer = regs->xer; - vcpu->arch.msr = regs->msr; + kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.srr0 = regs->srr0; vcpu->arch.srr1 = regs->srr1; vcpu->arch.sprg0 = regs->sprg0; @@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { - struct tlbe *gtlbe; - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } + return kvmppc_core_vcpu_translate(vcpu, tr); +} - gtlbe = &vcpu->arch.guest_tlb[index]; +int kvmppc_booke_init(void) +{ + unsigned long ivor[16]; + unsigned long max_ivor = 0; + int i; - tr->physical_address = tlb_xlate(gtlbe, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; + /* We install our own exception handlers by hijacking IVPR. IVPR must + * be 16-bit aligned, so we need a 64KB allocation. */ + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, + VCPU_SIZE_ORDER); + if (!kvmppc_booke_handlers) + return -ENOMEM; + + /* XXX make sure our handlers are smaller than Linux's */ + + /* Copy our interrupt handlers to match host IVORs. That way we don't + * have to swap the IVORs on every guest/host transition. */ + ivor[0] = mfspr(SPRN_IVOR0); + ivor[1] = mfspr(SPRN_IVOR1); + ivor[2] = mfspr(SPRN_IVOR2); + ivor[3] = mfspr(SPRN_IVOR3); + ivor[4] = mfspr(SPRN_IVOR4); + ivor[5] = mfspr(SPRN_IVOR5); + ivor[6] = mfspr(SPRN_IVOR6); + ivor[7] = mfspr(SPRN_IVOR7); + ivor[8] = mfspr(SPRN_IVOR8); + ivor[9] = mfspr(SPRN_IVOR9); + ivor[10] = mfspr(SPRN_IVOR10); + ivor[11] = mfspr(SPRN_IVOR11); + ivor[12] = mfspr(SPRN_IVOR12); + ivor[13] = mfspr(SPRN_IVOR13); + ivor[14] = mfspr(SPRN_IVOR14); + ivor[15] = mfspr(SPRN_IVOR15); + + for (i = 0; i < 16; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + i * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); return 0; } + +void __exit kvmppc_booke_exit(void) +{ + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); + kvm_exit(); +} diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h new file mode 100644 index 00000000000..cf7c94ca24b --- /dev/null +++ b/arch/powerpc/kvm/booke.h @@ -0,0 +1,60 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __KVM_BOOKE_H__ +#define __KVM_BOOKE_H__ + +#include <linux/types.h> +#include <linux/kvm_host.h> +#include "timing.h" + +/* interrupt priortity ordering */ +#define BOOKE_IRQPRIO_DATA_STORAGE 0 +#define BOOKE_IRQPRIO_INST_STORAGE 1 +#define BOOKE_IRQPRIO_ALIGNMENT 2 +#define BOOKE_IRQPRIO_PROGRAM 3 +#define BOOKE_IRQPRIO_FP_UNAVAIL 4 +#define BOOKE_IRQPRIO_SYSCALL 5 +#define BOOKE_IRQPRIO_AP_UNAVAIL 6 +#define BOOKE_IRQPRIO_DTLB_MISS 7 +#define BOOKE_IRQPRIO_ITLB_MISS 8 +#define BOOKE_IRQPRIO_MACHINE_CHECK 9 +#define BOOKE_IRQPRIO_DEBUG 10 +#define BOOKE_IRQPRIO_CRITICAL 11 +#define BOOKE_IRQPRIO_WATCHDOG 12 +#define BOOKE_IRQPRIO_EXTERNAL 13 +#define BOOKE_IRQPRIO_FIT 14 +#define BOOKE_IRQPRIO_DECREMENTER 15 + +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ +static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) +{ + if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) + kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); + + vcpu->arch.msr = new_msr; + + if (vcpu->arch.msr & MSR_WE) { + kvm_vcpu_block(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); + }; +} + +#endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c deleted file mode 100644 index b480341bc31..00000000000 --- a/arch/powerpc/kvm/booke_host.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <linux/errno.h> -#include <linux/kvm_host.h> -#include <linux/module.h> -#include <asm/cacheflush.h> -#include <asm/kvm_ppc.h> - -unsigned long kvmppc_booke_handlers; - -static int kvmppc_booke_init(void) -{ - unsigned long ivor[16]; - unsigned long max_ivor = 0; - int i; - - /* We install our own exception handlers by hijacking IVPR. IVPR must - * be 16-bit aligned, so we need a 64KB allocation. */ - kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, - VCPU_SIZE_ORDER); - if (!kvmppc_booke_handlers) - return -ENOMEM; - - /* XXX make sure our handlers are smaller than Linux's */ - - /* Copy our interrupt handlers to match host IVORs. That way we don't - * have to swap the IVORs on every guest/host transition. */ - ivor[0] = mfspr(SPRN_IVOR0); - ivor[1] = mfspr(SPRN_IVOR1); - ivor[2] = mfspr(SPRN_IVOR2); - ivor[3] = mfspr(SPRN_IVOR3); - ivor[4] = mfspr(SPRN_IVOR4); - ivor[5] = mfspr(SPRN_IVOR5); - ivor[6] = mfspr(SPRN_IVOR6); - ivor[7] = mfspr(SPRN_IVOR7); - ivor[8] = mfspr(SPRN_IVOR8); - ivor[9] = mfspr(SPRN_IVOR9); - ivor[10] = mfspr(SPRN_IVOR10); - ivor[11] = mfspr(SPRN_IVOR11); - ivor[12] = mfspr(SPRN_IVOR12); - ivor[13] = mfspr(SPRN_IVOR13); - ivor[14] = mfspr(SPRN_IVOR14); - ivor[15] = mfspr(SPRN_IVOR15); - - for (i = 0; i < 16; i++) { - if (ivor[i] > max_ivor) - max_ivor = ivor[i]; - - memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + i * kvmppc_handler_len, - kvmppc_handler_len); - } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); - - return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); -} - -static void __exit kvmppc_booke_exit(void) -{ - free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); - kvm_exit(); -} - -module_init(kvmppc_booke_init) -module_exit(kvmppc_booke_exit) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 95e165baf85..084ebcd7dd8 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host) li r6, 1 slw r6, r6, r5 +#ifdef CONFIG_KVM_EXIT_TIMING + /* save exit time */ +1: + mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + bne 1b + stw r8, VCPU_TIMING_EXIT_TBL(r4) + stw r9, VCPU_TIMING_EXIT_TBU(r4) +#endif + /* Save the faulting instruction and all GPRs for emulation. */ andi. r7, r6, NEED_INST_MASK beq ..skip_inst_copy @@ -335,54 +347,6 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 - /* Prevent all asynchronous TLB updates. */ - mfmsr r5 - lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h - ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l - andc r6, r5, r6 - mtmsr r6 - - /* Load the guest mappings, leaving the host's "pinned" kernel mappings - * in place. */ - mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ - li r5, PPC44x_TLB_SIZE - lis r5, tlb_44x_hwater@ha - lwz r5, tlb_44x_hwater@l(r5) - mtctr r5 - addi r9, r4, VCPU_SHADOW_TLB - addi r5, r4, VCPU_SHADOW_MOD - li r3, 0 -1: - lbzx r7, r3, r5 - cmpwi r7, 0 - beq 3f - - /* Load guest entry. */ - mulli r11, r3, TLBE_BYTES - add r11, r11, r9 - lwz r7, 0(r11) - mtspr SPRN_MMUCR, r7 - lwz r7, 4(r11) - tlbwe r7, r3, PPC44x_TLB_PAGEID - lwz r7, 8(r11) - tlbwe r7, r3, PPC44x_TLB_XLAT - lwz r7, 12(r11) - tlbwe r7, r3, PPC44x_TLB_ATTRIB -3: - addi r3, r3, 1 /* Increment index. */ - bdnz 1b - - mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ - - /* Clear bitmap of modified TLB entries */ - li r5, PPC44x_TLB_SIZE>>2 - mtctr r5 - addi r5, r4, VCPU_SHADOW_MOD - 4 - li r6, 0 -1: - stwu r6, 4(r5) - bdnz 1b - iccci 0, 0 /* XXX hack */ /* Load some guest volatiles. */ @@ -423,6 +387,18 @@ lightweight_exit: lwz r3, VCPU_SPRG7(r4) mtspr SPRN_SPRG7, r3 +#ifdef CONFIG_KVM_EXIT_TIMING + /* save enter time */ +1: + mfspr r6, SPRN_TBRU + mfspr r7, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + bne 1b + stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + /* Finish loading guest volatiles and jump to guest. */ lwz r3, VCPU_CTR(r4) mtctr r3 diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 0fce4fbdc20..d1d38daa93f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -23,161 +23,14 @@ #include <linux/string.h> #include <linux/kvm_host.h> -#include <asm/dcr.h> -#include <asm/dcr-regs.h> +#include <asm/reg.h> #include <asm/time.h> #include <asm/byteorder.h> #include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include "timing.h" -#include "44x_tlb.h" - -/* Instruction decoding */ -static inline unsigned int get_op(u32 inst) -{ - return inst >> 26; -} - -static inline unsigned int get_xop(u32 inst) -{ - return (inst >> 1) & 0x3ff; -} - -static inline unsigned int get_sprn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_dcrn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_rt(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_rs(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_ra(u32 inst) -{ - return (inst >> 16) & 0x1f; -} - -static inline unsigned int get_rb(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_rc(u32 inst) -{ - return inst & 0x1; -} - -static inline unsigned int get_ws(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_d(u32 inst) -{ - return inst & 0xffff; -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) -{ - u64 eaddr; - u64 raddr; - u64 asid; - u32 flags; - struct tlbe *tlbe; - unsigned int ra; - unsigned int rs; - unsigned int ws; - unsigned int index; - - ra = get_ra(inst); - rs = get_rs(inst); - ws = get_ws(inst); - - index = vcpu->arch.gpr[ra]; - if (index > PPC44x_TLB_SIZE) { - printk("%s: index %d\n", __func__, index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu->arch.guest_tlb[index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ - if (tlbe->word0 & PPC44x_TLB_VALID) { - eaddr = get_tlb_eaddr(tlbe); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid); - } - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = vcpu->arch.mmucr & 0xff; - tlbe->word0 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = vcpu->arch.gpr[rs]; - break; - - default: - return EMULATE_FAIL; - } - - if (tlbe_is_host_safe(vcpu, tlbe)) { - eaddr = get_tlb_eaddr(tlbe); - raddr = get_tlb_raddr(tlbe); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - flags = tlbe->word2 & 0xffff; - - /* Create a 4KB mapping on the host. If the guest wanted a - * large page, only the first 4KB is mapped here and the rest - * are mapped on the fly. */ - kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); - } - - KVMTRACE_5D(GTLB_WRITE, vcpu, index, - tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2, - handler); - - return EMULATE_DONE; -} - -static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) +void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { if (vcpu->arch.tcr & TCR_DIE) { /* The decrementer ticks at the same rate as the timebase, so @@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } -static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); -} - /* XXX to do: * lhax * lhaux @@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) * * XXX is_bigendian should depend on MMU mapping or MSR[LE] */ +/* XXX Should probably auto-generate instruction decoding for a particular core + * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst = vcpu->arch.last_inst; u32 ea; int ra; int rb; - int rc; int rs; int rt; int sprn; - int dcrn; enum emulation_result emulated = EMULATE_DONE; int advance = 1; + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + switch (get_op(inst)) { - case 3: /* trap */ - printk("trap!\n"); - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + case 3: /* trap */ + vcpu->arch.esr |= ESR_PTR; + kvmppc_core_queue_program(vcpu); advance = 0; break; - case 19: - switch (get_xop(inst)) { - case 50: /* rfi */ - kvmppc_emul_rfi(vcpu); - advance = 0; - break; - - default: - emulated = EMULATE_FAIL; - break; - } - break; - case 31: switch (get_xop(inst)) { @@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - case 83: /* mfmsr */ - rt = get_rt(inst); - vcpu->arch.gpr[rt] = vcpu->arch.msr; - break; - case 87: /* lbzx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case 131: /* wrtee */ - rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (vcpu->arch.gpr[rs] & MSR_EE); - break; - - case 146: /* mtmsr */ - rs = get_rs(inst); - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); - break; - case 151: /* stwx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 1); break; - case 163: /* wrteei */ - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (inst & MSR_EE); - break; - case 215: /* stbx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 323: /* mfdcr */ - dcrn = get_dcrn(inst); - rt = get_rt(inst); - - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); - local_irq_enable(); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = 0; - run->dcr.is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - emulated = EMULATE_DO_DCR; - } - - break; - case 339: /* mfspr */ sprn = get_sprn(inst); rt = get_rt(inst); @@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; case SPRN_SRR1: vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; - case SPRN_MMUCR: - vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; - case SPRN_PID: - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; - case SPRN_IVPR: - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; - case SPRN_CCR0: - vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; - case SPRN_CCR1: - vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; case SPRN_PVR: vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; - case SPRN_DEAR: - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; - case SPRN_ESR: - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; - case SPRN_DBCR0: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; - case SPRN_DBCR1: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. @@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ - case SPRN_IVOR0: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; - case SPRN_IVOR1: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; - case SPRN_IVOR2: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; - case SPRN_IVOR3: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; - case SPRN_IVOR4: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; - case SPRN_IVOR5: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; - case SPRN_IVOR6: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; - case SPRN_IVOR7: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; - case SPRN_IVOR8: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; - case SPRN_IVOR9: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; - case SPRN_IVOR10: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; - case SPRN_IVOR11: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; - case SPRN_IVOR12: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; - case SPRN_IVOR13: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; - case SPRN_IVOR14: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; - case SPRN_IVOR15: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; - default: - printk("mfspr: unknown spr %x\n", sprn); - vcpu->arch.gpr[rt] = 0; + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); + if (emulated == EMULATE_FAIL) { + printk("mfspr: unknown spr %x\n", sprn); + vcpu->arch.gpr[rt] = 0; + } break; } break; @@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 451: /* mtdcr */ - dcrn = get_dcrn(inst); - rs = get_rs(inst); - - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = vcpu->arch.gpr[rs]; - run->dcr.is_write = 1; - vcpu->arch.dcr_needed = 1; - emulated = EMULATE_DO_DCR; - } - - break; - case 467: /* mtspr */ sprn = get_sprn(inst); rs = get_rs(inst); @@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; case SPRN_SRR1: vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; - case SPRN_MMUCR: - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; - case SPRN_PID: - kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; - case SPRN_CCR0: - vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; - case SPRN_DEAR: - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; - case SPRN_ESR: - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR0: - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR1: - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; /* XXX We need to context-switch the timebase for * watchdog and FIT. */ @@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_emulate_dec(vcpu); break; - case SPRN_TSR: - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; - - case SPRN_TCR: - vcpu->arch.tcr = vcpu->arch.gpr[rs]; - kvmppc_emulate_dec(vcpu); - break; - case SPRN_SPRG0: vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; case SPRN_SPRG1: @@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_SPRG3: vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ - case SPRN_SPRG4: - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG5: - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG6: - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG7: - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; - - case SPRN_IVPR: - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR0: - vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR1: - vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR2: - vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR3: - vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR4: - vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR5: - vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR6: - vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR7: - vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR8: - vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR9: - vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR10: - vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR11: - vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR12: - vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR13: - vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR14: - vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR15: - vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; - default: - printk("mtspr: unknown spr %x\n", sprn); - emulated = EMULATE_FAIL; + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); + if (emulated == EMULATE_FAIL) + printk("mtspr: unknown spr %x\n", sprn); break; } break; @@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 0); break; - case 978: /* tlbwe */ - emulated = kvmppc_emul_tlbwe(vcpu, inst); - break; - - case 914: { /* tlbsx */ - int index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - rc = get_rc(inst); - - ea = vcpu->arch.gpr[rb]; - if (ra) - ea += vcpu->arch.gpr[ra]; - - index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - if (index < 0) - vcpu->arch.cr &= ~0x20000000; - else - vcpu->arch.cr |= 0x20000000; - } - vcpu->arch.gpr[rt] = index; - - } - break; - case 790: /* lhbrx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); @@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 2, 0); break; - case 966: /* iccci */ - break; - default: - printk("unknown: op %d xop %d\n", get_op(inst), - get_xop(inst)); + /* Attempt core-specific emulation below. */ emulated = EMULATE_FAIL; - break; } break; @@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: - printk("unknown op %d\n", get_op(inst)); emulated = EMULATE_FAIL; - break; } - KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); + if (emulated == EMULATE_FAIL) { + emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); + if (emulated == EMULATE_FAIL) { + advance = 0; + printk(KERN_ERR "Couldn't emulate instruction 0x%08x " + "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); + } + } + + KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit); if (advance) vcpu->arch.pc += 4; /* Advance past emulated instruction. */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8bef0efcdfe..2822c8ccfaa 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -28,9 +28,9 @@ #include <asm/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/tlbflush.h> +#include "timing.h" #include "../mm/mmu_decl.h" - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; @@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void) void kvm_arch_check_processor_compat(void *rtn) { - int r; - - if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) - r = 0; - else - r = -ENOTSUPP; - - *(int *)rtn = r; + *(int *)rtn = kvmppc_core_check_processor_compat(); } struct kvm *kvm_arch_create_vm(void) @@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { - case KVM_CAP_USER_MEMORY: - r = 1; - break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; @@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; - int err; - - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) { - err = -ENOMEM; - goto out; - } - - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - + vcpu = kvmppc_core_vcpu_create(kvm, id); + kvmppc_create_vcpu_debugfs(vcpu, id); return vcpu; - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(err); } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); + kvmppc_remove_vcpu_debugfs(vcpu); + kvmppc_core_vcpu_free(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; - - return test_bit(priority, &vcpu->arch.pending_exceptions); + return kvmppc_core_pending_dec(vcpu); } static void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); + kvmppc_core_queue_dec(vcpu); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); @@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_core_destroy_mmu(vcpu); } -/* Note: clearing MSR[DE] just means that the debug interrupt will not be - * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. - * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt - * will be delivered as an "imprecise debug event" (which is indicated by - * DBSR[IDE]. - */ -static void kvmppc_disable_debug_interrupts(void) -{ - mtmsr(mfmsr() & ~MSR_DE); -} - -static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu) -{ - kvmppc_disable_debug_interrupts(); - - mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); - mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); - mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); - mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); - mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); - mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); - mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); - mtmsr(vcpu->arch.host_msr); -} - -static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - u32 dbcr0 = 0; - - vcpu->arch.host_msr = mfmsr(); - kvmppc_disable_debug_interrupts(); - - /* Save host debug register state. */ - vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); - vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); - vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); - vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); - vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); - vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); - vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); - - /* set registers up for guest */ - - if (dbg->bp[0]) { - mtspr(SPRN_IAC1, dbg->bp[0]); - dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; - } - if (dbg->bp[1]) { - mtspr(SPRN_IAC2, dbg->bp[1]); - dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; - } - if (dbg->bp[2]) { - mtspr(SPRN_IAC3, dbg->bp[2]); - dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; - } - if (dbg->bp[3]) { - mtspr(SPRN_IAC4, dbg->bp[3]); - dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; - } - - mtspr(SPRN_DBCR0, dbcr0); - mtspr(SPRN_DBCR1, 0); - mtspr(SPRN_DBCR2, 0); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - int i; - if (vcpu->guest_debug.enabled) - kvmppc_load_guest_debug_registers(vcpu); + kvmppc_core_load_guest_debugstate(vcpu); - /* Mark every guest entry in the shadow TLB entry modified, so that they - * will all be reloaded on the next vcpu run (instead of being - * demand-faulted). */ - for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_tlbe_set_modified(vcpu, i); + kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { if (vcpu->guest_debug.enabled) - kvmppc_restore_host_debug_state(vcpu); + kvmppc_core_load_host_debugstate(vcpu); /* Don't leave guest TLB entries resident when being de-scheduled. */ /* XXX It would be nice to differentiate between heavyweight exit and * sched_out here, since we could avoid the TLB flush for heavyweight * exits. */ _tlbil_all(); + kvmppc_core_vcpu_put(vcpu); } int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, @@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; *gpr = run->dcr.data; } static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; if (run->mmio.len > sizeof(*gpr)) { printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); @@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->arch.dcr_needed = 0; } - kvmppc_check_and_deliver_interrupts(vcpu); + kvmppc_core_deliver_interrupts(vcpu); local_irq_disable(); kvm_guest_enter(); @@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + kvmppc_core_queue_external(vcpu, irq); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c new file mode 100644 index 00000000000..47ee603f558 --- /dev/null +++ b/arch/powerpc/kvm/timing.c @@ -0,0 +1,239 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> + +#include <asm/time.h> +#include <asm-generic/div64.h> + +#include "timing.h" + +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) +{ + int i; + + /* pause guest execution to avoid concurrent updates */ + local_irq_disable(); + mutex_lock(&vcpu->mutex); + + vcpu->arch.last_exit_type = 0xDEAD; + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + vcpu->arch.timing_count_type[i] = 0; + vcpu->arch.timing_max_duration[i] = 0; + vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF; + vcpu->arch.timing_sum_duration[i] = 0; + vcpu->arch.timing_sum_quad_duration[i] = 0; + } + vcpu->arch.timing_last_exit = 0; + vcpu->arch.timing_exit.tv64 = 0; + vcpu->arch.timing_last_enter.tv64 = 0; + + mutex_unlock(&vcpu->mutex); + local_irq_enable(); +} + +static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) +{ + u64 old; + + do_div(duration, tb_ticks_per_usec); + if (unlikely(duration > 0xFFFFFFFF)) { + printk(KERN_ERR"%s - duration too big -> overflow" + " duration %lld type %d exit #%d\n", + __func__, duration, type, + vcpu->arch.timing_count_type[type]); + return; + } + + vcpu->arch.timing_count_type[type]++; + + /* sum */ + old = vcpu->arch.timing_sum_duration[type]; + vcpu->arch.timing_sum_duration[type] += duration; + if (unlikely(old > vcpu->arch.timing_sum_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, vcpu->arch.timing_sum_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* square sum */ + old = vcpu->arch.timing_sum_quad_duration[type]; + vcpu->arch.timing_sum_quad_duration[type] += (duration*duration); + if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of squared durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, + vcpu->arch.timing_sum_quad_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* set min/max */ + if (unlikely(duration < vcpu->arch.timing_min_duration[type])) + vcpu->arch.timing_min_duration[type] = duration; + if (unlikely(duration > vcpu->arch.timing_max_duration[type])) + vcpu->arch.timing_max_duration[type] = duration; +} + +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) +{ + u64 exit = vcpu->arch.timing_last_exit; + u64 enter = vcpu->arch.timing_last_enter.tv64; + + /* save exit time, used next exit when the reenter time is known */ + vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64; + + if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0)) + return; /* skip incomplete cycle (e.g. after reset) */ + + /* update statistics for average and standard deviation */ + add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type); + /* enter -> timing_last_exit is time spent in guest - log this too */ + add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter), + TIMEINGUEST); +} + +static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { + [MMIO_EXITS] = "MMIO", + [DCR_EXITS] = "DCR", + [SIGNAL_EXITS] = "SIGNAL", + [ITLB_REAL_MISS_EXITS] = "ITLBREAL", + [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", + [DTLB_REAL_MISS_EXITS] = "DTLBREAL", + [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT", + [SYSCALL_EXITS] = "SYSCALL", + [ISI_EXITS] = "ISI", + [DSI_EXITS] = "DSI", + [EMULATED_INST_EXITS] = "EMULINST", + [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT", + [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE", + [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR", + [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR", + [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR", + [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR", + [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX", + [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE", + [EMULATED_RFI_EXITS] = "EMUL_RFI", + [DEC_EXITS] = "DEC", + [EXT_INTR_EXITS] = "EXTINT", + [HALT_WAKEUP] = "HALT", + [USR_PR_INST] = "USR_PR_INST", + [FP_UNAVAIL] = "FP_UNAVAIL", + [DEBUG_EXITS] = "DEBUG", + [TIMEINGUEST] = "TIMEINGUEST" +}; + +static int kvmppc_exit_timing_show(struct seq_file *m, void *private) +{ + struct kvm_vcpu *vcpu = m->private; + int i; + + seq_printf(m, "%s", "type count min max sum sum_squared\n"); + + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", + kvm_exit_names[i], + vcpu->arch.timing_count_type[i], + vcpu->arch.timing_min_duration[i], + vcpu->arch.timing_max_duration[i], + vcpu->arch.timing_sum_duration[i], + vcpu->arch.timing_sum_quad_duration[i]); + } + return 0; +} + +/* Write 'c' to clear the timing statistics. */ +static ssize_t kvmppc_exit_timing_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = -EINVAL; + char c; + + if (count > 1) { + goto done; + } + + if (get_user(c, user_buf)) { + err = -EFAULT; + goto done; + } + + if (c == 'c') { + struct seq_file *seqf = (struct seq_file *)file->private_data; + struct kvm_vcpu *vcpu = seqf->private; + /* Write does not affect our buffers previously generated with + * show. seq_file is locked here to prevent races of init with + * a show call */ + mutex_lock(&seqf->lock); + kvmppc_init_timing_stats(vcpu); + mutex_unlock(&seqf->lock); + err = count; + } + +done: + return err; +} + +static int kvmppc_exit_timing_open(struct inode *inode, struct file *file) +{ + return single_open(file, kvmppc_exit_timing_show, inode->i_private); +} + +static struct file_operations kvmppc_exit_timing_fops = { + .owner = THIS_MODULE, + .open = kvmppc_exit_timing_open, + .read = seq_read, + .write = kvmppc_exit_timing_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +{ + static char dbg_fname[50]; + struct dentry *debugfs_file; + + snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", + current->pid, id); + debugfs_file = debugfs_create_file(dbg_fname, 0666, + kvm_debugfs_dir, vcpu, + &kvmppc_exit_timing_fops); + + if (!debugfs_file) { + printk(KERN_ERR"%s: error creating debugfs file %s\n", + __func__, dbg_fname); + return; + } + + vcpu->arch.debugfs_exit_timing = debugfs_file; +} + +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.debugfs_exit_timing) { + debugfs_remove(vcpu->arch.debugfs_exit_timing); + vcpu->arch.debugfs_exit_timing = NULL; + } +} diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h new file mode 100644 index 00000000000..bb13b1f3cd5 --- /dev/null +++ b/arch/powerpc/kvm/timing.h @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#ifndef __POWERPC_KVM_EXITTIMING_H__ +#define __POWERPC_KVM_EXITTIMING_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_host.h> + +#ifdef CONFIG_KVM_EXIT_TIMING +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); + +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) +{ + vcpu->arch.last_exit_type = type; +} + +#else +/* if exit timing is not configured there is no need to build the c file */ +static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, + unsigned int id) {} +static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} +#endif /* CONFIG_KVM_EXIT_TIMING */ + +/* account the exit in kvm_stats */ +static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) +{ + /* type has to be known at build time for optimization */ + BUILD_BUG_ON(__builtin_constant_p(type)); + switch (type) { + case EXT_INTR_EXITS: + vcpu->stat.ext_intr_exits++; + break; + case DEC_EXITS: + vcpu->stat.dec_exits++; + break; + case EMULATED_INST_EXITS: + vcpu->stat.emulated_inst_exits++; + break; + case DCR_EXITS: + vcpu->stat.dcr_exits++; + break; + case DSI_EXITS: + vcpu->stat.dsi_exits++; + break; + case ISI_EXITS: + vcpu->stat.isi_exits++; + break; + case SYSCALL_EXITS: + vcpu->stat.syscall_exits++; + break; + case DTLB_REAL_MISS_EXITS: + vcpu->stat.dtlb_real_miss_exits++; + break; + case DTLB_VIRT_MISS_EXITS: + vcpu->stat.dtlb_virt_miss_exits++; + break; + case MMIO_EXITS: + vcpu->stat.mmio_exits++; + break; + case ITLB_REAL_MISS_EXITS: + vcpu->stat.itlb_real_miss_exits++; + break; + case ITLB_VIRT_MISS_EXITS: + vcpu->stat.itlb_virt_miss_exits++; + break; + case SIGNAL_EXITS: + vcpu->stat.signal_exits++; + break; + } +} + +/* wrapper to set exit time and account for it in kvm_stats */ +static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type) +{ + kvmppc_set_exit_type(vcpu, type); + kvmppc_account_exit_stat(vcpu, type); +} + +#endif /* __POWERPC_KVM_EXITTIMING_H__ */ diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c index 906a0a2a9fe..1410443731e 100644 --- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu) u64 route; if (nr_cpus_node(spu->node)) { - cpumask_t spumask = node_to_cpumask(spu->node); - cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu)); + const struct cpumask *spumask = cpumask_of_node(spu->node), + *cpumask = cpumask_of_node(cpu_to_node(cpu)); - if (!cpus_intersects(spumask, cpumask)) + if (!cpumask_intersects(spumask, cpumask)) return; } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 2ad914c4749..6a0ad196aeb 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx) static int __node_allowed(struct spu_context *ctx, int node) { if (nr_cpus_node(node)) { - cpumask_t mask = node_to_cpumask(node); + const struct cpumask *mask = cpumask_of_node(node); - if (cpus_intersects(mask, ctx->cpus_allowed)) + if (cpumask_intersects(mask, &ctx->cpus_allowed)) return 1; } diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index f7a69021b7b..84e058f1e1c 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) +static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; @@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void) /* Reset affinity to all cpus */ irq_desc[virq].affinity = CPU_MASK_ALL; - desc->chip->set_affinity(virq, CPU_MASK_ALL); + desc->chip->set_affinity(virq, cpu_all_mask); unlock: spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index c82babb7007..3e0d89dcdba 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) +void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) } else { cpumask_t tmp; - cpus_and(tmp, cpumask, cpu_online_map); + cpumask_and(&tmp, cpumask, cpu_online_mask); mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 6209c62a426..3cef2af10f4 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic) extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); extern void mpic_set_vector(unsigned int virq, unsigned int vector); -extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask); +extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); #endif /* _POWERPC_SYSDEV_MPIC_H */ |