diff options
Diffstat (limited to 'arch')
60 files changed, 1120 insertions, 395 deletions
diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c index fafcd32e690..489556eecbd 100644 --- a/arch/arm/mach-omap2/clock34xx.c +++ b/arch/arm/mach-omap2/clock34xx.c @@ -338,6 +338,13 @@ static struct omap_clk omap34xx_clks[] = { */ #define SDRC_MPURATE_LOOPS 96 +/* + * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks + * that are sourced by DPLL5, and both of these require this clock + * to be at 120 MHz for proper operation. + */ +#define DPLL5_FREQ_FOR_USBHOST 120000000 + /** * omap3430es2_clk_ssi_find_idlest - return CM_IDLEST info for SSI * @clk: struct clk * being enabled @@ -1056,6 +1063,28 @@ void omap2_clk_prepare_for_reboot(void) #endif } +static void omap3_clk_lock_dpll5(void) +{ + struct clk *dpll5_clk; + struct clk *dpll5_m2_clk; + + dpll5_clk = clk_get(NULL, "dpll5_ck"); + clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST); + clk_enable(dpll5_clk); + + /* Enable autoidle to allow it to enter low power bypass */ + omap3_dpll_allow_idle(dpll5_clk); + + /* Program dpll5_m2_clk divider for no division */ + dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck"); + clk_enable(dpll5_m2_clk); + clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST); + + clk_disable(dpll5_m2_clk); + clk_disable(dpll5_clk); + return; +} + /* REVISIT: Move this init stuff out into clock.c */ /* @@ -1148,6 +1177,12 @@ int __init omap2_clk_init(void) */ clk_enable_init_clocks(); + /* + * Lock DPLL5 and put it in autoidle. + */ + if (omap_rev() >= OMAP3430_REV_ES2_0) + omap3_clk_lock_dpll5(); + /* Avoid sleeping during omap2_clk_prepare_for_reboot() */ /* REVISIT: not yet ready for 343x */ #if 0 diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c index 1b4c1600f8d..2fc4d6abbd0 100644 --- a/arch/arm/mach-omap2/pm-debug.c +++ b/arch/arm/mach-omap2/pm-debug.c @@ -541,7 +541,7 @@ static int __init pm_dbg_init(void) printk(KERN_ERR "%s: only OMAP3 supported\n", __func__); return -ENODEV; } - + d = debugfs_create_dir("pm_debug", NULL); if (IS_ERR(d)) return PTR_ERR(d); @@ -551,7 +551,7 @@ static int __init pm_dbg_init(void) (void) debugfs_create_file("time", S_IRUGO, d, (void *)DEBUG_FILE_TIMERS, &debug_fops); - pwrdm_for_each(pwrdms_setup, (void *)d); + pwrdm_for_each_nolock(pwrdms_setup, (void *)d); pm_dbg_dir = debugfs_create_dir("registers", d); if (IS_ERR(pm_dbg_dir)) diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 0ff5a6c53aa..378c2f61835 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -51,97 +51,112 @@ static void (*_omap_sram_idle)(u32 *addr, int save_state); static struct powerdomain *mpu_pwrdm; -/* PRCM Interrupt Handler for wakeups */ -static irqreturn_t prcm_interrupt_handler (int irq, void *dev_id) +/* + * PRCM Interrupt Handler Helper Function + * + * The purpose of this function is to clear any wake-up events latched + * in the PRCM PM_WKST_x registers. It is possible that a wake-up event + * may occur whilst attempting to clear a PM_WKST_x register and thus + * set another bit in this register. A while loop is used to ensure + * that any peripheral wake-up events occurring while attempting to + * clear the PM_WKST_x are detected and cleared. + */ +static int prcm_clear_mod_irqs(s16 module, u8 regs) { - u32 wkst, irqstatus_mpu; - u32 fclk, iclk; - - /* WKUP */ - wkst = prm_read_mod_reg(WKUP_MOD, PM_WKST); + u32 wkst, fclk, iclk, clken; + u16 wkst_off = (regs == 3) ? OMAP3430ES2_PM_WKST3 : PM_WKST1; + u16 fclk_off = (regs == 3) ? OMAP3430ES2_CM_FCLKEN3 : CM_FCLKEN1; + u16 iclk_off = (regs == 3) ? CM_ICLKEN3 : CM_ICLKEN1; + u16 grpsel_off = (regs == 3) ? + OMAP3430ES2_PM_MPUGRPSEL3 : OMAP3430_PM_MPUGRPSEL; + int c = 0; + + wkst = prm_read_mod_reg(module, wkst_off); + wkst &= prm_read_mod_reg(module, grpsel_off); if (wkst) { - iclk = cm_read_mod_reg(WKUP_MOD, CM_ICLKEN); - fclk = cm_read_mod_reg(WKUP_MOD, CM_FCLKEN); - cm_set_mod_reg_bits(wkst, WKUP_MOD, CM_ICLKEN); - cm_set_mod_reg_bits(wkst, WKUP_MOD, CM_FCLKEN); - prm_write_mod_reg(wkst, WKUP_MOD, PM_WKST); - while (prm_read_mod_reg(WKUP_MOD, PM_WKST)) - cpu_relax(); - cm_write_mod_reg(iclk, WKUP_MOD, CM_ICLKEN); - cm_write_mod_reg(fclk, WKUP_MOD, CM_FCLKEN); + iclk = cm_read_mod_reg(module, iclk_off); + fclk = cm_read_mod_reg(module, fclk_off); + while (wkst) { + clken = wkst; + cm_set_mod_reg_bits(clken, module, iclk_off); + /* + * For USBHOST, we don't know whether HOST1 or + * HOST2 woke us up, so enable both f-clocks + */ + if (module == OMAP3430ES2_USBHOST_MOD) + clken |= 1 << OMAP3430ES2_EN_USBHOST2_SHIFT; + cm_set_mod_reg_bits(clken, module, fclk_off); + prm_write_mod_reg(wkst, module, wkst_off); + wkst = prm_read_mod_reg(module, wkst_off); + c++; + } + cm_write_mod_reg(iclk, module, iclk_off); + cm_write_mod_reg(fclk, module, fclk_off); } - /* CORE */ - wkst = prm_read_mod_reg(CORE_MOD, PM_WKST1); - if (wkst) { - iclk = cm_read_mod_reg(CORE_MOD, CM_ICLKEN1); - fclk = cm_read_mod_reg(CORE_MOD, CM_FCLKEN1); - cm_set_mod_reg_bits(wkst, CORE_MOD, CM_ICLKEN1); - cm_set_mod_reg_bits(wkst, CORE_MOD, CM_FCLKEN1); - prm_write_mod_reg(wkst, CORE_MOD, PM_WKST1); - while (prm_read_mod_reg(CORE_MOD, PM_WKST1)) - cpu_relax(); - cm_write_mod_reg(iclk, CORE_MOD, CM_ICLKEN1); - cm_write_mod_reg(fclk, CORE_MOD, CM_FCLKEN1); - } - wkst = prm_read_mod_reg(CORE_MOD, OMAP3430ES2_PM_WKST3); - if (wkst) { - iclk = cm_read_mod_reg(CORE_MOD, CM_ICLKEN3); - fclk = cm_read_mod_reg(CORE_MOD, OMAP3430ES2_CM_FCLKEN3); - cm_set_mod_reg_bits(wkst, CORE_MOD, CM_ICLKEN3); - cm_set_mod_reg_bits(wkst, CORE_MOD, OMAP3430ES2_CM_FCLKEN3); - prm_write_mod_reg(wkst, CORE_MOD, OMAP3430ES2_PM_WKST3); - while (prm_read_mod_reg(CORE_MOD, OMAP3430ES2_PM_WKST3)) - cpu_relax(); - cm_write_mod_reg(iclk, CORE_MOD, CM_ICLKEN3); - cm_write_mod_reg(fclk, CORE_MOD, OMAP3430ES2_CM_FCLKEN3); - } + return c; +} - /* PER */ - wkst = prm_read_mod_reg(OMAP3430_PER_MOD, PM_WKST); - if (wkst) { - iclk = cm_read_mod_reg(OMAP3430_PER_MOD, CM_ICLKEN); - fclk = cm_read_mod_reg(OMAP3430_PER_MOD, CM_FCLKEN); - cm_set_mod_reg_bits(wkst, OMAP3430_PER_MOD, CM_ICLKEN); - cm_set_mod_reg_bits(wkst, OMAP3430_PER_MOD, CM_FCLKEN); - prm_write_mod_reg(wkst, OMAP3430_PER_MOD, PM_WKST); - while (prm_read_mod_reg(OMAP3430_PER_MOD, PM_WKST)) - cpu_relax(); - cm_write_mod_reg(iclk, OMAP3430_PER_MOD, CM_ICLKEN); - cm_write_mod_reg(fclk, OMAP3430_PER_MOD, CM_FCLKEN); - } +static int _prcm_int_handle_wakeup(void) +{ + int c; + c = prcm_clear_mod_irqs(WKUP_MOD, 1); + c += prcm_clear_mod_irqs(CORE_MOD, 1); + c += prcm_clear_mod_irqs(OMAP3430_PER_MOD, 1); if (omap_rev() > OMAP3430_REV_ES1_0) { - /* USBHOST */ - wkst = prm_read_mod_reg(OMAP3430ES2_USBHOST_MOD, PM_WKST); - if (wkst) { - iclk = cm_read_mod_reg(OMAP3430ES2_USBHOST_MOD, - CM_ICLKEN); - fclk = cm_read_mod_reg(OMAP3430ES2_USBHOST_MOD, - CM_FCLKEN); - cm_set_mod_reg_bits(wkst, OMAP3430ES2_USBHOST_MOD, - CM_ICLKEN); - cm_set_mod_reg_bits(wkst, OMAP3430ES2_USBHOST_MOD, - CM_FCLKEN); - prm_write_mod_reg(wkst, OMAP3430ES2_USBHOST_MOD, - PM_WKST); - while (prm_read_mod_reg(OMAP3430ES2_USBHOST_MOD, - PM_WKST)) - cpu_relax(); - cm_write_mod_reg(iclk, OMAP3430ES2_USBHOST_MOD, - CM_ICLKEN); - cm_write_mod_reg(fclk, OMAP3430ES2_USBHOST_MOD, - CM_FCLKEN); - } + c += prcm_clear_mod_irqs(CORE_MOD, 3); + c += prcm_clear_mod_irqs(OMAP3430ES2_USBHOST_MOD, 1); } - irqstatus_mpu = prm_read_mod_reg(OCP_MOD, - OMAP3_PRM_IRQSTATUS_MPU_OFFSET); - prm_write_mod_reg(irqstatus_mpu, OCP_MOD, - OMAP3_PRM_IRQSTATUS_MPU_OFFSET); + return c; +} + +/* + * PRCM Interrupt Handler + * + * The PRM_IRQSTATUS_MPU register indicates if there are any pending + * interrupts from the PRCM for the MPU. These bits must be cleared in + * order to clear the PRCM interrupt. The PRCM interrupt handler is + * implemented to simply clear the PRM_IRQSTATUS_MPU in order to clear + * the PRCM interrupt. Please note that bit 0 of the PRM_IRQSTATUS_MPU + * register indicates that a wake-up event is pending for the MPU and + * this bit can only be cleared if the all the wake-up events latched + * in the various PM_WKST_x registers have been cleared. The interrupt + * handler is implemented using a do-while loop so that if a wake-up + * event occurred during the processing of the prcm interrupt handler + * (setting a bit in the corresponding PM_WKST_x register and thus + * preventing us from clearing bit 0 of the PRM_IRQSTATUS_MPU register) + * this would be handled. + */ +static irqreturn_t prcm_interrupt_handler (int irq, void *dev_id) +{ + u32 irqstatus_mpu; + int c = 0; + + do { + irqstatus_mpu = prm_read_mod_reg(OCP_MOD, + OMAP3_PRM_IRQSTATUS_MPU_OFFSET); + + if (irqstatus_mpu & (OMAP3430_WKUP_ST | OMAP3430_IO_ST)) { + c = _prcm_int_handle_wakeup(); + + /* + * Is the MPU PRCM interrupt handler racing with the + * IVA2 PRCM interrupt handler ? + */ + WARN(c == 0, "prcm: WARNING: PRCM indicated MPU wakeup " + "but no wakeup sources are marked\n"); + } else { + /* XXX we need to expand our PRCM interrupt handler */ + WARN(1, "prcm: WARNING: PRCM interrupt received, but " + "no code to handle it (%08x)\n", irqstatus_mpu); + } + + prm_write_mod_reg(irqstatus_mpu, OCP_MOD, + OMAP3_PRM_IRQSTATUS_MPU_OFFSET); - while (prm_read_mod_reg(OCP_MOD, OMAP3_PRM_IRQSTATUS_MPU_OFFSET)) - cpu_relax(); + } while (prm_read_mod_reg(OCP_MOD, OMAP3_PRM_IRQSTATUS_MPU_OFFSET)); return IRQ_HANDLED; } @@ -624,6 +639,16 @@ static void __init prcm_setup_regs(void) prm_write_mod_reg(OMAP3430_IO_EN | OMAP3430_WKUP_EN, OCP_MOD, OMAP3_PRM_IRQENABLE_MPU_OFFSET); + /* Enable GPIO wakeups in PER */ + prm_write_mod_reg(OMAP3430_EN_GPIO2 | OMAP3430_EN_GPIO3 | + OMAP3430_EN_GPIO4 | OMAP3430_EN_GPIO5 | + OMAP3430_EN_GPIO6, OMAP3430_PER_MOD, PM_WKEN); + /* and allow them to wake up MPU */ + prm_write_mod_reg(OMAP3430_GRPSEL_GPIO2 | OMAP3430_EN_GPIO3 | + OMAP3430_GRPSEL_GPIO4 | OMAP3430_EN_GPIO5 | + OMAP3430_GRPSEL_GPIO6, + OMAP3430_PER_MOD, OMAP3430_PM_MPUGRPSEL); + /* Don't attach IVA interrupts */ prm_write_mod_reg(0, WKUP_MOD, OMAP3430_PM_IVAGRPSEL); prm_write_mod_reg(0, CORE_MOD, OMAP3430_PM_IVAGRPSEL1); diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 2594cbff394..f00289abd30 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -273,35 +273,50 @@ struct powerdomain *pwrdm_lookup(const char *name) } /** - * pwrdm_for_each - call function on each registered clockdomain + * pwrdm_for_each_nolock - call function on each registered clockdomain * @fn: callback function * * * Call the supplied function for each registered powerdomain. The * callback function can return anything but 0 to bail out early from - * the iterator. The callback function is called with the pwrdm_rwlock - * held for reading, so no powerdomain structure manipulation - * functions should be called from the callback, although hardware - * powerdomain control functions are fine. Returns the last return - * value of the callback function, which should be 0 for success or - * anything else to indicate failure; or -EINVAL if the function - * pointer is null. + * the iterator. Returns the last return value of the callback function, which + * should be 0 for success or anything else to indicate failure; or -EINVAL if + * the function pointer is null. */ -int pwrdm_for_each(int (*fn)(struct powerdomain *pwrdm, void *user), - void *user) +int pwrdm_for_each_nolock(int (*fn)(struct powerdomain *pwrdm, void *user), + void *user) { struct powerdomain *temp_pwrdm; - unsigned long flags; int ret = 0; if (!fn) return -EINVAL; - read_lock_irqsave(&pwrdm_rwlock, flags); list_for_each_entry(temp_pwrdm, &pwrdm_list, node) { ret = (*fn)(temp_pwrdm, user); if (ret) break; } + + return ret; +} + +/** + * pwrdm_for_each - call function on each registered clockdomain + * @fn: callback function * + * + * This function is the same as 'pwrdm_for_each_nolock()', but keeps the + * &pwrdm_rwlock locked for reading, so no powerdomain structure manipulation + * functions should be called from the callback, although hardware powerdomain + * control functions are fine. + */ +int pwrdm_for_each(int (*fn)(struct powerdomain *pwrdm, void *user), + void *user) +{ + unsigned long flags; + int ret; + + read_lock_irqsave(&pwrdm_rwlock, flags); + ret = pwrdm_for_each_nolock(fn, user); read_unlock_irqrestore(&pwrdm_rwlock, flags); return ret; diff --git a/arch/arm/plat-omap/include/mach/cpu.h b/arch/arm/plat-omap/include/mach/cpu.h index 11e73d9e892..f129efb3075 100644 --- a/arch/arm/plat-omap/include/mach/cpu.h +++ b/arch/arm/plat-omap/include/mach/cpu.h @@ -303,32 +303,21 @@ IS_OMAP_TYPE(3430, 0x3430) #define cpu_is_omap2430() 0 #define cpu_is_omap3430() 0 -#if defined(MULTI_OMAP1) -# if defined(CONFIG_ARCH_OMAP730) -# undef cpu_is_omap730 -# define cpu_is_omap730() is_omap730() -# endif -# if defined(CONFIG_ARCH_OMAP850) -# undef cpu_is_omap850 -# define cpu_is_omap850() is_omap850() -# endif -#else -# if defined(CONFIG_ARCH_OMAP730) -# undef cpu_is_omap730 -# define cpu_is_omap730() 1 -# endif -#endif -#else -# if defined(CONFIG_ARCH_OMAP850) -# undef cpu_is_omap850 -# define cpu_is_omap850() 1 -# endif -#endif - /* * Whether we have MULTI_OMAP1 or not, we still need to distinguish - * between 330 vs. 1510 and 1611B/5912 vs. 1710. + * between 730 vs 850, 330 vs. 1510 and 1611B/5912 vs. 1710. */ + +#if defined(CONFIG_ARCH_OMAP730) +# undef cpu_is_omap730 +# define cpu_is_omap730() is_omap730() +#endif + +#if defined(CONFIG_ARCH_OMAP850) +# undef cpu_is_omap850 +# define cpu_is_omap850() is_omap850() +#endif + #if defined(CONFIG_ARCH_OMAP15XX) # undef cpu_is_omap310 # undef cpu_is_omap1510 @@ -433,3 +422,5 @@ IS_OMAP_TYPE(3430, 0x3430) int omap_chip_is(struct omap_chip_id oci); void omap2_check_revision(void); + +#endif diff --git a/arch/arm/plat-omap/include/mach/powerdomain.h b/arch/arm/plat-omap/include/mach/powerdomain.h index 6271d8556a4..fa6461423bd 100644 --- a/arch/arm/plat-omap/include/mach/powerdomain.h +++ b/arch/arm/plat-omap/include/mach/powerdomain.h @@ -135,6 +135,8 @@ struct powerdomain *pwrdm_lookup(const char *name); int pwrdm_for_each(int (*fn)(struct powerdomain *pwrdm, void *user), void *user); +int pwrdm_for_each_nolock(int (*fn)(struct powerdomain *pwrdm, void *user), + void *user); int pwrdm_add_clkdm(struct powerdomain *pwrdm, struct clockdomain *clkdm); int pwrdm_del_clkdm(struct powerdomain *pwrdm, struct clockdomain *clkdm); diff --git a/arch/arm/plat-omap/iovmm.c b/arch/arm/plat-omap/iovmm.c index 57f7122a091..dc3fac3dd0e 100644 --- a/arch/arm/plat-omap/iovmm.c +++ b/arch/arm/plat-omap/iovmm.c @@ -47,7 +47,7 @@ * 'va': mpu virtual address * * 'c': contiguous memory area - * 'd': dicontiguous memory area + * 'd': discontiguous memory area * 'a': anonymous memory allocation * '()': optional feature * @@ -363,8 +363,9 @@ void *da_to_va(struct iommu *obj, u32 da) goto out; } va = area->va; - mutex_unlock(&obj->mmap_lock); out: + mutex_unlock(&obj->mmap_lock); + return va; } EXPORT_SYMBOL_GPL(da_to_va); @@ -398,7 +399,7 @@ static inline void sgtable_drain_vmalloc(struct sg_table *sgt) { /* * Actually this is not necessary at all, just exists for - * consistency of the code readibility. + * consistency of the code readability. */ BUG_ON(!sgt); } @@ -434,7 +435,7 @@ static inline void sgtable_drain_kmalloc(struct sg_table *sgt) { /* * Actually this is not necessary at all, just exists for - * consistency of the code readibility + * consistency of the code readability */ BUG_ON(!sgt); } diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index 925f64711c3..75d1f26e5b1 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -270,7 +270,8 @@ void * omap_sram_push(void * start, unsigned long size) omap_sram_ceil -= size; omap_sram_ceil = ROUND_DOWN(omap_sram_ceil, sizeof(void *)); memcpy((void *)omap_sram_ceil, start, size); - flush_icache_range((unsigned long)start, (unsigned long)(start + size)); + flush_icache_range((unsigned long)omap_sram_ceil, + (unsigned long)(omap_sram_ceil + size)); return (void *)omap_sram_ceil; } diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c index 594ee0e657f..9a8876f715d 100644 --- a/arch/m68knommu/kernel/asm-offsets.c +++ b/arch/m68knommu/kernel/asm-offsets.c @@ -45,25 +45,25 @@ int main(void) DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fpstate)); /* offsets into the pt_regs */ - DEFINE(PT_D0, offsetof(struct pt_regs, d0)); - DEFINE(PT_ORIG_D0, offsetof(struct pt_regs, orig_d0)); - DEFINE(PT_D1, offsetof(struct pt_regs, d1)); - DEFINE(PT_D2, offsetof(struct pt_regs, d2)); - DEFINE(PT_D3, offsetof(struct pt_regs, d3)); - DEFINE(PT_D4, offsetof(struct pt_regs, d4)); - DEFINE(PT_D5, offsetof(struct pt_regs, d5)); - DEFINE(PT_A0, offsetof(struct pt_regs, a0)); - DEFINE(PT_A1, offsetof(struct pt_regs, a1)); - DEFINE(PT_A2, offsetof(struct pt_regs, a2)); - DEFINE(PT_PC, offsetof(struct pt_regs, pc)); - DEFINE(PT_SR, offsetof(struct pt_regs, sr)); + DEFINE(PT_OFF_D0, offsetof(struct pt_regs, d0)); + DEFINE(PT_OFF_ORIG_D0, offsetof(struct pt_regs, orig_d0)); + DEFINE(PT_OFF_D1, offsetof(struct pt_regs, d1)); + DEFINE(PT_OFF_D2, offsetof(struct pt_regs, d2)); + DEFINE(PT_OFF_D3, offsetof(struct pt_regs, d3)); + DEFINE(PT_OFF_D4, offsetof(struct pt_regs, d4)); + DEFINE(PT_OFF_D5, offsetof(struct pt_regs, d5)); + DEFINE(PT_OFF_A0, offsetof(struct pt_regs, a0)); + DEFINE(PT_OFF_A1, offsetof(struct pt_regs, a1)); + DEFINE(PT_OFF_A2, offsetof(struct pt_regs, a2)); + DEFINE(PT_OFF_PC, offsetof(struct pt_regs, pc)); + DEFINE(PT_OFF_SR, offsetof(struct pt_regs, sr)); #ifdef CONFIG_COLDFIRE /* bitfields are a bit difficult */ - DEFINE(PT_FORMATVEC, offsetof(struct pt_regs, sr) - 2); + DEFINE(PT_OFF_FORMATVEC, offsetof(struct pt_regs, sr) - 2); #else /* bitfields are a bit difficult */ - DEFINE(PT_VECTOR, offsetof(struct pt_regs, pc) + 4); + DEFINE(PT_OFF_VECTOR, offsetof(struct pt_regs, pc) + 4); #endif /* signal defines */ diff --git a/arch/m68knommu/kernel/entry.S b/arch/m68knommu/kernel/entry.S index f56faa5c9cd..56043ade394 100644 --- a/arch/m68knommu/kernel/entry.S +++ b/arch/m68knommu/kernel/entry.S @@ -46,7 +46,7 @@ ENTRY(buserr) SAVE_ALL moveq #-1,%d0 - movel %d0,%sp@(PT_ORIG_D0) + movel %d0,%sp@(PT_OFF_ORIG_D0) movel %sp,%sp@- /* stack frame pointer argument */ jsr buserr_c addql #4,%sp @@ -55,7 +55,7 @@ ENTRY(buserr) ENTRY(trap) SAVE_ALL moveq #-1,%d0 - movel %d0,%sp@(PT_ORIG_D0) + movel %d0,%sp@(PT_OFF_ORIG_D0) movel %sp,%sp@- /* stack frame pointer argument */ jsr trap_c addql #4,%sp @@ -67,7 +67,7 @@ ENTRY(trap) ENTRY(dbginterrupt) SAVE_ALL moveq #-1,%d0 - movel %d0,%sp@(PT_ORIG_D0) + movel %d0,%sp@(PT_OFF_ORIG_D0) movel %sp,%sp@- /* stack frame pointer argument */ jsr dbginterrupt_c addql #4,%sp diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c index b1703c67a4f..f3236d0b522 100644 --- a/arch/m68knommu/mm/init.c +++ b/arch/m68knommu/mm/init.c @@ -162,7 +162,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) totalram_pages++; pages++; } - printk (KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages); + printk (KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages * (PAGE_SIZE / 1024)); } #endif diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68knommu/platform/5206e/config.c index 0f41ba82a3b..942397984c6 100644 --- a/arch/m68knommu/platform/5206e/config.c +++ b/arch/m68knommu/platform/5206e/config.c @@ -17,7 +17,6 @@ #include <asm/mcfsim.h> #include <asm/mcfuart.h> #include <asm/mcfdma.h> -#include <asm/mcfuart.h> /***************************************************************************/ diff --git a/arch/m68knommu/platform/68328/entry.S b/arch/m68knommu/platform/68328/entry.S index b1aef72f3ba..9d80d2c4286 100644 --- a/arch/m68knommu/platform/68328/entry.S +++ b/arch/m68knommu/platform/68328/entry.S @@ -39,17 +39,17 @@ .globl inthandler7 badsys: - movel #-ENOSYS,%sp@(PT_D0) + movel #-ENOSYS,%sp@(PT_OFF_D0) jra ret_from_exception do_trace: - movel #-ENOSYS,%sp@(PT_D0) /* needed for strace*/ + movel #-ENOSYS,%sp@(PT_OFF_D0) /* needed for strace*/ subql #4,%sp SAVE_SWITCH_STACK jbsr syscall_trace RESTORE_SWITCH_STACK addql #4,%sp - movel %sp@(PT_ORIG_D0),%d1 + movel %sp@(PT_OFF_ORIG_D0),%d1 movel #-ENOSYS,%d0 cmpl #NR_syscalls,%d1 jcc 1f @@ -57,7 +57,7 @@ do_trace: lea sys_call_table, %a0 jbsr %a0@(%d1) -1: movel %d0,%sp@(PT_D0) /* save the return value */ +1: movel %d0,%sp@(PT_OFF_D0) /* save the return value */ subql #4,%sp /* dummy return address */ SAVE_SWITCH_STACK jbsr syscall_trace @@ -75,7 +75,7 @@ ENTRY(system_call) jbsr set_esp0 addql #4,%sp - movel %sp@(PT_ORIG_D0),%d0 + movel %sp@(PT_OFF_ORIG_D0),%d0 movel %sp,%d1 /* get thread_info pointer */ andl #-THREAD_SIZE,%d1 @@ -88,10 +88,10 @@ ENTRY(system_call) lea sys_call_table,%a0 movel %a0@(%d0), %a0 jbsr %a0@ - movel %d0,%sp@(PT_D0) /* save the return value*/ + movel %d0,%sp@(PT_OFF_D0) /* save the return value*/ ret_from_exception: - btst #5,%sp@(PT_SR) /* check if returning to kernel*/ + btst #5,%sp@(PT_OFF_SR) /* check if returning to kernel*/ jeq Luser_return /* if so, skip resched, signals*/ Lkernel_return: @@ -133,7 +133,7 @@ Lreturn: */ inthandler1: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -144,7 +144,7 @@ inthandler1: inthandler2: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -155,7 +155,7 @@ inthandler2: inthandler3: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -166,7 +166,7 @@ inthandler3: inthandler4: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -177,7 +177,7 @@ inthandler4: inthandler5: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -188,7 +188,7 @@ inthandler5: inthandler6: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -199,7 +199,7 @@ inthandler6: inthandler7: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -210,7 +210,7 @@ inthandler7: inthandler: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and #0x3ff, %d0 movel %sp,%sp@- @@ -224,7 +224,7 @@ ret_from_interrupt: 2: RESTORE_ALL 1: - moveb %sp@(PT_SR), %d0 + moveb %sp@(PT_OFF_SR), %d0 and #7, %d0 jhi 2b diff --git a/arch/m68knommu/platform/68360/entry.S b/arch/m68knommu/platform/68360/entry.S index 55dfefe3864..6d3460a39ca 100644 --- a/arch/m68knommu/platform/68360/entry.S +++ b/arch/m68knommu/platform/68360/entry.S @@ -35,17 +35,17 @@ .globl inthandler badsys: - movel #-ENOSYS,%sp@(PT_D0) + movel #-ENOSYS,%sp@(PT_OFF_D0) jra ret_from_exception do_trace: - movel #-ENOSYS,%sp@(PT_D0) /* needed for strace*/ + movel #-ENOSYS,%sp@(PT_OFF_D0) /* needed for strace*/ subql #4,%sp SAVE_SWITCH_STACK jbsr syscall_trace RESTORE_SWITCH_STACK addql #4,%sp - movel %sp@(PT_ORIG_D0),%d1 + movel %sp@(PT_OFF_ORIG_D0),%d1 movel #-ENOSYS,%d0 cmpl #NR_syscalls,%d1 jcc 1f @@ -53,7 +53,7 @@ do_trace: lea sys_call_table, %a0 jbsr %a0@(%d1) -1: movel %d0,%sp@(PT_D0) /* save the return value */ +1: movel %d0,%sp@(PT_OFF_D0) /* save the return value */ subql #4,%sp /* dummy return address */ SAVE_SWITCH_STACK jbsr syscall_trace @@ -79,10 +79,10 @@ ENTRY(system_call) lea sys_call_table,%a0 movel %a0@(%d0), %a0 jbsr %a0@ - movel %d0,%sp@(PT_D0) /* save the return value*/ + movel %d0,%sp@(PT_OFF_D0) /* save the return value*/ ret_from_exception: - btst #5,%sp@(PT_SR) /* check if returning to kernel*/ + btst #5,%sp@(PT_OFF_SR) /* check if returning to kernel*/ jeq Luser_return /* if so, skip resched, signals*/ Lkernel_return: @@ -124,7 +124,7 @@ Lreturn: */ inthandler: SAVE_ALL - movew %sp@(PT_VECTOR), %d0 + movew %sp@(PT_OFF_VECTOR), %d0 and.l #0x3ff, %d0 lsr.l #0x02, %d0 @@ -139,7 +139,7 @@ ret_from_interrupt: 2: RESTORE_ALL 1: - moveb %sp@(PT_SR), %d0 + moveb %sp@(PT_OFF_SR), %d0 and #7, %d0 jhi 2b /* check if we need to do software interrupts */ diff --git a/arch/m68knommu/platform/coldfire/entry.S b/arch/m68knommu/platform/coldfire/entry.S index 3b471c0da24..dd7d591f70e 100644 --- a/arch/m68knommu/platform/coldfire/entry.S +++ b/arch/m68knommu/platform/coldfire/entry.S @@ -81,11 +81,11 @@ ENTRY(system_call) movel %d3,%a0 jbsr %a0@ - movel %d0,%sp@(PT_D0) /* save the return value */ + movel %d0,%sp@(PT_OFF_D0) /* save the return value */ jra ret_from_exception 1: - movel #-ENOSYS,%d2 /* strace needs -ENOSYS in PT_D0 */ - movel %d2,PT_D0(%sp) /* on syscall entry */ + movel #-ENOSYS,%d2 /* strace needs -ENOSYS in PT_OFF_D0 */ + movel %d2,PT_OFF_D0(%sp) /* on syscall entry */ subql #4,%sp SAVE_SWITCH_STACK jbsr syscall_trace @@ -93,7 +93,7 @@ ENTRY(system_call) addql #4,%sp movel %d3,%a0 jbsr %a0@ - movel %d0,%sp@(PT_D0) /* save the return value */ + movel %d0,%sp@(PT_OFF_D0) /* save the return value */ subql #4,%sp /* dummy return address */ SAVE_SWITCH_STACK jbsr syscall_trace @@ -104,7 +104,7 @@ ret_from_signal: ret_from_exception: move #0x2700,%sr /* disable intrs */ - btst #5,%sp@(PT_SR) /* check if returning to kernel */ + btst #5,%sp@(PT_OFF_SR) /* check if returning to kernel */ jeq Luser_return /* if so, skip resched, signals */ #ifdef CONFIG_PREEMPT @@ -142,8 +142,8 @@ Luser_return: Lreturn: move #0x2700,%sr /* disable intrs */ movel sw_usp,%a0 /* get usp */ - movel %sp@(PT_PC),%a0@- /* copy exception program counter */ - movel %sp@(PT_FORMATVEC),%a0@-/* copy exception format/vector/sr */ + movel %sp@(PT_OFF_PC),%a0@- /* copy exception program counter */ + movel %sp@(PT_OFF_FORMATVEC),%a0@- /* copy exception format/vector/sr */ moveml %sp@,%d1-%d5/%a0-%a2 lea %sp@(32),%sp /* space for 8 regs */ movel %sp@+,%d0 @@ -181,9 +181,9 @@ Lsignal_return: ENTRY(inthandler) SAVE_ALL moveq #-1,%d0 - movel %d0,%sp@(PT_ORIG_D0) + movel %d0,%sp@(PT_OFF_ORIG_D0) - movew %sp@(PT_FORMATVEC),%d0 /* put exception # in d0 */ + movew %sp@(PT_OFF_FORMATVEC),%d0 /* put exception # in d0 */ andl #0x03fc,%d0 /* mask out vector only */ movel %sp,%sp@- /* push regs arg */ @@ -203,7 +203,7 @@ ENTRY(inthandler) ENTRY(fasthandler) SAVE_LOCAL - movew %sp@(PT_FORMATVEC),%d0 + movew %sp@(PT_OFF_FORMATVEC),%d0 andl #0x03fc,%d0 /* mask out vector only */ movel %sp,%sp@- /* push regs arg */ diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index acc1f05d1e2..e3ecb36dd55 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -592,6 +592,8 @@ C_ENTRY(full_exception_trap): nop mfs r7, rfsr; /* save FSR */ nop + mts rfsr, r0; /* Clear sticky fsr */ + nop la r12, r0, full_exception set_vms; rtbd r12, 0; diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S index 6b0288ebccd..2b86c03aa84 100644 --- a/arch/microblaze/kernel/hw_exception_handler.S +++ b/arch/microblaze/kernel/hw_exception_handler.S @@ -384,7 +384,7 @@ handle_other_ex: /* Handle Other exceptions here */ addk r8, r17, r0; /* Load exception address */ bralid r15, full_exception; /* Branch to the handler */ nop; - mts r0, rfsr; /* Clear sticky fsr */ + mts rfsr, r0; /* Clear sticky fsr */ nop /* diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 4201c743cc9..c592d475b3d 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -235,7 +235,9 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) regs->pc = pc; regs->r1 = usp; regs->pt_mode = 0; +#ifdef CONFIG_MMU regs->msr |= MSR_UMS; +#endif } #ifdef CONFIG_MMU diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index f388dc68f60..524d9352f17 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -18,6 +18,7 @@ config PARISC select BUG select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT + select HAVE_ARCH_TRACEHOOK help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, diff --git a/arch/parisc/include/asm/fixmap.h b/arch/parisc/include/asm/fixmap.h index de3fe3a1822..6fec4d4a1a1 100644 --- a/arch/parisc/include/asm/fixmap.h +++ b/arch/parisc/include/asm/fixmap.h @@ -21,9 +21,9 @@ #define KERNEL_MAP_END (TMPALIAS_MAP_START) #ifndef __ASSEMBLY__ -extern void *vmalloc_start; +extern void *parisc_vmalloc_start; #define PCXL_DMA_MAP_SIZE (8*1024*1024) -#define VMALLOC_START ((unsigned long)vmalloc_start) +#define VMALLOC_START ((unsigned long)parisc_vmalloc_start) #define VMALLOC_END (KERNEL_MAP_END) #endif /*__ASSEMBLY__*/ diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h index ce93133d511..0d68184a76c 100644 --- a/arch/parisc/include/asm/hardirq.h +++ b/arch/parisc/include/asm/hardirq.h @@ -1,29 +1,11 @@ /* hardirq.h: PA-RISC hard IRQ support. * * Copyright (C) 2001 Matthew Wilcox <matthew@wil.cx> - * - * The locking is really quite interesting. There's a cpu-local - * count of how many interrupts are being handled, and a global - * lock. An interrupt can only be serviced if the global lock - * is free. You can't be sure no more interrupts are being - * serviced until you've acquired the lock and then checked - * all the per-cpu interrupt counts are all zero. It's a specialised - * br_lock, and that's exactly how Sparc does it. We don't because - * it's more locking for us. This way is lock-free in the interrupt path. */ #ifndef _PARISC_HARDIRQ_H #define _PARISC_HARDIRQ_H -#include <linux/threads.h> -#include <linux/irq.h> - -typedef struct { - unsigned long __softirq_pending; /* set_bit is used on this */ -} ____cacheline_aligned irq_cpustat_t; - -#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ - -void ack_bad_irq(unsigned int irq); +#include <asm-generic/hardirq.h> #endif /* _PARISC_HARDIRQ_H */ diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 302f68dc889..aead40b16dd 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -59,8 +59,11 @@ void user_enable_block_step(struct task_struct *task); #define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0) #define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0) #define instruction_pointer(regs) ((regs)->iaoq[0] & ~3) +#define user_stack_pointer(regs) ((regs)->gr[30]) unsigned long profile_pc(struct pt_regs *); extern void show_regs(struct pt_regs *); -#endif + + +#endif /* __KERNEL__ */ #endif diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h new file mode 100644 index 00000000000..8bdfd2c8c39 --- /dev/null +++ b/arch/parisc/include/asm/syscall.h @@ -0,0 +1,40 @@ +/* syscall.h */ + +#ifndef _ASM_PARISC_SYSCALL_H_ +#define _ASM_PARISC_SYSCALL_H_ + +#include <linux/err.h> +#include <asm/ptrace.h> + +static inline long syscall_get_nr(struct task_struct *tsk, + struct pt_regs *regs) +{ + return regs->gr[20]; +} + +static inline void syscall_get_arguments(struct task_struct *tsk, + struct pt_regs *regs, unsigned int i, + unsigned int n, unsigned long *args) +{ + BUG_ON(i); + + switch (n) { + case 6: + args[5] = regs->gr[21]; + case 5: + args[4] = regs->gr[22]; + case 4: + args[3] = regs->gr[23]; + case 3: + args[2] = regs->gr[24]; + case 2: + args[1] = regs->gr[25]; + case 1: + args[0] = regs->gr[26]; + break; + default: + BUG(); + } +} + +#endif /*_ASM_PARISC_SYSCALL_H_*/ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index ac775a76bff..7ecc1039cfe 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -32,6 +32,11 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) +/* how to get the thread information struct from C */ +#define current_thread_info() ((struct thread_info *)mfctl(30)) + +#endif /* !__ASSEMBLY */ + /* thread information allocation */ #define THREAD_SIZE_ORDER 2 @@ -40,11 +45,6 @@ struct thread_info { #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER) -/* how to get the thread information struct from C */ -#define current_thread_info() ((struct thread_info *)mfctl(30)) - -#endif /* !__ASSEMBLY */ - #define PREEMPT_ACTIVE_BIT 28 #define PREEMPT_ACTIVE (1 << PREEMPT_ACTIVE_BIT) @@ -60,6 +60,8 @@ struct thread_info { #define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */ #define TIF_FREEZE 7 /* is freezing for suspend */ #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ +#define TIF_SINGLESTEP 9 /* single stepping? */ +#define TIF_BLOCKSTEP 10 /* branch stepping? */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -69,6 +71,8 @@ struct thread_info { #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_FREEZE (1 << TIF_FREEZE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK) diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 699cf8ef211..fcd3c707bf1 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -270,8 +270,8 @@ int main(void) DEFINE(DTLB_OFF_COUNT, offsetof(struct pdc_cache_info, dt_off_count)); DEFINE(DTLB_LOOP, offsetof(struct pdc_cache_info, dt_loop)); BLANK(); - DEFINE(PA_BLOCKSTEP_BIT, 31-PT_BLOCKSTEP_BIT); - DEFINE(PA_SINGLESTEP_BIT, 31-PT_SINGLESTEP_BIT); + DEFINE(TIF_BLOCKSTEP_PA_BIT, 31-TIF_BLOCKSTEP); + DEFINE(TIF_SINGLESTEP_PA_BIT, 31-TIF_SINGLESTEP); BLANK(); DEFINE(ASM_PMD_SHIFT, PMD_SHIFT); DEFINE(ASM_PGDIR_SHIFT, PGDIR_SHIFT); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 8c4712b74dc..3a44f7f704f 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -2047,12 +2047,13 @@ syscall_do_signal: b,n syscall_check_sig syscall_restore: - /* Are we being ptraced? */ LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 - ldw TASK_PTRACE(%r1), %r19 - bb,< %r19,31,syscall_restore_rfi - nop + /* Are we being ptraced? */ + ldw TASK_FLAGS(%r1),%r19 + ldi (_TIF_SINGLESTEP|_TIF_BLOCKSTEP),%r2 + and,COND(=) %r19,%r2,%r0 + b,n syscall_restore_rfi ldo TASK_PT_FR31(%r1),%r19 /* reload fpregs */ rest_fp %r19 @@ -2113,16 +2114,16 @@ syscall_restore_rfi: ldi 0x0b,%r20 /* Create new PSW */ depi -1,13,1,%r20 /* C, Q, D, and I bits */ - /* The values of PA_SINGLESTEP_BIT and PA_BLOCKSTEP_BIT are - * set in include/linux/ptrace.h and converted to PA bitmap + /* The values of SINGLESTEP_BIT and BLOCKSTEP_BIT are + * set in thread_info.h and converted to PA bitmap * numbers in asm-offsets.c */ - /* if ((%r19.PA_SINGLESTEP_BIT)) { %r20.27=1} */ - extru,= %r19,PA_SINGLESTEP_BIT,1,%r0 + /* if ((%r19.SINGLESTEP_BIT)) { %r20.27=1} */ + extru,= %r19,TIF_SINGLESTEP_PA_BIT,1,%r0 depi -1,27,1,%r20 /* R bit */ - /* if ((%r19.PA_BLOCKSTEP_BIT)) { %r20.7=1} */ - extru,= %r19,PA_BLOCKSTEP_BIT,1,%r0 + /* if ((%r19.BLOCKSTEP_BIT)) { %r20.7=1} */ + extru,= %r19,TIF_BLOCKSTEP_PA_BIT,1,%r0 depi -1,7,1,%r20 /* T bit */ STREG %r20,TASK_PT_PSW(%r1) diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 330f536a932..2e7610cb33d 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -423,8 +423,3 @@ void __init init_IRQ(void) set_eiem(cpu_eiem); /* EIEM : enable all external intr */ } - -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_WARNING "unexpected IRQ %d\n", irq); -} diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 61ee0eec4e6..212074653df 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -893,7 +893,7 @@ int module_finalize(const Elf_Ehdr *hdr, * ourselves */ for (i = 1; i < hdr->e_shnum; i++) { if(sechdrs[i].sh_type == SHT_SYMTAB - && (sechdrs[i].sh_type & SHF_ALLOC)) { + && (sechdrs[i].sh_flags & SHF_ALLOC)) { int strindex = sechdrs[i].sh_link; /* FIXME: AWFUL HACK * The cast is to drop the const from diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 927db3668b6..c4f49e45129 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -13,6 +13,7 @@ #include <linux/smp.h> #include <linux/errno.h> #include <linux/ptrace.h> +#include <linux/tracehook.h> #include <linux/user.h> #include <linux/personality.h> #include <linux/security.h> @@ -35,7 +36,8 @@ */ void ptrace_disable(struct task_struct *task) { - task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP); + clear_tsk_thread_flag(task, TIF_SINGLESTEP); + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); /* make sure the trap bits are not set */ pa_psw(task)->r = 0; @@ -55,8 +57,8 @@ void user_disable_single_step(struct task_struct *task) void user_enable_single_step(struct task_struct *task) { - task->ptrace &= ~PT_BLOCKSTEP; - task->ptrace |= PT_SINGLESTEP; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + set_tsk_thread_flag(task, TIF_SINGLESTEP); if (pa_psw(task)->n) { struct siginfo si; @@ -98,8 +100,8 @@ void user_enable_single_step(struct task_struct *task) void user_enable_block_step(struct task_struct *task) { - task->ptrace &= ~PT_SINGLESTEP; - task->ptrace |= PT_BLOCKSTEP; + clear_tsk_thread_flag(task, TIF_SINGLESTEP); + set_tsk_thread_flag(task, TIF_BLOCKSTEP); /* Enable taken branch trap. */ pa_psw(task)->r = 0; @@ -263,22 +265,20 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } #endif +long do_syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE) && + tracehook_report_syscall_entry(regs)) + return -1L; + + return regs->gr[20]; +} -void syscall_trace(void) +void do_syscall_trace_exit(struct pt_regs *regs) { - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; - if (!(current->ptrace & PT_PTRACED)) - return; - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } + int stepping = test_thread_flag(TIF_SINGLESTEP) || + test_thread_flag(TIF_BLOCKSTEP); + + if (stepping || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, stepping); } diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 8eb3c63c407..e8467e4aa8d 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -21,6 +21,7 @@ #include <linux/errno.h> #include <linux/wait.h> #include <linux/ptrace.h> +#include <linux/tracehook.h> #include <linux/unistd.h> #include <linux/stddef.h> #include <linux/compat.h> @@ -34,7 +35,6 @@ #include <asm/asm-offsets.h> #ifdef CONFIG_COMPAT -#include <linux/compat.h> #include "signal32.h" #endif @@ -468,6 +468,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(sig, info, ka, regs, 0); + return 1; } diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 59fc1a43ec3..f5f96021caa 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -288,18 +288,23 @@ tracesys: STREG %r18,PT_GR18(%r2) /* Finished saving things for the debugger */ - ldil L%syscall_trace,%r1 + copy %r2,%r26 + ldil L%do_syscall_trace_enter,%r1 ldil L%tracesys_next,%r2 - be R%syscall_trace(%sr7,%r1) + be R%do_syscall_trace_enter(%sr7,%r1) ldo R%tracesys_next(%r2),%r2 -tracesys_next: +tracesys_next: + /* do_syscall_trace_enter either returned the syscallno, or -1L, + * so we skip restoring the PT_GR20 below, since we pulled it from + * task->thread.regs.gr[20] above. + */ + copy %ret0,%r20 ldil L%sys_call_table,%r1 ldo R%sys_call_table(%r1), %r19 ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ LDREG TI_TASK(%r1), %r1 - LDREG TASK_PT_GR20(%r1), %r20 LDREG TASK_PT_GR26(%r1), %r26 /* Restore the users args */ LDREG TASK_PT_GR25(%r1), %r25 LDREG TASK_PT_GR24(%r1), %r24 @@ -336,7 +341,8 @@ tracesys_exit: #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif - bl syscall_trace, %r2 + ldo TASK_REGS(%r1),%r26 + bl do_syscall_trace_exit,%r2 STREG %r28,TASK_PT_GR28(%r1) /* save return value now */ ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ LDREG TI_TASK(%r1), %r1 @@ -353,12 +359,12 @@ tracesys_exit: tracesys_sigexit: ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ - LDREG 0(%r1), %r1 + LDREG TI_TASK(%r1), %r1 #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif - bl syscall_trace, %r2 - nop + bl do_syscall_trace_exit,%r2 + ldo TASK_REGS(%r1),%r26 ldil L%syscall_exit_rfi,%r1 be,n R%syscall_exit_rfi(%sr7,%r1) diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 775be2791bc..fda4baa059b 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -28,6 +28,7 @@ #include <asm/cache.h> #include <asm/page.h> #include <asm/asm-offsets.h> +#include <asm/thread_info.h> /* ld script to make hppa Linux kernel */ #ifndef CONFIG_64BIT @@ -134,6 +135,15 @@ SECTIONS __init_begin = .; INIT_TEXT_SECTION(16384) INIT_DATA_SECTION(16) + /* we have to discard exit text and such at runtime, not link time */ + .exit.text : + { + EXIT_TEXT + } + .exit.data : + { + EXIT_DATA + } PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index d5aca31fddb..13b6e3e59b9 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -434,8 +434,8 @@ void mark_rodata_ro(void) #define SET_MAP_OFFSET(x) ((void *)(((unsigned long)(x) + VM_MAP_OFFSET) \ & ~(VM_MAP_OFFSET-1))) -void *vmalloc_start __read_mostly; -EXPORT_SYMBOL(vmalloc_start); +void *parisc_vmalloc_start __read_mostly; +EXPORT_SYMBOL(parisc_vmalloc_start); #ifdef CONFIG_PA11 unsigned long pcxl_dma_start __read_mostly; @@ -496,13 +496,14 @@ void __init mem_init(void) #ifdef CONFIG_PA11 if (hppa_dma_ops == &pcxl_dma_ops) { pcxl_dma_start = (unsigned long)SET_MAP_OFFSET(MAP_START); - vmalloc_start = SET_MAP_OFFSET(pcxl_dma_start + PCXL_DMA_MAP_SIZE); + parisc_vmalloc_start = SET_MAP_OFFSET(pcxl_dma_start + + PCXL_DMA_MAP_SIZE); } else { pcxl_dma_start = 0; - vmalloc_start = SET_MAP_OFFSET(MAP_START); + parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START); } #else - vmalloc_start = SET_MAP_OFFSET(MAP_START); + parisc_vmalloc_start = SET_MAP_OFFSET(MAP_START); #endif printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ec5eee7c25d..06cce8285ba 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -58,7 +58,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); -static inline int kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu) +static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu) { return vcpu->arch.sie_block->gmslm - vcpu->arch.sie_block->gmsor diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ac45aab741a..05ef5380a68 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -26,6 +26,7 @@ config SPARC select RTC_CLASS select RTC_DRV_M48T59 select HAVE_PERF_EVENTS + select PERF_USE_VMALLOC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG @@ -48,6 +49,7 @@ config SPARC64 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE select HAVE_PERF_EVENTS + select PERF_USE_VMALLOC config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/hardirq_32.h b/arch/sparc/include/asm/hardirq_32.h index 4f63ed8df55..162007643cd 100644 --- a/arch/sparc/include/asm/hardirq_32.h +++ b/arch/sparc/include/asm/hardirq_32.h @@ -7,17 +7,7 @@ #ifndef __SPARC_HARDIRQ_H #define __SPARC_HARDIRQ_H -#include <linux/threads.h> -#include <linux/spinlock.h> -#include <linux/cache.h> - -/* entry.S is sensitive to the offsets of these fields */ /* XXX P3 Is it? */ -typedef struct { - unsigned int __softirq_pending; -} ____cacheline_aligned irq_cpustat_t; - -#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ - #define HARDIRQ_BITS 8 +#include <asm-generic/hardirq.h> #endif /* __SPARC_HARDIRQ_H */ diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h index ea43057d476..cbf4801deaa 100644 --- a/arch/sparc/include/asm/irq_32.h +++ b/arch/sparc/include/asm/irq_32.h @@ -6,10 +6,10 @@ #ifndef _SPARC_IRQ_H #define _SPARC_IRQ_H -#include <linux/interrupt.h> - #define NR_IRQS 16 +#include <linux/interrupt.h> + #define irq_canonicalize(irq) (irq) extern void __init init_IRQ(void); diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 0ff92fa2206..f3cb790fa2a 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -41,8 +41,8 @@ #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) #define VMALLOC_START _AC(0x0000000100000000,UL) -#define VMALLOC_END _AC(0x0000000200000000,UL) -#define VMEMMAP_BASE _AC(0x0000000200000000,UL) +#define VMALLOC_END _AC(0x0000010000000000,UL) +#define VMEMMAP_BASE _AC(0x0000010000000000,UL) #define vmemmap ((struct page *)VMEMMAP_BASE) diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 3ea6e8cde8c..1d361477d7d 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -280,8 +280,8 @@ kvmap_dtlb_nonlinear: #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Do not use the TSB for vmemmap. */ - mov (VMEMMAP_BASE >> 24), %g5 - sllx %g5, 24, %g5 + mov (VMEMMAP_BASE >> 40), %g5 + sllx %g5, 40, %g5 cmp %g4,%g5 bgeu,pn %xcc, kvmap_vmemmap nop @@ -293,8 +293,8 @@ kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 blu,pn %xcc, kvmap_dtlb_longpath - mov (VMALLOC_END >> 24), %g5 - sllx %g5, 24, %g5 + mov (VMALLOC_END >> 40), %g5 + sllx %g5, 40, %g5 cmp %g4, %g5 bgeu,pn %xcc, kvmap_dtlb_longpath nop diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2d6a1b10c81..04db9274389 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -56,7 +56,8 @@ struct cpu_hw_events { struct perf_event *events[MAX_HWEVENTS]; unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; - int enabled; + u64 pcr; + int enabled; }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -68,8 +69,30 @@ struct perf_event_map { #define PIC_LOWER 0x02 }; +static unsigned long perf_event_encode(const struct perf_event_map *pmap) +{ + return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; +} + +static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk) +{ + *msk = val & 0xff; + *enc = val >> 16; +} + +#define C(x) PERF_COUNT_HW_CACHE_##x + +#define CACHE_OP_UNSUPPORTED 0xfffe +#define CACHE_OP_NONSENSE 0xffff + +typedef struct perf_event_map cache_map_t + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + struct sparc_pmu { const struct perf_event_map *(*event_map)(int); + const cache_map_t *cache_map; int max_events; int upper_shift; int lower_shift; @@ -80,21 +103,109 @@ struct sparc_pmu { int lower_nop; }; -static const struct perf_event_map ultra3i_perfmon_event_map[] = { +static const struct perf_event_map ultra3_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, }; -static const struct perf_event_map *ultra3i_event_map(int event_id) +static const struct perf_event_map *ultra3_event_map(int event_id) { - return &ultra3i_perfmon_event_map[event_id]; + return &ultra3_perfmon_event_map[event_id]; } -static const struct sparc_pmu ultra3i_pmu = { - .event_map = ultra3i_event_map, - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), +static const cache_map_t ultra3_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static const struct sparc_pmu ultra3_pmu = { + .event_map = ultra3_event_map, + .cache_map = &ultra3_cache_map, + .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), .upper_shift = 11, .lower_shift = 4, .event_mask = 0x3f, @@ -102,6 +213,121 @@ static const struct sparc_pmu ultra3i_pmu = { .lower_nop = 0x14, }; +/* Niagara1 is very limited. The upper PIC is hard-locked to count + * only instructions, so it is free running which creates all kinds of + * problems. Some hardware designs make one wonder if the creator + * even looked at how this stuff gets used by software. + */ +static const struct perf_event_map niagara1_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, +}; + +static const struct perf_event_map *niagara1_event_map(int event_id) +{ + return &niagara1_perfmon_event_map[event_id]; +} + +static const cache_map_t niagara1_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, + [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + +static const struct sparc_pmu niagara1_pmu = { + .event_map = niagara1_event_map, + .cache_map = &niagara1_cache_map, + .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), + .upper_shift = 0, + .lower_shift = 4, + .event_mask = 0x7, + .upper_nop = 0x0, + .lower_nop = 0x0, +}; + static const struct perf_event_map niagara2_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, @@ -116,8 +342,96 @@ static const struct perf_event_map *niagara2_event_map(int event_id) return &niagara2_perfmon_event_map[event_id]; } +static const cache_map_t niagara2_cache_map = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, + [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, + }, +}, +}; + static const struct sparc_pmu niagara2_pmu = { .event_map = niagara2_event_map, + .cache_map = &niagara2_cache_map, .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), .upper_shift = 19, .lower_shift = 6, @@ -151,23 +465,30 @@ static u64 nop_for_index(int idx) sparc_pmu->lower_nop, idx); } -static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 val, mask = mask_for_index(idx); - val = pcr_ops->read(); - pcr_ops->write((val & ~mask) | hwc->config); + val = cpuc->pcr; + val &= ~mask; + val |= hwc->config; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } -static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, - int idx) +static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { u64 mask = mask_for_index(idx); u64 nop = nop_for_index(idx); - u64 val = pcr_ops->read(); + u64 val; - pcr_ops->write((val & ~mask) | nop); + val = cpuc->pcr; + val &= ~mask; + val |= nop; + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_enable(void) @@ -182,7 +503,7 @@ void hw_perf_enable(void) cpuc->enabled = 1; barrier(); - val = pcr_ops->read(); + val = cpuc->pcr; for (i = 0; i < MAX_HWEVENTS; i++) { struct perf_event *cp = cpuc->events[i]; @@ -194,7 +515,9 @@ void hw_perf_enable(void) val |= hwc->config_base; } - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } void hw_perf_disable(void) @@ -207,10 +530,12 @@ void hw_perf_disable(void) cpuc->enabled = 0; - val = pcr_ops->read(); + val = cpuc->pcr; val &= ~(PCR_UTRACE | PCR_STRACE | sparc_pmu->hv_bit | sparc_pmu->irq_bit); - pcr_ops->write(val); + cpuc->pcr = val; + + pcr_ops->write(cpuc->pcr); } static u32 read_pmc(int idx) @@ -242,7 +567,7 @@ static void write_pmc(int idx, u64 val) } static int sparc_perf_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; @@ -282,19 +607,19 @@ static int sparc_pmu_enable(struct perf_event *event) if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); sparc_perf_event_set_period(event, hwc, idx); - sparc_pmu_enable_event(hwc, idx); + sparc_pmu_enable_event(cpuc, hwc, idx); perf_event_update_userpage(event); return 0; } static u64 sparc_perf_event_update(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct hw_perf_event *hwc, int idx) { int shift = 64 - 32; u64 prev_raw_count, new_raw_count; @@ -324,7 +649,7 @@ static void sparc_pmu_disable(struct perf_event *event) int idx = hwc->idx; clear_bit(idx, cpuc->active_mask); - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); barrier(); @@ -338,18 +663,29 @@ static void sparc_pmu_disable(struct perf_event *event) static void sparc_pmu_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); } static void sparc_pmu_unthrottle(struct perf_event *event) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - sparc_pmu_enable_event(hwc, hwc->idx); + + sparc_pmu_enable_event(cpuc, hwc, hwc->idx); } static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); +static void perf_stop_nmi_watchdog(void *unused) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + stop_nmi_watchdog(NULL); + cpuc->pcr = pcr_ops->read(); +} + void perf_event_grab_pmc(void) { if (atomic_inc_not_zero(&active_events)) @@ -358,7 +694,7 @@ void perf_event_grab_pmc(void) mutex_lock(&pmc_grab_mutex); if (atomic_read(&active_events) == 0) { if (atomic_read(&nmi_active) > 0) { - on_each_cpu(stop_nmi_watchdog, NULL, 1); + on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); BUG_ON(atomic_read(&nmi_active) != 0); } atomic_inc(&active_events); @@ -375,30 +711,160 @@ void perf_event_release_pmc(void) } } +static const struct perf_event_map *sparc_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct perf_event_map *pmap; + + if (!sparc_pmu->cache_map) + return ERR_PTR(-ENOENT); + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); + + if (pmap->encoding == CACHE_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + if (pmap->encoding == CACHE_OP_NONSENSE) + return ERR_PTR(-EINVAL); + + return pmap; +} + static void hw_perf_event_destroy(struct perf_event *event) { perf_event_release_pmc(); } +/* Make sure all events can be scheduled into the hardware at + * the same time. This is simplified by the fact that we only + * need to support 2 simultaneous HW events. + */ +static int sparc_check_constraints(unsigned long *events, int n_ev) +{ + if (n_ev <= perf_max_events) { + u8 msk1, msk2; + u16 dummy; + + if (n_ev == 1) + return 0; + BUG_ON(n_ev != 2); + perf_event_decode(events[0], &dummy, &msk1); + perf_event_decode(events[1], &dummy, &msk2); + + /* If both events can go on any counter, OK. */ + if (msk1 == (PIC_UPPER | PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If one event is limited to a specific counter, + * and the other can go on both, OK. + */ + if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && + msk2 == (PIC_UPPER | PIC_LOWER)) + return 0; + if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) && + msk1 == (PIC_UPPER | PIC_LOWER)) + return 0; + + /* If the events are fixed to different counters, OK. */ + if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) || + (msk1 == PIC_LOWER && msk2 == PIC_UPPER)) + return 0; + + /* Otherwise, there is a conflict. */ + } + + return -1; +} + +static int check_excludes(struct perf_event **evts, int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + struct perf_event *event; + int i, n, first; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; i++) { + event = evts[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + return 0; +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *evts[], unsigned long *events) +{ + struct perf_event *event; + int n = 0; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + evts[n] = group; + events[n++] = group->hw.event_base; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + evts[n] = event; + events[n++] = event->hw.event_base; + } + } + return n; +} + static int __hw_perf_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; + struct perf_event *evts[MAX_HWEVENTS]; struct hw_perf_event *hwc = &event->hw; + unsigned long events[MAX_HWEVENTS]; const struct perf_event_map *pmap; u64 enc; + int n; if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type != PERF_TYPE_HARDWARE) + if (attr->type == PERF_TYPE_HARDWARE) { + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + pmap = sparc_pmu->event_map(attr->config); + } else if (attr->type == PERF_TYPE_HW_CACHE) { + pmap = sparc_map_cache_event(attr->config); + if (IS_ERR(pmap)) + return PTR_ERR(pmap); + } else return -EOPNOTSUPP; - if (attr->config >= sparc_pmu->max_events) - return -EINVAL; - - perf_event_grab_pmc(); - event->destroy = hw_perf_event_destroy; - /* We save the enable bits in the config_base. So to * turn off sampling just write 'config', and to enable * things write 'config | config_base'. @@ -411,15 +877,39 @@ static int __hw_perf_event_init(struct perf_event *event) if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; + hwc->event_base = perf_event_encode(pmap); + + enc = pmap->encoding; + + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + perf_max_events - 1, + evts, events); + if (n < 0) + return -EINVAL; + } + events[n] = hwc->event_base; + evts[n] = event; + + if (check_excludes(evts, n, 1)) + return -EINVAL; + + if (sparc_check_constraints(events, n + 1)) + return -EINVAL; + + /* Try to do all error checking before this point, as unwinding + * state after grabbing the PMC is difficult. + */ + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + if (!hwc->sample_period) { hwc->sample_period = MAX_PERIOD; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); } - pmap = sparc_pmu->event_map(attr->config); - - enc = pmap->encoding; if (pmap->pic_mask & PIC_UPPER) { hwc->idx = PIC_UPPER_INDEX; enc <<= sparc_pmu->upper_shift; @@ -472,7 +962,7 @@ void perf_event_print_debug(void) } static int __kprobes perf_event_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) + unsigned long cmd, void *__args) { struct die_args *args = __args; struct perf_sample_data data; @@ -513,7 +1003,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(hwc, idx); + sparc_pmu_disable_event(cpuc, hwc, idx); } return NOTIFY_STOP; @@ -525,8 +1015,15 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { static bool __init supported_pmu(void) { - if (!strcmp(sparc_pmu_type, "ultra3i")) { - sparc_pmu = &ultra3i_pmu; + if (!strcmp(sparc_pmu_type, "ultra3") || + !strcmp(sparc_pmu_type, "ultra3+") || + !strcmp(sparc_pmu_type, "ultra3i") || + !strcmp(sparc_pmu_type, "ultra4+")) { + sparc_pmu = &ultra3_pmu; + return true; + } + if (!strcmp(sparc_pmu_type, "niagara")) { + sparc_pmu = &niagara1_pmu; return true; } if (!strcmp(sparc_pmu_type, "niagara2")) { diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index f97cb8b6ee5..f9024bccff1 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -11,6 +11,7 @@ #include <linux/oprofile.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/param.h> /* for HZ */ #ifdef CONFIG_SPARC64 #include <linux/notifier.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8da93745c08..c876bace8fd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT config HAVE_LATENCYTOP_SUPPORT def_bool y -config FAST_CMPXCHG_LOCAL - bool - default y - config MMU def_bool y diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b8a9f..f2824fb8c79 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -400,7 +400,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM # this should be set for all -march=.. options where the compiler # generates cmov. @@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 default "6" if X86_32 && X86_P6_NOP + default "5" if X86_32 && X86_CMPXCHG64 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) default "3" diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74619c4f9fd..1733f9f65e8 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -21,8 +21,8 @@ #define __AUDIT_ARCH_LE 0x40000000 #ifndef CONFIG_AUDITSYSCALL -#define sysexit_audit int_ret_from_sys_call -#define sysretl_audit int_ret_from_sys_call +#define sysexit_audit ia32_ret_from_sys_call +#define sysretl_audit ia32_ret_from_sys_call #endif #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) @@ -39,12 +39,12 @@ .endm /* clobbers %eax */ - .macro CLEAR_RREGS _r9=rax + .macro CLEAR_RREGS offset=0, _r9=rax xorl %eax,%eax - movq %rax,R11(%rsp) - movq %rax,R10(%rsp) - movq %\_r9,R9(%rsp) - movq %rax,R8(%rsp) + movq %rax,\offset+R11(%rsp) + movq %rax,\offset+R10(%rsp) + movq %\_r9,\offset+R9(%rsp) + movq %rax,\offset+R8(%rsp) .endm /* @@ -172,6 +172,10 @@ sysexit_from_sys_call: movl RIP-R11(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx RESTORE_ARGS 1,24,1,1,1,1 + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 popfq CFI_ADJUST_CFA_OFFSET -8 /*CFI_RESTORE rflags*/ @@ -202,7 +206,7 @@ sysexit_from_sys_call: .macro auditsys_exit exit,ebpsave=RBP testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) - jnz int_ret_from_sys_call + jnz ia32_ret_from_sys_call TRACE_IRQS_ON sti movl %eax,%esi /* second arg, syscall return value */ @@ -218,8 +222,9 @@ sysexit_from_sys_call: cli TRACE_IRQS_OFF testl %edi,TI_flags(%r10) - jnz int_with_check - jmp \exit + jz \exit + CLEAR_RREGS -ARGOFFSET + jmp int_with_check .endm sysenter_auditsys: @@ -329,6 +334,9 @@ sysretl_from_sys_call: CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d /*CFI_REGISTER rflags,r11*/ + xorq %r10,%r10 + xorq %r9,%r9 + xorq %r8,%r8 TRACE_IRQS_ON movl RSP-ARGOFFSET(%rsp),%esp CFI_RESTORE rsp @@ -353,7 +361,7 @@ cstar_tracesys: #endif xchgl %r9d,%ebp SAVE_REST - CLEAR_RREGS r9 + CLEAR_RREGS 0, r9 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter @@ -425,6 +433,8 @@ ia32_do_call: call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX-ARGOFFSET(%rsp) +ia32_ret_from_sys_call: + CLEAR_RREGS -ARGOFFSET jmp int_ret_from_sys_call ia32_tracesys: @@ -442,8 +452,8 @@ END(ia32_syscall) ia32_badsys: movq $0,ORIG_RAX-ARGOFFSET(%rsp) - movq $-ENOSYS,RAX-ARGOFFSET(%rsp) - jmp int_ret_from_sys_call + movq $-ENOSYS,%rax + jmp ia32_sysret quiet_ni_syscall: movq $-ENOSYS,%rax diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3be000435fa..d83892226f7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b608a64c581..f1363b72364 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif +extern void (*x86_mce_decode_callback)(struct mce *m); + void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f..b1598a9436d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +static void default_decode_mce(struct mce *m) +{ + pr_emerg("No human readable MCE decoding support on this CPU type.\n"); + pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); +} + +/* + * CPU/chipset specific EDAC code can register a callback here to print + * MCE errors in a human-readable form: + */ +void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; +EXPORT_SYMBOL(x86_mce_decode_callback); /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -165,46 +177,46 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -void __weak decode_mce(struct mce *m) -{ - return; -} - static void print_mce(struct mce *m) { - printk(KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + pr_emerg("RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); + + pr_emerg("TSC %llx ", m->tsc); if (m->addr) - printk(KERN_CONT "ADDR %llx ", m->addr); + pr_cont("ADDR %llx ", m->addr); if (m->misc) - printk(KERN_CONT "MISC %llx ", m->misc); - printk(KERN_CONT "\n"); - printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, - m->apicid); + pr_cont("MISC %llx ", m->misc); + + pr_cont("\n"); + pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); - decode_mce(m); + /* + * Print out human-readable details about the MCE error, + * (if the CPU has an implementation for that): + */ + x86_mce_decode_callback(m); } static void print_mce_head(void) { - printk(KERN_EMERG "\nHARDWARE ERROR\n"); + pr_emerg("\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { - printk(KERN_EMERG "This is not a software problem!\n" - "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + pr_emerg("This is not a software problem!\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced; static void wait_for_panic(void) { long timeout = PANIC_TIMEOUT*USEC_PER_SEC; + preempt_disable(); local_irq_enable(); while (timeout-- > 0) @@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) static int msr_to_offset(u32 msr) { unsigned bank = __get_cpu_var(injectm.bank); + if (msr == rip_msr) return offsetof(struct mce, ip); if (msr == MSR_IA32_MCx_STATUS(bank)) diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index f04e7252760..3c1b12d461d 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) unsigned long long base, size; char *ptr; char line[LINE_SIZE]; + int length; size_t linelen; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!len) - return -EINVAL; memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) + + length = len; + length--; + + if (length > LINE_SIZE - 1) + length = LINE_SIZE - 1; + + if (length < 0) + return -EINVAL; + + if (copy_from_user(line, buf, length)) return -EFAULT; linelen = strlen(line); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 41fd965c80c..b9c830c12b4 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -206,8 +206,11 @@ static int __init setup_early_printk(char *buf) while (*buf != '\0') { if (!strncmp(buf, "serial", 6)) { - early_serial_init(buf + 6); + buf += 6; + early_serial_init(buf); early_console_register(&early_serial_console, keep); + if (!strncmp(buf, ",ttyS", 5)) + buf += 5; } if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf + 4); diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 1736c5a725a..9c3bd4a2050 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -15,8 +15,10 @@ EXPORT_SYMBOL(mcount); * the export, but dont use it from C code, it is used * by assembly code and is not using C calling convention! */ +#ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); EXPORT_SYMBOL(cmpxchg8b_emu); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 74656d1d4e3..39120619951 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -244,6 +244,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) __func__, smp_processor_id(), vector, irq); } + run_local_timers(); irq_exit(); set_irq_regs(old_regs); @@ -268,6 +269,7 @@ void smp_generic_interrupt(struct pt_regs *regs) if (generic_interrupt_extension) generic_interrupt_extension(); + run_local_timers(); irq_exit(); set_irq_regs(old_regs); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18..d20009b4e6e 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0; /* * This variable becomes 1 if iommu=pt is passed on the kernel command line. - * If this variable is 1, IOMMU implementations do no DMA ranslation for + * If this variable is 1, IOMMU implementations do no DMA translation for * devices and allow every device to access to whole physical memory. This is * useful if a user want to use an IOMMU only for KVM device assignment to * guests and not for driver dma translation. diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97600e..d915d956e66 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -198,6 +198,7 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); + run_local_timers(); /* * KVM uses this interrupt to force a cpu out of guest mode */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1ae5ceba7eb..7024224f0fc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->lapic_timer.timer.base->get_time(); - apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; atomic_set(&apic->lapic_timer.pending, 0); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca41ae9f45..685a4ffac8e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); #define CREATE_TRACE_POINTS #include "mmutrace.h" +#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) struct kvm_rmap_desc { @@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) if (*spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) - kvm_release_pfn_dirty(pfn); - else - kvm_release_pfn_clean(pfn); + kvm_set_pfn_dirty(pfn); rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); if (!*rmapp) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); @@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) return write_protected; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) { u64 *spte; int need_tlb_flush = 0; @@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) return need_tlb_flush; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - int (*handler)(struct kvm *kvm, unsigned long *rmapp)) +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) +{ + int need_flush = 0; + u64 *spte, new_spte; + pte_t *ptep = (pte_t *)data; + pfn_t new_pfn; + + WARN_ON(pte_huge(*ptep)); + new_pfn = pte_pfn(*ptep); + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!is_shadow_present_pte(*spte)); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); + need_flush = 1; + if (pte_write(*ptep)) { + rmap_remove(kvm, spte); + __set_spte(spte, shadow_trap_nonpresent_pte); + spte = rmap_next(kvm, rmapp, NULL); + } else { + new_spte = *spte &~ (PT64_BASE_ADDR_MASK); + new_spte |= (u64)new_pfn << PAGE_SHIFT; + + new_spte &= ~PT_WRITABLE_MASK; + new_spte &= ~SPTE_HOST_WRITEABLE; + if (is_writeble_pte(*spte)) + kvm_set_pfn_dirty(spte_to_pfn(*spte)); + __set_spte(spte, new_spte); + spte = rmap_next(kvm, rmapp, spte); + } + } + if (need_flush) + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + u64 data)) { int i, j; int retval = 0; @@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, if (hva >= start && hva < end) { gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; - retval |= handler(kvm, &memslot->rmap[gfn_offset]); + retval |= handler(kvm, &memslot->rmap[gfn_offset], + data); for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { int idx = gfn_offset; idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); retval |= handler(kvm, - &memslot->lpage_info[j][idx].rmap_pde); + &memslot->lpage_info[j][idx].rmap_pde, + data); } } } @@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp); +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) { u64 *spte; int young = 0; @@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) gfn = unalias_gfn(vcpu->kvm, gfn); rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp); + kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); kvm_flush_remote_tlbs(vcpu->kvm); } int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, kvm_age_rmapp); + return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); } #ifdef MMU_DEBUG @@ -1756,7 +1800,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int user_fault, int write_fault, int dirty, int level, gfn_t gfn, pfn_t pfn, bool speculative, - bool can_unsync) + bool can_unsync, bool reset_host_protection) { u64 spte; int ret = 0; @@ -1783,6 +1827,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); + if (reset_host_protection) + spte |= SPTE_HOST_WRITEABLE; + spte |= (u64)pfn << PAGE_SHIFT; if ((pte_access & ACC_WRITE_MASK) @@ -1828,7 +1875,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int level, gfn_t gfn, - pfn_t pfn, bool speculative) + pfn_t pfn, bool speculative, + bool reset_host_protection) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*sptep); @@ -1860,7 +1908,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, - dirty, level, gfn, pfn, speculative, true)) { + dirty, level, gfn, pfn, speculative, true, + reset_host_protection)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1877,8 +1926,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, page_header_update_slot(vcpu->kvm, sptep, gfn); if (!was_rmapped) { rmap_count = rmap_add(vcpu, sptep, gfn); - if (!is_rmap_spte(*sptep)) - kvm_release_pfn_clean(pfn); + kvm_release_pfn_clean(pfn); if (rmap_count > RMAP_RECYCLE_THRESHOLD) rmap_recycle(vcpu, sptep, gfn); } else { @@ -1909,7 +1957,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 0, write, 1, &pt_write, - level, gfn, pfn, false); + level, gfn, pfn, false, true); ++vcpu->stat.pf_fixed; break; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d2fec9c12d2..72558f8ff3f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) return; kvm_get_pfn(pfn); + /* + * we call mmu_set_spte() with reset_host_protection = true beacuse that + * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). + */ mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, - gpte_to_gfn(gpte), pfn, true); + gpte_to_gfn(gpte), pfn, true, true); } /* @@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, user_fault, write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, ptwrite, level, - gw->gfn, pfn, false); + gw->gfn, pfn, false, true); break; } @@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { int i, offset, nr_present; + bool reset_host_protection; offset = nr_present = 0; @@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { + pte_access &= ~ACC_WRITE_MASK; + reset_host_protection = 0; + } else { + reset_host_protection = 1; + } set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, - spte_to_pfn(sp->spt[i]), true, false); + spte_to_pfn(sp->spt[i]), true, false, + reset_host_protection); } return !nr_present; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 944cc9c04b3..c17404add91 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdtscll(tsc_this); delta = vcpu->arch.host_tsc - tsc_this; svm->vmcb->control.tsc_offset += delta; + if (is_nested(svm)) + svm->nested.hsave->control.tsc_offset += delta; vcpu->cpu = cpu; kvm_migrate_timers(vcpu); svm->asid_generation = 0; @@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) switch (ecx) { case MSR_IA32_TSC: { - u64 tsc; + u64 tsc_offset; + + if (is_nested(svm)) + tsc_offset = svm->nested.hsave->control.tsc_offset; + else + tsc_offset = svm->vmcb->control.tsc_offset; - rdtscll(tsc); - *data = svm->vmcb->control.tsc_offset + tsc; + *data = tsc_offset + native_read_tsc(); break; } case MSR_K6_STAR: @@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) switch (ecx) { case MSR_IA32_TSC: { - u64 tsc; + u64 tsc_offset = data - native_read_tsc(); + u64 g_tsc_offset = 0; + + if (is_nested(svm)) { + g_tsc_offset = svm->vmcb->control.tsc_offset - + svm->nested.hsave->control.tsc_offset; + svm->nested.hsave->control.tsc_offset = tsc_offset; + } + + svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset; - rdtscll(tsc); - svm->vmcb->control.tsc_offset = data - tsc; break; } case MSR_K6_STAR: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3812014bd0..ed53b42caba 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu->cpu != cpu) { vcpu_clear(vmx); kvm_migrate_timers(vcpu); - vpid_sync_vcpu_all(vmx); + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); local_irq_disable(); list_add(&vmx->local_vcpus_link, &per_cpu(vcpus_on_cpu, cpu)); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be451ee4424..9b9695322f5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, if (cpuid->nent < 1) goto out; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + cpuid->nent = KVM_MAX_CPUID_ENTRIES; r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 3e549b8ec8c..85f5db95c60 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -15,8 +15,10 @@ ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o lib-y += checksum_32.o lib-y += strstr_32.o - lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o - + lib-y += semaphore_32.o string_32.o +ifneq ($(CONFIG_X86_CMPXCHG64),y) + lib-y += cmpxchg8b_emu.o +endif lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o |