summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event_intel.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c261
1 files changed, 227 insertions, 34 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 187c294..6bca492 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -5,6 +5,8 @@
* among events on a single PMU.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/init.h>
@@ -21,14 +23,14 @@
*/
static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
- [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
- [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
};
static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -136,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
+#define SNB_DMND_DATA_RD (1ULL << 0)
+#define SNB_DMND_RFO (1ULL << 1)
+#define SNB_DMND_IFETCH (1ULL << 2)
+#define SNB_DMND_WB (1ULL << 3)
+#define SNB_PF_DATA_RD (1ULL << 4)
+#define SNB_PF_RFO (1ULL << 5)
+#define SNB_PF_IFETCH (1ULL << 6)
+#define SNB_LLC_DATA_RD (1ULL << 7)
+#define SNB_LLC_RFO (1ULL << 8)
+#define SNB_LLC_IFETCH (1ULL << 9)
+#define SNB_BUS_LOCKS (1ULL << 10)
+#define SNB_STRM_ST (1ULL << 11)
+#define SNB_OTHER (1ULL << 15)
+#define SNB_RESP_ANY (1ULL << 16)
+#define SNB_NO_SUPP (1ULL << 17)
+#define SNB_LLC_HITM (1ULL << 18)
+#define SNB_LLC_HITE (1ULL << 19)
+#define SNB_LLC_HITS (1ULL << 20)
+#define SNB_LLC_HITF (1ULL << 21)
+#define SNB_LOCAL (1ULL << 22)
+#define SNB_REMOTE (0xffULL << 23)
+#define SNB_SNP_NONE (1ULL << 31)
+#define SNB_SNP_NOT_NEEDED (1ULL << 32)
+#define SNB_SNP_MISS (1ULL << 33)
+#define SNB_NO_FWD (1ULL << 34)
+#define SNB_SNP_FWD (1ULL << 35)
+#define SNB_HITM (1ULL << 36)
+#define SNB_NON_DRAM (1ULL << 37)
+
+#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
+#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO)
+#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
+ SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
+ SNB_HITM)
+
+#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
+#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY)
+
+#define SNB_L3_ACCESS SNB_RESP_ANY
+#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 snb_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
+ },
+ },
+};
+
static __initconst const u64 snb_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -233,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
@@ -747,7 +827,7 @@ static void intel_pmu_disable_all(void)
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
intel_pmu_pebs_disable_all();
@@ -763,9 +843,9 @@ static void intel_pmu_enable_all(int added)
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
- if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
struct perf_event *event =
- cpuc->events[X86_PMC_IDX_FIXED_BTS];
+ cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
if (WARN_ON_ONCE(!event))
return;
@@ -871,7 +951,7 @@ static inline void intel_pmu_ack_status(u64 ack)
static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
{
- int idx = hwc->idx - X86_PMC_IDX_FIXED;
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask;
mask = 0xfULL << (idx * 4);
@@ -886,7 +966,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+ if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer();
return;
@@ -915,7 +995,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
{
- int idx = hwc->idx - X86_PMC_IDX_FIXED;
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask;
/*
@@ -949,7 +1029,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+ if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
if (!__this_cpu_read(cpu_hw_events.enabled))
return;
@@ -1000,14 +1080,14 @@ static void intel_pmu_reset(void)
local_irq_save(flags);
- printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+ pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
- checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
+ wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
+ wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
- checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+ wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
if (ds)
ds->bts_index = ds->bts_buffer_base;
@@ -1442,8 +1522,16 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+ /*
+ * If PMU counter has PEBS enabled it is not enough to disable counter
+ * on a guest entry since PEBS memory write can overshoot guest entry
+ * and corrupt guest memory. Disabling PEBS solves the problem.
+ */
+ arr[1].msr = MSR_IA32_PEBS_ENABLE;
+ arr[1].host = cpuc->pebs_enabled;
+ arr[1].guest = 0;
- *nr = 1;
+ *nr = 2;
return arr;
}
@@ -1707,16 +1795,61 @@ static __init void intel_clovertown_quirk(void)
* But taken together it might just make sense to not enable PEBS on
* these chips.
*/
- printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+ pr_warn("PEBS disabled due to CPU errata\n");
x86_pmu.pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
+static int intel_snb_pebs_broken(int cpu)
+{
+ u32 rev = UINT_MAX; /* default to broken for unknown models */
+
+ switch (cpu_data(cpu).x86_model) {
+ case 42: /* SNB */
+ rev = 0x28;
+ break;
+
+ case 45: /* SNB-EP */
+ switch (cpu_data(cpu).x86_mask) {
+ case 6: rev = 0x618; break;
+ case 7: rev = 0x70c; break;
+ }
+ }
+
+ return (cpu_data(cpu).microcode < rev);
+}
+
+static void intel_snb_check_microcode(void)
+{
+ int pebs_broken = 0;
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if ((pebs_broken = intel_snb_pebs_broken(cpu)))
+ break;
+ }
+ put_online_cpus();
+
+ if (pebs_broken == x86_pmu.pebs_broken)
+ return;
+
+ /*
+ * Serialized by the microcode lock..
+ */
+ if (x86_pmu.pebs_broken) {
+ pr_info("PEBS enabled due to microcode update\n");
+ x86_pmu.pebs_broken = 0;
+ } else {
+ pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
+ x86_pmu.pebs_broken = 1;
+ }
+}
+
static __init void intel_sandybridge_quirk(void)
{
- printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
- x86_pmu.pebs = 0;
- x86_pmu.pebs_constraints = NULL;
+ x86_pmu.check_microcode = intel_snb_check_microcode;
+ intel_snb_check_microcode();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
@@ -1736,8 +1869,8 @@ static __init void intel_arch_events_quirk(void)
/* disable event that reported as not presend by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
- printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
- intel_arch_events_map[bit].name);
+ pr_warn("CPUID marked event: \'%s\' unavailable\n",
+ intel_arch_events_map[bit].name);
}
}
@@ -1756,7 +1889,7 @@ static __init void intel_nehalem_quirk(void)
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
ebx.split.no_branch_misses_retired = 0;
x86_pmu.events_maskl = ebx.full;
- printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
+ pr_info("CPU erratum AAJ80 worked around\n");
}
}
@@ -1765,6 +1898,7 @@ __init int intel_pmu_init(void)
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
+ struct event_constraint *c;
unsigned int unused;
int version;
@@ -1800,6 +1934,8 @@ __init int intel_pmu_init(void)
x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length;
+ x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events:
@@ -1872,6 +2008,7 @@ __init int intel_pmu_init(void)
break;
case 28: /* Atom */
+ case 54: /* Cedariew */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -1911,9 +2048,10 @@ __init int intel_pmu_init(void)
case 42: /* SandyBridge */
case 45: /* SandyBridge, "Romely-EP" */
x86_add_quirk(intel_sandybridge_quirk);
- case 58: /* IvyBridge */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_snb();
@@ -1934,6 +2072,29 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
+ case 58: /* IvyBridge */
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_snb_event_constraints;
+ x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.extra_regs = intel_snb_extra_regs;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+ /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+
+ pr_cont("IvyBridge events, ");
+ break;
+
default:
switch (x86_pmu.version) {
@@ -1951,5 +2112,37 @@ __init int intel_pmu_init(void)
}
}
+ if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
+ WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+ x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
+ x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
+ }
+ x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+ if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
+ WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+ x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
+ x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+ }
+
+ x86_pmu.intel_ctrl |=
+ ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+ if (x86_pmu.event_constraints) {
+ /*
+ * event on fixed counter2 (REF_CYCLES) only works on this
+ * counter, so do not extend mask to generic counters
+ */
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if (c->cmask != X86_RAW_EVENT_MASK
+ || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+ continue;
+ }
+
+ c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+ c->weight += x86_pmu.num_counters;
+ }
+ }
+
return 0;
}