summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2011-06-06 16:57:03 +0200
committerIngo Molnar <mingo@elte.hu>2011-07-01 11:06:36 +0200
commitefc9f05df2dd171280dcb736a4d973ffefd5508e (patch)
treeccc1cee8f1cc0ad5391732eb3637b685b4b155a0 /arch/x86/kernel/cpu/perf_event.c
parenta7ac67ea021b4603095d2aa458bc41641238f22c (diff)
downloadlinux-efc9f05df2dd171280dcb736a4d973ffefd5508e.tar.gz
linux-efc9f05df2dd171280dcb736a4d973ffefd5508e.tar.xz
linux-efc9f05df2dd171280dcb736a4d973ffefd5508e.zip
perf_events: Update Intel extra regs shared constraints management
This patch improves the code managing the extra shared registers used for offcore_response events on Intel Nehalem/Westmere. The idea is to use static allocation instead of dynamic allocation. This simplifies greatly the get and put constraint routines for those events. The patch also renames per_core to shared_regs because the same data structure gets used whether or not HT is on. When HT is off, those events still need to coordination because they use a extra MSR that has to be shared within an event group. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c78
1 files changed, 61 insertions, 17 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5b86ec51534c..019fda7489e7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -45,6 +45,29 @@ do { \
#endif
/*
+ * | NHM/WSM | SNB |
+ * register -------------------------------
+ * | HT | no HT | HT | no HT |
+ *-----------------------------------------
+ * offcore | core | core | cpu | core |
+ * lbr_sel | core | core | cpu | core |
+ * ld_lat | cpu | core | cpu | core |
+ *-----------------------------------------
+ *
+ * Given that there is a small number of shared regs,
+ * we can pre-allocate their slot in the per-cpu
+ * per-core reg tables.
+ */
+enum extra_reg_type {
+ EXTRA_REG_NONE = -1, /* not used */
+
+ EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
+ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
+
+ EXTRA_REG_MAX /* number of entries needed */
+};
+
+/*
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
*/
static unsigned long
@@ -132,11 +155,10 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
/*
- * Intel percore register state.
- * Coordinate shared resources between HT threads.
+ * manage shared (per-core, per-cpu) registers
+ * used on Intel NHM/WSM/SNB
*/
- int percore_used; /* Used by this CPU? */
- struct intel_percore *per_core;
+ struct intel_shared_regs *shared_regs;
/*
* AMD specific bits
@@ -187,26 +209,45 @@ struct cpu_hw_events {
for ((e) = (c); (e)->weight; (e)++)
/*
+ * Per register state.
+ */
+struct er_account {
+ raw_spinlock_t lock; /* per-core: protect structure */
+ u64 config; /* extra MSR config */
+ u64 reg; /* extra MSR number */
+ atomic_t ref; /* reference count */
+};
+
+/*
* Extra registers for specific events.
+ *
* Some events need large masks and require external MSRs.
- * Define a mapping to these extra registers.
+ * Those extra MSRs end up being shared for all events on
+ * a PMU and sometimes between PMU of sibling HT threads.
+ * In either case, the kernel needs to handle conflicting
+ * accesses to those extra, shared, regs. The data structure
+ * to manage those registers is stored in cpu_hw_event.
*/
struct extra_reg {
unsigned int event;
unsigned int msr;
u64 config_mask;
u64 valid_mask;
+ int idx; /* per_xxx->regs[] reg index */
};
-#define EVENT_EXTRA_REG(e, ms, m, vm) { \
+#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
.event = (e), \
.msr = (ms), \
.config_mask = (m), \
.valid_mask = (vm), \
+ .idx = EXTRA_REG_##i \
}
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
- EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
+
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
union perf_capabilities {
struct {
@@ -253,7 +294,6 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
- struct event_constraint *percore_constraints;
void (*quirks)(void);
int perfctr_second_write;
@@ -400,10 +440,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
*/
static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
{
+ struct hw_perf_event_extra *reg;
struct extra_reg *er;
- event->hw.extra_reg = 0;
- event->hw.extra_config = 0;
+ reg = &event->hw.extra_reg;
if (!x86_pmu.extra_regs)
return 0;
@@ -413,8 +453,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
- event->hw.extra_reg = er->msr;
- event->hw.extra_config = event->attr.config1;
+
+ reg->idx = er->idx;
+ reg->config = event->attr.config1;
+ reg->reg = er->msr;
break;
}
return 0;
@@ -713,6 +755,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL;
+ /* mark unused */
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+
return x86_pmu.hw_config(event);
}
@@ -754,8 +799,8 @@ static void x86_pmu_disable(struct pmu *pmu)
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
- if (hwc->extra_reg)
- wrmsrl(hwc->extra_reg, hwc->extra_config);
+ if (hwc->extra_reg.reg)
+ wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
wrmsrl(hwc->config_base, hwc->config | enable_mask);
}
@@ -1692,7 +1737,6 @@ static int validate_group(struct perf_event *event)
fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
if (!fake_cpuc)
goto out;
-
/*
* the event is not yet connected with its
* siblings therefore we must first collect