@@ -301,6 +301,8 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
+/* Support to passthrough whole PMU resoure to guest */
+#define PERF_PMU_CAP_MEDIATED_VPMU 0x0400
/**
* pmu::scope
@@ -1811,6 +1813,8 @@ extern void perf_event_task_tick(void);
extern int perf_event_account_interrupt(struct perf_event *event);
extern int perf_event_period(struct perf_event *event, u64 value);
extern u64 perf_event_pause(struct perf_event *event, bool reset);
+int perf_get_mediated_pmu(void);
+void perf_put_mediated_pmu(void);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
@@ -1901,6 +1905,13 @@ static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *r
{
return 0;
}
+
+static inline int perf_get_mediated_pmu(void)
+{
+ return 0;
+}
+
+static inline void perf_put_mediated_pmu(void) { }
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
@@ -431,6 +431,20 @@ static atomic_t nr_bpf_events __read_mostly;
static atomic_t nr_cgroup_events __read_mostly;
static atomic_t nr_text_poke_events __read_mostly;
static atomic_t nr_build_id_events __read_mostly;
+static atomic_t nr_include_guest_events __read_mostly;
+
+static atomic_t nr_mediated_pmu_vms;
+static DEFINE_MUTEX(perf_mediated_pmu_mutex);
+
+/* !exclude_guest event of PMU with PERF_PMU_CAP_MEDIATED_VPMU */
+static inline bool is_include_guest_event(struct perf_event *event)
+{
+ if ((event->pmu->capabilities & PERF_PMU_CAP_MEDIATED_VPMU) &&
+ !event->attr.exclude_guest)
+ return true;
+
+ return false;
+}
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -5320,6 +5334,9 @@ static void _free_event(struct perf_event *event)
unaccount_event(event);
+ if (is_include_guest_event(event))
+ atomic_dec(&nr_include_guest_events);
+
security_perf_event_free(event);
if (event->rb) {
@@ -5877,6 +5894,36 @@ u64 perf_event_pause(struct perf_event *event, bool reset)
}
EXPORT_SYMBOL_GPL(perf_event_pause);
+/*
+ * Currently invoked at VM creation to
+ * - Check whether there are existing !exclude_guest events of PMU with
+ * PERF_PMU_CAP_MEDIATED_VPMU
+ * - Set nr_mediated_pmu_vms to prevent !exclude_guest event creation on
+ * PMUs with PERF_PMU_CAP_MEDIATED_VPMU
+ *
+ * No impact for the PMU without PERF_PMU_CAP_MEDIATED_VPMU. The perf
+ * still owns all the PMU resources.
+ */
+int perf_get_mediated_pmu(void)
+{
+ guard(mutex)(&perf_mediated_pmu_mutex);
+ if (atomic_inc_not_zero(&nr_mediated_pmu_vms))
+ return 0;
+
+ if (atomic_read(&nr_include_guest_events))
+ return -EBUSY;
+
+ atomic_inc(&nr_mediated_pmu_vms);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(perf_get_mediated_pmu);
+
+void perf_put_mediated_pmu(void)
+{
+ atomic_dec(&nr_mediated_pmu_vms);
+}
+EXPORT_SYMBOL_GPL(perf_put_mediated_pmu);
+
/*
* Holding the top-level event's child_mutex means that any
* descendant process that has inherited this event will block
@@ -12210,6 +12257,17 @@ static void account_event(struct perf_event *event)
account_pmu_sb_event(event);
}
+static int perf_account_include_guest_event(void)
+{
+ guard(mutex)(&perf_mediated_pmu_mutex);
+
+ if (atomic_read(&nr_mediated_pmu_vms))
+ return -EOPNOTSUPP;
+
+ atomic_inc(&nr_include_guest_events);
+ return 0;
+}
+
/*
* Allocate and initialize an event structure
*/
@@ -12435,11 +12493,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (err)
goto err_callchain_buffer;
+ if (is_include_guest_event(event)) {
+ err = perf_account_include_guest_event();
+ if (err)
+ goto err_security_alloc;
+ }
+
/* symmetric to unaccount_event() in _free_event() */
account_event(event);
return event;
+err_security_alloc:
+ security_perf_event_free(event);
err_callchain_buffer:
if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)