perf_event.h source code [linux/include/linux/perf_event.h]

1	/*
2	* Performance events:
3	*
4	* Copyright (C) 2008-2009, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
5	* Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
6	* Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
7	*
8	* Data type definitions, declarations, prototypes.
9	*
10	* Started by: Thomas Gleixner and Ingo Molnar
11	*
12	* For licencing details see kernel-base/COPYING
13	*/
14	#ifndef _LINUX_PERF_EVENT_H
15	#define _LINUX_PERF_EVENT_H
16
17	#include <uapi/linux/perf_event.h>
18	#include <uapi/linux/bpf_perf_event.h>
19
20	/*
21	* Kernel-internal data types and definitions:
22	*/
23
24	#ifdef CONFIG_PERF_EVENTS
25	# include <asm/perf_event.h>
26	# include <asm/local64.h>
27	#endif
28
29	#ifdef CONFIG_HAVE_HW_BREAKPOINT
30	# include <linux/rhashtable-types.h>
31	# include <asm/hw_breakpoint.h>
32	#endif
33
34	#include <linux/list.h>
35	#include <linux/mutex.h>
36	#include <linux/rculist.h>
37	#include <linux/rcupdate.h>
38	#include <linux/spinlock.h>
39	#include <linux/hrtimer.h>
40	#include <linux/fs.h>
41	#include <linux/pid_namespace.h>
42	#include <linux/workqueue.h>
43	#include <linux/ftrace.h>
44	#include <linux/cpu.h>
45	#include <linux/irq_work.h>
46	#include <linux/static_key.h>
47	#include <linux/jump_label_ratelimit.h>
48	#include <linux/atomic.h>
49	#include <linux/sysfs.h>
50	#include <linux/perf_regs.h>
51	#include <linux/cgroup.h>
52	#include <linux/refcount.h>
53	#include <linux/security.h>
54	#include <linux/static_call.h>
55	#include <linux/lockdep.h>
56
57	#include <asm/local.h>
58
59	struct perf_callchain_entry {
60	u64 nr;
61	u64 ip[]; / /proc/sys/kernel/perf_event_max_stack /
62	};
63
64	struct perf_callchain_entry_ctx {
65	struct perf_callchain_entry *entry;
66	u32 max_stack;
67	u32 nr;
68	short contexts;
69	bool contexts_maxed;
70	};
71
72	typedef unsigned long (perf_copy_f)(void* dst, const* void *src,
73	unsigned long off, unsigned long len);
74
75	struct perf_raw_frag {
76	union {
77	struct perf_raw_frag *next;
78	unsigned long pad;
79	};
80	perf_copy_f copy;
81	void *data;
82	u32 size;
83	} __packed;
84
85	struct perf_raw_record {
86	struct perf_raw_frag frag;
87	u32 size;
88	};
89
90	static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
91	{
92	return frag->pad < sizeof(u64);
93	}
94
95	/*
96	* branch stack layout:
97	* nr: number of taken branches stored in entries[]
98	* hw_idx: The low level index of raw branch records
99	* for the most recent branch.
100	* -1ULL means invalid/unknown.
101	*
102	* Note that nr can vary from sample to sample
103	* branches (to, from) are stored from most recent
104	* to least recent, i.e., entries[0] contains the most
105	* recent branch.
106	* The entries[] is an abstraction of raw branch records,
107	* which may not be stored in age order in HW, e.g. Intel LBR.
108	* The hw_idx is to expose the low level index of raw
109	* branch record for the most recent branch aka entries[0].
110	* The hw_idx index is between -1 (unknown) and max depth,
111	* which can be retrieved in /sys/devices/cpu/caps/branches.
112	* For the architectures whose raw branch records are
113	* already stored in age order, the hw_idx should be 0.
114	*/
115	struct perf_branch_stack {
116	u64 nr;
117	u64 hw_idx;
118	struct perf_branch_entry entries[];
119	};
120
121	struct task_struct;
122
123	/*
124	* extra PMU register associated with an event
125	*/
126	struct hw_perf_event_extra {
127	u64 config; / register value /
128	unsigned int reg; / register address or index /
129	int alloc; / extra register already allocated /
130	int idx; / index in shared_regs->regs[] /
131	};
132
133	/**
134	* hw_perf_event::flag values
135	*
136	* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
137	* usage.
138	*/
139	#define PERF_EVENT_FLAG_ARCH 0x0fffffff
140	#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
141
142	static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == `0`);
143
144	/**
145	* struct hw_perf_event - performance event hardware details:
146	*/
147	struct hw_perf_event {
148	#ifdef CONFIG_PERF_EVENTS
149	union {
150	struct { / hardware /
151	u64 config;
152	u64 config1;
153	u64 last_tag;
154	u64 dyn_constraint;
155	unsigned long config_base;
156	unsigned long event_base;
157	int event_base_rdpmc;
158	int idx;
159	int last_cpu;
160	int flags;
161
162	struct hw_perf_event_extra extra_reg;
163	struct hw_perf_event_extra branch_reg;
164	};
165	struct { / aux / Intel-PT /
166	u64 aux_config;
167	/*
168	* For AUX area events, aux_paused cannot be a state
169	* flag because it can be updated asynchronously to
170	* state.
171	*/
172	unsigned int aux_paused;
173	};
174	struct { / software /
175	struct hrtimer hrtimer;
176	};
177	struct { / tracepoint /
178	/ for tp_event->class /
179	struct list_head tp_list;
180	};
181	struct { / amd_power /
182	u64 pwr_acc;
183	u64 ptsc;
184	};
185	#ifdef CONFIG_HAVE_HW_BREAKPOINT
186	struct { / breakpoint /
187	/*
188	* Crufty hack to avoid the chicken and egg
189	* problem hw_breakpoint has with context
190	* creation and event initalization.
191	*/
192	struct arch_hw_breakpoint info;
193	struct rhlist_head bp_list;
194	};
195	#endif
196	struct { / amd_iommu /
197	u8 iommu_bank;
198	u8 iommu_cntr;
199	u16 padding;
200	u64 conf;
201	u64 conf1;
202	};
203	};
204	/*
205	* If the event is a per task event, this will point to the task in
206	* question. See the comment in perf_event_alloc().
207	*/
208	struct task_struct *target;
209
210	/*
211	* PMU would store hardware filter configuration
212	* here.
213	*/
214	void *addr_filters;
215
216	/ Last sync'ed generation of filters /
217	unsigned long addr_filters_gen;
218
219	/*
220	* hw_perf_event::state flags; used to track the PERF_EF_* state.
221	*/
222
223	/ the counter is stopped /
224	#define PERF_HES_STOPPED 0x01
225
226	/ event->count up-to-date /
227	#define PERF_HES_UPTODATE 0x02
228
229	#define PERF_HES_ARCH 0x04
230
231	int state;
232
233	/*
234	* The last observed hardware counter value, updated with a
235	* local64_cmpxchg() such that pmu::read() can be called nested.
236	*/
237	local64_t prev_count;
238
239	/*
240	* The period to start the next sample with.
241	*/
242	u64 sample_period;
243
244	union {
245	struct { / Sampling /
246	/*
247	* The period we started this sample with.
248	*/
249	u64 last_period;
250
251	/*
252	* However much is left of the current period;
253	* note that this is a full 64bit value and
254	* allows for generation of periods longer
255	* than hardware might allow.
256	*/
257	local64_t period_left;
258	};
259	struct { / Topdown events counting for context switch /
260	u64 saved_metric;
261	u64 saved_slots;
262	};
263	};
264
265	/*
266	* State for throttling the event, see __perf_event_overflow() and
267	* perf_adjust_freq_unthr_context().
268	*/
269	u64 interrupts_seq;
270	u64 interrupts;
271
272	/*
273	* State for freq target events, see __perf_event_overflow() and
274	* perf_adjust_freq_unthr_context().
275	*/
276	u64 freq_time_stamp;
277	u64 freq_count_stamp;
278	#endif /* CONFIG_PERF_EVENTS */
279	};
280
281	struct perf_event;
282	struct perf_event_pmu_context;
283
284	/*
285	* Common implementation detail of pmu::{start,commit,cancel}_txn
286	*/
287
288	/ txn to add/schedule event on PMU /
289	#define PERF_PMU_TXN_ADD 0x1
290
291	/ txn to read event group from PMU /
292	#define PERF_PMU_TXN_READ 0x2
293
294	/**
295	* pmu::capabilities flags
296	*/
297	#define PERF_PMU_CAP_NO_INTERRUPT 0x0001
298	#define PERF_PMU_CAP_NO_NMI 0x0002
299	#define PERF_PMU_CAP_AUX_NO_SG 0x0004
300	#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
301	#define PERF_PMU_CAP_EXCLUSIVE 0x0010
302	#define PERF_PMU_CAP_ITRACE 0x0020
303	#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
304	#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
305	#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
306	#define PERF_PMU_CAP_AUX_PAUSE 0x0200
307	#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
308
309	/**
310	* pmu::scope
311	*/
312	enum perf_pmu_scope {
313	PERF_PMU_SCOPE_NONE = `0`,
314	PERF_PMU_SCOPE_CORE,
315	PERF_PMU_SCOPE_DIE,
316	PERF_PMU_SCOPE_CLUSTER,
317	PERF_PMU_SCOPE_PKG,
318	PERF_PMU_SCOPE_SYS_WIDE,
319	PERF_PMU_MAX_SCOPE,
320	};
321
322	struct perf_output_handle;
323
324	#define PMU_NULL_DEV ((void *)(~0UL))
325
326	/**
327	* struct pmu - generic performance monitoring unit
328	*/
329	struct pmu {
330	struct list_head entry;
331
332	spinlock_t events_lock;
333	struct list_head events;
334
335	struct module *module;
336	struct device *dev;
337	struct device *parent;
338	const struct attribute_group **attr_groups;
339	const struct attribute_group **attr_update;
340	const char *name;
341	int type;
342
343	/*
344	* various common per-pmu feature flags
345	*/
346	int capabilities;
347
348	/*
349	* PMU scope
350	*/
351	unsigned int scope;
352
353	struct perf_cpu_pmu_context * __percpu *cpu_pmu_context;
354	atomic_t exclusive_cnt; / < 0: cpu; > 0: tsk /
355	int task_ctx_nr;
356	int hrtimer_interval_ms;
357
358	/ number of address filters this PMU can do /
359	unsigned int nr_addr_filters;
360
361	/*
362	* Fully disable/enable this PMU, can be used to protect from the PMI
363	* as well as for lazy/batch writing of the MSRs.
364	*/
365	void (pmu_enable) (struct* pmu pmu); /* optional /
366	void (pmu_disable) (struct* pmu pmu); /* optional /
367
368	/*
369	* Try and initialize the event for this PMU.
370	*
371	* Returns:
372	* -ENOENT -- @event is not for this PMU
373	*
374	* -ENODEV -- @event is for this PMU but PMU not present
375	* -EBUSY -- @event is for this PMU but PMU temporarily unavailable
376	* -EINVAL -- @event is for this PMU but @event is not valid
377	* -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
378	* -EACCES -- @event is for this PMU, @event is valid, but no privileges
379	*
380	* 0 -- @event is for this PMU and valid
381	*
382	* Other error return values are allowed.
383	*/
384	int (event_init) (struct* perf_event *event);
385
386	/*
387	* Notification that the event was mapped or unmapped. Called
388	* in the context of the mapping task.
389	*/
390	void (event_mapped) (struct* perf_event event, struct* mm_struct mm); /* optional /
391	void (event_unmapped) (struct* perf_event event, struct* mm_struct mm); /* optional /
392
393	/*
394	* Flags for ->add()/->del()/ ->start()/->stop(). There are
395	* matching hw_perf_event::state flags.
396	*/
397
398	/ start the counter when adding /
399	#define PERF_EF_START 0x01
400
401	/ reload the counter when starting /
402	#define PERF_EF_RELOAD 0x02
403
404	/ update the counter when stopping /
405	#define PERF_EF_UPDATE 0x04
406
407	/ AUX area event, pause tracing /
408	#define PERF_EF_PAUSE 0x08
409
410	/ AUX area event, resume tracing /
411	#define PERF_EF_RESUME 0x10
412
413	/*
414	* Adds/Removes a counter to/from the PMU, can be done inside a
415	* transaction, see the ->*_txn() methods.
416	*
417	* The add/del callbacks will reserve all hardware resources required
418	* to service the event, this includes any counter constraint
419	* scheduling etc.
420	*
421	* Called with IRQs disabled and the PMU disabled on the CPU the event
422	* is on.
423	*
424	* ->add() called without PERF_EF_START should result in the same state
425	* as ->add() followed by ->stop().
426	*
427	* ->del() must always PERF_EF_UPDATE stop an event. If it calls
428	* ->stop() that must deal with already being stopped without
429	* PERF_EF_UPDATE.
430	*/
431	int (add) (struct* perf_event event, int* flags);
432	void (del) (struct* perf_event event, int* flags);
433
434	/*
435	* Starts/Stops a counter present on the PMU.
436	*
437	* The PMI handler should stop the counter when perf_event_overflow()
438	* returns !0. ->start() will be used to continue.
439	*
440	* Also used to change the sample period.
441	*
442	* Called with IRQs disabled and the PMU disabled on the CPU the event
443	* is on -- will be called from NMI context with the PMU generates
444	* NMIs.
445	*
446	* ->stop() with PERF_EF_UPDATE will read the counter and update
447	* period/count values like ->read() would.
448	*
449	* ->start() with PERF_EF_RELOAD will reprogram the counter
450	* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
451	*
452	* ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
453	* overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
454	* PERF_EF_RESUME.
455	*
456	* ->start() with PERF_EF_RESUME will start as simply as possible but
457	* only if the counter is not otherwise stopped. Will not overlap
458	* another ->start() with PERF_EF_RESUME nor ->stop() with
459	* PERF_EF_PAUSE.
460	*
461	* Notably, PERF_EF_PAUSE/PERF_EF_RESUME can be concurrent with other
462	* ->stop()/->start() invocations, just not itself.
463	*/
464	void (start) (struct* perf_event event, int* flags);
465	void (stop) (struct* perf_event event, int* flags);
466
467	/*
468	* Updates the counter value of the event.
469	*
470	* For sampling capable PMUs this will also update the software period
471	* hw_perf_event::period_left field.
472	*/
473	void (read) (struct* perf_event *event);
474
475	/*
476	* Group events scheduling is treated as a transaction, add
477	* group events as a whole and perform one schedulability test.
478	* If the test fails, roll back the whole group
479	*
480	* Start the transaction, after this ->add() doesn't need to
481	* do schedulability tests.
482	*
483	* Optional.
484	*/
485	void (start_txn) (struct* pmu pmu, unsigned* int txn_flags);
486	/*
487	* If ->start_txn() disabled the ->add() schedulability test
488	* then ->commit_txn() is required to perform one. On success
489	* the transaction is closed. On error the transaction is kept
490	* open until ->cancel_txn() is called.
491	*
492	* Optional.
493	*/
494	int (commit_txn) (struct* pmu *pmu);
495	/*
496	* Will cancel the transaction, assumes ->del() is called
497	* for each successful ->add() during the transaction.
498	*
499	* Optional.
500	*/
501	void (cancel_txn) (struct* pmu *pmu);
502
503	/*
504	* Will return the value for perf_event_mmap_page::index for this event,
505	* if no implementation is provided it will default to 0 (see
506	* perf_event_idx_default).
507	*/
508	int (event_idx) (struct* perf_event event); /optional /*
509
510	/*
511	* context-switches callback
512	*/
513	void (sched_task) (struct* perf_event_pmu_context *pmu_ctx,
514	struct task_struct *task, bool sched_in);
515
516	/*
517	* Kmem cache of PMU specific data
518	*/
519	struct kmem_cache *task_ctx_cache;
520
521	/*
522	* Set up pmu-private data structures for an AUX area
523	*/
524	void (setup_aux) (struct perf_event event, void* **pages,
525	int nr_pages, bool overwrite);
526	/ optional /
527
528	/*
529	* Free pmu-private AUX data structures
530	*/
531	void (free_aux) (void* aux); /* optional /
532
533	/*
534	* Take a snapshot of the AUX buffer without touching the event
535	* state, so that preempting ->start()/->stop() callbacks does
536	* not interfere with their logic. Called in PMI context.
537	*
538	* Returns the size of AUX data copied to the output handle.
539	*
540	* Optional.
541	*/
542	long (snapshot_aux) (struct* perf_event *event,
543	struct perf_output_handle *handle,
544	unsigned long size);
545
546	/*
547	* Validate address range filters: make sure the HW supports the
548	* requested configuration and number of filters; return 0 if the
549	* supplied filters are valid, -errno otherwise.
550	*
551	* Runs in the context of the ioctl()ing process and is not serialized
552	* with the rest of the PMU callbacks.
553	*/
554	int (addr_filters_validate) (struct* list_head *filters);
555	/ optional /
556
557	/*
558	* Synchronize address range filter configuration:
559	* translate hw-agnostic filters into hardware configuration in
560	* event::hw::addr_filters.
561	*
562	* Runs as a part of filter sync sequence that is done in ->start()
563	* callback by calling perf_event_addr_filters_sync().
564	*
565	* May (and should) traverse event::addr_filters::list, for which its
566	* caller provides necessary serialization.
567	*/
568	void (addr_filters_sync) (struct* perf_event *event);
569	/ optional /
570
571	/*
572	* Check if event can be used for aux_output purposes for
573	* events of this PMU.
574	*
575	* Runs from perf_event_open(). Should return 0 for "no match"
576	* or non-zero for "match".
577	*/
578	int (aux_output_match) (struct* perf_event *event);
579	/ optional /
580
581	/*
582	* Skip programming this PMU on the given CPU. Typically needed for
583	* big.LITTLE things.
584	*/
585	bool (filter) (struct* pmu pmu, int* cpu); / optional /
586
587	/*
588	* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
589	*/
590	int (check_period) (struct* perf_event event, u64 value); /* optional /
591	};
592
593	enum perf_addr_filter_action_t {
594	PERF_ADDR_FILTER_ACTION_STOP = `0`,
595	PERF_ADDR_FILTER_ACTION_START,
596	PERF_ADDR_FILTER_ACTION_FILTER,
597	};
598
599	/**
600	* struct perf_addr_filter - address range filter definition
601	* @entry: event's filter list linkage
602	* @path: object file's path for file-based filters
603	* @offset: filter range offset
604	* @size: filter range size (size==0 means single address trigger)
605	* @action: filter/start/stop
606	*
607	* This is a hardware-agnostic filter configuration as specified by the user.
608	*/
609	struct perf_addr_filter {
610	struct list_head entry;
611	struct path path;
612	unsigned long offset;
613	unsigned long size;
614	enum perf_addr_filter_action_t action;
615	};
616
617	/**
618	* struct perf_addr_filters_head - container for address range filters
619	* @list: list of filters for this event
620	* @lock: spinlock that serializes accesses to the @list and event's
621	* (and its children's) filter generations.
622	* @nr_file_filters: number of file-based filters
623	*
624	* A child event will use parent's @list (and therefore @lock), so they are
625	* bundled together; see perf_event_addr_filters().
626	*/
627	struct perf_addr_filters_head {
628	struct list_head list;
629	raw_spinlock_t lock;
630	unsigned int nr_file_filters;
631	};
632
633	struct perf_addr_filter_range {
634	unsigned long start;
635	unsigned long size;
636	};
637
638	/*
639	* The normal states are:
640	*
641	* ACTIVE --.
642	* ^ \|
643	* \| \|
644	* sched_{in,out}() \|
645	* \| \|
646	* v \|
647	* ,---> INACTIVE --+ <-.
648	* \| \| \|
649	* \| {dis,en}able()
650	* sched_in() \| \|
651	* \| OFF <--' --+
652	* \| \|
653	* `---> ERROR ------'
654	*
655	* That is:
656	*
657	* sched_in: INACTIVE -> {ACTIVE,ERROR}
658	* sched_out: ACTIVE -> INACTIVE
659	* disable: {ACTIVE,INACTIVE} -> OFF
660	* enable: {OFF,ERROR} -> INACTIVE
661	*
662	* Where {OFF,ERROR} are disabled states.
663	*
664	* Then we have the {EXIT,REVOKED,DEAD} states which are various shades of
665	* defunct events:
666	*
667	* - EXIT means task that the even was assigned to died, but child events
668	* still live, and further children can still be created. But the event
669	* itself will never be active again. It can only transition to
670	* {REVOKED,DEAD};
671	*
672	* - REVOKED means the PMU the event was associated with is gone; all
673	* functionality is stopped but the event is still alive. Can only
674	* transition to DEAD;
675	*
676	* - DEAD event really is DYING tearing down state and freeing bits.
677	*
678	*/
679	enum perf_event_state {
680	PERF_EVENT_STATE_DEAD = -`5`,
681	PERF_EVENT_STATE_REVOKED = -`4`, / pmu gone, must not touch /
682	PERF_EVENT_STATE_EXIT = -`3`, / task died, still inherit /
683	PERF_EVENT_STATE_ERROR = -`2`, / scheduling error, can enable /
684	PERF_EVENT_STATE_OFF = -`1`,
685	PERF_EVENT_STATE_INACTIVE = `0`,
686	PERF_EVENT_STATE_ACTIVE = `1`,
687	};
688
689	struct file;
690	struct perf_sample_data;
691
692	typedef void (perf_overflow_handler_t)(struct* perf_event *,
693	struct perf_sample_data *,
694	struct pt_regs *regs);
695
696	/*
697	* Event capabilities. For event_caps and groups caps.
698	*
699	* PERF_EV_CAP_SOFTWARE: Is a software event.
700	* PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
701	* from any CPU in the package where it is active.
702	* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
703	* cannot be a group leader. If an event with this flag is detached from the
704	* group it is scheduled out and moved into an unrecoverable ERROR state.
705	* PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
706	* PMU scope where it is active.
707	*/
708	#define PERF_EV_CAP_SOFTWARE BIT(0)
709	#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
710	#define PERF_EV_CAP_SIBLING BIT(2)
711	#define PERF_EV_CAP_READ_SCOPE BIT(3)
712
713	#define SWEVENT_HLIST_BITS 8
714	#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
715
716	struct swevent_hlist {
717	struct hlist_head heads[SWEVENT_HLIST_SIZE];
718	struct rcu_head rcu_head;
719	};
720
721	#define PERF_ATTACH_CONTEXT 0x0001
722	#define PERF_ATTACH_GROUP 0x0002
723	#define PERF_ATTACH_TASK 0x0004
724	#define PERF_ATTACH_TASK_DATA 0x0008
725	#define PERF_ATTACH_GLOBAL_DATA 0x0010
726	#define PERF_ATTACH_SCHED_CB 0x0020
727	#define PERF_ATTACH_CHILD 0x0040
728	#define PERF_ATTACH_EXCLUSIVE 0x0080
729	#define PERF_ATTACH_CALLCHAIN 0x0100
730	#define PERF_ATTACH_ITRACE 0x0200
731
732	struct bpf_prog;
733	struct perf_cgroup;
734	struct perf_buffer;
735
736	struct pmu_event_list {
737	raw_spinlock_t lock;
738	struct list_head list;
739	};
740
741	/*
742	* event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
743	* as such iteration must hold either lock. However, since ctx->lock is an IRQ
744	* safe lock, and is only held by the CPU doing the modification, having IRQs
745	* disabled is sufficient since it will hold-off the IPIs.
746	*/
747	#ifdef CONFIG_PROVE_LOCKING
748	# define lockdep_assert_event_ctx(event) \
749	WARN_ON_ONCE(__lockdep_enabled && \
750	(this_cpu_read(hardirqs_enabled) && \
751	lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
752	#else
753	# define lockdep_assert_event_ctx(event)
754	#endif
755
756	#define for_each_sibling_event(sibling, event) \
757	lockdep_assert_event_ctx(event); \
758	if ((event)->group_leader == (event)) \
759	list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
760
761	/**
762	* struct perf_event - performance event kernel representation:
763	*/
764	struct perf_event {
765	#ifdef CONFIG_PERF_EVENTS
766	/*
767	* entry onto perf_event_context::event_list;
768	* modifications require ctx->lock
769	* RCU safe iterations.
770	*/
771	struct list_head event_entry;
772
773	/*
774	* Locked for modification by both ctx->mutex and ctx->lock; holding
775	* either sufficies for read.
776	*/
777	struct list_head sibling_list;
778	struct list_head active_list;
779	/*
780	* Node on the pinned or flexible tree located at the event context;
781	*/
782	struct rb_node group_node;
783	u64 group_index;
784	/*
785	* We need storage to track the entries in perf_pmu_migrate_context; we
786	* cannot use the event_entry because of RCU and we want to keep the
787	* group in tact which avoids us using the other two entries.
788	*/
789	struct list_head migrate_entry;
790
791	struct hlist_node hlist_entry;
792	struct list_head active_entry;
793	int nr_siblings;
794
795	/ Not serialized. Only written during event initialization. /
796	int event_caps;
797	/ The cumulative AND of all event_caps for events in this group. /
798	int group_caps;
799
800	unsigned int group_generation;
801	struct perf_event *group_leader;
802	/*
803	* event->pmu will always point to pmu in which this event belongs.
804	* Whereas event->pmu_ctx->pmu may point to other pmu when group of
805	* different pmu events is created.
806	*/
807	struct pmu *pmu;
808	void *pmu_private;
809
810	enum perf_event_state state;
811	unsigned int attach_state;
812	local64_t count;
813	atomic64_t child_count;
814
815	/*
816	* These are the total time in nanoseconds that the event
817	* has been enabled (i.e. eligible to run, and the task has
818	* been scheduled in, if this is a per-task event)
819	* and running (scheduled onto the CPU), respectively.
820	*/
821	u64 total_time_enabled;
822	u64 total_time_running;
823	u64 tstamp;
824
825	struct perf_event_attr attr;
826	u16 header_size;
827	u16 id_header_size;
828	u16 read_size;
829	struct hw_perf_event hw;
830
831	struct perf_event_context *ctx;
832	/*
833	* event->pmu_ctx points to perf_event_pmu_context in which the event
834	* is added. This pmu_ctx can be of other pmu for sw event when that
835	* sw event is part of a group which also contains non-sw events.
836	*/
837	struct perf_event_pmu_context *pmu_ctx;
838	atomic_long_t refcount;
839
840	/*
841	* These accumulate total time (in nanoseconds) that children
842	* events have been enabled and running, respectively.
843	*/
844	atomic64_t child_total_time_enabled;
845	atomic64_t child_total_time_running;
846
847	/*
848	* Protect attach/detach and child_list:
849	*/
850	struct mutex child_mutex;
851	struct list_head child_list;
852	struct perf_event *parent;
853
854	int oncpu;
855	int cpu;
856
857	struct list_head owner_entry;
858	struct task_struct *owner;
859
860	/ mmap bits /
861	struct mutex mmap_mutex;
862	refcount_t mmap_count;
863
864	struct perf_buffer *rb;
865	struct list_head rb_entry;
866	unsigned long rcu_batches;
867	int rcu_pending;
868
869	/ poll related /
870	wait_queue_head_t waitq;
871	struct fasync_struct *fasync;
872
873	/ delayed work for NMIs and such /
874	unsigned int pending_wakeup;
875	unsigned int pending_kill;
876	unsigned int pending_disable;
877	unsigned long pending_addr; / SIGTRAP /
878	struct irq_work pending_irq;
879	struct irq_work pending_disable_irq;
880	struct callback_head pending_task;
881	unsigned int pending_work;
882
883	atomic_t event_limit;
884
885	/ address range filters /
886	struct perf_addr_filters_head addr_filters;
887	/ vma address array for file-based filders /
888	struct perf_addr_filter_range *addr_filter_ranges;
889	unsigned long addr_filters_gen;
890
891	/ for aux_output events /
892	struct perf_event *aux_event;
893
894	void (destroy)(struct* perf_event *);
895	struct rcu_head rcu_head;
896
897	struct pid_namespace *ns;
898	u64 id;
899
900	atomic64_t lost_samples;
901
902	u64 (clock)(void*);
903	perf_overflow_handler_t overflow_handler;
904	void *overflow_handler_context;
905	struct bpf_prog *prog;
906	u64 bpf_cookie;
907
908	#ifdef CONFIG_EVENT_TRACING
909	struct trace_event_call *tp_event;
910	struct event_filter *filter;
911	# ifdef CONFIG_FUNCTION_TRACER
912	struct ftrace_ops ftrace_ops;
913	# endif
914	#endif
915
916	#ifdef CONFIG_CGROUP_PERF
917	struct perf_cgroup cgrp; /* cgroup event is attach to /
918	#endif
919
920	#ifdef CONFIG_SECURITY
921	void *security;
922	#endif
923	struct list_head sb_list;
924	struct list_head pmu_list;
925
926	/*
927	* Certain events gets forwarded to another pmu internally by over-
928	* writing kernel copy of event->attr.type without user being aware
929	* of it. event->orig_type contains original 'type' requested by
930	* user.
931	*/
932	u32 orig_type;
933	#endif /* CONFIG_PERF_EVENTS */
934	};
935
936	/*
937	* ,-----------------------[1:n]------------------------.
938	* V V
939	* perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
940	* \| \|
941	* `--[n:1]-> pmu <-[1:n]--'
942	*
943	*
944	* struct perf_event_pmu_context lifetime is refcount based and RCU freed
945	* (similar to perf_event_context). Locking is as if it were a member of
946	* perf_event_context; specifically:
947	*
948	* modification, both: ctx->mutex && ctx->lock
949	* reading, either: ctx->mutex \|\| ctx->lock
950	*
951	* There is one exception to this; namely put_pmu_ctx() isn't always called
952	* with ctx->mutex held; this means that as long as we can guarantee the epc
953	* has events the above rules hold.
954	*
955	* Specificially, sys_perf_event_open()'s group_leader case depends on
956	* ctx->mutex pinning the configuration. Since we hold a reference on
957	* group_leader (through the filedesc) it can't go away, therefore it's
958	* associated pmu_ctx must exist and cannot change due to ctx->mutex.
959	*
960	* perf_event holds a refcount on perf_event_context
961	* perf_event holds a refcount on perf_event_pmu_context
962	*/
963	struct perf_event_pmu_context {
964	struct pmu *pmu;
965	struct perf_event_context *ctx;
966
967	struct list_head pmu_ctx_entry;
968
969	struct list_head pinned_active;
970	struct list_head flexible_active;
971
972	/ Used to identify the per-cpu perf_event_pmu_context /
973	unsigned int embedded : `1`;
974
975	unsigned int nr_events;
976	unsigned int nr_cgroups;
977	unsigned int nr_freq;
978
979	atomic_t refcount; / event <-> epc /
980	struct rcu_head rcu_head;
981
982	/*
983	* Set when one or more (plausibly active) event can't be scheduled
984	* due to pmu overcommit or pmu constraints, except tolerant to
985	* events not necessary to be active due to scheduling constraints,
986	* such as cgroups.
987	*/
988	int rotate_necessary;
989	};
990
991	static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
992	{
993	return !list_empty(head: &epc->flexible_active) \|\| !list_empty(head: &epc->pinned_active);
994	}
995
996	struct perf_event_groups {
997	struct rb_root tree;
998	u64 index;
999	};
1000
1001
1002	/**
1003	* struct perf_event_context - event context structure
1004	*
1005	* Used as a container for task events and CPU events as well:
1006	*/
1007	struct perf_event_context {
1008	/*
1009	* Protect the states of the events in the list,
1010	* nr_active, and the list:
1011	*/
1012	raw_spinlock_t lock;
1013	/*
1014	* Protect the list of events. Locking either mutex or lock
1015	* is sufficient to ensure the list doesn't change; to change
1016	* the list you need to lock both the mutex and the spinlock.
1017	*/
1018	struct mutex mutex;
1019
1020	struct list_head pmu_ctx_list;
1021	struct perf_event_groups pinned_groups;
1022	struct perf_event_groups flexible_groups;
1023	struct list_head event_list;
1024
1025	int nr_events;
1026	int nr_user;
1027	int is_active;
1028
1029	int nr_stat;
1030	int nr_freq;
1031	int rotate_disable;
1032
1033	refcount_t refcount; / event <-> ctx /
1034	struct task_struct *task;
1035
1036	/*
1037	* Context clock, runs when context enabled.
1038	*/
1039	u64 time;
1040	u64 timestamp;
1041	u64 timeoffset;
1042
1043	/*
1044	* These fields let us detect when two contexts have both
1045	* been cloned (inherited) from a common ancestor.
1046	*/
1047	struct perf_event_context *parent_ctx;
1048	u64 parent_gen;
1049	u64 generation;
1050	int pin_count;
1051	#ifdef CONFIG_CGROUP_PERF
1052	int nr_cgroups; / cgroup evts /
1053	#endif
1054	struct rcu_head rcu_head;
1055
1056	/*
1057	* The count of events for which using the switch-out fast path
1058	* should be avoided.
1059	*
1060	* Sum (event->pending_work + events with
1061	* (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
1062	*
1063	* The SIGTRAP is targeted at ctx->task, as such it won't do changing
1064	* that until the signal is delivered.
1065	*/
1066	local_t nr_no_switch_fast;
1067	};
1068
1069	/**
1070	* struct perf_ctx_data - PMU specific data for a task
1071	* @rcu_head: To avoid the race on free PMU specific data
1072	* @refcount: To track users
1073	* @global: To track system-wide users
1074	* @ctx_cache: Kmem cache of PMU specific data
1075	* @data: PMU specific data
1076	*
1077	* Currently, the struct is only used in Intel LBR call stack mode to
1078	* save/restore the call stack of a task on context switches.
1079	*
1080	* The rcu_head is used to prevent the race on free the data.
1081	* The data only be allocated when Intel LBR call stack mode is enabled.
1082	* The data will be freed when the mode is disabled.
1083	* The content of the data will only be accessed in context switch, which
1084	* should be protected by rcu_read_lock().
1085	*
1086	* Because of the alignment requirement of Intel Arch LBR, the Kmem cache
1087	* is used to allocate the PMU specific data. The ctx_cache is to track
1088	* the Kmem cache.
1089	*
1090	* Careful: Struct perf_ctx_data is added as a pointer in struct task_struct.
1091	* When system-wide Intel LBR call stack mode is enabled, a buffer with
1092	* constant size will be allocated for each task.
1093	* Also, system memory consumption can further grow when the size of
1094	* struct perf_ctx_data enlarges.
1095	*/
1096	struct perf_ctx_data {
1097	struct rcu_head rcu_head;
1098	refcount_t refcount;
1099	int global;
1100	struct kmem_cache *ctx_cache;
1101	void *data;
1102	};
1103
1104	struct perf_cpu_pmu_context {
1105	struct perf_event_pmu_context epc;
1106	struct perf_event_pmu_context *task_epc;
1107
1108	struct list_head sched_cb_entry;
1109	int sched_cb_usage;
1110
1111	int active_oncpu;
1112	int exclusive;
1113	int pmu_disable_count;
1114
1115	raw_spinlock_t hrtimer_lock;
1116	struct hrtimer hrtimer;
1117	ktime_t hrtimer_interval;
1118	unsigned int hrtimer_active;
1119	};
1120
1121	/**
1122	* struct perf_event_cpu_context - per cpu event context structure
1123	*/
1124	struct perf_cpu_context {
1125	struct perf_event_context ctx;
1126	struct perf_event_context *task_ctx;
1127	int online;
1128
1129	#ifdef CONFIG_CGROUP_PERF
1130	struct perf_cgroup *cgrp;
1131	#endif
1132
1133	/*
1134	* Per-CPU storage for iterators used in visit_groups_merge. The default
1135	* storage is of size 2 to hold the CPU and any CPU event iterators.
1136	*/
1137	int heap_size;
1138	struct perf_event **heap;
1139	struct perf_event *heap_default[`2`];
1140	};
1141
1142	struct perf_output_handle {
1143	struct perf_event *event;
1144	struct perf_buffer *rb;
1145	unsigned long wakeup;
1146	unsigned long size;
1147	union {
1148	u64 flags; / perf_output() /*
1149	u64 aux_flags; / perf_aux_output() /*
1150	struct {
1151	u64 skip_read : `1`;
1152	};
1153	};
1154	union {
1155	void *addr;
1156	unsigned long head;
1157	};
1158	int page;
1159	};
1160
1161	struct bpf_perf_event_data_kern {
1162	bpf_user_pt_regs_t *regs;
1163	struct perf_sample_data *data;
1164	struct perf_event *event;
1165	};
1166
1167	#ifdef CONFIG_CGROUP_PERF
1168
1169	/*
1170	* perf_cgroup_info keeps track of time_enabled for a cgroup.
1171	* This is a per-cpu dynamically allocated data structure.
1172	*/
1173	struct perf_cgroup_info {
1174	u64 time;
1175	u64 timestamp;
1176	u64 timeoffset;
1177	int active;
1178	};
1179
1180	struct perf_cgroup {
1181	struct cgroup_subsys_state css;
1182	struct perf_cgroup_info __percpu *info;
1183	};
1184
1185	/*
1186	* Must ensure cgroup is pinned (css_get) before calling
1187	* this function. In other words, we cannot call this function
1188	* if there is no cgroup event for the current CPU context.
1189	*/
1190	static inline struct perf_cgroup *
1191	perf_cgroup_from_task(struct task_struct task, struct* perf_event_context *ctx)
1192	{
1193	return container_of(task_css_check(task, perf_event_cgrp_id,
1194	ctx ? lockdep_is_held(&ctx->lock)
1195	: true),
1196	struct perf_cgroup, css);
1197	}
1198	#endif /* CONFIG_CGROUP_PERF */
1199
1200	#ifdef CONFIG_PERF_EVENTS
1201
1202	extern struct perf_event_context perf_cpu_task_ctx(void*);
1203
1204	extern void perf_aux_output_begin(struct* perf_output_handle *handle,
1205	struct perf_event *event);
1206	extern void perf_aux_output_end(struct perf_output_handle *handle,
1207	unsigned long size);
1208	extern int perf_aux_output_skip(struct perf_output_handle *handle,
1209	unsigned long size);
1210	extern void perf_get_aux(struct* perf_output_handle *handle);
1211	extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
1212	extern void perf_event_itrace_started(struct perf_event *event);
1213
1214	extern int perf_pmu_register(struct pmu pmu, const* char name, int* type);
1215	extern int perf_pmu_unregister(struct pmu *pmu);
1216
1217	extern void __perf_event_task_sched_in(struct task_struct *prev,
1218	struct task_struct *task);
1219	extern void __perf_event_task_sched_out(struct task_struct *prev,
1220	struct task_struct *next);
1221	extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
1222	extern void perf_event_exit_task(struct task_struct *child);
1223	extern void perf_event_free_task(struct task_struct *task);
1224	extern void perf_event_delayed_put(struct task_struct *task);
1225	extern struct file perf_event_get(unsigned* int fd);
1226	extern const struct perf_event perf_get_event(struct* file *file);
1227	extern const struct perf_event_attr perf_event_attrs(struct* perf_event *event);
1228	extern void perf_event_print_debug(void);
1229	extern void perf_pmu_disable(struct pmu *pmu);
1230	extern void perf_pmu_enable(struct pmu *pmu);
1231	extern void perf_sched_cb_dec(struct pmu *pmu);
1232	extern void perf_sched_cb_inc(struct pmu *pmu);
1233	extern int perf_event_task_disable(void);
1234	extern int perf_event_task_enable(void);
1235
1236	extern void perf_pmu_resched(struct pmu *pmu);
1237
1238	extern int perf_event_refresh(struct perf_event event, int* refresh);
1239	extern void perf_event_update_userpage(struct perf_event *event);
1240	extern int perf_event_release_kernel(struct perf_event *event);
1241
1242	extern struct perf_event *
1243	perf_event_create_kernel_counter(struct perf_event_attr *attr,
1244	int cpu,
1245	struct task_struct *task,
1246	perf_overflow_handler_t callback,
1247	void *context);
1248
1249	extern void perf_pmu_migrate_context(struct pmu *pmu,
1250	int src_cpu, int dst_cpu);
1251	extern int perf_event_read_local(struct perf_event event, u64 value,
1252	u64 enabled, u64 running);
1253	extern u64 perf_event_read_value(struct perf_event *event,
1254	u64 enabled, u64 running);
1255
1256	extern struct perf_callchain_entry perf_callchain(struct* perf_event event, struct* pt_regs *regs);
1257
1258	static inline bool branch_sample_no_flags(const struct perf_event *event)
1259	{
1260	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
1261	}
1262
1263	static inline bool branch_sample_no_cycles(const struct perf_event *event)
1264	{
1265	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
1266	}
1267
1268	static inline bool branch_sample_type(const struct perf_event *event)
1269	{
1270	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
1271	}
1272
1273	static inline bool branch_sample_hw_index(const struct perf_event *event)
1274	{
1275	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
1276	}
1277
1278	static inline bool branch_sample_priv(const struct perf_event *event)
1279	{
1280	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
1281	}
1282
1283	static inline bool branch_sample_counters(const struct perf_event *event)
1284	{
1285	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
1286	}
1287
1288	static inline bool branch_sample_call_stack(const struct perf_event *event)
1289	{
1290	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
1291	}
1292
1293	struct perf_sample_data {
1294	/*
1295	* Fields set by perf_sample_data_init() unconditionally,
1296	* group so as to minimize the cachelines touched.
1297	*/
1298	u64 sample_flags;
1299	u64 period;
1300	u64 dyn_size;
1301
1302	/*
1303	* Fields commonly set by __perf_event_header__init_id(),
1304	* group so as to minimize the cachelines touched.
1305	*/
1306	u64 type;
1307	struct {
1308	u32 pid;
1309	u32 tid;
1310	} tid_entry;
1311	u64 time;
1312	u64 id;
1313	struct {
1314	u32 cpu;
1315	u32 reserved;
1316	} cpu_entry;
1317
1318	/*
1319	* The other fields, optionally {set,used} by
1320	* perf_{prepare,output}_sample().
1321	*/
1322	u64 ip;
1323	struct perf_callchain_entry *callchain;
1324	struct perf_raw_record *raw;
1325	struct perf_branch_stack *br_stack;
1326	u64 *br_stack_cntr;
1327	union perf_sample_weight weight;
1328	union perf_mem_data_src data_src;
1329	u64 txn;
1330
1331	struct perf_regs regs_user;
1332	struct perf_regs regs_intr;
1333	u64 stack_user_size;
1334
1335	u64 stream_id;
1336	u64 cgroup;
1337	u64 addr;
1338	u64 phys_addr;
1339	u64 data_page_size;
1340	u64 code_page_size;
1341	u64 aux_size;
1342	} ____cacheline_aligned;
1343
1344	/ default value for data source /
1345	#define PERF_MEM_NA (PERF_MEM_S(OP, NA) \|\
1346	PERF_MEM_S(LVL, NA) \|\
1347	PERF_MEM_S(SNOOP, NA) \|\
1348	PERF_MEM_S(LOCK, NA) \|\
1349	PERF_MEM_S(TLB, NA) \|\
1350	PERF_MEM_S(LVLNUM, NA))
1351
1352	static inline void perf_sample_data_init(struct perf_sample_data *data,
1353	u64 addr, u64 period)
1354	{
1355	/ remaining struct members initialized in perf_prepare_sample() /
1356	data->sample_flags = PERF_SAMPLE_PERIOD;
1357	data->period = period;
1358	data->dyn_size = `0`;
1359
1360	if (addr) {
1361	data->addr = addr;
1362	data->sample_flags \|= PERF_SAMPLE_ADDR;
1363	}
1364	}
1365
1366	static inline void perf_sample_save_callchain(struct perf_sample_data *data,
1367	struct perf_event *event,
1368	struct pt_regs *regs)
1369	{
1370	int size = `1`;
1371
1372	if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
1373	return;
1374	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
1375	return;
1376
1377	data->callchain = perf_callchain(event, regs);
1378	size += data->callchain->nr;
1379
1380	data->dyn_size += size * sizeof(u64);
1381	data->sample_flags \|= PERF_SAMPLE_CALLCHAIN;
1382	}
1383
1384	static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
1385	struct perf_event *event,
1386	struct perf_raw_record *raw)
1387	{
1388	struct perf_raw_frag *frag = &raw->frag;
1389	u32 sum = `0`;
1390	int size;
1391
1392	if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
1393	return;
1394	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
1395	return;
1396
1397	do {
1398	sum += frag->size;
1399	if (perf_raw_frag_last(frag))
1400	break;
1401	frag = frag->next;
1402	} while (`1`);
1403
1404	size = round_up(sum + sizeof(u32), sizeof(u64));
1405	raw->size = size - sizeof(u32);
1406	frag->pad = raw->size - sum;
1407
1408	data->raw = raw;
1409	data->dyn_size += size;
1410	data->sample_flags \|= PERF_SAMPLE_RAW;
1411	}
1412
1413	static inline bool has_branch_stack(struct perf_event *event)
1414	{
1415	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1416	}
1417
1418	static inline void perf_sample_save_brstack(struct perf_sample_data *data,
1419	struct perf_event *event,
1420	struct perf_branch_stack *brs,
1421	u64 *brs_cntr)
1422	{
1423	int size = sizeof(u64); / nr /
1424
1425	if (!has_branch_stack(event))
1426	return;
1427	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
1428	return;
1429
1430	if (branch_sample_hw_index(event))
1431	size += sizeof(u64);
1432
1433	brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr);
1434
1435	size += brs->nr * sizeof(struct perf_branch_entry);
1436
1437	/*
1438	* The extension space for counters is appended after the
1439	* struct perf_branch_stack. It is used to store the occurrences
1440	* of events of each branch.
1441	*/
1442	if (brs_cntr)
1443	size += brs->nr * sizeof(u64);
1444
1445	data->br_stack = brs;
1446	data->br_stack_cntr = brs_cntr;
1447	data->dyn_size += size;
1448	data->sample_flags \|= PERF_SAMPLE_BRANCH_STACK;
1449	}
1450
1451	static inline u32 perf_sample_data_size(struct perf_sample_data *data,
1452	struct perf_event *event)
1453	{
1454	u32 size = sizeof(struct perf_event_header);
1455
1456	size += event->header_size + event->id_header_size;
1457	size += data->dyn_size;
1458
1459	return size;
1460	}
1461
1462	/*
1463	* Clear all bitfields in the perf_branch_entry.
1464	* The to and from fields are not cleared because they are
1465	* systematically modified by caller.
1466	*/
1467	static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
1468	{
1469	br->mispred = `0`;
1470	br->predicted = `0`;
1471	br->in_tx = `0`;
1472	br->abort = `0`;
1473	br->cycles = `0`;
1474	br->type = `0`;
1475	br->spec = PERF_BR_SPEC_NA;
1476	br->reserved = `0`;
1477	}
1478
1479	extern void perf_output_sample(struct perf_output_handle *handle,
1480	struct perf_event_header *header,
1481	struct perf_sample_data *data,
1482	struct perf_event *event);
1483	extern void perf_prepare_sample(struct perf_sample_data *data,
1484	struct perf_event *event,
1485	struct pt_regs *regs);
1486	extern void perf_prepare_header(struct perf_event_header *header,
1487	struct perf_sample_data *data,
1488	struct perf_event *event,
1489	struct pt_regs *regs);
1490
1491	extern int perf_event_overflow(struct perf_event *event,
1492	struct perf_sample_data *data,
1493	struct pt_regs *regs);
1494
1495	extern void perf_event_output_forward(struct perf_event *event,
1496	struct perf_sample_data *data,
1497	struct pt_regs *regs);
1498	extern void perf_event_output_backward(struct perf_event *event,
1499	struct perf_sample_data *data,
1500	struct pt_regs *regs);
1501	extern int perf_event_output(struct perf_event *event,
1502	struct perf_sample_data *data,
1503	struct pt_regs *regs);
1504
1505	static inline bool
1506	is_default_overflow_handler(struct perf_event *event)
1507	{
1508	perf_overflow_handler_t overflow_handler = event->overflow_handler;
1509
1510	if (likely(overflow_handler == perf_event_output_forward))
1511	return true;
1512	if (unlikely(overflow_handler == perf_event_output_backward))
1513	return true;
1514	return false;
1515	}
1516
1517	extern void
1518	perf_event_header__init_id(struct perf_event_header *header,
1519	struct perf_sample_data *data,
1520	struct perf_event *event);
1521	extern void
1522	perf_event__output_id_sample(struct perf_event *event,
1523	struct perf_output_handle *handle,
1524	struct perf_sample_data *sample);
1525
1526	extern void
1527	perf_log_lost_samples(struct perf_event *event, u64 lost);
1528
1529	static inline bool event_has_any_exclude_flag(struct perf_event *event)
1530	{
1531	struct perf_event_attr *attr = &event->attr;
1532
1533	return attr->exclude_idle \|\| attr->exclude_user \|\|
1534	attr->exclude_kernel \|\| attr->exclude_hv \|\|
1535	attr->exclude_guest \|\| attr->exclude_host;
1536	}
1537
1538	static inline bool is_sampling_event(struct perf_event *event)
1539	{
1540	return event->attr.sample_period != `0`;
1541	}
1542
1543	/*
1544	* Return 1 for a software event, 0 for a hardware event
1545	*/
1546	static inline int is_software_event(struct perf_event *event)
1547	{
1548	return event->event_caps & PERF_EV_CAP_SOFTWARE;
1549	}
1550
1551	/*
1552	* Return 1 for event in sw context, 0 for event in hw context
1553	*/
1554	static inline int in_software_context(struct perf_event *event)
1555	{
1556	return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
1557	}
1558
1559	static inline int is_exclusive_pmu(struct pmu *pmu)
1560	{
1561	return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
1562	}
1563
1564	extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1565
1566	extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
1567	extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
1568
1569	#ifndef perf_arch_fetch_caller_regs
1570	static inline void perf_arch_fetch_caller_regs(struct pt_regs regs, unsigned* long ip) { }
1571	#endif
1572
1573	/*
1574	* When generating a perf sample in-line, instead of from an interrupt /
1575	* exception, we lack a pt_regs. This is typically used from software events
1576	* like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
1577	*
1578	* We typically don't need a full set, but (for x86) do require:
1579	* - ip for PERF_SAMPLE_IP
1580	* - cs for user_mode() tests
1581	* - sp for PERF_SAMPLE_CALLCHAIN
1582	* - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
1583	*
1584	* NOTE: assumes @regs is otherwise already 0 filled; this is important for
1585	* things like PERF_SAMPLE_REGS_INTR.
1586	*/
1587	static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1588	{
1589	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
1590	}
1591
1592	static __always_inline void
1593	perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1594	{
1595	if (static_key_false(key: &perf_swevent_enabled[event_id]))
1596	__perf_sw_event(event_id, nr, regs, addr);
1597	}
1598
1599	DECLARE_PER_CPU(struct pt_regs, __perf_regs[`4`]);
1600
1601	/*
1602	* 'Special' version for the scheduler, it hard assumes no recursion,
1603	* which is guaranteed by us not actually scheduling inside other swevents
1604	* because those disable preemption.
1605	*/
1606	static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
1607	{
1608	struct pt_regs *regs = this_cpu_ptr(&__perf_regs[`0`]);
1609
1610	perf_fetch_caller_regs(regs);
1611	___perf_sw_event(event_id, nr, regs, addr);
1612	}
1613
1614	extern struct static_key_false perf_sched_events;
1615
1616	static __always_inline bool __perf_sw_enabled(int swevt)
1617	{
1618	return static_key_false(key: &perf_swevent_enabled[swevt]);
1619	}
1620
1621	static inline void perf_event_task_migrate(struct task_struct *task)
1622	{
1623	if (__perf_sw_enabled(swevt: PERF_COUNT_SW_CPU_MIGRATIONS))
1624	task->sched_migrated = `1`;
1625	}
1626
1627	static inline void perf_event_task_sched_in(struct task_struct *prev,
1628	struct task_struct *task)
1629	{
1630	if (static_branch_unlikely(&perf_sched_events))
1631	__perf_event_task_sched_in(prev, task);
1632
1633	if (__perf_sw_enabled(swevt: PERF_COUNT_SW_CPU_MIGRATIONS) &&
1634	task->sched_migrated) {
1635	__perf_sw_event_sched(event_id: PERF_COUNT_SW_CPU_MIGRATIONS, nr: `1`, addr: `0`);
1636	task->sched_migrated = `0`;
1637	}
1638	}
1639
1640	static inline void perf_event_task_sched_out(struct task_struct *prev,
1641	struct task_struct *next)
1642	{
1643	if (__perf_sw_enabled(swevt: PERF_COUNT_SW_CONTEXT_SWITCHES))
1644	__perf_sw_event_sched(event_id: PERF_COUNT_SW_CONTEXT_SWITCHES, nr: `1`, addr: `0`);
1645
1646	#ifdef CONFIG_CGROUP_PERF
1647	if (__perf_sw_enabled(swevt: PERF_COUNT_SW_CGROUP_SWITCHES) &&
1648	perf_cgroup_from_task(task: prev, NULL) !=
1649	perf_cgroup_from_task(task: next, NULL))
1650	__perf_sw_event_sched(event_id: PERF_COUNT_SW_CGROUP_SWITCHES, nr: `1`, addr: `0`);
1651	#endif
1652
1653	if (static_branch_unlikely(&perf_sched_events))
1654	__perf_event_task_sched_out(prev, next);
1655	}
1656
1657	extern void perf_event_mmap(struct vm_area_struct *vma);
1658
1659	extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1660	bool unregister, const char *sym);
1661	extern void perf_event_bpf_event(struct bpf_prog *prog,
1662	enum perf_bpf_event_type type,
1663	u16 flags);
1664
1665	#define PERF_GUEST_ACTIVE 0x01
1666	#define PERF_GUEST_USER 0x02
1667
1668	struct perf_guest_info_callbacks {
1669	unsigned int (state)(void*);
1670	unsigned long (get_ip)(void*);
1671	unsigned int (handle_intel_pt_intr)(void*);
1672	};
1673
1674	#ifdef CONFIG_GUEST_PERF_EVENTS
1675
1676	extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
1677
1678	DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
1679	DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
1680	DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
1681
1682	static inline unsigned int perf_guest_state(void)
1683	{
1684	return static_call(__perf_guest_state)();
1685	}
1686
1687	static inline unsigned long perf_guest_get_ip(void)
1688	{
1689	return static_call(__perf_guest_get_ip)();
1690	}
1691
1692	static inline unsigned int perf_guest_handle_intel_pt_intr(void)
1693	{
1694	return static_call(__perf_guest_handle_intel_pt_intr)();
1695	}
1696
1697	extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1698	extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1699
1700	#else /* !CONFIG_GUEST_PERF_EVENTS: */
1701
1702	static inline unsigned int perf_guest_state(void) { return `0`; }
1703	static inline unsigned long perf_guest_get_ip(void) { return `0`; }
1704	static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return `0`; }
1705
1706	#endif /* !CONFIG_GUEST_PERF_EVENTS */
1707
1708	extern void perf_event_exec(void);
1709	extern void perf_event_comm(struct task_struct *tsk, bool exec);
1710	extern void perf_event_namespaces(struct task_struct *tsk);
1711	extern void perf_event_fork(struct task_struct *tsk);
1712	extern void perf_event_text_poke(const void *addr,
1713	const void *old_bytes, size_t old_len,
1714	const void *new_bytes, size_t new_len);
1715
1716	/ Callchains /
1717	DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1718
1719	extern void perf_callchain_user(struct perf_callchain_entry_ctx entry, struct* pt_regs *regs);
1720	extern void perf_callchain_kernel(struct perf_callchain_entry_ctx entry, struct* pt_regs *regs);
1721	extern struct perf_callchain_entry *
1722	get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
1723	u32 max_stack, bool crosstask, bool add_mark, u64 defer_cookie);
1724	extern int get_callchain_buffers(int max_stack);
1725	extern void put_callchain_buffers(void);
1726	extern struct perf_callchain_entry get_callchain_entry(int* *rctx);
1727	extern void put_callchain_entry(int rctx);
1728
1729	extern int sysctl_perf_event_max_stack;
1730	extern int sysctl_perf_event_max_contexts_per_stack;
1731
1732	static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
1733	{
1734	if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
1735	struct perf_callchain_entry *entry = ctx->entry;
1736
1737	entry->ip[entry->nr++] = ip;
1738	++ctx->contexts;
1739	return `0`;
1740	} else {
1741	ctx->contexts_maxed = true;
1742	return -`1`; / no more room, stop walking the stack /
1743	}
1744	}
1745
1746	static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
1747	{
1748	if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
1749	struct perf_callchain_entry *entry = ctx->entry;
1750
1751	entry->ip[entry->nr++] = ip;
1752	++ctx->nr;
1753	return `0`;
1754	} else {
1755	return -`1`; / no more room, stop walking the stack /
1756	}
1757	}
1758
1759	extern int sysctl_perf_event_paranoid;
1760	extern int sysctl_perf_event_sample_rate;
1761
1762	extern void perf_sample_event_took(u64 sample_len_ns);
1763
1764	/ Access to perf_event_open(2) syscall. /
1765	#define PERF_SECURITY_OPEN 0
1766
1767	/ Finer grained perf_event_open(2) access control. /
1768	#define PERF_SECURITY_CPU 1
1769	#define PERF_SECURITY_KERNEL 2
1770	#define PERF_SECURITY_TRACEPOINT 3
1771
1772	static inline int perf_is_paranoid(void)
1773	{
1774	return sysctl_perf_event_paranoid > -`1`;
1775	}
1776
1777	extern int perf_allow_kernel(void);
1778
1779	static inline int perf_allow_cpu(void)
1780	{
1781	if (sysctl_perf_event_paranoid > `0` && !perfmon_capable())
1782	return -EACCES;
1783
1784	return security_perf_event_open(PERF_SECURITY_CPU);
1785	}
1786
1787	static inline int perf_allow_tracepoint(void)
1788	{
1789	if (sysctl_perf_event_paranoid > -`1` && !perfmon_capable())
1790	return -EPERM;
1791
1792	return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
1793	}
1794
1795	extern int perf_exclude_event(struct perf_event event, struct* pt_regs *regs);
1796
1797	extern void perf_event_init(void);
1798	extern void perf_tp_event(u16 event_type, u64 count, void *record,
1799	int entry_size, struct pt_regs *regs,
1800	struct hlist_head head, int* rctx,
1801	struct task_struct *task);
1802	extern void perf_bp_event(struct perf_event event, void* *data);
1803
1804	extern unsigned long perf_misc_flags(struct perf_event event, struct* pt_regs *regs);
1805	extern unsigned long perf_instruction_pointer(struct perf_event *event,
1806	struct pt_regs *regs);
1807
1808	#ifndef perf_arch_misc_flags
1809	# define perf_arch_misc_flags(regs) \
1810	(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
1811	# define perf_arch_instruction_pointer(regs) instruction_pointer(regs)
1812	#endif
1813	#ifndef perf_arch_bpf_user_pt_regs
1814	# define perf_arch_bpf_user_pt_regs(regs) regs
1815	#endif
1816
1817	#ifndef perf_arch_guest_misc_flags
1818	static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
1819	{
1820	unsigned long guest_state = perf_guest_state();
1821
1822	if (!(guest_state & PERF_GUEST_ACTIVE))
1823	return `0`;
1824
1825	if (guest_state & PERF_GUEST_USER)
1826	return PERF_RECORD_MISC_GUEST_USER;
1827	else
1828	return PERF_RECORD_MISC_GUEST_KERNEL;
1829	}
1830	# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
1831	#endif
1832
1833	static inline bool needs_branch_stack(struct perf_event *event)
1834	{
1835	return event->attr.branch_sample_type != `0`;
1836	}
1837
1838	static inline bool has_aux(struct perf_event *event)
1839	{
1840	return event->pmu && event->pmu->setup_aux;
1841	}
1842
1843	static inline bool has_aux_action(struct perf_event *event)
1844	{
1845	return event->attr.aux_sample_size \|\|
1846	event->attr.aux_pause \|\|
1847	event->attr.aux_resume;
1848	}
1849
1850	static inline bool is_write_backward(struct perf_event *event)
1851	{
1852	return !!event->attr.write_backward;
1853	}
1854
1855	static inline bool has_addr_filter(struct perf_event *event)
1856	{
1857	return event->pmu->nr_addr_filters;
1858	}
1859
1860	/*
1861	* An inherited event uses parent's filters
1862	*/
1863	static inline struct perf_addr_filters_head *
1864	perf_event_addr_filters(struct perf_event *event)
1865	{
1866	struct perf_addr_filters_head *ifh = &event->addr_filters;
1867
1868	if (event->parent)
1869	ifh = &event->parent->addr_filters;
1870
1871	return ifh;
1872	}
1873
1874	static inline struct fasync_struct perf_event_fasync(struct** perf_event *event)
1875	{
1876	/ Only the parent has fasync state /
1877	if (event->parent)
1878	event = event->parent;
1879	return &event->fasync;
1880	}
1881
1882	extern void perf_event_addr_filters_sync(struct perf_event *event);
1883	extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
1884
1885	extern int perf_output_begin(struct perf_output_handle *handle,
1886	struct perf_sample_data *data,
1887	struct perf_event event, unsigned* int size);
1888	extern int perf_output_begin_forward(struct perf_output_handle *handle,
1889	struct perf_sample_data *data,
1890	struct perf_event *event,
1891	unsigned int size);
1892	extern int perf_output_begin_backward(struct perf_output_handle *handle,
1893	struct perf_sample_data *data,
1894	struct perf_event *event,
1895	unsigned int size);
1896
1897	extern void perf_output_end(struct perf_output_handle *handle);
1898	extern unsigned int perf_output_copy(struct perf_output_handle *handle,
1899	const void buf, unsigned* int len);
1900	extern unsigned int perf_output_skip(struct perf_output_handle *handle,
1901	unsigned int len);
1902	extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
1903	struct perf_output_handle *handle,
1904	unsigned long from, unsigned long to);
1905	extern int perf_swevent_get_recursion_context(void);
1906	extern void perf_swevent_put_recursion_context(int rctx);
1907	extern u64 perf_swevent_set_period(struct perf_event *event);
1908	extern void perf_event_enable(struct perf_event *event);
1909	extern void perf_event_disable(struct perf_event *event);
1910	extern void perf_event_disable_local(struct perf_event *event);
1911	extern void perf_event_disable_inatomic(struct perf_event *event);
1912	extern void perf_event_task_tick(void);
1913	extern int perf_event_account_interrupt(struct perf_event *event);
1914	extern int perf_event_period(struct perf_event *event, u64 value);
1915	extern u64 perf_event_pause(struct perf_event *event, bool reset);
1916
1917	#else /* !CONFIG_PERF_EVENTS: */
1918
1919	static inline void *
1920	perf_aux_output_begin(struct perf_output_handle *handle,
1921	struct perf_event event) { return* NULL; }
1922	static inline void
1923	perf_aux_output_end(struct perf_output_handle handle, unsigned* long size)
1924	{ }
1925	static inline int
1926	perf_aux_output_skip(struct perf_output_handle *handle,
1927	unsigned long size) { return -EINVAL; }
1928	static inline void *
1929	perf_get_aux(struct perf_output_handle handle) { return* NULL; }
1930	static inline void
1931	perf_event_task_migrate(struct task_struct *task) { }
1932	static inline void
1933	perf_event_task_sched_in(struct task_struct *prev,
1934	struct task_struct *task) { }
1935	static inline void
1936	perf_event_task_sched_out(struct task_struct *prev,
1937	struct task_struct *next) { }
1938	static inline int perf_event_init_task(struct task_struct *child,
1939	u64 clone_flags) { return `0`; }
1940	static inline void perf_event_exit_task(struct task_struct *child) { }
1941	static inline void perf_event_free_task(struct task_struct *task) { }
1942	static inline void perf_event_delayed_put(struct task_struct *task) { }
1943	static inline struct file perf_event_get(unsigned* int fd) { return ERR_PTR(-EINVAL); }
1944	static inline const struct perf_event perf_get_event(struct* file *file)
1945	{
1946	return ERR_PTR(-EINVAL);
1947	}
1948	static inline const struct perf_event_attr perf_event_attrs(struct* perf_event *event)
1949	{
1950	return ERR_PTR(-EINVAL);
1951	}
1952	static inline int perf_event_read_local(struct perf_event event, u64 value,
1953	u64 enabled, u64 running)
1954	{
1955	return -EINVAL;
1956	}
1957	static inline void perf_event_print_debug(void) { }
1958	static inline int perf_event_task_disable(void) { return -EINVAL; }
1959	static inline int perf_event_task_enable(void) { return -EINVAL; }
1960	static inline int perf_event_refresh(struct perf_event event, int* refresh)
1961	{
1962	return -EINVAL;
1963	}
1964
1965	static inline void
1966	perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
1967	static inline void
1968	perf_bp_event(struct perf_event event, void* *data) { }
1969
1970	static inline void perf_event_mmap(struct vm_area_struct *vma) { }
1971
1972	typedef int (perf_ksymbol_get_name_f)(char name, int* name_len, void *data);
1973	static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1974	bool unregister, const char *sym) { }
1975	static inline void perf_event_bpf_event(struct bpf_prog *prog,
1976	enum perf_bpf_event_type type,
1977	u16 flags) { }
1978	static inline void perf_event_exec(void) { }
1979	static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
1980	static inline void perf_event_namespaces(struct task_struct *tsk) { }
1981	static inline void perf_event_fork(struct task_struct *tsk) { }
1982	static inline void perf_event_text_poke(const void *addr,
1983	const void *old_bytes,
1984	size_t old_len,
1985	const void *new_bytes,
1986	size_t new_len) { }
1987	static inline void perf_event_init(void) { }
1988	static inline int perf_swevent_get_recursion_context(void) { return -`1`; }
1989	static inline void perf_swevent_put_recursion_context(int rctx) { }
1990	static inline u64 perf_swevent_set_period(struct perf_event event) { return* `0`; }
1991	static inline void perf_event_enable(struct perf_event *event) { }
1992	static inline void perf_event_disable(struct perf_event *event) { }
1993	static inline int __perf_event_disable(void info) { return* -`1`; }
1994	static inline void perf_event_task_tick(void) { }
1995	static inline int perf_event_release_kernel(struct perf_event event) { return* `0`; }
1996	static inline int
1997	perf_event_period(struct perf_event event, u64 value) { return* -EINVAL; }
1998	static inline u64
1999	perf_event_pause(struct perf_event event, bool reset) { return* `0`; }
2000	static inline int
2001	perf_exclude_event(struct perf_event event, struct* pt_regs regs) { return* `0`; }
2002
2003	#endif /* !CONFIG_PERF_EVENTS */
2004
2005	#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
2006	extern void perf_restore_debug_store(void);
2007	#else
2008	static inline void perf_restore_debug_store(void) { }
2009	#endif
2010
2011	#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
2012
2013	struct perf_pmu_events_attr {
2014	struct device_attribute attr;
2015	u64 id;
2016	const char *event_str;
2017	};
2018
2019	struct perf_pmu_events_ht_attr {
2020	struct device_attribute attr;
2021	u64 id;
2022	const char *event_str_ht;
2023	const char *event_str_noht;
2024	};
2025
2026	struct perf_pmu_events_hybrid_attr {
2027	struct device_attribute attr;
2028	u64 id;
2029	const char *event_str;
2030	u64 pmu_type;
2031	};
2032
2033	struct perf_pmu_format_hybrid_attr {
2034	struct device_attribute attr;
2035	u64 pmu_type;
2036	};
2037
2038	ssize_t perf_event_sysfs_show(struct device dev, struct* device_attribute *attr,
2039	char *page);
2040
2041	#define PMU_EVENT_ATTR(_name, _var, _id, _show) \
2042	static struct perf_pmu_events_attr _var = { \
2043	.attr = __ATTR(_name, 0444, _show, NULL), \
2044	.id = _id, \
2045	};
2046
2047	#define PMU_EVENT_ATTR_STRING(_name, _var, _str) \
2048	static struct perf_pmu_events_attr _var = { \
2049	.attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
2050	.id = 0, \
2051	.event_str = _str, \
2052	};
2053
2054	#define PMU_EVENT_ATTR_ID(_name, _show, _id) \
2055	(&((struct perf_pmu_events_attr[]) { \
2056	{ .attr = __ATTR(_name, 0444, _show, NULL), \
2057	.id = _id, } \
2058	})[0].attr.attr)
2059
2060	#define PMU_FORMAT_ATTR_SHOW(_name, _format) \
2061	static ssize_t \
2062	_name##_show(struct device *dev, \
2063	struct device_attribute *attr, \
2064	char *page) \
2065	{ \
2066	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
2067	return sprintf(page, _format "\n"); \
2068	} \
2069
2070	#define PMU_FORMAT_ATTR(_name, _format) \
2071	PMU_FORMAT_ATTR_SHOW(_name, _format) \
2072	\
2073	static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
2074
2075	/ Performance counter hotplug functions /
2076	#ifdef CONFIG_PERF_EVENTS
2077	extern int perf_event_init_cpu(unsigned int cpu);
2078	extern int perf_event_exit_cpu(unsigned int cpu);
2079	#else
2080	# define perf_event_init_cpu NULL
2081	# define perf_event_exit_cpu NULL
2082	#endif
2083
2084	extern void arch_perf_update_userpage(struct perf_event *event,
2085	struct perf_event_mmap_page *userpg,
2086	u64 now);
2087
2088	/*
2089	* Snapshot branch stack on software events.
2090	*
2091	* Branch stack can be very useful in understanding software events. For
2092	* example, when a long function, e.g. sys_perf_event_open, returns an
2093	* errno, it is not obvious why the function failed. Branch stack could
2094	* provide very helpful information in this type of scenarios.
2095	*
2096	* On software event, it is necessary to stop the hardware branch recorder
2097	* fast. Otherwise, the hardware register/buffer will be flushed with
2098	* entries of the triggering event. Therefore, static call is used to
2099	* stop the hardware recorder.
2100	*/
2101
2102	/*
2103	* cnt is the number of entries allocated for entries.
2104	* Return number of entries copied to .
2105	*/
2106	typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
2107	unsigned int cnt);
2108	DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
2109
2110	#ifndef PERF_NEEDS_LOPWR_CB
2111	static inline void perf_lopwr_cb(bool mode)
2112	{
2113	}
2114	#endif
2115
2116	#endif /* _LINUX_PERF_EVENT_H */
2117

source code of linux/include/linux/perf_event.h