linux/kernel/events/internal.h
Adrian Hunter 18d92bb57c perf/core: Add aux_pause, aux_resume, aux_start_paused
Hardware traces, such as instruction traces, can produce a vast amount of
trace data, so being able to reduce tracing to more specific circumstances
can be useful.

The ability to pause or resume tracing when another event happens, can do
that.

Add ability for an event to "pause" or "resume" AUX area tracing.

Add aux_pause bit to perf_event_attr to indicate that, if the event
happens, the associated AUX area tracing should be paused. Ditto
aux_resume. Do not allow aux_pause and aux_resume to be set together.

Add aux_start_paused bit to perf_event_attr to indicate to an AUX area
event that it should start in a "paused" state.

Add aux_paused to struct hw_perf_event for AUX area events to keep track of
the "paused" state. aux_paused is initialized to aux_start_paused.

Add PERF_EF_PAUSE and PERF_EF_RESUME modes for ->stop() and ->start()
callbacks. Call as needed, during __perf_event_output(). Add
aux_in_pause_resume to struct perf_buffer to prevent races with the NMI
handler. Pause/resume in NMI context will miss out if it coincides with
another pause/resume.

To use aux_pause or aux_resume, an event must be in a group with the AUX
area event as the group leader.

Example (requires Intel PT and tools patches also):

 $ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname
 Linux
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.043 MB perf.data ]
 $ perf script --call-trace
 uname   30805 [000] 24001.058782799: name: 0x7ffc9c1865b0
 uname   30805 [000] 24001.058784424:  psb offs: 0
 uname   30805 [000] 24001.058784424:  cbr: 39 freq: 3904 MHz (139%)
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        debug_smp_processor_id
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        __x64_sys_newuname
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            down_read
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                __cond_resched
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                    in_lock_functions
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_sub
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            up_read
 uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    in_lock_functions
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                preempt_count_sub
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            _copy_to_user
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])        syscall_exit_to_user_mode
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            syscall_exit_work
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                perf_syscall_exit
 uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    debug_smp_processor_id
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_trace_buf_alloc
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_get_recursion_context
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            debug_smp_processor_id
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        debug_smp_processor_id
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_tp_event
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_trace_buf_update
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            tracing_gen_ctx_irq_test
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_event
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            __perf_event_account_interrupt
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                __this_cpu_preempt_check
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            perf_event_output_forward
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                perf_event_aux_pause
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                    ring_buffer_get
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_lock
 uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_unlock
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                    pt_event_stop
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
 uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        native_write_msr
 uname   30805 [000] 24001.058785463: ([kernel.kallsyms])                                        native_write_msr
 uname   30805 [000] 24001.058785639: 0x0

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Clark <james.clark@arm.com>
Link: https://lkml.kernel.org/r/20241022155920.17511-3-adrian.hunter@intel.com
2024-11-05 12:55:43 +01:00

249 lines
5.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _KERNEL_EVENTS_INTERNAL_H
#define _KERNEL_EVENTS_INTERNAL_H
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/refcount.h>
/* Buffer handling */
#define RING_BUFFER_WRITABLE 0x01
struct perf_buffer {
refcount_t refcount;
struct rcu_head rcu_head;
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
int page_order; /* allocation order */
#endif
int nr_pages; /* nr of data pages */
int overwrite; /* can overwrite itself */
int paused; /* can write into ring buffer */
atomic_t poll; /* POLL_ for wakeups */
local_t head; /* write position */
unsigned int nest; /* nested writers */
local_t events; /* event limit */
local_t wakeup; /* wakeup stamp */
local_t lost; /* nr records lost */
long watermark; /* wakeup watermark */
long aux_watermark;
/* poll crap */
spinlock_t event_lock;
struct list_head event_list;
atomic_t mmap_count;
unsigned long mmap_locked;
struct user_struct *mmap_user;
/* AUX area */
struct mutex aux_mutex;
long aux_head;
unsigned int aux_nest;
long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */
unsigned long aux_pgoff;
int aux_nr_pages;
int aux_overwrite;
atomic_t aux_mmap_count;
unsigned long aux_mmap_locked;
void (*free_aux)(void *);
refcount_t aux_refcount;
int aux_in_sampling;
int aux_in_pause_resume;
void **aux_pages;
void *aux_priv;
struct perf_event_mmap_page *user_page;
void *data_pages[];
};
extern void rb_free(struct perf_buffer *rb);
static inline void rb_free_rcu(struct rcu_head *rcu_head)
{
struct perf_buffer *rb;
rb = container_of(rcu_head, struct perf_buffer, rcu_head);
rb_free(rb);
}
static inline void rb_toggle_paused(struct perf_buffer *rb, bool pause)
{
if (!pause && rb->nr_pages)
rb->paused = 0;
else
rb->paused = 1;
}
extern struct perf_buffer *
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
extern void perf_event_wakeup(struct perf_event *event);
extern int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
pgoff_t pgoff, int nr_pages, long watermark, int flags);
extern void rb_free_aux(struct perf_buffer *rb);
extern struct perf_buffer *ring_buffer_get(struct perf_event *event);
extern void ring_buffer_put(struct perf_buffer *rb);
static inline bool rb_has_aux(struct perf_buffer *rb)
{
return !!rb->aux_nr_pages;
}
void perf_event_aux_event(struct perf_event *event, unsigned long head,
unsigned long size, u64 flags);
extern struct page *
perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff);
#ifdef CONFIG_PERF_USE_VMALLOC
/*
* Back perf_mmap() with vmalloc memory.
*
* Required for architectures that have d-cache aliasing issues.
*/
static inline int page_order(struct perf_buffer *rb)
{
return rb->page_order;
}
#else
static inline int page_order(struct perf_buffer *rb)
{
return 0;
}
#endif
static inline int data_page_nr(struct perf_buffer *rb)
{
return rb->nr_pages << page_order(rb);
}
static inline unsigned long perf_data_size(struct perf_buffer *rb)
{
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
}
static inline unsigned long perf_aux_size(struct perf_buffer *rb)
{
return (unsigned long)rb->aux_nr_pages << PAGE_SHIFT;
}
#define __DEFINE_OUTPUT_COPY_BODY(advance_buf, memcpy_func, ...) \
{ \
unsigned long size, written; \
\
do { \
size = min(handle->size, len); \
written = memcpy_func(__VA_ARGS__); \
written = size - written; \
\
len -= written; \
handle->addr += written; \
if (advance_buf) \
buf += written; \
handle->size -= written; \
if (!handle->size) { \
struct perf_buffer *rb = handle->rb; \
\
handle->page++; \
handle->page &= rb->nr_pages - 1; \
handle->addr = rb->data_pages[handle->page]; \
handle->size = PAGE_SIZE << page_order(rb); \
} \
} while (len && written == size); \
\
return len; \
}
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
static inline unsigned long \
func_name(struct perf_output_handle *handle, \
const void *buf, unsigned long len) \
__DEFINE_OUTPUT_COPY_BODY(true, memcpy_func, handle->addr, buf, size)
static inline unsigned long
__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
const void *buf, unsigned long len)
{
unsigned long orig_len = len;
__DEFINE_OUTPUT_COPY_BODY(false, copy_func, handle->addr, buf,
orig_len - len, size)
}
static inline unsigned long
memcpy_common(void *dst, const void *src, unsigned long n)
{
memcpy(dst, src, n);
return 0;
}
DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
static inline unsigned long
memcpy_skip(void *dst, const void *src, unsigned long n)
{
return 0;
}
DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)
#ifndef arch_perf_out_copy_user
#define arch_perf_out_copy_user arch_perf_out_copy_user
static inline unsigned long
arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
{
unsigned long ret;
pagefault_disable();
ret = __copy_from_user_inatomic(dst, src, n);
pagefault_enable();
return ret;
}
#endif
DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
static inline int get_recursion_context(u8 *recursion)
{
unsigned char rctx = interrupt_context_level();
if (recursion[rctx])
return -1;
recursion[rctx]++;
barrier();
return rctx;
}
static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
{
barrier();
recursion[rctx]--;
}
#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
static inline bool arch_perf_have_user_stack_dump(void)
{
return true;
}
#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
#else
static inline bool arch_perf_have_user_stack_dump(void)
{
return false;
}
#define perf_user_stack_pointer(regs) 0
#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
#endif /* _KERNEL_EVENTS_INTERNAL_H */