mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 11:35:45 +00:00
Performance events changes for v6.13:
- Uprobes: - Add BPF session support (Jiri Olsa) - Switch to RCU Tasks Trace flavor for better performance (Andrii Nakryiko) - Massively increase uretprobe SMP scalability by SRCU-protecting the uretprobe lifetime (Andrii Nakryiko) - Kill xol_area->slot_count (Oleg Nesterov) - Core facilities: - Implement targeted high-frequency profiling by adding the ability for an event to "pause" or "resume" AUX area tracing (Adrian Hunter) - VM profiling/sampling: - Correct perf sampling with guest VMs (Colton Lewis) - New hardware support: - x86/intel: Add PMU support for Intel ArrowLake-H CPUs (Dapeng Mi) - Misc fixes and enhancements: - x86/intel/pt: Fix buffer full but size is 0 case (Adrian Hunter) - x86/amd: Warn only on new bits set (Breno Leitao) - x86/amd/uncore: Avoid a false positive warning about snprintf truncation in amd_uncore_umc_ctx_init (Jean Delvare) - uprobes: Re-order struct uprobe_task to save some space (Christophe JAILLET) - x86/rapl: Move the pmu allocation out of CPU hotplug (Kan Liang) - x86/rapl: Clean up cpumask and hotplug (Kan Liang) - uprobes: Deuglify xol_get_insn_slot/xol_free_insn_slot paths (Oleg Nesterov) Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmc7eKERHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1i57A/+KQ6TrIoICVTE+BPlDfUw8NU+N3DagVb0 dzoyDxlDRsnsYzeXZipPn+3IitX1w+DrGxBNIojSoiFVCLnHIKgo4uHbj7cVrR7J fBTVSnoJ94SGAk5ySebvLwMLce/YhXBeHK2lx6W/pI6acNcxzDfIabjjETeqltUo g7hmT9lo10pzZEZyuUfYX9khlWBxda1dKHc9pMIq7baeLe4iz/fCGlJ0K4d4M4z3 NPZw239Np6iHUwu3Lcs4gNKe4rcDe7Bt47hpedemHe0Y+7c4s2HaPxbXWxvDtE76 mlsg93i28f8SYxeV83pREn0EOCptXcljhiek+US+GR7NSbltMnV+uUiDfPKIE9+Y vYP/DYF9hx73FsOucEFrHxYYcePorn3pne5/khBYWdQU6TnlrBYWpoLQsjgCKTTR 4JhCFlBZ5cDpc6ihtpwCwVTQ4Q/H7vM1XOlDwx0hPhcIPPHDreaQD/wxo61jBdXf PY0EPAxh3BcQxfPYuDS+XiYjQ8qO8MtXMKz5bZyHBZlbHwccV6T4ExjsLKxFk5As 6BG8pkBWLg7drXAgVdleIY0ux+34w/Zzv7gemdlQxvWLlZrVvpjiG93oU3PTpZeq A2UD9eAOuXVD6+HsF/dmn88sFmcLWbrMskFWujkvhEUmCvSGAnz3YSS/mLEawBiT 2xI8xykNWSY= =ItOT -----END PGP SIGNATURE----- Merge tag 'perf-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull performance events updates from Ingo Molnar: "Uprobes: - Add BPF session support (Jiri Olsa) - Switch to RCU Tasks Trace flavor for better performance (Andrii Nakryiko) - Massively increase uretprobe SMP scalability by SRCU-protecting the uretprobe lifetime (Andrii Nakryiko) - Kill xol_area->slot_count (Oleg Nesterov) Core facilities: - Implement targeted high-frequency profiling by adding the ability for an event to "pause" or "resume" AUX area tracing (Adrian Hunter) VM profiling/sampling: - Correct perf sampling with guest VMs (Colton Lewis) New hardware support: - x86/intel: Add PMU support for Intel ArrowLake-H CPUs (Dapeng Mi) Misc fixes and enhancements: - x86/intel/pt: Fix buffer full but size is 0 case (Adrian Hunter) - x86/amd: Warn only on new bits set (Breno Leitao) - x86/amd/uncore: Avoid a false positive warning about snprintf truncation in amd_uncore_umc_ctx_init (Jean Delvare) - uprobes: Re-order struct uprobe_task to save some space (Christophe JAILLET) - x86/rapl: Move the pmu allocation out of CPU hotplug (Kan Liang) - x86/rapl: Clean up cpumask and hotplug (Kan Liang) - uprobes: Deuglify xol_get_insn_slot/xol_free_insn_slot paths (Oleg Nesterov)" * tag 'perf-core-2024-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits) perf/core: Correct perf sampling with guest VMs perf/x86: Refactor misc flag assignments perf/powerpc: Use perf_arch_instruction_pointer() perf/core: Hoist perf_instruction_pointer() and perf_misc_flags() perf/arm: Drop unused functions uprobes: Re-order struct uprobe_task to save some space perf/x86/amd/uncore: Avoid a false positive warning about snprintf truncation in amd_uncore_umc_ctx_init perf/x86/intel: Do not enable large PEBS for events with aux actions or aux sampling perf/x86/intel/pt: Add support for pause / resume perf/core: Add aux_pause, aux_resume, aux_start_paused perf/x86/intel/pt: Fix buffer full but size is 0 case uprobes: SRCU-protect uretprobe lifetime (with timeout) uprobes: allow put_uprobe() from non-sleepable softirq context perf/x86/rapl: Clean up cpumask and hotplug perf/x86/rapl: Move the pmu allocation out of CPU hotplug uprobe: Add support for session consumer uprobe: Add data pointer to consumer handlers perf/x86/amd: Warn only on new bits set uprobes: fold xol_take_insn_slot() into xol_get_insn_slot() uprobes: kill xol_area->slot_count ...
This commit is contained in:
commit
f41dac3efb
@ -135,6 +135,7 @@ config KPROBES_ON_FTRACE
|
||||
config UPROBES
|
||||
def_bool n
|
||||
depends on ARCH_SUPPORTS_UPROBES
|
||||
select TASKS_TRACE_RCU
|
||||
help
|
||||
Uprobes is the user-space counterpart to kprobes: they
|
||||
enable instrumentation applications (such as 'perf probe')
|
||||
|
@ -8,13 +8,6 @@
|
||||
#ifndef __ARM_PERF_EVENT_H__
|
||||
#define __ARM_PERF_EVENT_H__
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
#define perf_misc_flags(regs) perf_misc_flags(regs)
|
||||
#endif
|
||||
|
||||
#define perf_arch_fetch_caller_regs(regs, __ip) { \
|
||||
(regs)->ARM_pc = (__ip); \
|
||||
frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \
|
||||
|
@ -96,20 +96,3 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
|
||||
arm_get_current_stackframe(regs, &fr);
|
||||
walk_stackframe(&fr, callchain_trace, entry);
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
int misc = 0;
|
||||
|
||||
if (user_mode(regs))
|
||||
misc |= PERF_RECORD_MISC_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_KERNEL;
|
||||
|
||||
return misc;
|
||||
}
|
||||
|
@ -10,10 +10,6 @@
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
#define perf_misc_flags(regs) perf_misc_flags(regs)
|
||||
#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs
|
||||
#endif
|
||||
|
||||
|
@ -38,31 +38,3 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
|
||||
|
||||
arch_stack_walk(callchain_trace, entry, current, regs);
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
if (perf_guest_state())
|
||||
return perf_guest_get_ip();
|
||||
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int guest_state = perf_guest_state();
|
||||
int misc = 0;
|
||||
|
||||
if (guest_state) {
|
||||
if (guest_state & PERF_GUEST_USER)
|
||||
misc |= PERF_RECORD_MISC_GUEST_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
} else {
|
||||
if (user_mode(regs))
|
||||
misc |= PERF_RECORD_MISC_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_KERNEL;
|
||||
}
|
||||
|
||||
return misc;
|
||||
}
|
||||
|
@ -102,8 +102,8 @@ struct power_pmu {
|
||||
int __init register_power_pmu(struct power_pmu *pmu);
|
||||
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
|
||||
extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long int read_bhrb(int n);
|
||||
|
||||
/*
|
||||
@ -111,7 +111,7 @@ extern unsigned long int read_bhrb(int n);
|
||||
* if we have hardware PMU support.
|
||||
*/
|
||||
#ifdef CONFIG_PPC_PERF_CTRS
|
||||
#define perf_misc_flags(regs) perf_misc_flags(regs)
|
||||
#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -51,7 +51,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
|
||||
|
||||
lr = regs->link;
|
||||
sp = regs->gpr[1];
|
||||
perf_callchain_store(entry, perf_instruction_pointer(regs));
|
||||
perf_callchain_store(entry, perf_arch_instruction_pointer(regs));
|
||||
|
||||
if (!validate_sp(sp, current))
|
||||
return;
|
||||
|
@ -139,7 +139,7 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
|
||||
long level = 0;
|
||||
unsigned int __user *fp, *uregs;
|
||||
|
||||
next_ip = perf_instruction_pointer(regs);
|
||||
next_ip = perf_arch_instruction_pointer(regs);
|
||||
lr = regs->link;
|
||||
sp = regs->gpr[1];
|
||||
perf_callchain_store(entry, next_ip);
|
||||
|
@ -74,7 +74,7 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
|
||||
struct signal_frame_64 __user *sigframe;
|
||||
unsigned long __user *fp, *uregs;
|
||||
|
||||
next_ip = perf_instruction_pointer(regs);
|
||||
next_ip = perf_arch_instruction_pointer(regs);
|
||||
lr = regs->link;
|
||||
sp = regs->gpr[1];
|
||||
perf_callchain_store(entry, next_ip);
|
||||
|
@ -2332,7 +2332,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||
* Called from generic code to get the misc flags (i.e. processor mode)
|
||||
* for an event_id.
|
||||
*/
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
unsigned long perf_arch_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
u32 flags = perf_get_misc_flags(regs);
|
||||
|
||||
@ -2346,7 +2346,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
* Called from generic code to get the instruction pointer
|
||||
* for an event_id.
|
||||
*/
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long siar = mfspr(SPRN_SIAR);
|
||||
|
||||
|
@ -37,9 +37,9 @@ extern ssize_t cpumf_events_sysfs_show(struct device *dev,
|
||||
|
||||
/* Perf callbacks */
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
#define perf_misc_flags(regs) perf_misc_flags(regs)
|
||||
extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
|
||||
#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
|
||||
#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs
|
||||
|
||||
/* Perf pt_regs extension for sample-data-entry indicators */
|
||||
|
@ -57,7 +57,7 @@ static unsigned long instruction_pointer_guest(struct pt_regs *regs)
|
||||
return sie_block(regs)->gpsw.addr;
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
return is_in_guest(regs) ? instruction_pointer_guest(regs)
|
||||
: instruction_pointer(regs);
|
||||
@ -84,7 +84,7 @@ static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
|
||||
return flags;
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
unsigned long perf_arch_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
/* Check if the cpum_sf PMU has created the pt_regs structure.
|
||||
* In this case, perf misc flags can be easily extracted. Otherwise,
|
||||
|
@ -943,11 +943,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u
|
||||
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
static atomic64_t status_warned = ATOMIC64_INIT(0);
|
||||
u64 reserved, status, mask, new_bits, prev_bits;
|
||||
struct perf_sample_data data;
|
||||
struct hw_perf_event *hwc;
|
||||
struct perf_event *event;
|
||||
int handled = 0, idx;
|
||||
u64 reserved, status, mask;
|
||||
bool pmu_enabled;
|
||||
|
||||
/*
|
||||
@ -1012,7 +1013,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
* the corresponding PMCs are expected to be inactive according to the
|
||||
* active_mask
|
||||
*/
|
||||
WARN_ON(status > 0);
|
||||
if (status > 0) {
|
||||
prev_bits = atomic64_fetch_or(status, &status_warned);
|
||||
// A new bit was set for the very first time.
|
||||
new_bits = status & ~prev_bits;
|
||||
WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
|
||||
}
|
||||
|
||||
/* Clear overflow and freeze bits */
|
||||
amd_pmu_ack_global_status(~status);
|
||||
|
@ -916,7 +916,8 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
|
||||
union amd_uncore_info info;
|
||||
struct amd_uncore_pmu *pmu;
|
||||
int index = 0, gid, i;
|
||||
int gid, i;
|
||||
u16 index = 0;
|
||||
|
||||
if (pmu_version < 2)
|
||||
return 0;
|
||||
@ -948,7 +949,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
|
||||
for (i = 0; i < group_num_pmus[gid]; i++) {
|
||||
pmu = &uncore->pmus[index];
|
||||
snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
|
||||
snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
|
||||
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
|
||||
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
|
||||
pmu->rdpmc_base = -1;
|
||||
|
@ -3003,35 +3003,57 @@ static unsigned long code_segment_base(struct pt_regs *regs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
if (perf_guest_state())
|
||||
return perf_guest_get_ip();
|
||||
|
||||
return regs->ip + code_segment_base(regs);
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
static unsigned long common_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int guest_state = perf_guest_state();
|
||||
int misc = 0;
|
||||
|
||||
if (guest_state) {
|
||||
if (guest_state & PERF_GUEST_USER)
|
||||
misc |= PERF_RECORD_MISC_GUEST_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
} else {
|
||||
if (user_mode(regs))
|
||||
misc |= PERF_RECORD_MISC_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_KERNEL;
|
||||
}
|
||||
|
||||
if (regs->flags & PERF_EFLAGS_EXACT)
|
||||
misc |= PERF_RECORD_MISC_EXACT_IP;
|
||||
return PERF_RECORD_MISC_EXACT_IP;
|
||||
|
||||
return misc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long guest_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long guest_state = perf_guest_state();
|
||||
|
||||
if (!(guest_state & PERF_GUEST_ACTIVE))
|
||||
return 0;
|
||||
|
||||
if (guest_state & PERF_GUEST_USER)
|
||||
return PERF_RECORD_MISC_GUEST_USER;
|
||||
else
|
||||
return PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
|
||||
}
|
||||
|
||||
static unsigned long host_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
if (user_mode(regs))
|
||||
return PERF_RECORD_MISC_USER;
|
||||
else
|
||||
return PERF_RECORD_MISC_KERNEL;
|
||||
}
|
||||
|
||||
unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long flags = common_misc_flags(regs);
|
||||
|
||||
flags |= guest_misc_flags(regs);
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
unsigned long perf_arch_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long flags = common_misc_flags(regs);
|
||||
|
||||
flags |= host_misc_flags(regs);
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
|
@ -3962,8 +3962,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
|
||||
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
|
||||
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
|
||||
if (!(event->attr.sample_type &
|
||||
~intel_pmu_large_pebs_flags(event))) {
|
||||
if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) &&
|
||||
!has_aux_action(event)) {
|
||||
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
|
||||
event->attach_state |= PERF_ATTACH_SCHED_CB;
|
||||
}
|
||||
@ -4599,6 +4599,28 @@ static inline bool erratum_hsw11(struct perf_event *event)
|
||||
X86_CONFIG(.event=0xc0, .umask=0x01);
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->pmu_type == hybrid_tiny)
|
||||
return cmt_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
return mtl_get_event_constraints(cpuc, idx, event);
|
||||
}
|
||||
|
||||
static int arl_h_hw_config(struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->pmu_type == hybrid_tiny)
|
||||
return intel_pmu_hw_config(event);
|
||||
|
||||
return adl_hw_config(event);
|
||||
}
|
||||
|
||||
/*
|
||||
* The HSW11 requires a period larger than 100 which is the same as the BDM11.
|
||||
* A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
|
||||
@ -4924,17 +4946,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
|
||||
|
||||
/*
|
||||
* This essentially just maps between the 'hybrid_cpu_type'
|
||||
* and 'hybrid_pmu_type' enums:
|
||||
* and 'hybrid_pmu_type' enums except for ARL-H processor
|
||||
* which needs to compare atom uarch native id since ARL-H
|
||||
* contains two different atom uarchs.
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
|
||||
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
|
||||
u32 native_id;
|
||||
|
||||
if (cpu_type == HYBRID_INTEL_CORE &&
|
||||
pmu_type == hybrid_big)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
if (cpu_type == HYBRID_INTEL_ATOM &&
|
||||
pmu_type == hybrid_small)
|
||||
if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
if (cpu_type == HYBRID_INTEL_ATOM) {
|
||||
if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
|
||||
native_id = get_this_hybrid_cpu_native_id();
|
||||
if (native_id == skt_native_id && pmu_type == hybrid_small)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
if (native_id == cmt_native_id && pmu_type == hybrid_tiny)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -5965,6 +5996,37 @@ static struct attribute *lnl_hybrid_events_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
/* The event string must be in PMU IDX order. */
|
||||
EVENT_ATTR_STR_HYBRID(topdown-retiring,
|
||||
td_retiring_arl_h,
|
||||
"event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-bad-spec,
|
||||
td_bad_spec_arl_h,
|
||||
"event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-fe-bound,
|
||||
td_fe_bound_arl_h,
|
||||
"event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(topdown-be-bound,
|
||||
td_be_bound_arl_h,
|
||||
"event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0",
|
||||
hybrid_big_small_tiny);
|
||||
|
||||
static struct attribute *arl_h_hybrid_events_attrs[] = {
|
||||
EVENT_PTR(slots_adl),
|
||||
EVENT_PTR(td_retiring_arl_h),
|
||||
EVENT_PTR(td_bad_spec_arl_h),
|
||||
EVENT_PTR(td_fe_bound_arl_h),
|
||||
EVENT_PTR(td_be_bound_arl_h),
|
||||
EVENT_PTR(td_heavy_ops_adl),
|
||||
EVENT_PTR(td_br_mis_adl),
|
||||
EVENT_PTR(td_fetch_lat_adl),
|
||||
EVENT_PTR(td_mem_bound_adl),
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* Must be in IDX order */
|
||||
EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
|
||||
EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
|
||||
@ -5983,6 +6045,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR_HYBRID(mem-loads,
|
||||
mem_ld_arl_h,
|
||||
"event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3",
|
||||
hybrid_big_small_tiny);
|
||||
EVENT_ATTR_STR_HYBRID(mem-stores,
|
||||
mem_st_arl_h,
|
||||
"event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6",
|
||||
hybrid_big_small_tiny);
|
||||
|
||||
static struct attribute *arl_h_hybrid_mem_attrs[] = {
|
||||
EVENT_PTR(mem_ld_arl_h),
|
||||
EVENT_PTR(mem_st_arl_h),
|
||||
NULL,
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
|
||||
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
|
||||
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
|
||||
@ -6006,8 +6083,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = {
|
||||
|
||||
FORMAT_ATTR_HYBRID(in_tx, hybrid_big);
|
||||
FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big);
|
||||
FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
|
||||
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
|
||||
FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny);
|
||||
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny);
|
||||
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
|
||||
|
||||
#define ADL_HYBRID_RTM_FORMAT_ATTR \
|
||||
@ -6030,7 +6107,7 @@ static struct attribute *adl_hybrid_extra_attr[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
|
||||
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny);
|
||||
|
||||
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
|
||||
ADL_HYBRID_RTM_FORMAT_ATTR,
|
||||
@ -6238,8 +6315,9 @@ static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
|
||||
}
|
||||
|
||||
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
|
||||
{ hybrid_small, "cpu_atom" },
|
||||
{ hybrid_big, "cpu_core" },
|
||||
{ hybrid_small, "cpu_atom" },
|
||||
{ hybrid_big, "cpu_core" },
|
||||
{ hybrid_tiny, "cpu_lowpower" },
|
||||
};
|
||||
|
||||
static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
|
||||
@ -6272,7 +6350,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
|
||||
0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
|
||||
|
||||
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
|
||||
if (pmu->pmu_type & hybrid_small) {
|
||||
if (pmu->pmu_type & hybrid_small_tiny) {
|
||||
pmu->intel_cap.perf_metrics = 0;
|
||||
pmu->intel_cap.pebs_output_pt_available = 1;
|
||||
pmu->mid_ack = true;
|
||||
@ -7111,6 +7189,37 @@ __init int intel_pmu_init(void)
|
||||
name = "lunarlake_hybrid";
|
||||
break;
|
||||
|
||||
case INTEL_ARROWLAKE_H:
|
||||
intel_pmu_init_hybrid(hybrid_big_small_tiny);
|
||||
|
||||
x86_pmu.pebs_latency_data = arl_h_latency_data;
|
||||
x86_pmu.get_event_constraints = arl_h_get_event_constraints;
|
||||
x86_pmu.hw_config = arl_h_hw_config;
|
||||
|
||||
td_attr = arl_h_hybrid_events_attrs;
|
||||
mem_attr = arl_h_hybrid_mem_attrs;
|
||||
tsx_attr = adl_hybrid_tsx_attrs;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
|
||||
|
||||
/* Initialize big core specific PerfMon capabilities. */
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
intel_pmu_init_lnc(&pmu->pmu);
|
||||
|
||||
/* Initialize Atom core specific PerfMon capabilities. */
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
|
||||
intel_pmu_init_skt(&pmu->pmu);
|
||||
|
||||
/* Initialize Lower Power Atom specific PerfMon capabilities. */
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX];
|
||||
intel_pmu_init_grt(&pmu->pmu);
|
||||
pmu->extra_regs = intel_cmt_extra_regs;
|
||||
|
||||
intel_pmu_pebs_data_source_arl_h();
|
||||
pr_cont("ArrowLake-H Hybrid events, ");
|
||||
name = "arrowlake_h_hybrid";
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
case 1:
|
||||
|
@ -177,6 +177,17 @@ void __init intel_pmu_pebs_data_source_mtl(void)
|
||||
__intel_pmu_pebs_data_source_cmt(data_source);
|
||||
}
|
||||
|
||||
void __init intel_pmu_pebs_data_source_arl_h(void)
|
||||
{
|
||||
u64 *data_source;
|
||||
|
||||
intel_pmu_pebs_data_source_lnl();
|
||||
|
||||
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
|
||||
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
|
||||
__intel_pmu_pebs_data_source_cmt(data_source);
|
||||
}
|
||||
|
||||
void __init intel_pmu_pebs_data_source_cmt(void)
|
||||
{
|
||||
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
|
||||
@ -388,6 +399,16 @@ u64 lnl_latency_data(struct perf_event *event, u64 status)
|
||||
return lnc_latency_data(event, status);
|
||||
}
|
||||
|
||||
u64 arl_h_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->pmu_type == hybrid_tiny)
|
||||
return cmt_latency_data(event, status);
|
||||
|
||||
return lnl_latency_data(event, status);
|
||||
}
|
||||
|
||||
static u64 load_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
|
@ -418,6 +418,9 @@ static void pt_config_start(struct perf_event *event)
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
u64 ctl = event->hw.aux_config;
|
||||
|
||||
if (READ_ONCE(event->hw.aux_paused))
|
||||
return;
|
||||
|
||||
ctl |= RTIT_CTL_TRACEEN;
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
|
||||
@ -534,7 +537,24 @@ static void pt_config(struct perf_event *event)
|
||||
reg |= (event->attr.config & PT_CONFIG_MASK);
|
||||
|
||||
event->hw.aux_config = reg;
|
||||
|
||||
/*
|
||||
* Allow resume before starting so as not to overwrite a value set by a
|
||||
* PMI.
|
||||
*/
|
||||
barrier();
|
||||
WRITE_ONCE(pt->resume_allowed, 1);
|
||||
/* Configuration is complete, it is now OK to handle an NMI */
|
||||
barrier();
|
||||
WRITE_ONCE(pt->handle_nmi, 1);
|
||||
barrier();
|
||||
pt_config_start(event);
|
||||
barrier();
|
||||
/*
|
||||
* Allow pause after starting so its pt_config_stop() doesn't race with
|
||||
* pt_config_start().
|
||||
*/
|
||||
WRITE_ONCE(pt->pause_allowed, 1);
|
||||
}
|
||||
|
||||
static void pt_config_stop(struct perf_event *event)
|
||||
@ -828,11 +848,13 @@ static void pt_buffer_advance(struct pt_buffer *buf)
|
||||
buf->cur_idx++;
|
||||
|
||||
if (buf->cur_idx == buf->cur->last) {
|
||||
if (buf->cur == buf->last)
|
||||
if (buf->cur == buf->last) {
|
||||
buf->cur = buf->first;
|
||||
else
|
||||
buf->wrapped = true;
|
||||
} else {
|
||||
buf->cur = list_entry(buf->cur->list.next, struct topa,
|
||||
list);
|
||||
}
|
||||
buf->cur_idx = 0;
|
||||
}
|
||||
}
|
||||
@ -846,8 +868,11 @@ static void pt_buffer_advance(struct pt_buffer *buf)
|
||||
static void pt_update_head(struct pt *pt)
|
||||
{
|
||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||
bool wrapped = buf->wrapped;
|
||||
u64 topa_idx, base, old;
|
||||
|
||||
buf->wrapped = false;
|
||||
|
||||
if (buf->single) {
|
||||
local_set(&buf->data_size, buf->output_off);
|
||||
return;
|
||||
@ -865,7 +890,7 @@ static void pt_update_head(struct pt *pt)
|
||||
} else {
|
||||
old = (local64_xchg(&buf->head, base) &
|
||||
((buf->nr_pages << PAGE_SHIFT) - 1));
|
||||
if (base < old)
|
||||
if (base < old || (base == old && wrapped))
|
||||
base += buf->nr_pages << PAGE_SHIFT;
|
||||
|
||||
local_add(base - old, &buf->data_size);
|
||||
@ -1511,6 +1536,7 @@ void intel_pt_interrupt(void)
|
||||
buf = perf_aux_output_begin(&pt->handle, event);
|
||||
if (!buf) {
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
WRITE_ONCE(pt->resume_allowed, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1519,6 +1545,7 @@ void intel_pt_interrupt(void)
|
||||
ret = pt_buffer_reset_markers(buf, &pt->handle);
|
||||
if (ret) {
|
||||
perf_aux_output_end(&pt->handle, 0);
|
||||
WRITE_ONCE(pt->resume_allowed, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1573,6 +1600,26 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf;
|
||||
|
||||
if (mode & PERF_EF_RESUME) {
|
||||
if (READ_ONCE(pt->resume_allowed)) {
|
||||
u64 status;
|
||||
|
||||
/*
|
||||
* Only if the trace is not active and the error and
|
||||
* stopped bits are clear, is it safe to start, but a
|
||||
* PMI might have just cleared these, so resume_allowed
|
||||
* must be checked again also.
|
||||
*/
|
||||
rdmsrl(MSR_IA32_RTIT_STATUS, status);
|
||||
if (!(status & (RTIT_STATUS_TRIGGEREN |
|
||||
RTIT_STATUS_ERROR |
|
||||
RTIT_STATUS_STOPPED)) &&
|
||||
READ_ONCE(pt->resume_allowed))
|
||||
pt_config_start(event);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
buf = perf_aux_output_begin(&pt->handle, event);
|
||||
if (!buf)
|
||||
goto fail_stop;
|
||||
@ -1583,7 +1630,6 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||
goto fail_end_stop;
|
||||
}
|
||||
|
||||
WRITE_ONCE(pt->handle_nmi, 1);
|
||||
hwc->state = 0;
|
||||
|
||||
pt_config_buffer(buf);
|
||||
@ -1601,6 +1647,12 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
|
||||
if (mode & PERF_EF_PAUSE) {
|
||||
if (READ_ONCE(pt->pause_allowed))
|
||||
pt_config_stop(event);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Protect against the PMI racing with disabling wrmsr,
|
||||
* see comment in intel_pt_interrupt().
|
||||
@ -1608,6 +1660,15 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||
WRITE_ONCE(pt->handle_nmi, 0);
|
||||
barrier();
|
||||
|
||||
/*
|
||||
* Prevent a resume from attempting to restart tracing, or a pause
|
||||
* during a subsequent start. Do this after clearing handle_nmi so that
|
||||
* pt_event_snapshot_aux() will not re-allow them.
|
||||
*/
|
||||
WRITE_ONCE(pt->pause_allowed, 0);
|
||||
WRITE_ONCE(pt->resume_allowed, 0);
|
||||
barrier();
|
||||
|
||||
pt_config_stop(event);
|
||||
|
||||
if (event->hw.state == PERF_HES_STOPPED)
|
||||
@ -1657,6 +1718,10 @@ static long pt_event_snapshot_aux(struct perf_event *event,
|
||||
if (WARN_ON_ONCE(!buf->snapshot))
|
||||
return 0;
|
||||
|
||||
/* Prevent pause/resume from attempting to start/stop tracing */
|
||||
WRITE_ONCE(pt->pause_allowed, 0);
|
||||
WRITE_ONCE(pt->resume_allowed, 0);
|
||||
barrier();
|
||||
/*
|
||||
* There is no PT interrupt in this mode, so stop the trace and it will
|
||||
* remain stopped while the buffer is copied.
|
||||
@ -1676,8 +1741,13 @@ static long pt_event_snapshot_aux(struct perf_event *event,
|
||||
* Here, handle_nmi tells us if the tracing was on.
|
||||
* If the tracing was on, restart it.
|
||||
*/
|
||||
if (READ_ONCE(pt->handle_nmi))
|
||||
if (READ_ONCE(pt->handle_nmi)) {
|
||||
WRITE_ONCE(pt->resume_allowed, 1);
|
||||
barrier();
|
||||
pt_config_start(event);
|
||||
barrier();
|
||||
WRITE_ONCE(pt->pause_allowed, 1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1793,7 +1863,9 @@ static __init int pt_init(void)
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
||||
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
|
||||
|
||||
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
|
||||
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
|
||||
PERF_PMU_CAP_ITRACE |
|
||||
PERF_PMU_CAP_AUX_PAUSE;
|
||||
pt_pmu.pmu.attr_groups = pt_attr_groups;
|
||||
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
|
||||
pt_pmu.pmu.event_init = pt_event_init;
|
||||
|
@ -65,6 +65,7 @@ struct pt_pmu {
|
||||
* @head: logical write offset inside the buffer
|
||||
* @snapshot: if this is for a snapshot/overwrite counter
|
||||
* @single: use Single Range Output instead of ToPA
|
||||
* @wrapped: buffer advance wrapped back to the first topa table
|
||||
* @stop_pos: STOP topa entry index
|
||||
* @intr_pos: INT topa entry index
|
||||
* @stop_te: STOP topa entry pointer
|
||||
@ -82,6 +83,7 @@ struct pt_buffer {
|
||||
local64_t head;
|
||||
bool snapshot;
|
||||
bool single;
|
||||
bool wrapped;
|
||||
long stop_pos, intr_pos;
|
||||
struct topa_entry *stop_te, *intr_te;
|
||||
void **data_pages;
|
||||
@ -117,6 +119,8 @@ struct pt_filters {
|
||||
* @filters: last configured filters
|
||||
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
||||
* @vmx_on: 1 if VMX is ON on this cpu
|
||||
* @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing
|
||||
* @resume_allowed: PERF_EF_RESUME is allowed to start tracing
|
||||
* @output_base: cached RTIT_OUTPUT_BASE MSR value
|
||||
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
|
||||
*/
|
||||
@ -125,6 +129,8 @@ struct pt {
|
||||
struct pt_filters filters;
|
||||
int handle_nmi;
|
||||
int vmx_on;
|
||||
int pause_allowed;
|
||||
int resume_allowed;
|
||||
u64 output_base;
|
||||
u64 output_mask;
|
||||
};
|
||||
|
@ -668,24 +668,38 @@ enum {
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
|
||||
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
|
||||
|
||||
/*
|
||||
* CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
|
||||
* of the core. Bits 31-24 indicates its core type (Core or Atom)
|
||||
* and Bits [23:0] indicates the native model ID of the core.
|
||||
* Core type and native model ID are defined in below enumerations.
|
||||
*/
|
||||
enum hybrid_cpu_type {
|
||||
HYBRID_INTEL_NONE,
|
||||
HYBRID_INTEL_ATOM = 0x20,
|
||||
HYBRID_INTEL_CORE = 0x40,
|
||||
};
|
||||
|
||||
enum hybrid_pmu_type {
|
||||
not_hybrid,
|
||||
hybrid_small = BIT(0),
|
||||
hybrid_big = BIT(1),
|
||||
|
||||
hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */
|
||||
};
|
||||
|
||||
#define X86_HYBRID_PMU_ATOM_IDX 0
|
||||
#define X86_HYBRID_PMU_CORE_IDX 1
|
||||
#define X86_HYBRID_PMU_TINY_IDX 2
|
||||
|
||||
#define X86_HYBRID_NUM_PMUS 2
|
||||
enum hybrid_pmu_type {
|
||||
not_hybrid,
|
||||
hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX),
|
||||
hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX),
|
||||
hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX),
|
||||
|
||||
/* The belows are only used for matching */
|
||||
hybrid_big_small = hybrid_big | hybrid_small,
|
||||
hybrid_small_tiny = hybrid_small | hybrid_tiny,
|
||||
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
|
||||
};
|
||||
|
||||
enum atom_native_id {
|
||||
cmt_native_id = 0x2, /* Crestmont */
|
||||
skt_native_id = 0x3, /* Skymont */
|
||||
};
|
||||
|
||||
struct x86_hybrid_pmu {
|
||||
struct pmu pmu;
|
||||
@ -1578,6 +1592,8 @@ u64 cmt_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
u64 lnl_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
u64 arl_h_latency_data(struct perf_event *event, u64 status);
|
||||
|
||||
extern struct event_constraint intel_core2_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_atom_pebs_event_constraints[];
|
||||
@ -1697,6 +1713,8 @@ void intel_pmu_pebs_data_source_grt(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_mtl(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_arl_h(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_cmt(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_lnl(void);
|
||||
|
@ -148,7 +148,6 @@ struct rapl_model {
|
||||
/* 1/2^hw_unit Joule */
|
||||
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
|
||||
static struct rapl_pmus *rapl_pmus;
|
||||
static cpumask_t rapl_cpu_mask;
|
||||
static unsigned int rapl_cntr_mask;
|
||||
static u64 rapl_timer_ms;
|
||||
static struct perf_msr *rapl_msrs;
|
||||
@ -369,8 +368,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
|
||||
|
||||
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
|
||||
return -EINVAL;
|
||||
|
||||
@ -389,7 +386,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
|
||||
pmu = cpu_to_rapl_pmu(event->cpu);
|
||||
if (!pmu)
|
||||
return -EINVAL;
|
||||
event->cpu = pmu->cpu;
|
||||
event->pmu_private = pmu;
|
||||
event->hw.event_base = rapl_msrs[bit].msr;
|
||||
event->hw.config = cfg;
|
||||
@ -403,23 +399,6 @@ static void rapl_pmu_event_read(struct perf_event *event)
|
||||
rapl_event_update(event);
|
||||
}
|
||||
|
||||
static ssize_t rapl_get_attr_cpumask(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
|
||||
|
||||
static struct attribute *rapl_pmu_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group rapl_pmu_attr_group = {
|
||||
.attrs = rapl_pmu_attrs,
|
||||
};
|
||||
|
||||
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
|
||||
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||
@ -467,7 +446,6 @@ static struct attribute_group rapl_pmu_format_group = {
|
||||
};
|
||||
|
||||
static const struct attribute_group *rapl_attr_groups[] = {
|
||||
&rapl_pmu_attr_group,
|
||||
&rapl_pmu_format_group,
|
||||
&rapl_pmu_events_group,
|
||||
NULL,
|
||||
@ -570,65 +548,6 @@ static struct perf_msr amd_rapl_msrs[] = {
|
||||
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
|
||||
};
|
||||
|
||||
static int rapl_cpu_offline(unsigned int cpu)
|
||||
{
|
||||
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
|
||||
int target;
|
||||
|
||||
/* Check if exiting cpu is used for collecting rapl events */
|
||||
if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
|
||||
return 0;
|
||||
|
||||
pmu->cpu = -1;
|
||||
/* Find a new cpu to collect rapl events */
|
||||
target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu);
|
||||
|
||||
/* Migrate rapl events to the new target */
|
||||
if (target < nr_cpu_ids) {
|
||||
cpumask_set_cpu(target, &rapl_cpu_mask);
|
||||
pmu->cpu = target;
|
||||
perf_pmu_migrate_context(pmu->pmu, cpu, target);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_cpu_online(unsigned int cpu)
|
||||
{
|
||||
s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu);
|
||||
if (rapl_pmu_idx < 0) {
|
||||
pr_err("topology_logical_(package/die)_id() returned a negative value");
|
||||
return -EINVAL;
|
||||
}
|
||||
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
|
||||
int target;
|
||||
|
||||
if (!pmu) {
|
||||
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
raw_spin_lock_init(&pmu->lock);
|
||||
INIT_LIST_HEAD(&pmu->active_list);
|
||||
pmu->pmu = &rapl_pmus->pmu;
|
||||
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
rapl_pmus->pmus[rapl_pmu_idx] = pmu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there is an online cpu in the package which collects rapl
|
||||
* events already.
|
||||
*/
|
||||
target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu));
|
||||
if (target < nr_cpu_ids)
|
||||
return 0;
|
||||
|
||||
cpumask_set_cpu(cpu, &rapl_cpu_mask);
|
||||
pmu->cpu = cpu;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_check_hw_unit(struct rapl_model *rm)
|
||||
{
|
||||
u64 msr_rapl_power_unit_bits;
|
||||
@ -707,12 +626,41 @@ static const struct attribute_group *rapl_attr_update[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int __init init_rapl_pmu(void)
|
||||
{
|
||||
struct rapl_pmu *pmu;
|
||||
int idx;
|
||||
|
||||
for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
|
||||
pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
|
||||
if (!pmu)
|
||||
goto free;
|
||||
|
||||
raw_spin_lock_init(&pmu->lock);
|
||||
INIT_LIST_HEAD(&pmu->active_list);
|
||||
pmu->pmu = &rapl_pmus->pmu;
|
||||
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
|
||||
rapl_hrtimer_init(pmu);
|
||||
|
||||
rapl_pmus->pmus[idx] = pmu;
|
||||
}
|
||||
|
||||
return 0;
|
||||
free:
|
||||
for (; idx > 0; idx--)
|
||||
kfree(rapl_pmus->pmus[idx - 1]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int __init init_rapl_pmus(void)
|
||||
{
|
||||
int nr_rapl_pmu = topology_max_packages();
|
||||
int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
|
||||
|
||||
if (!rapl_pmu_is_pkg_scope())
|
||||
if (!rapl_pmu_is_pkg_scope()) {
|
||||
nr_rapl_pmu *= topology_max_dies_per_package();
|
||||
rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
|
||||
}
|
||||
|
||||
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
|
||||
if (!rapl_pmus)
|
||||
@ -728,9 +676,11 @@ static int __init init_rapl_pmus(void)
|
||||
rapl_pmus->pmu.start = rapl_pmu_event_start;
|
||||
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
|
||||
rapl_pmus->pmu.read = rapl_pmu_event_read;
|
||||
rapl_pmus->pmu.scope = rapl_pmu_scope;
|
||||
rapl_pmus->pmu.module = THIS_MODULE;
|
||||
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
|
||||
return 0;
|
||||
|
||||
return init_rapl_pmu();
|
||||
}
|
||||
|
||||
static struct rapl_model model_snb = {
|
||||
@ -876,24 +826,13 @@ static int __init rapl_pmu_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Install callbacks. Core will call them for each online cpu.
|
||||
*/
|
||||
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
|
||||
"perf/x86/rapl:online",
|
||||
rapl_cpu_online, rapl_cpu_offline);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
|
||||
if (ret)
|
||||
goto out1;
|
||||
goto out;
|
||||
|
||||
rapl_advertise();
|
||||
return 0;
|
||||
|
||||
out1:
|
||||
cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
|
||||
out:
|
||||
pr_warn("Initialization failed (%d), disabled\n", ret);
|
||||
cleanup_rapl_pmus();
|
||||
@ -903,7 +842,6 @@ module_init(rapl_pmu_init);
|
||||
|
||||
static void __exit intel_rapl_exit(void)
|
||||
{
|
||||
cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
|
||||
perf_pmu_unregister(&rapl_pmus->pmu);
|
||||
cleanup_rapl_pmus();
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
|
||||
extern bool handle_guest_split_lock(unsigned long ip);
|
||||
extern void handle_bus_lock(struct pt_regs *regs);
|
||||
u8 get_this_hybrid_cpu_type(void);
|
||||
u32 get_this_hybrid_cpu_native_id(void);
|
||||
#else
|
||||
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
|
||||
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
@ -50,6 +51,11 @@ static inline u8 get_this_hybrid_cpu_type(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u32 get_this_hybrid_cpu_native_id(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_IA32_FEAT_CTL
|
||||
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
|
||||
|
@ -536,15 +536,17 @@ struct x86_perf_regs {
|
||||
u64 *xmm_regs;
|
||||
};
|
||||
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
#define perf_misc_flags(regs) perf_misc_flags(regs)
|
||||
extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
|
||||
extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
|
||||
#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
|
||||
#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
/*
|
||||
* We abuse bit 3 from flags to pass exact information, see perf_misc_flags
|
||||
* and the comment with PERF_EFLAGS_EXACT.
|
||||
* We abuse bit 3 from flags to pass exact information, see
|
||||
* perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT.
|
||||
*/
|
||||
#define perf_arch_fetch_caller_regs(regs, __ip) { \
|
||||
(regs)->ip = (__ip); \
|
||||
|
@ -1299,3 +1299,18 @@ u8 get_this_hybrid_cpu_type(void)
|
||||
|
||||
return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU
|
||||
*
|
||||
* Returns the uarch native ID [23:0] of a CPU in a hybrid processor.
|
||||
* If the processor is not hybrid, returns 0.
|
||||
*/
|
||||
u32 get_this_hybrid_cpu_native_id(void)
|
||||
{
|
||||
if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
|
||||
return 0;
|
||||
|
||||
return cpuid_eax(0x0000001a) &
|
||||
(BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1);
|
||||
}
|
||||
|
@ -208,7 +208,6 @@ enum cpuhp_state {
|
||||
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
|
||||
CPUHP_AP_PERF_X86_RAPL_ONLINE,
|
||||
CPUHP_AP_PERF_S390_CF_ONLINE,
|
||||
CPUHP_AP_PERF_S390_SF_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_CCI_ONLINE,
|
||||
|
@ -170,6 +170,12 @@ struct hw_perf_event {
|
||||
};
|
||||
struct { /* aux / Intel-PT */
|
||||
u64 aux_config;
|
||||
/*
|
||||
* For AUX area events, aux_paused cannot be a state
|
||||
* flag because it can be updated asynchronously to
|
||||
* state.
|
||||
*/
|
||||
unsigned int aux_paused;
|
||||
};
|
||||
struct { /* software */
|
||||
struct hrtimer hrtimer;
|
||||
@ -294,6 +300,7 @@ struct perf_event_pmu_context;
|
||||
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
|
||||
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
|
||||
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
|
||||
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
|
||||
|
||||
/**
|
||||
* pmu::scope
|
||||
@ -384,6 +391,8 @@ struct pmu {
|
||||
#define PERF_EF_START 0x01 /* start the counter when adding */
|
||||
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
|
||||
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
|
||||
#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
|
||||
#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
|
||||
|
||||
/*
|
||||
* Adds/Removes a counter to/from the PMU, can be done inside a
|
||||
@ -423,6 +432,18 @@ struct pmu {
|
||||
*
|
||||
* ->start() with PERF_EF_RELOAD will reprogram the counter
|
||||
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
|
||||
*
|
||||
* ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
|
||||
* overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
|
||||
* PERF_EF_RESUME.
|
||||
*
|
||||
* ->start() with PERF_EF_RESUME will start as simply as possible but
|
||||
* only if the counter is not otherwise stopped. Will not overlap
|
||||
* another ->start() with PERF_EF_RESUME nor ->stop() with
|
||||
* PERF_EF_PAUSE.
|
||||
*
|
||||
* Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
|
||||
* ->stop()/->start() invocations, just not itself.
|
||||
*/
|
||||
void (*start) (struct perf_event *event, int flags);
|
||||
void (*stop) (struct perf_event *event, int flags);
|
||||
@ -1655,15 +1676,35 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record,
|
||||
struct task_struct *task);
|
||||
extern void perf_bp_event(struct perf_event *event, void *data);
|
||||
|
||||
#ifndef perf_misc_flags
|
||||
# define perf_misc_flags(regs) \
|
||||
extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
|
||||
extern unsigned long perf_instruction_pointer(struct perf_event *event,
|
||||
struct pt_regs *regs);
|
||||
|
||||
#ifndef perf_arch_misc_flags
|
||||
# define perf_arch_misc_flags(regs) \
|
||||
(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
|
||||
# define perf_instruction_pointer(regs) instruction_pointer(regs)
|
||||
# define perf_arch_instruction_pointer(regs) instruction_pointer(regs)
|
||||
#endif
|
||||
#ifndef perf_arch_bpf_user_pt_regs
|
||||
# define perf_arch_bpf_user_pt_regs(regs) regs
|
||||
#endif
|
||||
|
||||
#ifndef perf_arch_guest_misc_flags
|
||||
static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long guest_state = perf_guest_state();
|
||||
|
||||
if (!(guest_state & PERF_GUEST_ACTIVE))
|
||||
return 0;
|
||||
|
||||
if (guest_state & PERF_GUEST_USER)
|
||||
return PERF_RECORD_MISC_GUEST_USER;
|
||||
else
|
||||
return PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
}
|
||||
# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
|
||||
#endif
|
||||
|
||||
static inline bool has_branch_stack(struct perf_event *event)
|
||||
{
|
||||
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
|
||||
@ -1679,6 +1720,13 @@ static inline bool has_aux(struct perf_event *event)
|
||||
return event->pmu->setup_aux;
|
||||
}
|
||||
|
||||
static inline bool has_aux_action(struct perf_event *event)
|
||||
{
|
||||
return event->attr.aux_sample_size ||
|
||||
event->attr.aux_pause ||
|
||||
event->attr.aux_resume;
|
||||
}
|
||||
|
||||
static inline bool is_write_backward(struct perf_event *event)
|
||||
{
|
||||
return !!event->attr.write_backward;
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
struct uprobe;
|
||||
struct vm_area_struct;
|
||||
@ -23,8 +24,17 @@ struct inode;
|
||||
struct notifier_block;
|
||||
struct page;
|
||||
|
||||
/*
|
||||
* Allowed return values from uprobe consumer's handler callback
|
||||
* with following meaning:
|
||||
*
|
||||
* UPROBE_HANDLER_REMOVE
|
||||
* - Remove the uprobe breakpoint from current->mm.
|
||||
* UPROBE_HANDLER_IGNORE
|
||||
* - Ignore ret_handler callback for this consumer.
|
||||
*/
|
||||
#define UPROBE_HANDLER_REMOVE 1
|
||||
#define UPROBE_HANDLER_MASK 1
|
||||
#define UPROBE_HANDLER_IGNORE 2
|
||||
|
||||
#define MAX_URETPROBE_DEPTH 64
|
||||
|
||||
@ -37,13 +47,15 @@ struct uprobe_consumer {
|
||||
* for the current process. If filter() is omitted or returns true,
|
||||
* UPROBE_HANDLER_REMOVE is effectively ignored.
|
||||
*/
|
||||
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
|
||||
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
|
||||
int (*ret_handler)(struct uprobe_consumer *self,
|
||||
unsigned long func,
|
||||
struct pt_regs *regs);
|
||||
struct pt_regs *regs, __u64 *data);
|
||||
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
|
||||
|
||||
struct list_head cons_node;
|
||||
|
||||
__u64 id; /* set when uprobe_consumer is registered */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_UPROBES
|
||||
@ -56,12 +68,62 @@ enum uprobe_task_state {
|
||||
UTASK_SSTEP_TRAPPED,
|
||||
};
|
||||
|
||||
/* The state of hybrid-lifetime uprobe inside struct return_instance */
|
||||
enum hprobe_state {
|
||||
HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
|
||||
HPROBE_STABLE, /* refcounted uprobe */
|
||||
HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
|
||||
HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
|
||||
};
|
||||
|
||||
/*
|
||||
* Hybrid lifetime uprobe. Represents a uprobe instance that could be either
|
||||
* SRCU protected (with SRCU protection eventually potentially timing out),
|
||||
* refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
|
||||
*
|
||||
* hprobe's internal state is setup such that background timer thread can
|
||||
* atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
|
||||
* (or no uprobe, if refcounting failed).
|
||||
*
|
||||
* *stable* pointer always point to the uprobe (or could be NULL if there is
|
||||
* was no valid underlying uprobe to begin with).
|
||||
*
|
||||
* *leased* pointer is the key to achieving race-free atomic lifetime state
|
||||
* transition and can have three possible states:
|
||||
* - either the same non-NULL value as *stable*, in which case uprobe is
|
||||
* SRCU-protected;
|
||||
* - NULL, in which case uprobe (if there is any) is refcounted;
|
||||
* - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
|
||||
* protected initially, but SRCU period timed out and we attempted to
|
||||
* convert it to refcounted, but refcount_inc_not_zero() failed, because
|
||||
* uprobe effectively went away (the last consumer unsubscribed). In this
|
||||
* case it's important to know that *stable* pointer (which still has
|
||||
* non-NULL uprobe pointer) shouldn't be used, because lifetime of
|
||||
* underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
|
||||
* internal marker and is handled transparently by hprobe_fetch() helper.
|
||||
*
|
||||
* When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
|
||||
* SRCU unlocking.
|
||||
*
|
||||
* See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
|
||||
* state transitioning details. It all hinges on atomic xchg() over *leaded*
|
||||
* pointer. *stable* pointer, once initially set, is not modified concurrently.
|
||||
*/
|
||||
struct hprobe {
|
||||
enum hprobe_state state;
|
||||
int srcu_idx;
|
||||
struct uprobe *uprobe;
|
||||
};
|
||||
|
||||
/*
|
||||
* uprobe_task: Metadata of a task while it singlesteps.
|
||||
*/
|
||||
struct uprobe_task {
|
||||
enum uprobe_task_state state;
|
||||
|
||||
unsigned int depth;
|
||||
struct return_instance *return_instances;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct arch_uprobe_task autask;
|
||||
@ -75,23 +137,30 @@ struct uprobe_task {
|
||||
};
|
||||
|
||||
struct uprobe *active_uprobe;
|
||||
struct timer_list ri_timer;
|
||||
unsigned long xol_vaddr;
|
||||
|
||||
struct arch_uprobe *auprobe;
|
||||
};
|
||||
|
||||
struct return_instance *return_instances;
|
||||
unsigned int depth;
|
||||
struct return_consumer {
|
||||
__u64 cookie;
|
||||
__u64 id;
|
||||
};
|
||||
|
||||
struct return_instance {
|
||||
struct uprobe *uprobe;
|
||||
struct hprobe hprobe;
|
||||
unsigned long func;
|
||||
unsigned long stack; /* stack pointer */
|
||||
unsigned long orig_ret_vaddr; /* original return address */
|
||||
bool chained; /* true, if instance is nested */
|
||||
int consumers_cnt;
|
||||
|
||||
struct return_instance *next; /* keep as stack */
|
||||
};
|
||||
struct rcu_head rcu;
|
||||
|
||||
struct return_consumer consumers[] __counted_by(consumers_cnt);
|
||||
} ____cacheline_aligned;
|
||||
|
||||
enum rp_check {
|
||||
RP_CHECK_CALL,
|
||||
|
@ -511,7 +511,16 @@ struct perf_event_attr {
|
||||
__u16 sample_max_stack;
|
||||
__u16 __reserved_2;
|
||||
__u32 aux_sample_size;
|
||||
__u32 __reserved_3;
|
||||
|
||||
union {
|
||||
__u32 aux_action;
|
||||
struct {
|
||||
__u32 aux_start_paused : 1, /* start AUX area tracing paused */
|
||||
aux_pause : 1, /* on overflow, pause AUX area tracing */
|
||||
aux_resume : 1, /* on overflow, resume AUX area tracing */
|
||||
__reserved_3 : 29;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* User provided data if sigtrap=1, passed back to user via
|
||||
|
@ -2142,7 +2142,7 @@ static void perf_put_aux_event(struct perf_event *event)
|
||||
|
||||
static bool perf_need_aux_event(struct perf_event *event)
|
||||
{
|
||||
return !!event->attr.aux_output || !!event->attr.aux_sample_size;
|
||||
return event->attr.aux_output || has_aux_action(event);
|
||||
}
|
||||
|
||||
static int perf_get_aux_event(struct perf_event *event,
|
||||
@ -2167,6 +2167,10 @@ static int perf_get_aux_event(struct perf_event *event,
|
||||
!perf_aux_output_match(event, group_leader))
|
||||
return 0;
|
||||
|
||||
if ((event->attr.aux_pause || event->attr.aux_resume) &&
|
||||
!(group_leader->pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE))
|
||||
return 0;
|
||||
|
||||
if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
|
||||
return 0;
|
||||
|
||||
@ -7003,6 +7007,29 @@ void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
|
||||
EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
|
||||
#endif
|
||||
|
||||
static bool should_sample_guest(struct perf_event *event)
|
||||
{
|
||||
return !event->attr.exclude_guest && perf_guest_state();
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (should_sample_guest(event))
|
||||
return perf_arch_guest_misc_flags(regs);
|
||||
|
||||
return perf_arch_misc_flags(regs);
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (should_sample_guest(event))
|
||||
return perf_guest_get_ip();
|
||||
|
||||
return perf_arch_instruction_pointer(regs);
|
||||
}
|
||||
|
||||
static void
|
||||
perf_output_sample_regs(struct perf_output_handle *handle,
|
||||
struct pt_regs *regs, u64 mask)
|
||||
@ -7820,7 +7847,7 @@ void perf_prepare_sample(struct perf_sample_data *data,
|
||||
__perf_event_header__init_id(data, event, filtered_sample_type);
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_IP) {
|
||||
data->ip = perf_instruction_pointer(regs);
|
||||
data->ip = perf_instruction_pointer(event, regs);
|
||||
data->sample_flags |= PERF_SAMPLE_IP;
|
||||
}
|
||||
|
||||
@ -7984,7 +8011,7 @@ void perf_prepare_header(struct perf_event_header *header,
|
||||
{
|
||||
header->type = PERF_RECORD_SAMPLE;
|
||||
header->size = perf_sample_data_size(data, event);
|
||||
header->misc = perf_misc_flags(regs);
|
||||
header->misc = perf_misc_flags(event, regs);
|
||||
|
||||
/*
|
||||
* If you're adding more sample types here, you likely need to do
|
||||
@ -7997,6 +8024,49 @@ void perf_prepare_header(struct perf_event_header *header,
|
||||
WARN_ON_ONCE(header->size & 7);
|
||||
}
|
||||
|
||||
static void __perf_event_aux_pause(struct perf_event *event, bool pause)
|
||||
{
|
||||
if (pause) {
|
||||
if (!event->hw.aux_paused) {
|
||||
event->hw.aux_paused = 1;
|
||||
event->pmu->stop(event, PERF_EF_PAUSE);
|
||||
}
|
||||
} else {
|
||||
if (event->hw.aux_paused) {
|
||||
event->hw.aux_paused = 0;
|
||||
event->pmu->start(event, PERF_EF_RESUME);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_event_aux_pause(struct perf_event *event, bool pause)
|
||||
{
|
||||
struct perf_buffer *rb;
|
||||
|
||||
if (WARN_ON_ONCE(!event))
|
||||
return;
|
||||
|
||||
rb = ring_buffer_get(event);
|
||||
if (!rb)
|
||||
return;
|
||||
|
||||
scoped_guard (irqsave) {
|
||||
/*
|
||||
* Guard against self-recursion here. Another event could trip
|
||||
* this same from NMI context.
|
||||
*/
|
||||
if (READ_ONCE(rb->aux_in_pause_resume))
|
||||
break;
|
||||
|
||||
WRITE_ONCE(rb->aux_in_pause_resume, 1);
|
||||
barrier();
|
||||
__perf_event_aux_pause(event, pause);
|
||||
barrier();
|
||||
WRITE_ONCE(rb->aux_in_pause_resume, 0);
|
||||
}
|
||||
ring_buffer_put(rb);
|
||||
}
|
||||
|
||||
static __always_inline int
|
||||
__perf_event_output(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
@ -9799,9 +9869,12 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
|
||||
ret = __perf_event_account_interrupt(event, throttle);
|
||||
|
||||
if (event->attr.aux_pause)
|
||||
perf_event_aux_pause(event->aux_event, true);
|
||||
|
||||
if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
|
||||
!bpf_overflow_handler(event, data, regs))
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* XXX event_limit might not quite work as expected on inherited
|
||||
@ -9863,6 +9936,9 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
event->pending_wakeup = 1;
|
||||
irq_work_queue(&event->pending_irq);
|
||||
}
|
||||
out:
|
||||
if (event->attr.aux_resume)
|
||||
perf_event_aux_pause(event->aux_event, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -12254,11 +12330,25 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
}
|
||||
|
||||
if (event->attr.aux_output &&
|
||||
!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
|
||||
(!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT) ||
|
||||
event->attr.aux_pause || event->attr.aux_resume)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto err_pmu;
|
||||
}
|
||||
|
||||
if (event->attr.aux_pause && event->attr.aux_resume) {
|
||||
err = -EINVAL;
|
||||
goto err_pmu;
|
||||
}
|
||||
|
||||
if (event->attr.aux_start_paused) {
|
||||
if (!(pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto err_pmu;
|
||||
}
|
||||
event->hw.aux_paused = 1;
|
||||
}
|
||||
|
||||
if (cgroup_fd != -1) {
|
||||
err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
|
||||
if (err)
|
||||
@ -13052,7 +13142,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
||||
* Grouping is not supported for kernel events, neither is 'AUX',
|
||||
* make sure the caller's intentions are adjusted.
|
||||
*/
|
||||
if (attr->aux_output)
|
||||
if (attr->aux_output || attr->aux_action)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
|
||||
|
@ -52,6 +52,7 @@ struct perf_buffer {
|
||||
void (*free_aux)(void *);
|
||||
refcount_t aux_refcount;
|
||||
int aux_in_sampling;
|
||||
int aux_in_pause_resume;
|
||||
void **aux_pages;
|
||||
void *aux_priv;
|
||||
|
||||
|
@ -26,6 +26,9 @@
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/khugepaged.h>
|
||||
#include <linux/rcupdate_trace.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/srcu.h>
|
||||
|
||||
#include <linux/uprobes.h>
|
||||
|
||||
@ -42,8 +45,6 @@ static struct rb_root uprobes_tree = RB_ROOT;
|
||||
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
|
||||
static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock);
|
||||
|
||||
DEFINE_STATIC_SRCU(uprobes_srcu);
|
||||
|
||||
#define UPROBES_HASH_SZ 13
|
||||
/* serialize uprobe->pending_list */
|
||||
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
|
||||
@ -51,6 +52,9 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
|
||||
|
||||
DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
|
||||
|
||||
/* Covers return_instance's uprobe lifetime. */
|
||||
DEFINE_STATIC_SRCU(uretprobes_srcu);
|
||||
|
||||
/* Have a copy of original instruction */
|
||||
#define UPROBE_COPY_INSN 0
|
||||
|
||||
@ -62,10 +66,13 @@ struct uprobe {
|
||||
struct list_head pending_list;
|
||||
struct list_head consumers;
|
||||
struct inode *inode; /* Also hold a ref to inode */
|
||||
struct rcu_head rcu;
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
struct work_struct work;
|
||||
};
|
||||
loff_t offset;
|
||||
loff_t ref_ctr_offset;
|
||||
unsigned long flags;
|
||||
unsigned long flags; /* "unsigned long" so bitops work */
|
||||
|
||||
/*
|
||||
* The generic code assumes that it has two members of unknown type
|
||||
@ -100,7 +107,6 @@ static LIST_HEAD(delayed_uprobe_list);
|
||||
*/
|
||||
struct xol_area {
|
||||
wait_queue_head_t wq; /* if all slots are busy */
|
||||
atomic_t slot_count; /* number of in-use slots */
|
||||
unsigned long *bitmap; /* 0 = free slot */
|
||||
|
||||
struct page *page;
|
||||
@ -620,17 +626,23 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
|
||||
return !RB_EMPTY_NODE(&uprobe->rb_node);
|
||||
}
|
||||
|
||||
static void uprobe_free_rcu(struct rcu_head *rcu)
|
||||
static void uprobe_free_rcu_tasks_trace(struct rcu_head *rcu)
|
||||
{
|
||||
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
|
||||
|
||||
kfree(uprobe);
|
||||
}
|
||||
|
||||
static void put_uprobe(struct uprobe *uprobe)
|
||||
static void uprobe_free_srcu(struct rcu_head *rcu)
|
||||
{
|
||||
if (!refcount_dec_and_test(&uprobe->ref))
|
||||
return;
|
||||
struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
|
||||
|
||||
call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu_tasks_trace);
|
||||
}
|
||||
|
||||
static void uprobe_free_deferred(struct work_struct *work)
|
||||
{
|
||||
struct uprobe *uprobe = container_of(work, struct uprobe, work);
|
||||
|
||||
write_lock(&uprobes_treelock);
|
||||
|
||||
@ -651,7 +663,162 @@ static void put_uprobe(struct uprobe *uprobe)
|
||||
delayed_uprobe_remove(uprobe, NULL);
|
||||
mutex_unlock(&delayed_uprobe_lock);
|
||||
|
||||
call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
|
||||
/* start srcu -> rcu_tasks_trace -> kfree chain */
|
||||
call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu);
|
||||
}
|
||||
|
||||
static void put_uprobe(struct uprobe *uprobe)
|
||||
{
|
||||
if (!refcount_dec_and_test(&uprobe->ref))
|
||||
return;
|
||||
|
||||
INIT_WORK(&uprobe->work, uprobe_free_deferred);
|
||||
schedule_work(&uprobe->work);
|
||||
}
|
||||
|
||||
/* Initialize hprobe as SRCU-protected "leased" uprobe */
|
||||
static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx)
|
||||
{
|
||||
WARN_ON(!uprobe);
|
||||
hprobe->state = HPROBE_LEASED;
|
||||
hprobe->uprobe = uprobe;
|
||||
hprobe->srcu_idx = srcu_idx;
|
||||
}
|
||||
|
||||
/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */
|
||||
static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe)
|
||||
{
|
||||
hprobe->state = uprobe ? HPROBE_STABLE : HPROBE_GONE;
|
||||
hprobe->uprobe = uprobe;
|
||||
hprobe->srcu_idx = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* hprobe_consume() fetches hprobe's underlying uprobe and detects whether
|
||||
* uprobe is SRCU protected or is refcounted. hprobe_consume() can be
|
||||
* used only once for a given hprobe.
|
||||
*
|
||||
* Caller has to call hprobe_finalize() and pass previous hprobe_state, so
|
||||
* that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever
|
||||
* is appropriate.
|
||||
*/
|
||||
static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate)
|
||||
{
|
||||
*hstate = xchg(&hprobe->state, HPROBE_CONSUMED);
|
||||
switch (*hstate) {
|
||||
case HPROBE_LEASED:
|
||||
case HPROBE_STABLE:
|
||||
return hprobe->uprobe;
|
||||
case HPROBE_GONE: /* uprobe is NULL, no SRCU */
|
||||
case HPROBE_CONSUMED: /* uprobe was finalized already, do nothing */
|
||||
return NULL;
|
||||
default:
|
||||
WARN(1, "hprobe invalid state %d", *hstate);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset hprobe state and, if hprobe was LEASED, release SRCU lock.
|
||||
* hprobe_finalize() can only be used from current context after
|
||||
* hprobe_consume() call (which determines uprobe and hstate value).
|
||||
*/
|
||||
static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate)
|
||||
{
|
||||
switch (hstate) {
|
||||
case HPROBE_LEASED:
|
||||
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
|
||||
break;
|
||||
case HPROBE_STABLE:
|
||||
put_uprobe(hprobe->uprobe);
|
||||
break;
|
||||
case HPROBE_GONE:
|
||||
case HPROBE_CONSUMED:
|
||||
break;
|
||||
default:
|
||||
WARN(1, "hprobe invalid state %d", hstate);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to switch (atomically) uprobe from being SRCU protected (LEASED)
|
||||
* to refcounted (STABLE) state. Competes with hprobe_consume(); only one of
|
||||
* them can win the race to perform SRCU unlocking. Whoever wins must perform
|
||||
* SRCU unlock.
|
||||
*
|
||||
* Returns underlying valid uprobe or NULL, if there was no underlying uprobe
|
||||
* to begin with or we failed to bump its refcount and it's going away.
|
||||
*
|
||||
* Returned non-NULL uprobe can be still safely used within an ongoing SRCU
|
||||
* locked region. If `get` is true, it's guaranteed that non-NULL uprobe has
|
||||
* an extra refcount for caller to assume and use. Otherwise, it's not
|
||||
* guaranteed that returned uprobe has a positive refcount, so caller has to
|
||||
* attempt try_get_uprobe(), if it needs to preserve uprobe beyond current
|
||||
* SRCU lock region. See dup_utask().
|
||||
*/
|
||||
static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
|
||||
{
|
||||
enum hprobe_state hstate;
|
||||
|
||||
/*
|
||||
* return_instance's hprobe is protected by RCU.
|
||||
* Underlying uprobe is itself protected from reuse by SRCU.
|
||||
*/
|
||||
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
|
||||
|
||||
hstate = READ_ONCE(hprobe->state);
|
||||
switch (hstate) {
|
||||
case HPROBE_STABLE:
|
||||
/* uprobe has positive refcount, bump refcount, if necessary */
|
||||
return get ? get_uprobe(hprobe->uprobe) : hprobe->uprobe;
|
||||
case HPROBE_GONE:
|
||||
/*
|
||||
* SRCU was unlocked earlier and we didn't manage to take
|
||||
* uprobe refcnt, so it's effectively NULL
|
||||
*/
|
||||
return NULL;
|
||||
case HPROBE_CONSUMED:
|
||||
/*
|
||||
* uprobe was consumed, so it's effectively NULL as far as
|
||||
* uretprobe processing logic is concerned
|
||||
*/
|
||||
return NULL;
|
||||
case HPROBE_LEASED: {
|
||||
struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe);
|
||||
/*
|
||||
* Try to switch hprobe state, guarding against
|
||||
* hprobe_consume() or another hprobe_expire() racing with us.
|
||||
* Note, if we failed to get uprobe refcount, we use special
|
||||
* HPROBE_GONE state to signal that hprobe->uprobe shouldn't
|
||||
* be used as it will be freed after SRCU is unlocked.
|
||||
*/
|
||||
if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) {
|
||||
/* We won the race, we are the ones to unlock SRCU */
|
||||
__srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx);
|
||||
return get ? get_uprobe(uprobe) : uprobe;
|
||||
}
|
||||
|
||||
/*
|
||||
* We lost the race, undo refcount bump (if it ever happened),
|
||||
* unless caller would like an extra refcount anyways.
|
||||
*/
|
||||
if (uprobe && !get)
|
||||
put_uprobe(uprobe);
|
||||
/*
|
||||
* Even if hprobe_consume() or another hprobe_expire() wins
|
||||
* the state update race and unlocks SRCU from under us, we
|
||||
* still have a guarantee that underyling uprobe won't be
|
||||
* freed due to ongoing caller's SRCU lock region, so we can
|
||||
* return it regardless. Also, if `get` was true, we also have
|
||||
* an extra ref for the caller to own. This is used in dup_utask().
|
||||
*/
|
||||
return uprobe;
|
||||
}
|
||||
default:
|
||||
WARN(1, "unknown hprobe state %d", hstate);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
@ -706,7 +873,7 @@ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
|
||||
struct rb_node *node;
|
||||
unsigned int seq;
|
||||
|
||||
lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
|
||||
lockdep_assert(rcu_read_lock_trace_held());
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&uprobes_seqcount);
|
||||
@ -825,8 +992,11 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
|
||||
|
||||
static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
|
||||
{
|
||||
static atomic64_t id;
|
||||
|
||||
down_write(&uprobe->consumer_rwsem);
|
||||
list_add_rcu(&uc->cons_node, &uprobe->consumers);
|
||||
uc->id = (__u64) atomic64_inc_return(&id);
|
||||
up_write(&uprobe->consumer_rwsem);
|
||||
}
|
||||
|
||||
@ -934,8 +1104,7 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
|
||||
bool ret = false;
|
||||
|
||||
down_read(&uprobe->consumer_rwsem);
|
||||
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
ret = consumer_filter(uc, mm);
|
||||
if (ret)
|
||||
break;
|
||||
@ -1156,7 +1325,8 @@ void uprobe_unregister_sync(void)
|
||||
* unlucky enough caller can free consumer's memory and cause
|
||||
* handler_chain() or handle_uretprobe_chain() to do an use-after-free.
|
||||
*/
|
||||
synchronize_srcu(&uprobes_srcu);
|
||||
synchronize_rcu_tasks_trace();
|
||||
synchronize_srcu(&uretprobes_srcu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(uprobe_unregister_sync);
|
||||
|
||||
@ -1240,19 +1410,18 @@ EXPORT_SYMBOL_GPL(uprobe_register);
|
||||
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
|
||||
{
|
||||
struct uprobe_consumer *con;
|
||||
int ret = -ENOENT, srcu_idx;
|
||||
int ret = -ENOENT;
|
||||
|
||||
down_write(&uprobe->register_rwsem);
|
||||
|
||||
srcu_idx = srcu_read_lock(&uprobes_srcu);
|
||||
list_for_each_entry_srcu(con, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
rcu_read_lock_trace();
|
||||
list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
if (con == uc) {
|
||||
ret = register_for_each_vma(uprobe, add ? uc : NULL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&uprobes_srcu, srcu_idx);
|
||||
rcu_read_unlock_trace();
|
||||
|
||||
up_write(&uprobe->register_rwsem);
|
||||
|
||||
@ -1475,9 +1644,15 @@ static vm_fault_t xol_fault(const struct vm_special_mapping *sm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xol_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
static const struct vm_special_mapping xol_mapping = {
|
||||
.name = "[uprobes]",
|
||||
.fault = xol_fault,
|
||||
.mremap = xol_mremap,
|
||||
};
|
||||
|
||||
/* Slot allocation for XOL */
|
||||
@ -1553,7 +1728,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
|
||||
init_waitqueue_head(&area->wq);
|
||||
/* Reserve the 1st slot for get_trampoline_vaddr() */
|
||||
set_bit(0, area->bitmap);
|
||||
atomic_set(&area->slot_count, 1);
|
||||
insns = arch_uprobe_trampoline(&insns_size);
|
||||
arch_uprobe_copy_ixol(area->page, 0, insns, insns_size);
|
||||
|
||||
@ -1626,92 +1800,57 @@ void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* - search for a free slot.
|
||||
*/
|
||||
static unsigned long xol_take_insn_slot(struct xol_area *area)
|
||||
static unsigned long xol_get_slot_nr(struct xol_area *area)
|
||||
{
|
||||
unsigned long slot_addr;
|
||||
int slot_nr;
|
||||
unsigned long slot_nr;
|
||||
|
||||
do {
|
||||
slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
|
||||
if (slot_nr < UINSNS_PER_PAGE) {
|
||||
if (!test_and_set_bit(slot_nr, area->bitmap))
|
||||
break;
|
||||
slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
|
||||
if (slot_nr < UINSNS_PER_PAGE) {
|
||||
if (!test_and_set_bit(slot_nr, area->bitmap))
|
||||
return slot_nr;
|
||||
}
|
||||
|
||||
slot_nr = UINSNS_PER_PAGE;
|
||||
continue;
|
||||
}
|
||||
wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
|
||||
} while (slot_nr >= UINSNS_PER_PAGE);
|
||||
|
||||
slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
|
||||
atomic_inc(&area->slot_count);
|
||||
|
||||
return slot_addr;
|
||||
return UINSNS_PER_PAGE;
|
||||
}
|
||||
|
||||
/*
|
||||
* xol_get_insn_slot - allocate a slot for xol.
|
||||
* Returns the allocated slot address or 0.
|
||||
*/
|
||||
static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
|
||||
static bool xol_get_insn_slot(struct uprobe *uprobe, struct uprobe_task *utask)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long xol_vaddr;
|
||||
struct xol_area *area = get_xol_area();
|
||||
unsigned long slot_nr;
|
||||
|
||||
area = get_xol_area();
|
||||
if (!area)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
xol_vaddr = xol_take_insn_slot(area);
|
||||
if (unlikely(!xol_vaddr))
|
||||
return 0;
|
||||
wait_event(area->wq, (slot_nr = xol_get_slot_nr(area)) < UINSNS_PER_PAGE);
|
||||
|
||||
arch_uprobe_copy_ixol(area->page, xol_vaddr,
|
||||
utask->xol_vaddr = area->vaddr + slot_nr * UPROBE_XOL_SLOT_BYTES;
|
||||
arch_uprobe_copy_ixol(area->page, utask->xol_vaddr,
|
||||
&uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
|
||||
|
||||
return xol_vaddr;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* xol_free_insn_slot - If slot was earlier allocated by
|
||||
* @xol_get_insn_slot(), make the slot available for
|
||||
* subsequent requests.
|
||||
* xol_free_insn_slot - free the slot allocated by xol_get_insn_slot()
|
||||
*/
|
||||
static void xol_free_insn_slot(struct task_struct *tsk)
|
||||
static void xol_free_insn_slot(struct uprobe_task *utask)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long vma_end;
|
||||
unsigned long slot_addr;
|
||||
struct xol_area *area = current->mm->uprobes_state.xol_area;
|
||||
unsigned long offset = utask->xol_vaddr - area->vaddr;
|
||||
unsigned int slot_nr;
|
||||
|
||||
if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask)
|
||||
utask->xol_vaddr = 0;
|
||||
/* xol_vaddr must fit into [area->vaddr, area->vaddr + PAGE_SIZE) */
|
||||
if (WARN_ON_ONCE(offset >= PAGE_SIZE))
|
||||
return;
|
||||
|
||||
slot_addr = tsk->utask->xol_vaddr;
|
||||
if (unlikely(!slot_addr))
|
||||
return;
|
||||
|
||||
area = tsk->mm->uprobes_state.xol_area;
|
||||
vma_end = area->vaddr + PAGE_SIZE;
|
||||
if (area->vaddr <= slot_addr && slot_addr < vma_end) {
|
||||
unsigned long offset;
|
||||
int slot_nr;
|
||||
|
||||
offset = slot_addr - area->vaddr;
|
||||
slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
|
||||
if (slot_nr >= UINSNS_PER_PAGE)
|
||||
return;
|
||||
|
||||
clear_bit(slot_nr, area->bitmap);
|
||||
atomic_dec(&area->slot_count);
|
||||
smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
|
||||
if (waitqueue_active(&area->wq))
|
||||
wake_up(&area->wq);
|
||||
|
||||
tsk->utask->xol_vaddr = 0;
|
||||
}
|
||||
slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
|
||||
clear_bit(slot_nr, area->bitmap);
|
||||
smp_mb__after_atomic(); /* pairs with prepare_to_wait() */
|
||||
if (waitqueue_active(&area->wq))
|
||||
wake_up(&area->wq);
|
||||
}
|
||||
|
||||
void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
|
||||
@ -1750,11 +1889,18 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
static struct return_instance *free_ret_instance(struct return_instance *ri)
|
||||
static struct return_instance *free_ret_instance(struct return_instance *ri, bool cleanup_hprobe)
|
||||
{
|
||||
struct return_instance *next = ri->next;
|
||||
put_uprobe(ri->uprobe);
|
||||
kfree(ri);
|
||||
|
||||
if (cleanup_hprobe) {
|
||||
enum hprobe_state hstate;
|
||||
|
||||
(void)hprobe_consume(&ri->hprobe, &hstate);
|
||||
hprobe_finalize(&ri->hprobe, hstate);
|
||||
}
|
||||
|
||||
kfree_rcu(ri, rcu);
|
||||
return next;
|
||||
}
|
||||
|
||||
@ -1770,18 +1916,50 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
if (!utask)
|
||||
return;
|
||||
|
||||
if (utask->active_uprobe)
|
||||
put_uprobe(utask->active_uprobe);
|
||||
WARN_ON_ONCE(utask->active_uprobe || utask->xol_vaddr);
|
||||
|
||||
timer_delete_sync(&utask->ri_timer);
|
||||
|
||||
ri = utask->return_instances;
|
||||
while (ri)
|
||||
ri = free_ret_instance(ri);
|
||||
ri = free_ret_instance(ri, true /* cleanup_hprobe */);
|
||||
|
||||
xol_free_insn_slot(t);
|
||||
kfree(utask);
|
||||
t->utask = NULL;
|
||||
}
|
||||
|
||||
#define RI_TIMER_PERIOD (HZ / 10) /* 100 ms */
|
||||
|
||||
#define for_each_ret_instance_rcu(pos, head) \
|
||||
for (pos = rcu_dereference_raw(head); pos; pos = rcu_dereference_raw(pos->next))
|
||||
|
||||
static void ri_timer(struct timer_list *timer)
|
||||
{
|
||||
struct uprobe_task *utask = container_of(timer, struct uprobe_task, ri_timer);
|
||||
struct return_instance *ri;
|
||||
|
||||
/* SRCU protects uprobe from reuse for the cmpxchg() inside hprobe_expire(). */
|
||||
guard(srcu)(&uretprobes_srcu);
|
||||
/* RCU protects return_instance from freeing. */
|
||||
guard(rcu)();
|
||||
|
||||
for_each_ret_instance_rcu(ri, utask->return_instances)
|
||||
hprobe_expire(&ri->hprobe, false);
|
||||
}
|
||||
|
||||
static struct uprobe_task *alloc_utask(void)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
|
||||
utask = kzalloc(sizeof(*utask), GFP_KERNEL);
|
||||
if (!utask)
|
||||
return NULL;
|
||||
|
||||
timer_setup(&utask->ri_timer, ri_timer, 0);
|
||||
|
||||
return utask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a uprobe_task object for the task if necessary.
|
||||
* Called when the thread hits a breakpoint.
|
||||
@ -1793,38 +1971,73 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
static struct uprobe_task *get_utask(void)
|
||||
{
|
||||
if (!current->utask)
|
||||
current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
|
||||
current->utask = alloc_utask();
|
||||
return current->utask;
|
||||
}
|
||||
|
||||
static size_t ri_size(int consumers_cnt)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
|
||||
return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
|
||||
}
|
||||
|
||||
#define DEF_CNT 4
|
||||
|
||||
static struct return_instance *alloc_return_instance(void)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
|
||||
ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
|
||||
if (!ri)
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
ri->consumers_cnt = DEF_CNT;
|
||||
return ri;
|
||||
}
|
||||
|
||||
static struct return_instance *dup_return_instance(struct return_instance *old)
|
||||
{
|
||||
size_t size = ri_size(old->consumers_cnt);
|
||||
|
||||
return kmemdup(old, size, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
|
||||
{
|
||||
struct uprobe_task *n_utask;
|
||||
struct return_instance **p, *o, *n;
|
||||
struct uprobe *uprobe;
|
||||
|
||||
n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
|
||||
n_utask = alloc_utask();
|
||||
if (!n_utask)
|
||||
return -ENOMEM;
|
||||
t->utask = n_utask;
|
||||
|
||||
/* protect uprobes from freeing, we'll need try_get_uprobe() them */
|
||||
guard(srcu)(&uretprobes_srcu);
|
||||
|
||||
p = &n_utask->return_instances;
|
||||
for (o = o_utask->return_instances; o; o = o->next) {
|
||||
n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
|
||||
n = dup_return_instance(o);
|
||||
if (!n)
|
||||
return -ENOMEM;
|
||||
|
||||
*n = *o;
|
||||
/*
|
||||
* uprobe's refcnt has to be positive at this point, kept by
|
||||
* utask->return_instances items; return_instances can't be
|
||||
* removed right now, as task is blocked due to duping; so
|
||||
* get_uprobe() is safe to use here.
|
||||
*/
|
||||
get_uprobe(n->uprobe);
|
||||
n->next = NULL;
|
||||
/* if uprobe is non-NULL, we'll have an extra refcount for uprobe */
|
||||
uprobe = hprobe_expire(&o->hprobe, true);
|
||||
|
||||
*p = n;
|
||||
/*
|
||||
* New utask will have stable properly refcounted uprobe or
|
||||
* NULL. Even if we failed to get refcounted uprobe, we still
|
||||
* need to preserve full set of return_instances for proper
|
||||
* uretprobe handling and nesting in forked task.
|
||||
*/
|
||||
hprobe_init_stable(&n->hprobe, uprobe);
|
||||
|
||||
n->next = NULL;
|
||||
rcu_assign_pointer(*p, n);
|
||||
p = &n->next;
|
||||
|
||||
n_utask->depth++;
|
||||
}
|
||||
|
||||
@ -1900,45 +2113,34 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
|
||||
enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
|
||||
|
||||
while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
|
||||
ri = free_ret_instance(ri);
|
||||
ri = free_ret_instance(ri, true /* cleanup_hprobe */);
|
||||
utask->depth--;
|
||||
}
|
||||
utask->return_instances = ri;
|
||||
rcu_assign_pointer(utask->return_instances, ri);
|
||||
}
|
||||
|
||||
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
|
||||
struct return_instance *ri)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
struct uprobe_task *utask;
|
||||
struct uprobe_task *utask = current->utask;
|
||||
unsigned long orig_ret_vaddr, trampoline_vaddr;
|
||||
bool chained;
|
||||
int srcu_idx;
|
||||
|
||||
if (!get_xol_area())
|
||||
return;
|
||||
|
||||
utask = get_utask();
|
||||
if (!utask)
|
||||
return;
|
||||
goto free;
|
||||
|
||||
if (utask->depth >= MAX_URETPROBE_DEPTH) {
|
||||
printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
|
||||
" nestedness limit pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
return;
|
||||
goto free;
|
||||
}
|
||||
|
||||
/* we need to bump refcount to store uprobe in utask */
|
||||
if (!try_get_uprobe(uprobe))
|
||||
return;
|
||||
|
||||
ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
|
||||
if (!ri)
|
||||
goto fail;
|
||||
|
||||
trampoline_vaddr = uprobe_get_trampoline_vaddr();
|
||||
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
|
||||
if (orig_ret_vaddr == -1)
|
||||
goto fail;
|
||||
goto free;
|
||||
|
||||
/* drop the entries invalidated by longjmp() */
|
||||
chained = (orig_ret_vaddr == trampoline_vaddr);
|
||||
@ -1956,53 +2158,51 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
* attack from user-space.
|
||||
*/
|
||||
uprobe_warn(current, "handle tail call");
|
||||
goto fail;
|
||||
goto free;
|
||||
}
|
||||
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
|
||||
}
|
||||
ri->uprobe = uprobe;
|
||||
|
||||
/* __srcu_read_lock() because SRCU lock survives switch to user space */
|
||||
srcu_idx = __srcu_read_lock(&uretprobes_srcu);
|
||||
|
||||
ri->func = instruction_pointer(regs);
|
||||
ri->stack = user_stack_pointer(regs);
|
||||
ri->orig_ret_vaddr = orig_ret_vaddr;
|
||||
ri->chained = chained;
|
||||
|
||||
utask->depth++;
|
||||
|
||||
hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx);
|
||||
ri->next = utask->return_instances;
|
||||
utask->return_instances = ri;
|
||||
rcu_assign_pointer(utask->return_instances, ri);
|
||||
|
||||
mod_timer(&utask->ri_timer, jiffies + RI_TIMER_PERIOD);
|
||||
|
||||
return;
|
||||
fail:
|
||||
free:
|
||||
kfree(ri);
|
||||
put_uprobe(uprobe);
|
||||
}
|
||||
|
||||
/* Prepare to single-step probed instruction out of line. */
|
||||
static int
|
||||
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
unsigned long xol_vaddr;
|
||||
struct uprobe_task *utask = current->utask;
|
||||
int err;
|
||||
|
||||
utask = get_utask();
|
||||
if (!utask)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!try_get_uprobe(uprobe))
|
||||
return -EINVAL;
|
||||
|
||||
xol_vaddr = xol_get_insn_slot(uprobe);
|
||||
if (!xol_vaddr) {
|
||||
if (!xol_get_insn_slot(uprobe, utask)) {
|
||||
err = -ENOMEM;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
utask->xol_vaddr = xol_vaddr;
|
||||
utask->vaddr = bp_vaddr;
|
||||
|
||||
err = arch_uprobe_pre_xol(&uprobe->arch, regs);
|
||||
if (unlikely(err)) {
|
||||
xol_free_insn_slot(current);
|
||||
xol_free_insn_slot(utask);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
@ -2125,35 +2325,90 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
|
||||
return uprobe;
|
||||
}
|
||||
|
||||
static struct return_instance*
|
||||
push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
|
||||
{
|
||||
if (unlikely(ri == ZERO_SIZE_PTR))
|
||||
return ri;
|
||||
|
||||
if (unlikely(idx >= ri->consumers_cnt)) {
|
||||
struct return_instance *old_ri = ri;
|
||||
|
||||
ri->consumers_cnt += DEF_CNT;
|
||||
ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
|
||||
if (!ri) {
|
||||
kfree(old_ri);
|
||||
return ZERO_SIZE_PTR;
|
||||
}
|
||||
}
|
||||
|
||||
ri->consumers[idx].id = id;
|
||||
ri->consumers[idx].cookie = cookie;
|
||||
return ri;
|
||||
}
|
||||
|
||||
static struct return_consumer *
|
||||
return_consumer_find(struct return_instance *ri, int *iter, int id)
|
||||
{
|
||||
struct return_consumer *ric;
|
||||
int idx = *iter;
|
||||
|
||||
for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
|
||||
if (ric->id == id) {
|
||||
*iter = idx + 1;
|
||||
return ric;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool ignore_ret_handler(int rc)
|
||||
{
|
||||
return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE;
|
||||
}
|
||||
|
||||
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_consumer *uc;
|
||||
int remove = UPROBE_HANDLER_REMOVE;
|
||||
bool need_prep = false; /* prepare return uprobe, when needed */
|
||||
bool has_consumers = false;
|
||||
bool has_consumers = false, remove = true;
|
||||
struct return_instance *ri = NULL;
|
||||
int push_idx = 0;
|
||||
|
||||
current->utask->auprobe = &uprobe->arch;
|
||||
|
||||
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
bool session = uc->handler && uc->ret_handler;
|
||||
__u64 cookie = 0;
|
||||
int rc = 0;
|
||||
|
||||
if (uc->handler) {
|
||||
rc = uc->handler(uc, regs);
|
||||
WARN(rc & ~UPROBE_HANDLER_MASK,
|
||||
rc = uc->handler(uc, regs, &cookie);
|
||||
WARN(rc < 0 || rc > 2,
|
||||
"bad rc=0x%x from %ps()\n", rc, uc->handler);
|
||||
}
|
||||
|
||||
if (uc->ret_handler)
|
||||
need_prep = true;
|
||||
|
||||
remove &= rc;
|
||||
remove &= rc == UPROBE_HANDLER_REMOVE;
|
||||
has_consumers = true;
|
||||
|
||||
if (!uc->ret_handler || ignore_ret_handler(rc))
|
||||
continue;
|
||||
|
||||
if (!ri)
|
||||
ri = alloc_return_instance();
|
||||
|
||||
if (session)
|
||||
ri = push_consumer(ri, push_idx++, uc->id, cookie);
|
||||
}
|
||||
current->utask->auprobe = NULL;
|
||||
|
||||
if (need_prep && !remove)
|
||||
prepare_uretprobe(uprobe, regs); /* put bp at return */
|
||||
if (!ZERO_OR_NULL_PTR(ri)) {
|
||||
/*
|
||||
* The push_idx value has the final number of return consumers,
|
||||
* and ri->consumers_cnt has number of allocated consumers.
|
||||
*/
|
||||
ri->consumers_cnt = push_idx;
|
||||
prepare_uretprobe(uprobe, regs, ri);
|
||||
}
|
||||
|
||||
if (remove && has_consumers) {
|
||||
down_read(&uprobe->register_rwsem);
|
||||
@ -2169,19 +2424,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
static void
|
||||
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
|
||||
handle_uretprobe_chain(struct return_instance *ri, struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe *uprobe = ri->uprobe;
|
||||
struct return_consumer *ric;
|
||||
struct uprobe_consumer *uc;
|
||||
int srcu_idx;
|
||||
int ric_idx = 0;
|
||||
|
||||
srcu_idx = srcu_read_lock(&uprobes_srcu);
|
||||
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
|
||||
srcu_read_lock_held(&uprobes_srcu)) {
|
||||
if (uc->ret_handler)
|
||||
uc->ret_handler(uc, ri->func, regs);
|
||||
/* all consumers unsubscribed meanwhile */
|
||||
if (unlikely(!uprobe))
|
||||
return;
|
||||
|
||||
rcu_read_lock_trace();
|
||||
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
|
||||
bool session = uc->handler && uc->ret_handler;
|
||||
|
||||
if (uc->ret_handler) {
|
||||
ric = return_consumer_find(ri, &ric_idx, uc->id);
|
||||
if (!session || ric)
|
||||
uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&uprobes_srcu, srcu_idx);
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
static struct return_instance *find_next_ret_chain(struct return_instance *ri)
|
||||
@ -2200,6 +2463,8 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
struct return_instance *ri, *next;
|
||||
struct uprobe *uprobe;
|
||||
enum hprobe_state hstate;
|
||||
bool valid;
|
||||
|
||||
utask = current->utask;
|
||||
@ -2230,21 +2495,24 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
|
||||
* trampoline addresses on the stack are replaced with correct
|
||||
* original return addresses
|
||||
*/
|
||||
utask->return_instances = ri->next;
|
||||
rcu_assign_pointer(utask->return_instances, ri->next);
|
||||
|
||||
uprobe = hprobe_consume(&ri->hprobe, &hstate);
|
||||
if (valid)
|
||||
handle_uretprobe_chain(ri, regs);
|
||||
ri = free_ret_instance(ri);
|
||||
handle_uretprobe_chain(ri, uprobe, regs);
|
||||
hprobe_finalize(&ri->hprobe, hstate);
|
||||
|
||||
/* We already took care of hprobe, no need to waste more time on that. */
|
||||
ri = free_ret_instance(ri, false /* !cleanup_hprobe */);
|
||||
utask->depth--;
|
||||
} while (ri != next);
|
||||
} while (!valid);
|
||||
|
||||
utask->return_instances = ri;
|
||||
return;
|
||||
|
||||
sigill:
|
||||
sigill:
|
||||
uprobe_warn(current, "handle uretprobe, sending SIGILL.");
|
||||
force_sig(SIGILL);
|
||||
|
||||
}
|
||||
|
||||
bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs)
|
||||
@ -2266,13 +2534,13 @@ static void handle_swbp(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe *uprobe;
|
||||
unsigned long bp_vaddr;
|
||||
int is_swbp, srcu_idx;
|
||||
int is_swbp;
|
||||
|
||||
bp_vaddr = uprobe_get_swbp_addr(regs);
|
||||
if (bp_vaddr == uprobe_get_trampoline_vaddr())
|
||||
return uprobe_handle_trampoline(regs);
|
||||
|
||||
srcu_idx = srcu_read_lock(&uprobes_srcu);
|
||||
rcu_read_lock_trace();
|
||||
|
||||
uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
|
||||
if (!uprobe) {
|
||||
@ -2330,7 +2598,7 @@ static void handle_swbp(struct pt_regs *regs)
|
||||
|
||||
out:
|
||||
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
|
||||
srcu_read_unlock(&uprobes_srcu, srcu_idx);
|
||||
rcu_read_unlock_trace();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2353,7 +2621,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
|
||||
put_uprobe(uprobe);
|
||||
utask->active_uprobe = NULL;
|
||||
utask->state = UTASK_RUNNING;
|
||||
xol_free_insn_slot(current);
|
||||
xol_free_insn_slot(utask);
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
recalc_sigpending(); /* see uprobe_deny_signal() */
|
||||
|
@ -3240,7 +3240,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
|
||||
}
|
||||
|
||||
static int
|
||||
uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct bpf_uprobe *uprobe;
|
||||
|
||||
@ -3249,7 +3250,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
static int
|
||||
uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
|
||||
uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct bpf_uprobe *uprobe;
|
||||
|
||||
|
@ -89,9 +89,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
|
||||
static int register_uprobe_event(struct trace_uprobe *tu);
|
||||
static int unregister_uprobe_event(struct trace_uprobe *tu);
|
||||
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
|
||||
__u64 *data);
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs);
|
||||
unsigned long func, struct pt_regs *regs,
|
||||
__u64 *data);
|
||||
|
||||
#ifdef CONFIG_STACK_GROWSUP
|
||||
static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
|
||||
@ -1522,7 +1524,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
|
||||
}
|
||||
}
|
||||
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
struct uprobe_dispatch_data udd;
|
||||
@ -1553,7 +1556,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
unsigned long func, struct pt_regs *regs,
|
||||
__u64 *data)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
struct uprobe_dispatch_data udd;
|
||||
|
@ -461,7 +461,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
|
||||
|
||||
static int
|
||||
uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
|
||||
struct pt_regs *regs)
|
||||
struct pt_regs *regs, __u64 *data)
|
||||
|
||||
{
|
||||
regs->ax = 0x12345678deadbeef;
|
||||
|
Loading…
Reference in New Issue
Block a user