linux/kernel/bpf/dispatcher.c
Xu Kuohai 7c8ce4ffb6 bpf: Add kernel symbol for struct_ops trampoline
Without kernel symbols for struct_ops trampoline, the unwinder may
produce unexpected stacktraces.

For example, the x86 ORC and FP unwinders check if an IP is in kernel
text by verifying the presence of the IP's kernel symbol. When a
struct_ops trampoline address is encountered, the unwinder stops due
to the absence of symbol, resulting in an incomplete stacktrace that
consists only of direct and indirect child functions called from the
trampoline.

The arm64 unwinder is another example. While the arm64 unwinder can
proceed across a struct_ops trampoline address, the corresponding
symbol name is displayed as "unknown", which is confusing.

Thus, add kernel symbol for struct_ops trampoline. The name is
bpf__<struct_ops_name>_<member_name>, where <struct_ops_name> is the
type name of the struct_ops, and <member_name> is the name of
the member that the trampoline is linked to.

Below is a comparison of stacktraces captured on x86 by perf record,
before and after this patch.

Before:
ffffffff8116545d __lock_acquire+0xad ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff813088f4 __bpf_prog_enter+0x34 ([kernel.kallsyms])

After:
ffffffff811656bd __lock_acquire+0x30d ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff81309024 __bpf_prog_enter+0x34 ([kernel.kallsyms])
ffffffffc000d7e9 bpf__tcp_congestion_ops_cong_avoid+0x3e ([kernel.kallsyms])
ffffffff81f250a5 tcp_ack+0x10d5 ([kernel.kallsyms])
ffffffff81f27c66 tcp_rcv_established+0x3b6 ([kernel.kallsyms])
ffffffff81f3ad03 tcp_v4_do_rcv+0x193 ([kernel.kallsyms])
ffffffff81d65a18 __release_sock+0xd8 ([kernel.kallsyms])
ffffffff81d65af4 release_sock+0x34 ([kernel.kallsyms])
ffffffff81f15c4b tcp_sendmsg+0x3b ([kernel.kallsyms])
ffffffff81f663d7 inet_sendmsg+0x47 ([kernel.kallsyms])
ffffffff81d5ab40 sock_write_iter+0x160 ([kernel.kallsyms])
ffffffff8149c67b vfs_write+0x3fb ([kernel.kallsyms])
ffffffff8149caf6 ksys_write+0xc6 ([kernel.kallsyms])
ffffffff8149cb5d __x64_sys_write+0x1d ([kernel.kallsyms])
ffffffff81009200 x64_sys_call+0x1d30 ([kernel.kallsyms])
ffffffff82232d28 do_syscall_64+0x68 ([kernel.kallsyms])
ffffffff8240012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])

Fixes: 85d33df357 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20241112145849.3436772-4-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-11-12 17:13:46 -08:00

172 lines
4.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2019 Intel Corporation. */
#include <linux/hash.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/static_call.h>
/* The BPF dispatcher is a multiway branch code generator. The
* dispatcher is a mechanism to avoid the performance penalty of an
* indirect call, which is expensive when retpolines are enabled. A
* dispatch client registers a BPF program into the dispatcher, and if
* there is available room in the dispatcher a direct call to the BPF
* program will be generated. All calls to the BPF programs called via
* the dispatcher will then be a direct call, instead of an
* indirect. The dispatcher hijacks a trampoline function it via the
* __fentry__ of the trampoline. The trampoline function has the
* following signature:
*
* unsigned int trampoline(const void *ctx, const struct bpf_insn *insnsi,
* unsigned int (*bpf_func)(const void *,
* const struct bpf_insn *));
*/
static struct bpf_dispatcher_prog *bpf_dispatcher_find_prog(
struct bpf_dispatcher *d, struct bpf_prog *prog)
{
int i;
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
if (prog == d->progs[i].prog)
return &d->progs[i];
}
return NULL;
}
static struct bpf_dispatcher_prog *bpf_dispatcher_find_free(
struct bpf_dispatcher *d)
{
return bpf_dispatcher_find_prog(d, NULL);
}
static bool bpf_dispatcher_add_prog(struct bpf_dispatcher *d,
struct bpf_prog *prog)
{
struct bpf_dispatcher_prog *entry;
if (!prog)
return false;
entry = bpf_dispatcher_find_prog(d, prog);
if (entry) {
refcount_inc(&entry->users);
return false;
}
entry = bpf_dispatcher_find_free(d);
if (!entry)
return false;
bpf_prog_inc(prog);
entry->prog = prog;
refcount_set(&entry->users, 1);
d->num_progs++;
return true;
}
static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d,
struct bpf_prog *prog)
{
struct bpf_dispatcher_prog *entry;
if (!prog)
return false;
entry = bpf_dispatcher_find_prog(d, prog);
if (!entry)
return false;
if (refcount_dec_and_test(&entry->users)) {
entry->prog = NULL;
bpf_prog_put(prog);
d->num_progs--;
return true;
}
return false;
}
int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs)
{
return -ENOTSUPP;
}
static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf)
{
s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
int i;
for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
if (d->progs[i].prog)
*ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func;
}
return arch_prepare_bpf_dispatcher(image, buf, &ips[0], d->num_progs);
}
static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
{
void *new, *tmp;
u32 noff = 0;
if (prev_num_progs)
noff = d->image_off ^ (PAGE_SIZE / 2);
new = d->num_progs ? d->image + noff : NULL;
tmp = d->num_progs ? d->rw_image + noff : NULL;
if (new) {
/* Prepare the dispatcher in d->rw_image. Then use
* bpf_arch_text_copy to update d->image, which is RO+X.
*/
if (bpf_dispatcher_prepare(d, new, tmp))
return;
if (IS_ERR(bpf_arch_text_copy(new, tmp, PAGE_SIZE / 2)))
return;
}
__BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func);
/* Make sure all the callers executing the previous/old half of the
* image leave it, so following update call can modify it safely.
*/
synchronize_rcu();
if (new)
d->image_off = noff;
}
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to)
{
bool changed = false;
int prev_num_progs;
if (from == to)
return;
mutex_lock(&d->mutex);
if (!d->image) {
d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero);
if (!d->image)
goto out;
d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!d->rw_image) {
bpf_prog_pack_free(d->image, PAGE_SIZE);
d->image = NULL;
goto out;
}
bpf_image_ksym_init(d->image, PAGE_SIZE, &d->ksym);
bpf_image_ksym_add(&d->ksym);
}
prev_num_progs = d->num_progs;
changed |= bpf_dispatcher_remove_prog(d, from);
changed |= bpf_dispatcher_add_prog(d, to);
if (!changed)
goto out;
bpf_dispatcher_update(d, prev_num_progs);
out:
mutex_unlock(&d->mutex);
}