tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* trace_events_inject - trace event injection
|
|
|
|
*
|
|
|
|
* Copyright (C) 2019 Cong Wang <cwang@twitter.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/ctype.h>
|
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/rculist.h>
|
|
|
|
|
|
|
|
#include "trace.h"
|
|
|
|
|
|
|
|
static int
|
|
|
|
trace_inject_entry(struct trace_event_file *file, void *rec, int len)
|
|
|
|
{
|
|
|
|
struct trace_event_buffer fbuffer;
|
|
|
|
int written = 0;
|
|
|
|
void *entry;
|
|
|
|
|
|
|
|
rcu_read_lock_sched();
|
|
|
|
entry = trace_event_buffer_reserve(&fbuffer, file, len);
|
|
|
|
if (entry) {
|
|
|
|
memcpy(entry, rec, len);
|
|
|
|
written = len;
|
|
|
|
trace_event_buffer_commit(&fbuffer);
|
|
|
|
}
|
|
|
|
rcu_read_unlock_sched();
|
|
|
|
|
|
|
|
return written;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
parse_field(char *str, struct trace_event_call *call,
|
|
|
|
struct ftrace_event_field **pf, u64 *pv)
|
|
|
|
{
|
|
|
|
struct ftrace_event_field *field;
|
|
|
|
char *field_name;
|
|
|
|
int s, i = 0;
|
|
|
|
int len;
|
|
|
|
u64 val;
|
|
|
|
|
|
|
|
if (!str[i])
|
|
|
|
return 0;
|
|
|
|
/* First find the field to associate to */
|
|
|
|
while (isspace(str[i]))
|
|
|
|
i++;
|
|
|
|
s = i;
|
|
|
|
while (isalnum(str[i]) || str[i] == '_')
|
|
|
|
i++;
|
|
|
|
len = i - s;
|
|
|
|
if (!len)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
field_name = kmemdup_nul(str + s, len, GFP_KERNEL);
|
|
|
|
if (!field_name)
|
|
|
|
return -ENOMEM;
|
|
|
|
field = trace_find_event_field(call, field_name);
|
|
|
|
kfree(field_name);
|
|
|
|
if (!field)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
*pf = field;
|
|
|
|
while (isspace(str[i]))
|
|
|
|
i++;
|
|
|
|
if (str[i] != '=')
|
|
|
|
return -EINVAL;
|
|
|
|
i++;
|
|
|
|
while (isspace(str[i]))
|
|
|
|
i++;
|
|
|
|
s = i;
|
|
|
|
if (isdigit(str[i]) || str[i] == '-') {
|
|
|
|
char *num, c;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Make sure the field is not a string */
|
|
|
|
if (is_string_field(field))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (str[i] == '-')
|
|
|
|
i++;
|
|
|
|
|
|
|
|
/* We allow 0xDEADBEEF */
|
|
|
|
while (isalnum(str[i]))
|
|
|
|
i++;
|
|
|
|
num = str + s;
|
|
|
|
c = str[i];
|
|
|
|
if (c != '\0' && !isspace(c))
|
|
|
|
return -EINVAL;
|
|
|
|
str[i] = '\0';
|
|
|
|
/* Make sure it is a value */
|
|
|
|
if (field->is_signed)
|
|
|
|
ret = kstrtoll(num, 0, &val);
|
|
|
|
else
|
|
|
|
ret = kstrtoull(num, 0, &val);
|
|
|
|
str[i] = c;
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
*pv = val;
|
|
|
|
return i;
|
|
|
|
} else if (str[i] == '\'' || str[i] == '"') {
|
|
|
|
char q = str[i];
|
|
|
|
|
|
|
|
/* Make sure the field is OK for strings */
|
|
|
|
if (!is_string_field(field))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
for (i++; str[i]; i++) {
|
|
|
|
if (str[i] == '\\' && str[i + 1]) {
|
|
|
|
i++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (str[i] == q)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!str[i])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Skip quotes */
|
|
|
|
s++;
|
|
|
|
len = i - s;
|
|
|
|
if (len >= MAX_FILTER_STR_VAL)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
*pv = (unsigned long)(str + s);
|
|
|
|
str[i] = 0;
|
|
|
|
/* go past the last quote */
|
|
|
|
i++;
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int trace_get_entry_size(struct trace_event_call *call)
|
|
|
|
{
|
|
|
|
struct ftrace_event_field *field;
|
|
|
|
struct list_head *head;
|
|
|
|
int size = 0;
|
|
|
|
|
|
|
|
head = trace_get_fields(call);
|
|
|
|
list_for_each_entry(field, head, link) {
|
|
|
|
if (field->size + field->offset > size)
|
|
|
|
size = field->size + field->offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *trace_alloc_entry(struct trace_event_call *call, int *size)
|
|
|
|
{
|
|
|
|
int entry_size = trace_get_entry_size(call);
|
|
|
|
struct ftrace_event_field *field;
|
|
|
|
struct list_head *head;
|
|
|
|
void *entry = NULL;
|
|
|
|
|
|
|
|
/* We need an extra '\0' at the end. */
|
|
|
|
entry = kzalloc(entry_size + 1, GFP_KERNEL);
|
|
|
|
if (!entry)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
head = trace_get_fields(call);
|
|
|
|
list_for_each_entry(field, head, link) {
|
|
|
|
if (!is_string_field(field))
|
|
|
|
continue;
|
|
|
|
if (field->filter_type == FILTER_STATIC_STRING)
|
|
|
|
continue;
|
2021-11-22 09:30:12 +00:00
|
|
|
if (field->filter_type == FILTER_DYN_STRING ||
|
|
|
|
field->filter_type == FILTER_RDYN_STRING) {
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
u32 *str_item;
|
|
|
|
int str_loc = entry_size & 0xffff;
|
|
|
|
|
2021-11-22 09:30:12 +00:00
|
|
|
if (field->filter_type == FILTER_RDYN_STRING)
|
|
|
|
str_loc -= field->offset + field->size;
|
|
|
|
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
str_item = (u32 *)(entry + field->offset);
|
|
|
|
*str_item = str_loc; /* string length is 0. */
|
|
|
|
} else {
|
|
|
|
char **paddr;
|
|
|
|
|
|
|
|
paddr = (char **)(entry + field->offset);
|
|
|
|
*paddr = "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*size = entry_size + 1;
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define INJECT_STRING "STATIC STRING CAN NOT BE INJECTED"
|
|
|
|
|
|
|
|
/* Caller is responsible to free the *pentry. */
|
|
|
|
static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
|
|
|
|
{
|
|
|
|
struct ftrace_event_field *field;
|
|
|
|
void *entry = NULL;
|
|
|
|
int entry_size;
|
2020-01-03 00:04:57 +00:00
|
|
|
u64 val = 0;
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
int len;
|
|
|
|
|
|
|
|
entry = trace_alloc_entry(call, &entry_size);
|
|
|
|
*pentry = entry;
|
|
|
|
if (!entry)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-01-25 19:45:08 +00:00
|
|
|
tracing_generic_entry_update(entry, call->event.type,
|
|
|
|
tracing_gen_ctx());
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
|
|
|
|
while ((len = parse_field(str, call, &field, &val)) > 0) {
|
|
|
|
if (is_function_field(field))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (is_string_field(field)) {
|
|
|
|
char *addr = (char *)(unsigned long) val;
|
|
|
|
|
|
|
|
if (field->filter_type == FILTER_STATIC_STRING) {
|
2023-05-16 14:39:56 +00:00
|
|
|
strscpy(entry + field->offset, addr, field->size);
|
2021-11-22 09:30:12 +00:00
|
|
|
} else if (field->filter_type == FILTER_DYN_STRING ||
|
|
|
|
field->filter_type == FILTER_RDYN_STRING) {
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
int str_len = strlen(addr) + 1;
|
|
|
|
int str_loc = entry_size & 0xffff;
|
|
|
|
u32 *str_item;
|
|
|
|
|
|
|
|
entry_size += str_len;
|
|
|
|
*pentry = krealloc(entry, entry_size, GFP_KERNEL);
|
|
|
|
if (!*pentry) {
|
|
|
|
kfree(entry);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
entry = *pentry;
|
|
|
|
|
2023-05-16 14:39:56 +00:00
|
|
|
strscpy(entry + (entry_size - str_len), addr, str_len);
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
str_item = (u32 *)(entry + field->offset);
|
2021-11-22 09:30:12 +00:00
|
|
|
if (field->filter_type == FILTER_RDYN_STRING)
|
|
|
|
str_loc -= field->offset + field->size;
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
*str_item = (str_len << 16) | str_loc;
|
|
|
|
} else {
|
|
|
|
char **paddr;
|
|
|
|
|
|
|
|
paddr = (char **)(entry + field->offset);
|
|
|
|
*paddr = INJECT_STRING;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (field->size) {
|
|
|
|
case 1: {
|
|
|
|
u8 tmp = (u8) val;
|
|
|
|
|
|
|
|
memcpy(entry + field->offset, &tmp, 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 2: {
|
|
|
|
u16 tmp = (u16) val;
|
|
|
|
|
|
|
|
memcpy(entry + field->offset, &tmp, 2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 4: {
|
|
|
|
u32 tmp = (u32) val;
|
|
|
|
|
|
|
|
memcpy(entry + field->offset, &tmp, 4);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 8:
|
|
|
|
memcpy(entry + field->offset, &val, 8);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
str += len;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len < 0)
|
|
|
|
return len;
|
|
|
|
|
|
|
|
return entry_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt,
|
|
|
|
loff_t *ppos)
|
|
|
|
{
|
|
|
|
struct trace_event_call *call;
|
|
|
|
struct trace_event_file *file;
|
|
|
|
int err = -ENODEV, size;
|
|
|
|
void *entry = NULL;
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
if (cnt >= PAGE_SIZE)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
buf = memdup_user_nul(ubuf, cnt);
|
|
|
|
if (IS_ERR(buf))
|
|
|
|
return PTR_ERR(buf);
|
|
|
|
strim(buf);
|
|
|
|
|
|
|
|
mutex_lock(&event_mutex);
|
tracing: Have format file honor EVENT_FILE_FL_FREED
When eventfs was introduced, special care had to be done to coordinate the
freeing of the file meta data with the files that are exposed to user
space. The file meta data would have a ref count that is set when the file
is created and would be decremented and freed after the last user that
opened the file closed it. When the file meta data was to be freed, it
would set a flag (EVENT_FILE_FL_FREED) to denote that the file is freed,
and any new references made (like new opens or reads) would fail as it is
marked freed. This allowed other meta data to be freed after this flag was
set (under the event_mutex).
All the files that were dynamically created in the events directory had a
pointer to the file meta data and would call event_release() when the last
reference to the user space file was closed. This would be the time that it
is safe to free the file meta data.
A shortcut was made for the "format" file. It's i_private would point to
the "call" entry directly and not point to the file's meta data. This is
because all format files are the same for the same "call", so it was
thought there was no reason to differentiate them. The other files
maintain state (like the "enable", "trigger", etc). But this meant if the
file were to disappear, the "format" file would be unaware of it.
This caused a race that could be trigger via the user_events test (that
would create dynamic events and free them), and running a loop that would
read the user_events format files:
In one console run:
# cd tools/testing/selftests/user_events
# while true; do ./ftrace_test; done
And in another console run:
# cd /sys/kernel/tracing/
# while true; do cat events/user_events/__test_event/format; done 2>/dev/null
With KASAN memory checking, it would trigger a use-after-free bug report
(which was a real bug). This was because the format file was not checking
the file's meta data flag "EVENT_FILE_FL_FREED", so it would access the
event that the file meta data pointed to after the event was freed.
After inspection, there are other locations that were found to not check
the EVENT_FILE_FL_FREED flag when accessing the trace_event_file. Add a
new helper function: event_file_file() that will make sure that the
event_mutex is held, and will return NULL if the trace_event_file has the
EVENT_FILE_FL_FREED flag set. Have the first reference of the struct file
pointer use event_file_file() and check for NULL. Later uses can still use
the event_file_data() helper function if the event_mutex is still held and
was not released since the event_file_file() call.
Link: https://lore.kernel.org/all/20240719204701.1605950-1-minipli@grsecurity.net/
Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Ajay Kaher <ajay.kaher@broadcom.com>
Cc: Ilkka Naulapää <digirigawa@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Florian Fainelli <florian.fainelli@broadcom.com>
Cc: Alexey Makhalov <alexey.makhalov@broadcom.com>
Cc: Vasavi Sirnapalli <vasavi.sirnapalli@broadcom.com>
Link: https://lore.kernel.org/20240730110657.3b69d3c1@gandalf.local.home
Fixes: b63db58e2fa5d ("eventfs/tracing: Add callback for release of an eventfs_inode")
Reported-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: Mathias Krause <minipli@grsecurity.net>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2024-07-30 15:06:57 +00:00
|
|
|
file = event_file_file(filp);
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
if (file) {
|
|
|
|
call = file->event_call;
|
|
|
|
size = parse_entry(buf, call, &entry);
|
|
|
|
if (size < 0)
|
|
|
|
err = size;
|
|
|
|
else
|
|
|
|
err = trace_inject_entry(file, entry, size);
|
|
|
|
}
|
|
|
|
mutex_unlock(&event_mutex);
|
|
|
|
|
|
|
|
kfree(entry);
|
|
|
|
kfree(buf);
|
|
|
|
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
*ppos += err;
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
event_inject_read(struct file *file, char __user *buf, size_t size,
|
|
|
|
loff_t *ppos)
|
|
|
|
{
|
|
|
|
return -EPERM;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct file_operations event_inject_fops = {
|
2023-09-07 02:47:16 +00:00
|
|
|
.open = tracing_open_file_tr,
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
.read = event_inject_read,
|
|
|
|
.write = event_inject_write,
|
2023-09-07 02:47:16 +00:00
|
|
|
.release = tracing_release_file_tr,
|
tracing: Introduce trace event injection
We have been trying to use rasdaemon to monitor hardware errors like
correctable memory errors. rasdaemon uses trace events to monitor
various hardware errors. In order to test it, we have to inject some
hardware errors, unfortunately not all of them provide error
injections. MCE does provide a way to inject MCE errors, but errors
like PCI error and devlink error don't, it is not easy to add error
injection to each of them. Instead, it is relatively easier to just
allow users to inject trace events in a generic way so that all trace
events can be injected.
This patch introduces trace event injection, where a new 'inject' is
added to each tracepoint directory. Users could write into this file
with key=value pairs to specify the value of each fields of the trace
event, all unspecified fields are set to zero values by default.
For example, for the net/net_dev_queue tracepoint, we can inject:
INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
echo "" > $INJECT
echo "name='test'" > $INJECT
echo "name='test' len=1024" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
<...>-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
<...>-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
Triggers could be triggered as usual too:
echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
echo "len=1025" > $INJECT
cat /sys/kernel/debug/tracing/trace
...
bash-614 [000] .... 36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
bash-614 [001] .... 136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
bash-614 [001] .N.. 208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
bash-614 [001] .N.1 284.236349: <stack trace>
=> event_inject_write
=> vfs_write
=> ksys_write
=> do_syscall_64
=> entry_SYSCALL_64_after_hwframe
The only thing that can't be injected is string pointers as they
require constant string pointers, this can't be done at run time.
Link: http://lkml.kernel.org/r/20191130045218.18979-1-xiyou.wangcong@gmail.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
2019-11-30 04:52:18 +00:00
|
|
|
};
|