sched/fair: Fair server interface

Add an interface for fair server setup on debugfs.

Each CPU has two files under /debug/sched/fair_server/cpu{ID}:

 - runtime: set runtime in ns
 - period:  set period in ns

This then leaves /proc/sys/kernel/sched_rt_{period,runtime}_us to set
bounds on admission control.

The interface also add the server to the dl bandwidth accounting.

Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/r/a9ef9fc69bcedb44bddc9bc34f2b313296052819.1716811044.git.bristot@kernel.org
This commit is contained in:
Daniel Bristot de Oliveira 2024-05-27 14:06:52 +02:00 committed by Peter Zijlstra
parent a110a81c52
commit d741f297bc
4 changed files with 256 additions and 17 deletions

View File

@ -320,19 +320,12 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
__sub_running_bw(dl_se->dl_bw, dl_rq);
}
static void dl_change_utilization(struct task_struct *p, u64 new_bw)
static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw)
{
struct rq *rq;
if (dl_se->dl_non_contending) {
sub_running_bw(dl_se, &rq->dl);
dl_se->dl_non_contending = 0;
WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
if (task_on_rq_queued(p))
return;
rq = task_rq(p);
if (p->dl.dl_non_contending) {
sub_running_bw(&p->dl, &rq->dl);
p->dl.dl_non_contending = 0;
/*
* If the timer handler is currently running and the
* timer cannot be canceled, inactive_task_timer()
@ -340,13 +333,25 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
* will not touch the rq's active utilization,
* so we are still safe.
*/
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
put_task_struct(p);
if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
if (!dl_server(dl_se))
put_task_struct(dl_task_of(dl_se));
}
}
__sub_rq_bw(p->dl.dl_bw, &rq->dl);
__sub_rq_bw(dl_se->dl_bw, &rq->dl);
__add_rq_bw(new_bw, &rq->dl);
}
static void dl_change_utilization(struct task_struct *p, u64 new_bw)
{
WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
if (task_on_rq_queued(p))
return;
dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
}
static void __dl_clear_params(struct sched_dl_entity *dl_se);
/*
@ -1621,11 +1626,17 @@ void dl_server_start(struct sched_dl_entity *dl_se)
{
struct rq *rq = dl_se->rq;
/*
* XXX: the apply do not work fine at the init phase for the
* fair server because things are not yet set. We need to improve
* this before getting generic.
*/
if (!dl_server(dl_se)) {
/* Disabled */
dl_se->dl_runtime = 0;
dl_se->dl_deadline = 1000 * NSEC_PER_MSEC;
dl_se->dl_period = 1000 * NSEC_PER_MSEC;
u64 runtime = 0;
u64 period = 1000 * NSEC_PER_MSEC;
dl_server_apply_params(dl_se, runtime, period, 1);
dl_se->dl_server = 1;
dl_se->dl_defer = 1;
@ -1660,6 +1671,64 @@ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_se->server_pick = pick;
}
void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
{
u64 new_bw = dl_se->dl_bw;
int cpu = cpu_of(rq);
struct dl_bw *dl_b;
dl_b = dl_bw_of(cpu_of(rq));
guard(raw_spinlock)(&dl_b->lock);
if (!dl_bw_cpus(cpu))
return;
__dl_add(dl_b, new_bw, dl_bw_cpus(cpu));
}
int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
{
u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
u64 new_bw = to_ratio(period, runtime);
struct rq *rq = dl_se->rq;
int cpu = cpu_of(rq);
struct dl_bw *dl_b;
unsigned long cap;
int retval = 0;
int cpus;
dl_b = dl_bw_of(cpu);
guard(raw_spinlock)(&dl_b->lock);
cpus = dl_bw_cpus(cpu);
cap = dl_bw_capacity(cpu);
if (__dl_overflow(dl_b, cap, old_bw, new_bw))
return -EBUSY;
if (init) {
__add_rq_bw(new_bw, &rq->dl);
__dl_add(dl_b, new_bw, cpus);
} else {
__dl_sub(dl_b, dl_se->dl_bw, cpus);
__dl_add(dl_b, new_bw, cpus);
dl_rq_change_utilization(rq, dl_se, new_bw);
}
dl_se->dl_runtime = runtime;
dl_se->dl_deadline = period;
dl_se->dl_period = period;
dl_se->runtime = 0;
dl_se->deadline = 0;
dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
return retval;
}
/*
* Update the current task's runtime statistics (provided it is still
* a -deadline task and has not been removed from the dl_rq).

View File

@ -333,8 +333,165 @@ static const struct file_operations sched_debug_fops = {
.release = seq_release,
};
enum dl_param {
DL_RUNTIME = 0,
DL_PERIOD,
};
static unsigned long fair_server_period_max = (1 << 22) * NSEC_PER_USEC; /* ~4 seconds */
static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC; /* 100 us */
static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos, enum dl_param param)
{
long cpu = (long) ((struct seq_file *) filp->private_data)->private;
struct rq *rq = cpu_rq(cpu);
u64 runtime, period;
size_t err;
int retval;
u64 value;
err = kstrtoull_from_user(ubuf, cnt, 10, &value);
if (err)
return err;
scoped_guard (rq_lock_irqsave, rq) {
runtime = rq->fair_server.dl_runtime;
period = rq->fair_server.dl_period;
switch (param) {
case DL_RUNTIME:
if (runtime == value)
break;
runtime = value;
break;
case DL_PERIOD:
if (value == period)
break;
period = value;
break;
}
if (runtime > period ||
period > fair_server_period_max ||
period < fair_server_period_min) {
return -EINVAL;
}
if (rq->cfs.h_nr_running) {
update_rq_clock(rq);
dl_server_stop(&rq->fair_server);
}
retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
if (retval)
cnt = retval;
if (!runtime)
printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
cpu_of(rq));
if (rq->cfs.h_nr_running)
dl_server_start(&rq->fair_server);
}
*ppos += cnt;
return cnt;
}
static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param)
{
unsigned long cpu = (unsigned long) m->private;
struct rq *rq = cpu_rq(cpu);
u64 value;
switch (param) {
case DL_RUNTIME:
value = rq->fair_server.dl_runtime;
break;
case DL_PERIOD:
value = rq->fair_server.dl_period;
break;
}
seq_printf(m, "%llu\n", value);
return 0;
}
static ssize_t
sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME);
}
static int sched_fair_server_runtime_show(struct seq_file *m, void *v)
{
return sched_fair_server_show(m, v, DL_RUNTIME);
}
static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp)
{
return single_open(filp, sched_fair_server_runtime_show, inode->i_private);
}
static const struct file_operations fair_server_runtime_fops = {
.open = sched_fair_server_runtime_open,
.write = sched_fair_server_runtime_write,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static ssize_t
sched_fair_server_period_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD);
}
static int sched_fair_server_period_show(struct seq_file *m, void *v)
{
return sched_fair_server_show(m, v, DL_PERIOD);
}
static int sched_fair_server_period_open(struct inode *inode, struct file *filp)
{
return single_open(filp, sched_fair_server_period_show, inode->i_private);
}
static const struct file_operations fair_server_period_fops = {
.open = sched_fair_server_period_open,
.write = sched_fair_server_period_write,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static struct dentry *debugfs_sched;
static void debugfs_fair_server_init(void)
{
struct dentry *d_fair;
unsigned long cpu;
d_fair = debugfs_create_dir("fair_server", debugfs_sched);
if (!d_fair)
return;
for_each_possible_cpu(cpu) {
struct dentry *d_cpu;
char buf[32];
snprintf(buf, sizeof(buf), "cpu%lu", cpu);
d_cpu = debugfs_create_dir(buf, d_fair);
debugfs_create_file("runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops);
debugfs_create_file("period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops);
}
}
static __init int sched_init_debug(void)
{
struct dentry __maybe_unused *numa;
@ -374,6 +531,8 @@ static __init int sched_init_debug(void)
debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
debugfs_fair_server_init();
return 0;
}
late_initcall(sched_init_debug);

View File

@ -366,6 +366,9 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
extern void dl_server_update_idle_time(struct rq *rq,
struct task_struct *p);
extern void fair_server_init(struct rq *rq);
extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq);
extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
u64 runtime, u64 period, bool init);
#ifdef CONFIG_CGROUP_SCHED

View File

@ -516,6 +516,14 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
set_rq_online(rq);
/*
* Because the rq is not a task, dl_add_task_root_domain() did not
* move the fair server bw to the rd if it already started.
* Add it now.
*/
if (rq->fair_server.dl_server)
__dl_server_attach_root(&rq->fair_server, rq);
rq_unlock_irqrestore(rq, &rf);
if (old_rd)