sched: psi: pass enqueue/dequeue flags to psi callbacks directly

What psi needs to do on each enqueue and dequeue has gotten more
subtle, and the generic sched code trying to distill this into a bool
for the callbacks is awkward.

Pass the flags directly and let psi parse them. For that to work, the
#include "stats.h" (which has the psi callback implementations) needs
to be below the flag definitions in "sched.h". Move that section
further down, next to some of the other accounting stuff.

This also puts the ENQUEUE_SAVE/RESTORE branch behind the psi jump
label, slightly reducing overhead when PSI=y but runtime disabled.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20241014144358.GB1021@cmpxchg.org
This commit is contained in:
Johannes Weiner 2024-10-14 10:43:58 -04:00 committed by Peter Zijlstra
parent 23f1178ad7
commit 1a6151017e
3 changed files with 53 additions and 44 deletions

View File

@ -2024,10 +2024,10 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
*/
uclamp_rq_inc(rq, p);
if (!(flags & ENQUEUE_RESTORE)) {
psi_enqueue(p, flags);
if (!(flags & ENQUEUE_RESTORE))
sched_info_enqueue(rq, p);
psi_enqueue(p, flags & ENQUEUE_MIGRATED);
}
if (sched_core_enabled(rq))
sched_core_enqueue(rq, p);
@ -2044,10 +2044,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & DEQUEUE_NOCLOCK))
update_rq_clock(rq);
if (!(flags & DEQUEUE_SAVE)) {
if (!(flags & DEQUEUE_SAVE))
sched_info_dequeue(rq, p);
psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
}
psi_dequeue(p, flags);
/*
* Must be before ->dequeue_task() because ->dequeue_task() can 'fail'

View File

@ -2093,34 +2093,6 @@ static inline const struct cpumask *task_user_cpus(struct task_struct *p)
#endif /* CONFIG_SMP */
#include "stats.h"
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
extern void __sched_core_account_forceidle(struct rq *rq);
static inline void sched_core_account_forceidle(struct rq *rq)
{
if (schedstat_enabled())
__sched_core_account_forceidle(rq);
}
extern void __sched_core_tick(struct rq *rq);
static inline void sched_core_tick(struct rq *rq)
{
if (sched_core_enabled(rq) && schedstat_enabled())
__sched_core_tick(rq);
}
#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
static inline void sched_core_account_forceidle(struct rq *rq) { }
static inline void sched_core_tick(struct rq *rq) { }
#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
#ifdef CONFIG_CGROUP_SCHED
/*
@ -3191,6 +3163,34 @@ extern void nohz_run_idle_balance(int cpu);
static inline void nohz_run_idle_balance(int cpu) { }
#endif
#include "stats.h"
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
extern void __sched_core_account_forceidle(struct rq *rq);
static inline void sched_core_account_forceidle(struct rq *rq)
{
if (schedstat_enabled())
__sched_core_account_forceidle(rq);
}
extern void __sched_core_tick(struct rq *rq);
static inline void sched_core_tick(struct rq *rq)
{
if (sched_core_enabled(rq) && schedstat_enabled())
__sched_core_tick(rq);
}
#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */
static inline void sched_core_account_forceidle(struct rq *rq) { }
static inline void sched_core_tick(struct rq *rq) { }
#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {

View File

@ -127,21 +127,25 @@ static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
* go through migration requeues. In this case, *sleeping* states need
* to be transferred.
*/
static inline void psi_enqueue(struct task_struct *p, bool migrate)
static inline void psi_enqueue(struct task_struct *p, int flags)
{
int clear = 0, set = 0;
if (static_branch_likely(&psi_disabled))
return;
/* Same runqueue, nothing changed for psi */
if (flags & ENQUEUE_RESTORE)
return;
if (p->se.sched_delayed) {
/* CPU migration of "sleeping" task */
SCHED_WARN_ON(!migrate);
SCHED_WARN_ON(!(flags & ENQUEUE_MIGRATED));
if (p->in_memstall)
set |= TSK_MEMSTALL;
if (p->in_iowait)
set |= TSK_IOWAIT;
} else if (migrate) {
} else if (flags & ENQUEUE_MIGRATED) {
/* CPU migration of runnable task */
set = TSK_RUNNING;
if (p->in_memstall)
@ -158,17 +162,14 @@ static inline void psi_enqueue(struct task_struct *p, bool migrate)
psi_task_change(p, clear, set);
}
static inline void psi_dequeue(struct task_struct *p, bool migrate)
static inline void psi_dequeue(struct task_struct *p, int flags)
{
if (static_branch_likely(&psi_disabled))
return;
/*
* When migrating a task to another CPU, clear all psi
* state. The enqueue callback above will work it out.
*/
if (migrate)
psi_task_change(p, p->psi_flags, 0);
/* Same runqueue, nothing changed for psi */
if (flags & DEQUEUE_SAVE)
return;
/*
* A voluntary sleep is a dequeue followed by a task switch. To
@ -176,6 +177,14 @@ static inline void psi_dequeue(struct task_struct *p, bool migrate)
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
* Do nothing here.
*/
if (flags & DEQUEUE_SLEEP)
return;
/*
* When migrating a task to another CPU, clear all psi
* state. The enqueue callback above will work it out.
*/
psi_task_change(p, p->psi_flags, 0);
}
static inline void psi_ttwu_dequeue(struct task_struct *p)