sched: Add TIF_NEED_RESCHED_LAZY infrastructure

Add the basic infrastructure to split the TIF_NEED_RESCHED bit in two.
Either bit will cause a resched on return-to-user, but only
TIF_NEED_RESCHED will drive IRQ preemption.

No behavioural change intended.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lkml.kernel.org/r/20241007075055.219540785@infradead.org
This commit is contained in:
Peter Zijlstra 2024-10-04 14:47:02 +02:00
parent 0f0d1b8e50
commit 26baa1f1c4
7 changed files with 48 additions and 24 deletions

View File

@ -64,7 +64,8 @@
#define EXIT_TO_USER_MODE_WORK \ #define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
_TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK) ARCH_EXIT_TO_USER_MODE_WORK)
/** /**

View File

@ -17,8 +17,9 @@
#endif #endif
#define XFER_TO_GUEST_MODE_WORK \ #define XFER_TO_GUEST_MODE_WORK \
(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \
ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu; struct kvm_vcpu;

View File

@ -2002,7 +2002,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk)
static inline void clear_tsk_need_resched(struct task_struct *tsk) static inline void clear_tsk_need_resched(struct task_struct *tsk)
{ {
clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY,
(atomic_long_t *)&task_thread_info(tsk)->flags);
} }
static inline int test_tsk_need_resched(struct task_struct *tsk) static inline int test_tsk_need_resched(struct task_struct *tsk)

View File

@ -59,6 +59,14 @@ enum syscall_work_bit {
#include <asm/thread_info.h> #include <asm/thread_info.h>
#ifndef TIF_NEED_RESCHED_LAZY
#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
#error Inconsistent PREEMPT_LAZY
#endif
#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED
#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
#endif
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifndef arch_set_restart_data #ifndef arch_set_restart_data
@ -179,22 +187,27 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti
#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
static __always_inline bool tif_need_resched(void) static __always_inline bool tif_test_bit(int bit)
{ {
return arch_test_bit(TIF_NEED_RESCHED, return arch_test_bit(bit,
(unsigned long *)(&current_thread_info()->flags)); (unsigned long *)(&current_thread_info()->flags));
} }
#else #else
static __always_inline bool tif_need_resched(void) static __always_inline bool tif_test_bit(int bit)
{ {
return test_bit(TIF_NEED_RESCHED, return test_bit(bit,
(unsigned long *)(&current_thread_info()->flags)); (unsigned long *)(&current_thread_info()->flags));
} }
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
static __always_inline bool tif_need_resched(void)
{
return tif_test_bit(TIF_NEED_RESCHED);
}
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack, static inline int arch_within_stack_frames(const void * const stack,
const void * const stackend, const void * const stackend,

View File

@ -98,7 +98,7 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
local_irq_enable_exit_to_user(ti_work); local_irq_enable_exit_to_user(ti_work);
if (ti_work & _TIF_NEED_RESCHED) if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
schedule(); schedule();
if (ti_work & _TIF_UPROBE) if (ti_work & _TIF_UPROBE)

View File

@ -13,7 +13,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
return -EINTR; return -EINTR;
} }
if (ti_work & _TIF_NEED_RESCHED) if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
schedule(); schedule();
if (ti_work & _TIF_NOTIFY_RESUME) if (ti_work & _TIF_NOTIFY_RESUME)
@ -24,7 +24,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
return ret; return ret;
ti_work = read_thread_flags(); ti_work = read_thread_flags();
} while (ti_work & XFER_TO_GUEST_MODE_WORK || need_resched()); } while (ti_work & XFER_TO_GUEST_MODE_WORK);
return 0; return 0;
} }

View File

@ -941,10 +941,9 @@ static inline void hrtick_rq_init(struct rq *rq)
* this avoids any races wrt polling state changes and thereby avoids * this avoids any races wrt polling state changes and thereby avoids
* spurious IPIs. * spurious IPIs.
*/ */
static inline bool set_nr_and_not_polling(struct task_struct *p) static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
{ {
struct thread_info *ti = task_thread_info(p); return !(fetch_or(&ti->flags, 1 << tif) & _TIF_POLLING_NRFLAG);
return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
} }
/* /*
@ -969,9 +968,9 @@ static bool set_nr_if_polling(struct task_struct *p)
} }
#else #else
static inline bool set_nr_and_not_polling(struct task_struct *p) static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
{ {
set_tsk_need_resched(p); set_ti_thread_flag(ti, tif);
return true; return true;
} }
@ -1076,28 +1075,37 @@ void wake_up_q(struct wake_q_head *head)
* might also involve a cross-CPU call to trigger the scheduler on * might also involve a cross-CPU call to trigger the scheduler on
* the target CPU. * the target CPU.
*/ */
void resched_curr(struct rq *rq) static void __resched_curr(struct rq *rq, int tif)
{ {
struct task_struct *curr = rq->curr; struct task_struct *curr = rq->curr;
struct thread_info *cti = task_thread_info(curr);
int cpu; int cpu;
lockdep_assert_rq_held(rq); lockdep_assert_rq_held(rq);
if (test_tsk_need_resched(curr)) if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED))
return; return;
cpu = cpu_of(rq); cpu = cpu_of(rq);
if (cpu == smp_processor_id()) { if (cpu == smp_processor_id()) {
set_tsk_need_resched(curr); set_ti_thread_flag(cti, tif);
set_preempt_need_resched(); if (tif == TIF_NEED_RESCHED)
set_preempt_need_resched();
return; return;
} }
if (set_nr_and_not_polling(curr)) if (set_nr_and_not_polling(cti, tif)) {
smp_send_reschedule(cpu); if (tif == TIF_NEED_RESCHED)
else smp_send_reschedule(cpu);
} else {
trace_sched_wake_idle_without_ipi(cpu); trace_sched_wake_idle_without_ipi(cpu);
}
}
void resched_curr(struct rq *rq)
{
__resched_curr(rq, TIF_NEED_RESCHED);
} }
void resched_cpu(int cpu) void resched_cpu(int cpu)
@ -1192,7 +1200,7 @@ static void wake_up_idle_cpu(int cpu)
* and testing of the above solutions didn't appear to report * and testing of the above solutions didn't appear to report
* much benefits. * much benefits.
*/ */
if (set_nr_and_not_polling(rq->idle)) if (set_nr_and_not_polling(task_thread_info(rq->idle), TIF_NEED_RESCHED))
smp_send_reschedule(cpu); smp_send_reschedule(cpu);
else else
trace_sched_wake_idle_without_ipi(cpu); trace_sched_wake_idle_without_ipi(cpu);