mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:46:16 +00:00
sched_ext: Enable the ops breather and eject BPF scheduler on softlockup
On 2 x Intel Sapphire Rapids machines with 224 logical CPUs, a poorly behaving BPF scheduler can live-lock the system by making multiple CPUs bang on the same DSQ to the point where soft-lockup detection triggers before SCX's own watchdog can take action. It also seems possible that the machine can be live-locked enough to prevent scx_ops_helper, which is an RT task, from running in a timely manner. Implement scx_softlockup() which is called when three quarters of soft-lockup threshold has passed. The function immediately enables the ops breather and triggers an ops error to initiate ejection of the BPF scheduler. The previous and this patch combined enable the kernel to reliably recover the system from live-lock conditions that can be triggered by a poorly behaving BPF scheduler on Intel dual socket systems. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Douglas Anderson <dianders@chromium.org> Cc: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
62dcbab8b0
commit
e32c260195
@ -205,11 +205,13 @@ struct sched_ext_entity {
|
|||||||
|
|
||||||
void sched_ext_free(struct task_struct *p);
|
void sched_ext_free(struct task_struct *p);
|
||||||
void print_scx_info(const char *log_lvl, struct task_struct *p);
|
void print_scx_info(const char *log_lvl, struct task_struct *p);
|
||||||
|
void scx_softlockup(u32 dur_s);
|
||||||
|
|
||||||
#else /* !CONFIG_SCHED_CLASS_EXT */
|
#else /* !CONFIG_SCHED_CLASS_EXT */
|
||||||
|
|
||||||
static inline void sched_ext_free(struct task_struct *p) {}
|
static inline void sched_ext_free(struct task_struct *p) {}
|
||||||
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
|
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
|
||||||
|
static inline void scx_softlockup(u32 dur_s) {}
|
||||||
|
|
||||||
#endif /* CONFIG_SCHED_CLASS_EXT */
|
#endif /* CONFIG_SCHED_CLASS_EXT */
|
||||||
#endif /* _LINUX_SCHED_EXT_H */
|
#endif /* _LINUX_SCHED_EXT_H */
|
||||||
|
@ -867,6 +867,7 @@ static DEFINE_MUTEX(scx_ops_enable_mutex);
|
|||||||
DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled);
|
DEFINE_STATIC_KEY_FALSE(__scx_ops_enabled);
|
||||||
DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
|
DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
|
||||||
static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED);
|
static atomic_t scx_ops_enable_state_var = ATOMIC_INIT(SCX_OPS_DISABLED);
|
||||||
|
static unsigned long scx_in_softlockup;
|
||||||
static atomic_t scx_ops_breather_depth = ATOMIC_INIT(0);
|
static atomic_t scx_ops_breather_depth = ATOMIC_INIT(0);
|
||||||
static int scx_ops_bypass_depth;
|
static int scx_ops_bypass_depth;
|
||||||
static bool scx_ops_init_task_enabled;
|
static bool scx_ops_init_task_enabled;
|
||||||
@ -4614,6 +4615,49 @@ bool task_should_scx(struct task_struct *p)
|
|||||||
return p->policy == SCHED_EXT;
|
return p->policy == SCHED_EXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* scx_softlockup - sched_ext softlockup handler
|
||||||
|
*
|
||||||
|
* On some multi-socket setups (e.g. 2x Intel 8480c), the BPF scheduler can
|
||||||
|
* live-lock the system by making many CPUs target the same DSQ to the point
|
||||||
|
* where soft-lockup detection triggers. This function is called from
|
||||||
|
* soft-lockup watchdog when the triggering point is close and tries to unjam
|
||||||
|
* the system by enabling the breather and aborting the BPF scheduler.
|
||||||
|
*/
|
||||||
|
void scx_softlockup(u32 dur_s)
|
||||||
|
{
|
||||||
|
switch (scx_ops_enable_state()) {
|
||||||
|
case SCX_OPS_ENABLING:
|
||||||
|
case SCX_OPS_ENABLED:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* allow only one instance, cleared at the end of scx_ops_bypass() */
|
||||||
|
if (test_and_set_bit(0, &scx_in_softlockup))
|
||||||
|
return;
|
||||||
|
|
||||||
|
printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU%d stuck for %us, disabling \"%s\"\n",
|
||||||
|
smp_processor_id(), dur_s, scx_ops.name);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some CPUs may be trapped in the dispatch paths. Enable breather
|
||||||
|
* immediately; otherwise, we might even be able to get to
|
||||||
|
* scx_ops_bypass().
|
||||||
|
*/
|
||||||
|
atomic_inc(&scx_ops_breather_depth);
|
||||||
|
|
||||||
|
scx_ops_error("soft lockup - CPU#%d stuck for %us",
|
||||||
|
smp_processor_id(), dur_s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void scx_clear_softlockup(void)
|
||||||
|
{
|
||||||
|
if (test_and_clear_bit(0, &scx_in_softlockup))
|
||||||
|
atomic_dec(&scx_ops_breather_depth);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scx_ops_bypass - [Un]bypass scx_ops and guarantee forward progress
|
* scx_ops_bypass - [Un]bypass scx_ops and guarantee forward progress
|
||||||
*
|
*
|
||||||
@ -4724,6 +4768,7 @@ static void scx_ops_bypass(bool bypass)
|
|||||||
atomic_dec(&scx_ops_breather_depth);
|
atomic_dec(&scx_ops_breather_depth);
|
||||||
unlock:
|
unlock:
|
||||||
raw_spin_unlock_irqrestore(&bypass_lock, flags);
|
raw_spin_unlock_irqrestore(&bypass_lock, flags);
|
||||||
|
scx_clear_softlockup();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void free_exit_info(struct scx_exit_info *ei)
|
static void free_exit_info(struct scx_exit_info *ei)
|
||||||
|
@ -644,6 +644,14 @@ static int is_softlockup(unsigned long touch_ts,
|
|||||||
need_counting_irqs())
|
need_counting_irqs())
|
||||||
start_counting_irqs();
|
start_counting_irqs();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A poorly behaving BPF scheduler can live-lock the system into
|
||||||
|
* soft lockups. Tell sched_ext to try ejecting the BPF
|
||||||
|
* scheduler when close to a soft lockup.
|
||||||
|
*/
|
||||||
|
if (time_after_eq(now, period_ts + get_softlockup_thresh() * 3 / 4))
|
||||||
|
scx_softlockup(now - touch_ts);
|
||||||
|
|
||||||
/* Warn about unreasonable delays. */
|
/* Warn about unreasonable delays. */
|
||||||
if (time_after(now, period_ts + get_softlockup_thresh()))
|
if (time_after(now, period_ts + get_softlockup_thresh()))
|
||||||
return now - touch_ts;
|
return now - touch_ts;
|
||||||
|
@ -35,6 +35,8 @@ print(f'enabled : {read_static_key("__scx_ops_enabled")}')
|
|||||||
print(f'switching_all : {read_int("scx_switching_all")}')
|
print(f'switching_all : {read_int("scx_switching_all")}')
|
||||||
print(f'switched_all : {read_static_key("__scx_switched_all")}')
|
print(f'switched_all : {read_static_key("__scx_switched_all")}')
|
||||||
print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})')
|
print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})')
|
||||||
|
print(f'in_softlockup : {prog["scx_in_softlockup"].value_()}')
|
||||||
|
print(f'breather_depth: {read_atomic("scx_ops_breather_depth")}')
|
||||||
print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}')
|
print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}')
|
||||||
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
|
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
|
||||||
print(f'enable_seq : {read_atomic("scx_enable_seq")}')
|
print(f'enable_seq : {read_atomic("scx_enable_seq")}')
|
||||||
|
Loading…
Reference in New Issue
Block a user