sched_ext: Fixes for v7.0-rc6
- Fix SCX_KICK_WAIT deadlock where multiple CPUs waiting for each other in hardirq context form a cycle. Move the wait to a balance callback which can drop the rq lock and process IPIs. - Fix inconsistent NUMA node lookup in scx_select_cpu_dfl() where the waker_node used cpu_to_node() while prev_cpu used scx_cpu_node_if_enabled(), leading to undefined behavior when per-node idle tracking is disabled. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCacwiiQ4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGVILAP44s30JBpNyJ9JhAiCoTYzxzOXqqGbotnpQckMF +7WoJAD/Z9dJO/Sw/AH0fX6WVJDmO0QsQvFXLXJBxWy7A5XVAA0= =2DW5 -----END PGP SIGNATURE----- Merge tag 'sched_ext-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext Pull sched_ext fixes from Tejun Heo: - Fix SCX_KICK_WAIT deadlock where multiple CPUs waiting for each other in hardirq context form a cycle. Move the wait to a balance callback which can drop the rq lock and process IPIs. - Fix inconsistent NUMA node lookup in scx_select_cpu_dfl() where the waker_node used cpu_to_node() while prev_cpu used scx_cpu_node_if_enabled(), leading to undefined behavior when per-node idle tracking is disabled. * tag 'sched_ext-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: selftests/sched_ext: Add cyclic SCX_KICK_WAIT stress test sched_ext: Fix SCX_KICK_WAIT deadlock by deferring wait to balance callback sched_ext: Fix inconsistent NUMA node lookup in scx_select_cpu_dfl()
This commit is contained in:
commit
9147566d80
|
|
@ -2404,7 +2404,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
|
||||||
{
|
{
|
||||||
struct scx_sched *sch = scx_root;
|
struct scx_sched *sch = scx_root;
|
||||||
|
|
||||||
/* see kick_cpus_irq_workfn() */
|
/* see kick_sync_wait_bal_cb() */
|
||||||
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
|
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
|
||||||
|
|
||||||
update_curr_scx(rq);
|
update_curr_scx(rq);
|
||||||
|
|
@ -2447,6 +2447,48 @@ switch_class:
|
||||||
switch_class(rq, next);
|
switch_class(rq, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void kick_sync_wait_bal_cb(struct rq *rq)
|
||||||
|
{
|
||||||
|
struct scx_kick_syncs __rcu *ks = __this_cpu_read(scx_kick_syncs);
|
||||||
|
unsigned long *ksyncs = rcu_dereference_sched(ks)->syncs;
|
||||||
|
bool waited;
|
||||||
|
s32 cpu;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drop rq lock and enable IRQs while waiting. IRQs must be enabled
|
||||||
|
* — a target CPU may be waiting for us to process an IPI (e.g. TLB
|
||||||
|
* flush) while we wait for its kick_sync to advance.
|
||||||
|
*
|
||||||
|
* Also, keep advancing our own kick_sync so that new kick_sync waits
|
||||||
|
* targeting us, which can start after we drop the lock, cannot form
|
||||||
|
* cyclic dependencies.
|
||||||
|
*/
|
||||||
|
retry:
|
||||||
|
waited = false;
|
||||||
|
for_each_cpu(cpu, rq->scx.cpus_to_sync) {
|
||||||
|
/*
|
||||||
|
* smp_load_acquire() pairs with smp_store_release() on
|
||||||
|
* kick_sync updates on the target CPUs.
|
||||||
|
*/
|
||||||
|
if (cpu == cpu_of(rq) ||
|
||||||
|
smp_load_acquire(&cpu_rq(cpu)->scx.kick_sync) != ksyncs[cpu]) {
|
||||||
|
cpumask_clear_cpu(cpu, rq->scx.cpus_to_sync);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_spin_rq_unlock_irq(rq);
|
||||||
|
while (READ_ONCE(cpu_rq(cpu)->scx.kick_sync) == ksyncs[cpu]) {
|
||||||
|
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
|
||||||
|
cpu_relax();
|
||||||
|
}
|
||||||
|
raw_spin_rq_lock_irq(rq);
|
||||||
|
waited = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (waited)
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
static struct task_struct *first_local_task(struct rq *rq)
|
static struct task_struct *first_local_task(struct rq *rq)
|
||||||
{
|
{
|
||||||
return list_first_entry_or_null(&rq->scx.local_dsq.list,
|
return list_first_entry_or_null(&rq->scx.local_dsq.list,
|
||||||
|
|
@ -2460,7 +2502,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
|
||||||
bool keep_prev;
|
bool keep_prev;
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
|
|
||||||
/* see kick_cpus_irq_workfn() */
|
/* see kick_sync_wait_bal_cb() */
|
||||||
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
|
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
|
||||||
|
|
||||||
rq_modified_begin(rq, &ext_sched_class);
|
rq_modified_begin(rq, &ext_sched_class);
|
||||||
|
|
@ -2470,6 +2512,17 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
|
||||||
rq_repin_lock(rq, rf);
|
rq_repin_lock(rq, rf);
|
||||||
maybe_queue_balance_callback(rq);
|
maybe_queue_balance_callback(rq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Defer to a balance callback which can drop rq lock and enable
|
||||||
|
* IRQs. Waiting directly in the pick path would deadlock against
|
||||||
|
* CPUs sending us IPIs (e.g. TLB flushes) while we wait for them.
|
||||||
|
*/
|
||||||
|
if (unlikely(rq->scx.kick_sync_pending)) {
|
||||||
|
rq->scx.kick_sync_pending = false;
|
||||||
|
queue_balance_callback(rq, &rq->scx.kick_sync_bal_cb,
|
||||||
|
kick_sync_wait_bal_cb);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If any higher-priority sched class enqueued a runnable task on
|
* If any higher-priority sched class enqueued a runnable task on
|
||||||
* this rq during balance_one(), abort and return RETRY_TASK, so
|
* this rq during balance_one(), abort and return RETRY_TASK, so
|
||||||
|
|
@ -4713,6 +4766,9 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
|
||||||
if (!cpumask_empty(rq->scx.cpus_to_wait))
|
if (!cpumask_empty(rq->scx.cpus_to_wait))
|
||||||
dump_line(&ns, " cpus_to_wait : %*pb",
|
dump_line(&ns, " cpus_to_wait : %*pb",
|
||||||
cpumask_pr_args(rq->scx.cpus_to_wait));
|
cpumask_pr_args(rq->scx.cpus_to_wait));
|
||||||
|
if (!cpumask_empty(rq->scx.cpus_to_sync))
|
||||||
|
dump_line(&ns, " cpus_to_sync : %*pb",
|
||||||
|
cpumask_pr_args(rq->scx.cpus_to_sync));
|
||||||
|
|
||||||
used = seq_buf_used(&ns);
|
used = seq_buf_used(&ns);
|
||||||
if (SCX_HAS_OP(sch, dump_cpu)) {
|
if (SCX_HAS_OP(sch, dump_cpu)) {
|
||||||
|
|
@ -5610,11 +5666,11 @@ static bool kick_one_cpu(s32 cpu, struct rq *this_rq, unsigned long *ksyncs)
|
||||||
|
|
||||||
if (cpumask_test_cpu(cpu, this_scx->cpus_to_wait)) {
|
if (cpumask_test_cpu(cpu, this_scx->cpus_to_wait)) {
|
||||||
if (cur_class == &ext_sched_class) {
|
if (cur_class == &ext_sched_class) {
|
||||||
|
cpumask_set_cpu(cpu, this_scx->cpus_to_sync);
|
||||||
ksyncs[cpu] = rq->scx.kick_sync;
|
ksyncs[cpu] = rq->scx.kick_sync;
|
||||||
should_wait = true;
|
should_wait = true;
|
||||||
} else {
|
|
||||||
cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
|
|
||||||
}
|
}
|
||||||
|
cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
resched_curr(rq);
|
resched_curr(rq);
|
||||||
|
|
@ -5669,27 +5725,15 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
|
||||||
cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle);
|
cpumask_clear_cpu(cpu, this_scx->cpus_to_kick_if_idle);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!should_wait)
|
/*
|
||||||
return;
|
* Can't wait in hardirq — kick_sync can't advance, deadlocking if
|
||||||
|
* CPUs wait for each other. Defer to kick_sync_wait_bal_cb().
|
||||||
for_each_cpu(cpu, this_scx->cpus_to_wait) {
|
*/
|
||||||
unsigned long *wait_kick_sync = &cpu_rq(cpu)->scx.kick_sync;
|
if (should_wait) {
|
||||||
|
raw_spin_rq_lock(this_rq);
|
||||||
/*
|
this_scx->kick_sync_pending = true;
|
||||||
* Busy-wait until the task running at the time of kicking is no
|
resched_curr(this_rq);
|
||||||
* longer running. This can be used to implement e.g. core
|
raw_spin_rq_unlock(this_rq);
|
||||||
* scheduling.
|
|
||||||
*
|
|
||||||
* smp_cond_load_acquire() pairs with store_releases in
|
|
||||||
* pick_task_scx() and put_prev_task_scx(). The former breaks
|
|
||||||
* the wait if SCX's scheduling path is entered even if the same
|
|
||||||
* task is picked subsequently. The latter is necessary to break
|
|
||||||
* the wait when $cpu is taken by a higher sched class.
|
|
||||||
*/
|
|
||||||
if (cpu != cpu_of(this_rq))
|
|
||||||
smp_cond_load_acquire(wait_kick_sync, VAL != ksyncs[cpu]);
|
|
||||||
|
|
||||||
cpumask_clear_cpu(cpu, this_scx->cpus_to_wait);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -5794,6 +5838,7 @@ void __init init_sched_ext_class(void)
|
||||||
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n));
|
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n));
|
||||||
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n));
|
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n));
|
||||||
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n));
|
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n));
|
||||||
|
BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_sync, GFP_KERNEL, n));
|
||||||
rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn);
|
rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn);
|
||||||
rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn);
|
rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -543,7 +543,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||||
* piled up on it even if there is an idle core elsewhere on
|
* piled up on it even if there is an idle core elsewhere on
|
||||||
* the system.
|
* the system.
|
||||||
*/
|
*/
|
||||||
waker_node = cpu_to_node(cpu);
|
waker_node = scx_cpu_node_if_enabled(cpu);
|
||||||
if (!(current->flags & PF_EXITING) &&
|
if (!(current->flags & PF_EXITING) &&
|
||||||
cpu_rq(cpu)->scx.local_dsq.nr == 0 &&
|
cpu_rq(cpu)->scx.local_dsq.nr == 0 &&
|
||||||
(!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
|
(!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
|
||||||
|
|
|
||||||
|
|
@ -805,9 +805,12 @@ struct scx_rq {
|
||||||
cpumask_var_t cpus_to_kick_if_idle;
|
cpumask_var_t cpus_to_kick_if_idle;
|
||||||
cpumask_var_t cpus_to_preempt;
|
cpumask_var_t cpus_to_preempt;
|
||||||
cpumask_var_t cpus_to_wait;
|
cpumask_var_t cpus_to_wait;
|
||||||
|
cpumask_var_t cpus_to_sync;
|
||||||
|
bool kick_sync_pending;
|
||||||
unsigned long kick_sync;
|
unsigned long kick_sync;
|
||||||
local_t reenq_local_deferred;
|
local_t reenq_local_deferred;
|
||||||
struct balance_callback deferred_bal_cb;
|
struct balance_callback deferred_bal_cb;
|
||||||
|
struct balance_callback kick_sync_bal_cb;
|
||||||
struct irq_work deferred_irq_work;
|
struct irq_work deferred_irq_work;
|
||||||
struct irq_work kick_cpus_irq_work;
|
struct irq_work kick_cpus_irq_work;
|
||||||
struct scx_dispatch_q bypass_dsq;
|
struct scx_dispatch_q bypass_dsq;
|
||||||
|
|
|
||||||
|
|
@ -188,6 +188,7 @@ auto-test-targets := \
|
||||||
rt_stall \
|
rt_stall \
|
||||||
test_example \
|
test_example \
|
||||||
total_bw \
|
total_bw \
|
||||||
|
cyclic_kick_wait \
|
||||||
|
|
||||||
testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
|
testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Stress concurrent SCX_KICK_WAIT calls to reproduce wait-cycle deadlock.
|
||||||
|
*
|
||||||
|
* Three CPUs are designated from userspace. Every enqueue from one of the
|
||||||
|
* three CPUs kicks the next CPU in the ring with SCX_KICK_WAIT, creating a
|
||||||
|
* persistent A -> B -> C -> A wait cycle pressure.
|
||||||
|
*/
|
||||||
|
#include <scx/common.bpf.h>
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
||||||
|
|
||||||
|
const volatile s32 test_cpu_a;
|
||||||
|
const volatile s32 test_cpu_b;
|
||||||
|
const volatile s32 test_cpu_c;
|
||||||
|
|
||||||
|
u64 nr_enqueues;
|
||||||
|
u64 nr_wait_kicks;
|
||||||
|
|
||||||
|
UEI_DEFINE(uei);
|
||||||
|
|
||||||
|
static s32 target_cpu(s32 cpu)
|
||||||
|
{
|
||||||
|
if (cpu == test_cpu_a)
|
||||||
|
return test_cpu_b;
|
||||||
|
if (cpu == test_cpu_b)
|
||||||
|
return test_cpu_c;
|
||||||
|
if (cpu == test_cpu_c)
|
||||||
|
return test_cpu_a;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BPF_STRUCT_OPS(cyclic_kick_wait_enqueue, struct task_struct *p,
|
||||||
|
u64 enq_flags)
|
||||||
|
{
|
||||||
|
s32 this_cpu = bpf_get_smp_processor_id();
|
||||||
|
s32 tgt;
|
||||||
|
|
||||||
|
__sync_fetch_and_add(&nr_enqueues, 1);
|
||||||
|
|
||||||
|
if (p->flags & PF_KTHREAD) {
|
||||||
|
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF,
|
||||||
|
enq_flags | SCX_ENQ_PREEMPT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
|
||||||
|
|
||||||
|
tgt = target_cpu(this_cpu);
|
||||||
|
if (tgt < 0 || tgt == this_cpu)
|
||||||
|
return;
|
||||||
|
|
||||||
|
__sync_fetch_and_add(&nr_wait_kicks, 1);
|
||||||
|
scx_bpf_kick_cpu(tgt, SCX_KICK_WAIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BPF_STRUCT_OPS(cyclic_kick_wait_exit, struct scx_exit_info *ei)
|
||||||
|
{
|
||||||
|
UEI_RECORD(uei, ei);
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC(".struct_ops.link")
|
||||||
|
struct sched_ext_ops cyclic_kick_wait_ops = {
|
||||||
|
.enqueue = cyclic_kick_wait_enqueue,
|
||||||
|
.exit = cyclic_kick_wait_exit,
|
||||||
|
.name = "cyclic_kick_wait",
|
||||||
|
.timeout_ms = 1000U,
|
||||||
|
};
|
||||||
|
|
@ -0,0 +1,194 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Test SCX_KICK_WAIT forward progress under cyclic wait pressure.
|
||||||
|
*
|
||||||
|
* SCX_KICK_WAIT busy-waits until the target CPU enters the scheduling path.
|
||||||
|
* If multiple CPUs form a wait cycle (A waits for B, B waits for C, C waits
|
||||||
|
* for A), all CPUs deadlock unless the implementation breaks the cycle.
|
||||||
|
*
|
||||||
|
* This test creates that scenario: three CPUs are arranged in a ring. The BPF
|
||||||
|
* scheduler's ops.enqueue() kicks the next CPU in the ring with SCX_KICK_WAIT
|
||||||
|
* on every enqueue. Userspace pins 4 worker threads per CPU that loop calling
|
||||||
|
* sched_yield(), generating a steady stream of enqueues and thus sustained
|
||||||
|
* A->B->C->A kick_wait cycle pressure. The test passes if the system remains
|
||||||
|
* responsive for 5 seconds without the scheduler being killed by the watchdog.
|
||||||
|
*/
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
|
#include <bpf/bpf.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <scx/common.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "scx_test.h"
|
||||||
|
#include "cyclic_kick_wait.bpf.skel.h"
|
||||||
|
|
||||||
|
#define WORKERS_PER_CPU 4
|
||||||
|
#define NR_TEST_CPUS 3
|
||||||
|
#define NR_WORKERS (NR_TEST_CPUS * WORKERS_PER_CPU)
|
||||||
|
|
||||||
|
struct worker_ctx {
|
||||||
|
pthread_t tid;
|
||||||
|
int cpu;
|
||||||
|
volatile bool stop;
|
||||||
|
volatile __u64 iters;
|
||||||
|
bool started;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void *worker_fn(void *arg)
|
||||||
|
{
|
||||||
|
struct worker_ctx *worker = arg;
|
||||||
|
cpu_set_t mask;
|
||||||
|
|
||||||
|
CPU_ZERO(&mask);
|
||||||
|
CPU_SET(worker->cpu, &mask);
|
||||||
|
|
||||||
|
if (sched_setaffinity(0, sizeof(mask), &mask))
|
||||||
|
return (void *)(uintptr_t)errno;
|
||||||
|
|
||||||
|
while (!worker->stop) {
|
||||||
|
sched_yield();
|
||||||
|
worker->iters++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int join_worker(struct worker_ctx *worker)
|
||||||
|
{
|
||||||
|
void *ret;
|
||||||
|
struct timespec ts;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (!worker->started)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (clock_gettime(CLOCK_REALTIME, &ts))
|
||||||
|
return -errno;
|
||||||
|
|
||||||
|
ts.tv_sec += 2;
|
||||||
|
err = pthread_timedjoin_np(worker->tid, &ret, &ts);
|
||||||
|
if (err == ETIMEDOUT)
|
||||||
|
pthread_detach(worker->tid);
|
||||||
|
if (err)
|
||||||
|
return -err;
|
||||||
|
|
||||||
|
if ((uintptr_t)ret)
|
||||||
|
return -(int)(uintptr_t)ret;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum scx_test_status setup(void **ctx)
|
||||||
|
{
|
||||||
|
struct cyclic_kick_wait *skel;
|
||||||
|
|
||||||
|
skel = cyclic_kick_wait__open();
|
||||||
|
SCX_FAIL_IF(!skel, "Failed to open skel");
|
||||||
|
SCX_ENUM_INIT(skel);
|
||||||
|
|
||||||
|
*ctx = skel;
|
||||||
|
return SCX_TEST_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum scx_test_status run(void *ctx)
|
||||||
|
{
|
||||||
|
struct cyclic_kick_wait *skel = ctx;
|
||||||
|
struct worker_ctx workers[NR_WORKERS] = {};
|
||||||
|
struct bpf_link *link = NULL;
|
||||||
|
enum scx_test_status status = SCX_TEST_PASS;
|
||||||
|
int test_cpus[NR_TEST_CPUS];
|
||||||
|
int nr_cpus = 0;
|
||||||
|
cpu_set_t mask;
|
||||||
|
int ret, i;
|
||||||
|
|
||||||
|
if (sched_getaffinity(0, sizeof(mask), &mask)) {
|
||||||
|
SCX_ERR("Failed to get affinity (%d)", errno);
|
||||||
|
return SCX_TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < CPU_SETSIZE; i++) {
|
||||||
|
if (CPU_ISSET(i, &mask))
|
||||||
|
test_cpus[nr_cpus++] = i;
|
||||||
|
if (nr_cpus == NR_TEST_CPUS)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nr_cpus < NR_TEST_CPUS)
|
||||||
|
return SCX_TEST_SKIP;
|
||||||
|
|
||||||
|
skel->rodata->test_cpu_a = test_cpus[0];
|
||||||
|
skel->rodata->test_cpu_b = test_cpus[1];
|
||||||
|
skel->rodata->test_cpu_c = test_cpus[2];
|
||||||
|
|
||||||
|
if (cyclic_kick_wait__load(skel)) {
|
||||||
|
SCX_ERR("Failed to load skel");
|
||||||
|
return SCX_TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
link = bpf_map__attach_struct_ops(skel->maps.cyclic_kick_wait_ops);
|
||||||
|
if (!link) {
|
||||||
|
SCX_ERR("Failed to attach scheduler");
|
||||||
|
return SCX_TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < NR_WORKERS; i++)
|
||||||
|
workers[i].cpu = test_cpus[i / WORKERS_PER_CPU];
|
||||||
|
|
||||||
|
for (i = 0; i < NR_WORKERS; i++) {
|
||||||
|
ret = pthread_create(&workers[i].tid, NULL, worker_fn, &workers[i]);
|
||||||
|
if (ret) {
|
||||||
|
SCX_ERR("Failed to create worker thread %d (%d)", i, ret);
|
||||||
|
status = SCX_TEST_FAIL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
workers[i].started = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(5);
|
||||||
|
|
||||||
|
if (skel->data->uei.kind != EXIT_KIND(SCX_EXIT_NONE)) {
|
||||||
|
SCX_ERR("Scheduler exited unexpectedly (kind=%llu code=%lld)",
|
||||||
|
(unsigned long long)skel->data->uei.kind,
|
||||||
|
(long long)skel->data->uei.exit_code);
|
||||||
|
status = SCX_TEST_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
for (i = 0; i < NR_WORKERS; i++)
|
||||||
|
workers[i].stop = true;
|
||||||
|
|
||||||
|
for (i = 0; i < NR_WORKERS; i++) {
|
||||||
|
ret = join_worker(&workers[i]);
|
||||||
|
if (ret && status == SCX_TEST_PASS) {
|
||||||
|
SCX_ERR("Failed to join worker thread %d (%d)", i, ret);
|
||||||
|
status = SCX_TEST_FAIL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (link)
|
||||||
|
bpf_link__destroy(link);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cleanup(void *ctx)
|
||||||
|
{
|
||||||
|
struct cyclic_kick_wait *skel = ctx;
|
||||||
|
|
||||||
|
cyclic_kick_wait__destroy(skel);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct scx_test cyclic_kick_wait = {
|
||||||
|
.name = "cyclic_kick_wait",
|
||||||
|
.description = "Verify SCX_KICK_WAIT forward progress under a 3-CPU wait cycle",
|
||||||
|
.setup = setup,
|
||||||
|
.run = run,
|
||||||
|
.cleanup = cleanup,
|
||||||
|
};
|
||||||
|
REGISTER_SCX_TEST(&cyclic_kick_wait)
|
||||||
Loading…
Reference in New Issue