srcu: Use irq_work to start GP in tiny SRCU

Tiny SRCU's srcu_gp_start_if_needed() directly calls schedule_work(),
which acquires the workqueue pool->lock.

This causes a lockdep splat when call_srcu() is called with a scheduler
lock held, due to:

  call_srcu() [holding pi_lock]
    srcu_gp_start_if_needed()
      schedule_work() -> pool->lock

  workqueue_init() / create_worker() [holding pool->lock]
    wake_up_process() -> try_to_wake_up() -> pi_lock

Also add irq_work_sync() to cleanup_srcu_struct() to prevent a
use-after-free if a queued irq_work fires after cleanup begins.

Tested with rcutorture SRCU-T and no lockdep warnings.

[ Thanks to Boqun for similar fix in patch "rcu: Use an intermediate irq_work
to start process_srcu()" ]

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Boqun Feng <boqun@kernel.org>
This commit is contained in:
Joel Fernandes 2026-03-23 20:14:18 -04:00 committed by Boqun Feng
parent 7c405fb327
commit a6fc88b22b
2 changed files with 22 additions and 1 deletions

View File

@ -11,6 +11,7 @@
#ifndef _LINUX_SRCU_TINY_H
#define _LINUX_SRCU_TINY_H
#include <linux/irq_work_types.h>
#include <linux/swait.h>
struct srcu_struct {
@ -24,18 +25,21 @@ struct srcu_struct {
struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */
struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */
struct work_struct srcu_work; /* For driving grace periods. */
struct irq_work srcu_irq_work; /* Defer schedule_work() to irq work. */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
};
void srcu_drive_gp(struct work_struct *wp);
void srcu_tiny_irq_work(struct irq_work *irq_work);
#define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored) \
{ \
.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \
.srcu_cb_tail = &name.srcu_cb_head, \
.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \
.srcu_irq_work = { .func = srcu_tiny_irq_work }, \
__SRCU_DEP_MAP_INIT(name) \
}

View File

@ -9,6 +9,7 @@
*/
#include <linux/export.h>
#include <linux/irq_work.h>
#include <linux/mutex.h>
#include <linux/preempt.h>
#include <linux/rcupdate_wait.h>
@ -41,6 +42,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp)
ssp->srcu_idx_max = 0;
INIT_WORK(&ssp->srcu_work, srcu_drive_gp);
INIT_LIST_HEAD(&ssp->srcu_work.entry);
init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work);
return 0;
}
@ -84,6 +86,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
void cleanup_srcu_struct(struct srcu_struct *ssp)
{
WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
irq_work_sync(&ssp->srcu_irq_work);
flush_work(&ssp->srcu_work);
WARN_ON(ssp->srcu_gp_running);
WARN_ON(ssp->srcu_gp_waiting);
@ -177,6 +180,20 @@ void srcu_drive_gp(struct work_struct *wp)
}
EXPORT_SYMBOL_GPL(srcu_drive_gp);
/*
* Use an irq_work to defer schedule_work() to avoid acquiring the workqueue
* pool->lock while the caller might hold scheduler locks, causing lockdep
* splats due to workqueue_init() doing a wakeup.
*/
void srcu_tiny_irq_work(struct irq_work *irq_work)
{
struct srcu_struct *ssp;
ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work);
schedule_work(&ssp->srcu_work);
}
EXPORT_SYMBOL_GPL(srcu_tiny_irq_work);
static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
{
unsigned long cookie;
@ -189,7 +206,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
WRITE_ONCE(ssp->srcu_idx_max, cookie);
if (!READ_ONCE(ssp->srcu_gp_running)) {
if (likely(srcu_init_done))
schedule_work(&ssp->srcu_work);
irq_work_queue(&ssp->srcu_irq_work);
else if (list_empty(&ssp->srcu_work.entry))
list_add(&ssp->srcu_work.entry, &srcu_boot_list);
}