Landlock fix for v7.0-rc6

-----BEGIN PGP SIGNATURE-----
 
 iIYEABYKAC4WIQSVyBthFV4iTW/VU1/l49DojIL20gUCacVk0xAcbWljQGRpZ2lr
 b2QubmV0AAoJEOXj0OiMgvbS0v4A/joA39PP40bpHZorGYVgHyEZZgCgGicffmYd
 TnvlvawOAPoDc6h1HwkcOonhYgvEe29JPIBrEFOCNBZsGTntvN29Ag==
 =T4m+
 -----END PGP SIGNATURE-----

Merge tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux

Pull Landlock fixes from Mickaël Salaün:
 "This mainly fixes Landlock TSYNC issues related to interrupts and
  unexpected task exit.

  Other fixes touch documentation and sample, and a new test extends
  coverage"

* tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux:
  landlock: Expand restrict flags example for ABI version 8
  selftests/landlock: Test tsync interruption and cancellation paths
  landlock: Clean up interrupted thread logic in TSYNC
  landlock: Serialize TSYNC thread restriction
  samples/landlock: Bump ABI version to 8
  landlock: Improve TSYNC types
  landlock: Fully release unused TSYNC work entries
  landlock: Fix formatting
This commit is contained in:
Linus Torvalds 2026-03-26 12:03:37 -07:00
commit 25b69ebe28
6 changed files with 191 additions and 34 deletions

View File

@ -8,7 +8,7 @@ Landlock: unprivileged access control
=====================================
:Author: Mickaël Salaün
:Date: January 2026
:Date: March 2026
The goal of Landlock is to enable restriction of ambient rights (e.g. global
filesystem or network access) for a set of processes. Because Landlock
@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags
.. code-block:: c
__u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
if (abi < 7) {
/* Clear logging flags unsupported before ABI 7. */
__u32 restrict_flags =
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
LANDLOCK_RESTRICT_SELF_TSYNC;
switch (abi) {
case 1 ... 6:
/* Removes logging flags for ABI < 7 */
restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
__attribute__((fallthrough));
case 7:
/*
* Removes multithreaded enforcement flag for ABI < 8
*
* WARNING: Without this flag, calling landlock_restrict_self(2) is
* only equivalent if the calling process is single-threaded. Below
* ABI v8 (and as of ABI v8, when not using this flag), a Landlock
* policy would only be enforced for the calling thread and its
* children (and not for all threads, including parents and siblings).
*/
restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC;
}
The next step is to restrict the current thread from gaining more privileges

View File

@ -299,7 +299,7 @@ out_unset:
/* clang-format on */
#define LANDLOCK_ABI_LAST 7
#define LANDLOCK_ABI_LAST 8
#define XSTR(s) #s
#define STR(s) XSTR(s)
@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp)
/* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */
supported_restrict_flags &=
~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
__attribute__((fallthrough));
case 7:
/* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
fprintf(stderr,
"Hint: You should update the running kernel "

View File

@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void)
* allocate with GFP_KERNEL_ACCOUNT because it is independent from the
* caller.
*/
details =
kzalloc_flex(*details, exe_path, path_size);
details = kzalloc_flex(*details, exe_path, path_size);
if (!details)
return ERR_PTR(-ENOMEM);

View File

@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
{
struct landlock_ruleset *new_ruleset;
new_ruleset =
kzalloc_flex(*new_ruleset, access_masks, num_layers,
GFP_KERNEL_ACCOUNT);
new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers,
GFP_KERNEL_ACCOUNT);
if (!new_ruleset)
return ERR_PTR(-ENOMEM);
refcount_set(&new_ruleset->usage, 1);
@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
if (IS_ERR(new_dom))
return new_dom;
new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy,
GFP_KERNEL_ACCOUNT);
new_dom->hierarchy =
kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
if (!new_dom->hierarchy)
return ERR_PTR(-ENOMEM);

View File

@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s,
return ctx;
}
/**
* tsync_works_trim - Put the last tsync_work element
*
* @s: TSYNC works to trim.
*
* Put the last task and decrement the size of @s.
*
* This helper does not cancel a running task, but just reset the last element
* to zero.
*/
static void tsync_works_trim(struct tsync_works *s)
{
struct tsync_work *ctx;
if (WARN_ON_ONCE(s->size <= 0))
return;
ctx = s->works[s->size - 1];
/*
* For consistency, remove the task from ctx so that it does not look like
* we handed it a task_work.
*/
put_task_struct(ctx->task);
*ctx = (typeof(*ctx)){};
/*
* Cancel the tsync_works_provide() change to recycle the reserved memory
* for the next thread, if any. This also ensures that cancel_tsync_works()
* and tsync_works_release() do not see any NULL task pointers.
*/
s->size--;
}
/*
* tsync_works_grow_by - preallocates space for n more contexts in s
*
@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
* tsync_works_contains - checks for presence of task in s
*/
static bool tsync_works_contains_task(const struct tsync_works *s,
struct task_struct *task)
const struct task_struct *task)
{
size_t i;
for (i = 0; i < s->size; i++)
if (s->works[i]->task == task)
return true;
return false;
}
@ -276,7 +311,7 @@ static void tsync_works_release(struct tsync_works *s)
size_t i;
for (i = 0; i < s->size; i++) {
if (!s->works[i]->task)
if (WARN_ON_ONCE(!s->works[i]->task))
continue;
put_task_struct(s->works[i]->task);
@ -284,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s)
for (i = 0; i < s->capacity; i++)
kfree(s->works[i]);
kfree(s->works);
s->works = NULL;
s->size = 0;
@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s)
*/
static size_t count_additional_threads(const struct tsync_works *works)
{
struct task_struct *thread, *caller;
const struct task_struct *caller, *thread;
size_t n = 0;
caller = current;
@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
int err;
struct task_struct *thread, *caller;
const struct task_struct *caller;
struct task_struct *thread;
struct tsync_work *ctx;
bool found_more_threads = false;
@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works,
init_task_work(&ctx->work, restrict_one_thread_callback);
err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
if (err) {
if (unlikely(err)) {
/*
* task_work_add() only fails if the task is about to exit. We
* checked that earlier, but it can happen as a race. Resume
* without setting an error, as the task is probably gone in the
* next loop iteration. For consistency, remove the task from ctx
* so that it does not look like we handed it a task_work.
* next loop iteration.
*/
put_task_struct(ctx->task);
ctx->task = NULL;
tsync_works_trim(works);
atomic_dec(&shared_ctx->num_preparing);
atomic_dec(&shared_ctx->num_unfinished);
@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works,
* shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
* completions if needed, as if the task was never scheduled.
*/
static void cancel_tsync_works(struct tsync_works *works,
static void cancel_tsync_works(const struct tsync_works *works,
struct tsync_shared_context *shared_ctx)
{
int i;
size_t i;
for (i = 0; i < works->size; i++) {
if (WARN_ON_ONCE(!works->works[i]->task))
continue;
if (!task_work_cancel(works->works[i]->task,
&works->works[i]->work))
continue;
@ -447,6 +485,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
shared_ctx.new_cred = new_cred;
shared_ctx.set_no_new_privs = task_no_new_privs(current);
/*
* Serialize concurrent TSYNC operations to prevent deadlocks when
* multiple threads call landlock_restrict_self() simultaneously.
* If the lock is already held, we gracefully yield by restarting the
* syscall. This allows the current thread to process pending
* task_works before retrying.
*/
if (!down_write_trylock(&current->signal->exec_update_lock))
return restart_syscall();
/*
* We schedule a pseudo-signal task_work for each of the calling task's
* sibling threads. In the task work, each thread:
@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
-ERESTARTNOINTR);
/*
* Cancel task works for tasks that did not start running yet,
* and decrement all_prepared and num_unfinished accordingly.
* Opportunistic improvement: try to cancel task
* works for tasks that did not start running
* yet. We do not have a guarantee that it
* cancels any of the enqueued task works
* because task_work_run() might already have
* dequeued them.
*/
cancel_tsync_works(&works, &shared_ctx);
/*
* The remaining task works have started running, so waiting for
* their completion will finish.
* Break the loop with error. The cleanup code
* after the loop unblocks the remaining
* task_works.
*/
wait_for_completion(&shared_ctx.all_prepared);
break;
}
}
} while (found_more_threads &&
!atomic_read(&shared_ctx.preparation_error));
/*
* We now have all sibling threads blocking and in "prepared" state in the
* task work. Ask all threads to commit.
* We now have either (a) all sibling threads blocking and in "prepared"
* state in the task work, or (b) the preparation error is set. Ask all
* threads to commit (or abort).
*/
complete_all(&shared_ctx.ready_to_commit);
@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
wait_for_completion(&shared_ctx.all_finished);
tsync_works_release(&works);
up_write(&current->signal->exec_update_lock);
return atomic_read(&shared_ctx.preparation_error);
}

View File

@ -6,9 +6,10 @@
*/
#define _GNU_SOURCE
#include <pthread.h>
#include <sys/prctl.h>
#include <linux/landlock.h>
#include <pthread.h>
#include <signal.h>
#include <sys/prctl.h>
#include "common.h"
@ -158,4 +159,92 @@ TEST(competing_enablement)
EXPECT_EQ(0, close(ruleset_fd));
}
static void signal_nop_handler(int sig)
{
}
struct signaler_data {
pthread_t target;
volatile bool stop;
};
static void *signaler_thread(void *data)
{
struct signaler_data *sd = data;
while (!sd->stop)
pthread_kill(sd->target, SIGUSR1);
return NULL;
}
/*
* Number of idle sibling threads. This must be large enough that even on
* machines with many cores, the sibling threads cannot all complete their
* credential preparation in a single parallel wave, otherwise the signaler
* thread has no window to interrupt wait_for_completion_interruptible().
* 200 threads on a 64-core machine yields ~3 serialized waves, giving the
* tight signal loop enough time to land an interruption.
*/
#define NUM_IDLE_THREADS 200
/*
* Exercises the tsync interruption and cancellation paths in tsync.c.
*
* When a signal interrupts the calling thread while it waits for sibling
* threads to finish their credential preparation
* (wait_for_completion_interruptible in landlock_restrict_sibling_threads),
* the kernel sets ERESTARTNOINTR, cancels queued task works that have not
* started yet (cancel_tsync_works), then waits for the remaining works to
* finish. On the error return, syscalls.c aborts the prepared credentials.
* The kernel automatically restarts the syscall, so userspace sees success.
*/
TEST(tsync_interrupt)
{
size_t i;
pthread_t threads[NUM_IDLE_THREADS];
pthread_t signaler;
struct signaler_data sd;
struct sigaction sa = {};
const int ruleset_fd = create_ruleset(_metadata);
disable_caps(_metadata);
/* Install a no-op SIGUSR1 handler so the signal does not kill us. */
sa.sa_handler = signal_nop_handler;
sigemptyset(&sa.sa_mask);
ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL));
ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
for (i = 0; i < NUM_IDLE_THREADS; i++)
ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL));
/*
* Start a signaler thread that continuously sends SIGUSR1 to the
* calling thread. This maximizes the chance of interrupting
* wait_for_completion_interruptible() in the kernel's tsync path.
*/
sd.target = pthread_self();
sd.stop = false;
ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd));
/*
* The syscall may be interrupted and transparently restarted by the
* kernel (ERESTARTNOINTR). From userspace, it should always succeed.
*/
EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
LANDLOCK_RESTRICT_SELF_TSYNC));
sd.stop = true;
ASSERT_EQ(0, pthread_join(signaler, NULL));
for (i = 0; i < NUM_IDLE_THREADS; i++) {
ASSERT_EQ(0, pthread_cancel(threads[i]));
ASSERT_EQ(0, pthread_join(threads[i], NULL));
}
EXPECT_EQ(0, close(ruleset_fd));
}
TEST_HARNESS_MAIN