Landlock fix for v7.0-rc6

-----BEGIN PGP SIGNATURE----- iIYEABYKAC4WIQSVyBthFV4iTW/VU1/l49DojIL20gUCacVk0xAcbWljQGRpZ2lr b2QubmV0AAoJEOXj0OiMgvbS0v4A/joA39PP40bpHZorGYVgHyEZZgCgGicffmYd TnvlvawOAPoDc6h1HwkcOonhYgvEe29JPIBrEFOCNBZsGTntvN29Ag== =T4m+ -----END PGP SIGNATURE----- Merge tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux Pull Landlock fixes from Mickaël Salaün: "This mainly fixes Landlock TSYNC issues related to interrupts and unexpected task exit. Other fixes touch documentation and sample, and a new test extends coverage" * tag 'landlock-7.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux: landlock: Expand restrict flags example for ABI version 8 selftests/landlock: Test tsync interruption and cancellation paths landlock: Clean up interrupted thread logic in TSYNC landlock: Serialize TSYNC thread restriction samples/landlock: Bump ABI version to 8 landlock: Improve TSYNC types landlock: Fully release unused TSYNC work entries landlock: Fix formatting
2026-03-26 12:03:37 -07:00 · 2026-03-26 12:03:37 -07:00 · 25b69ebe28
parent 453a4a5f97 a23811061a
commit 25b69ebe28
6 changed files with 191 additions and 34 deletions
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@ -8,7 +8,7 @@ Landlock: unprivileged access control
 =====================================
 :Author: Mickaël Salaün
-:Date: January 2026
+:Date: March 2026
 The goal of Landlock is to enable restriction of ambient rights (e.g. global
 filesystem or network access) for a set of processes.  Because Landlock
@ -197,12 +197,27 @@ similar backwards compatibility check is needed for the restrict flags
 .. code-block:: c
-    __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
+    __u32 restrict_flags =
-    if (abi < 7) {
+        LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
-        /* Clear logging flags unsupported before ABI 7. */
+        LANDLOCK_RESTRICT_SELF_TSYNC;
    switch (abi) {
    case 1 ... 6:
        /* Removes logging flags for ABI < 7 */
        restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF |
                            LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON |
                            LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF);
        __attribute__((fallthrough));
    case 7:
        /*
         * Removes multithreaded enforcement flag for ABI < 8
         *
         * WARNING: Without this flag, calling landlock_restrict_self(2) is
         * only equivalent if the calling process is single-threaded. Below
         * ABI v8 (and as of ABI v8, when not using this flag), a Landlock
         * policy would only be enforced for the calling thread and its
         * children (and not for all threads, including parents and siblings).
         */
        restrict_flags &= ~LANDLOCK_RESTRICT_SELF_TSYNC;
    }
 The next step is to restrict the current thread from gaining more privileges
--- a/samples/landlock/sandboxer.c
+++ b/samples/landlock/sandboxer.c
@ -299,7 +299,7 @@ out_unset:
 /* clang-format on */
-#define LANDLOCK_ABI_LAST 7
+#define LANDLOCK_ABI_LAST 8
 #define XSTR(s) #s
 #define STR(s) XSTR(s)
@ -436,7 +436,8 @@ int main(const int argc, char *const argv[], char *const *const envp)
 		/* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */
 		supported_restrict_flags &=
 			~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON;
-
+		__attribute__((fallthrough));
 	case 7:
 		/* Must be printed for any ABI < LANDLOCK_ABI_LAST. */
 		fprintf(stderr,
 			"Hint: You should update the running kernel "
--- a/security/landlock/domain.c
+++ b/security/landlock/domain.c
@ -94,8 +94,7 @@ static struct landlock_details *get_current_details(void)
 	 * allocate with GFP_KERNEL_ACCOUNT because it is independent from the
 	 * caller.
 	 */
-	details =
+	details = kzalloc_flex(*details, exe_path, path_size);
 		kzalloc_flex(*details, exe_path, path_size);
 	if (!details)
 		return ERR_PTR(-ENOMEM);
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@ -32,9 +32,8 @@ static struct landlock_ruleset *create_ruleset(const u32 num_layers)
 {
 	struct landlock_ruleset *new_ruleset;
-	new_ruleset =
+	new_ruleset = kzalloc_flex(*new_ruleset, access_masks, num_layers,
-		kzalloc_flex(*new_ruleset, access_masks, num_layers,
+				   GFP_KERNEL_ACCOUNT);
 			     GFP_KERNEL_ACCOUNT);
 	if (!new_ruleset)
 		return ERR_PTR(-ENOMEM);
 	refcount_set(&new_ruleset->usage, 1);
@ -559,8 +558,8 @@ landlock_merge_ruleset(struct landlock_ruleset *const parent,
 	if (IS_ERR(new_dom))
 		return new_dom;
-	new_dom->hierarchy = kzalloc_obj(*new_dom->hierarchy,
+	new_dom->hierarchy =
-					 GFP_KERNEL_ACCOUNT);
+		kzalloc_obj(*new_dom->hierarchy, GFP_KERNEL_ACCOUNT);
 	if (!new_dom->hierarchy)
 		return ERR_PTR(-ENOMEM);
--- a/security/landlock/tsync.c
+++ b/security/landlock/tsync.c
@ -203,6 +203,40 @@ static struct tsync_work *tsync_works_provide(struct tsync_works *s,
 	return ctx;
 }
 /**
 * tsync_works_trim - Put the last tsync_work element
 *
 * @s: TSYNC works to trim.
 *
 * Put the last task and decrement the size of @s.
 *
 * This helper does not cancel a running task, but just reset the last element
 * to zero.
 */
 static void tsync_works_trim(struct tsync_works *s)
 {
 	struct tsync_work *ctx;
 	if (WARN_ON_ONCE(s->size <= 0))
 		return;
 	ctx = s->works[s->size - 1];
 	/*
 	 * For consistency, remove the task from ctx so that it does not look like
 	 * we handed it a task_work.
 	 */
 	put_task_struct(ctx->task);
 	*ctx = (typeof(*ctx)){};
 	/*
 	 * Cancel the tsync_works_provide() change to recycle the reserved memory
 	 * for the next thread, if any.  This also ensures that cancel_tsync_works()
 	 * and tsync_works_release() do not see any NULL task pointers.
 	 */
 	s->size--;
 }
 /*
 * tsync_works_grow_by - preallocates space for n more contexts in s
 *
@ -256,13 +290,14 @@ static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
 * tsync_works_contains - checks for presence of task in s
 */
 static bool tsync_works_contains_task(const struct tsync_works *s,
-				      struct task_struct *task)
+				      const struct task_struct *task)
 {
 	size_t i;
 	for (i = 0; i < s->size; i++)
 		if (s->works[i]->task == task)
 			return true;
 	return false;
 }
@ -276,7 +311,7 @@ static void tsync_works_release(struct tsync_works *s)
 	size_t i;
 	for (i = 0; i < s->size; i++) {
-		if (!s->works[i]->task)
+		if (WARN_ON_ONCE(!s->works[i]->task))
 			continue;
 		put_task_struct(s->works[i]->task);
@ -284,6 +319,7 @@ static void tsync_works_release(struct tsync_works *s)
 	for (i = 0; i < s->capacity; i++)
 		kfree(s->works[i]);
 	kfree(s->works);
 	s->works = NULL;
 	s->size = 0;
@ -295,7 +331,7 @@ static void tsync_works_release(struct tsync_works *s)
 */
 static size_t count_additional_threads(const struct tsync_works *works)
 {
-	struct task_struct *thread, *caller;
+	const struct task_struct *caller, *thread;
 	size_t n = 0;
 	caller = current;
@ -334,7 +370,8 @@ static bool schedule_task_work(struct tsync_works *works,
 			       struct tsync_shared_context *shared_ctx)
 {
 	int err;
-	struct task_struct *thread, *caller;
+	const struct task_struct *caller;
 	struct task_struct *thread;
 	struct tsync_work *ctx;
 	bool found_more_threads = false;
@ -379,16 +416,14 @@ static bool schedule_task_work(struct tsync_works *works,
 		init_task_work(&ctx->work, restrict_one_thread_callback);
 		err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
-		if (err) {
+		if (unlikely(err)) {
 			/*
 			 * task_work_add() only fails if the task is about to exit.  We
 			 * checked that earlier, but it can happen as a race.  Resume
 			 * without setting an error, as the task is probably gone in the
-			 * next loop iteration.  For consistency, remove the task from ctx
+			 * next loop iteration.
 			 * so that it does not look like we handed it a task_work.
 			 */
-			put_task_struct(ctx->task);
+			tsync_works_trim(works);
 			ctx->task = NULL;
 			atomic_dec(&shared_ctx->num_preparing);
 			atomic_dec(&shared_ctx->num_unfinished);
@ -406,12 +441,15 @@ static bool schedule_task_work(struct tsync_works *works,
 * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
 * completions if needed, as if the task was never scheduled.
 */
-static void cancel_tsync_works(struct tsync_works *works,
+static void cancel_tsync_works(const struct tsync_works *works,
 			       struct tsync_shared_context *shared_ctx)
 {
-	int i;
+	size_t i;
 	for (i = 0; i < works->size; i++) {
 		if (WARN_ON_ONCE(!works->works[i]->task))
 			continue;
 		if (!task_work_cancel(works->works[i]->task,
 				      &works->works[i]->work))
 			continue;
@ -447,6 +485,16 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
 	shared_ctx.new_cred = new_cred;
 	shared_ctx.set_no_new_privs = task_no_new_privs(current);
 	/*
 	 * Serialize concurrent TSYNC operations to prevent deadlocks when
 	 * multiple threads call landlock_restrict_self() simultaneously.
 	 * If the lock is already held, we gracefully yield by restarting the
 	 * syscall. This allows the current thread to process pending
 	 * task_works before retrying.
 	 */
 	if (!down_write_trylock(&current->signal->exec_update_lock))
 		return restart_syscall();
 	/*
 	 * We schedule a pseudo-signal task_work for each of the calling task's
 	 * sibling threads.  In the task work, each thread:
@ -527,24 +575,30 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
 					   -ERESTARTNOINTR);
 				/*
-				 * Cancel task works for tasks that did not start running yet,
+				 * Opportunistic improvement: try to cancel task
-				 * and decrement all_prepared and num_unfinished accordingly.
+				 * works for tasks that did not start running
 				 * yet. We do not have a guarantee that it
 				 * cancels any of the enqueued task works
 				 * because task_work_run() might already have
 				 * dequeued them.
 				 */
 				cancel_tsync_works(&works, &shared_ctx);
 				/*
-				 * The remaining task works have started running, so waiting for
+				 * Break the loop with error. The cleanup code
-				 * their completion will finish.
+				 * after the loop unblocks the remaining
 				 * task_works.
 				 */
-				wait_for_completion(&shared_ctx.all_prepared);
+				break;
 			}
 		}
 	} while (found_more_threads &&
 		 !atomic_read(&shared_ctx.preparation_error));
 	/*
-	 * We now have all sibling threads blocking and in "prepared" state in the
+	 * We now have either (a) all sibling threads blocking and in "prepared"
-	 * task work. Ask all threads to commit.
+	 * state in the task work, or (b) the preparation error is set. Ask all
 	 * threads to commit (or abort).
 	 */
 	complete_all(&shared_ctx.ready_to_commit);
@ -556,6 +610,6 @@ int landlock_restrict_sibling_threads(const struct cred *old_cred,
 		wait_for_completion(&shared_ctx.all_finished);
 	tsync_works_release(&works);
-
+	up_write(&current->signal->exec_update_lock);
 	return atomic_read(&shared_ctx.preparation_error);
 }
--- a/tools/testing/selftests/landlock/tsync_test.c
+++ b/tools/testing/selftests/landlock/tsync_test.c
@ -6,9 +6,10 @@
 */
 #define _GNU_SOURCE
 #include <pthread.h>
 #include <sys/prctl.h>
 #include <linux/landlock.h>
 #include <pthread.h>
 #include <signal.h>
 #include <sys/prctl.h>
 #include "common.h"
@ -158,4 +159,92 @@ TEST(competing_enablement)
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 static void signal_nop_handler(int sig)
 {
 }
 struct signaler_data {
 	pthread_t target;
 	volatile bool stop;
 };
 static void *signaler_thread(void *data)
 {
 	struct signaler_data *sd = data;
 	while (!sd->stop)
 		pthread_kill(sd->target, SIGUSR1);
 	return NULL;
 }
 /*
 * Number of idle sibling threads.  This must be large enough that even on
 * machines with many cores, the sibling threads cannot all complete their
 * credential preparation in a single parallel wave, otherwise the signaler
 * thread has no window to interrupt wait_for_completion_interruptible().
 * 200 threads on a 64-core machine yields ~3 serialized waves, giving the
 * tight signal loop enough time to land an interruption.
 */
 #define NUM_IDLE_THREADS 200
 /*
 * Exercises the tsync interruption and cancellation paths in tsync.c.
 *
 * When a signal interrupts the calling thread while it waits for sibling
 * threads to finish their credential preparation
 * (wait_for_completion_interruptible in landlock_restrict_sibling_threads),
 * the kernel sets ERESTARTNOINTR, cancels queued task works that have not
 * started yet (cancel_tsync_works), then waits for the remaining works to
 * finish.  On the error return, syscalls.c aborts the prepared credentials.
 * The kernel automatically restarts the syscall, so userspace sees success.
 */
 TEST(tsync_interrupt)
 {
 	size_t i;
 	pthread_t threads[NUM_IDLE_THREADS];
 	pthread_t signaler;
 	struct signaler_data sd;
 	struct sigaction sa = {};
 	const int ruleset_fd = create_ruleset(_metadata);
 	disable_caps(_metadata);
 	/* Install a no-op SIGUSR1 handler so the signal does not kill us. */
 	sa.sa_handler = signal_nop_handler;
 	sigemptyset(&sa.sa_mask);
 	ASSERT_EQ(0, sigaction(SIGUSR1, &sa, NULL));
 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
 	for (i = 0; i < NUM_IDLE_THREADS; i++)
 		ASSERT_EQ(0, pthread_create(&threads[i], NULL, idle, NULL));
 	/*
 	 * Start a signaler thread that continuously sends SIGUSR1 to the
 	 * calling thread.  This maximizes the chance of interrupting
 	 * wait_for_completion_interruptible() in the kernel's tsync path.
 	 */
 	sd.target = pthread_self();
 	sd.stop = false;
 	ASSERT_EQ(0, pthread_create(&signaler, NULL, signaler_thread, &sd));
 	/*
 	 * The syscall may be interrupted and transparently restarted by the
 	 * kernel (ERESTARTNOINTR).  From userspace, it should always succeed.
 	 */
 	EXPECT_EQ(0, landlock_restrict_self(ruleset_fd,
 					    LANDLOCK_RESTRICT_SELF_TSYNC));
 	sd.stop = true;
 	ASSERT_EQ(0, pthread_join(signaler, NULL));
 	for (i = 0; i < NUM_IDLE_THREADS; i++) {
 		ASSERT_EQ(0, pthread_cancel(threads[i]));
 		ASSERT_EQ(0, pthread_join(threads[i], NULL));
 	}
 	EXPECT_EQ(0, close(ruleset_fd));
 }
 TEST_HARNESS_MAIN