@@ -7,6 +7,7 @@
use crate::{
bindings,
ffi::{c_int, c_long, c_uint},
+ mm::MmWithUser,
pid_namespace::PidNamespace,
types::{ARef, NotThreadSafe, Opaque},
};
@@ -31,22 +32,20 @@
#[macro_export]
macro_rules! current {
() => {
- // SAFETY: Deref + addr-of below create a temporary `TaskRef` that cannot outlive the
- // caller.
+ // SAFETY: This expression creates a temporary value that is dropped at the end of the
+ // caller's scope. The following mechanisms ensure that the resulting `&CurrentTask` cannot
+ // leave current task context:
+ //
+ // * To return to userspace, the caller must leave the current scope.
+ // * Operations such as `begin_new_exec()` are necessarily unsafe and the caller of
+ // `begin_new_exec()` is responsible for safety.
+ // * Rust abstractions for things such as a `kthread_use_mm()` scope must require the
+ // closure to be `Send`, so the `NotThreadSafe` field of `CurrentTask` ensures that the
+ // `&CurrentTask` cannot cross the scope in either direction.
unsafe { &*$crate::task::Task::current() }
};
}
-/// Returns the currently running task's pid namespace.
-#[macro_export]
-macro_rules! current_pid_ns {
- () => {
- // SAFETY: Deref + addr-of below create a temporary `PidNamespaceRef` that cannot outlive
- // the caller.
- unsafe { &*$crate::task::Task::current_pid_ns() }
- };
-}
-
/// Wraps the kernel's `struct task_struct`.
///
/// # Invariants
@@ -85,7 +84,7 @@ macro_rules! current_pid_ns {
/// impl State {
/// fn new() -> Self {
/// Self {
-/// creator: current!().into(),
+/// creator: ARef::from(&**current!()),
/// index: 0,
/// }
/// }
@@ -105,6 +104,44 @@ unsafe impl Send for Task {}
// synchronised by C code (e.g., `signal_pending`).
unsafe impl Sync for Task {}
+/// Represents the [`Task`] in the `current` global.
+///
+/// This type exists to provide more efficient operations that are only valid on the current task.
+/// For example, to retrieve the pid-namespace of a task, you must use rcu protection unless it is
+/// the current task.
+///
+/// # Invariants
+///
+/// Each value of this type must only be accessed from the task context it was created within.
+///
+/// Of course, every thread is in a different task context, but for the purposes of this invariant,
+/// these operations also permanently leave the task context:
+///
+/// * Returning to userspace from system call context.
+/// * Calling `release_task()`.
+/// * Calling `begin_new_exec()` in a binary format loader.
+///
+/// Other operations temporarily create a new sub-context:
+///
+/// * Calling `kthread_use_mm()` creates a new context, and `kthread_unuse_mm()` returns to the
+/// old context.
+///
+/// This means that a `CurrentTask` obtained before a `kthread_use_mm()` call may be used again
+/// once `kthread_unuse_mm()` is called, but it must not be used between these two calls.
+/// Conversely, a `CurrentTask` obtained between a `kthread_use_mm()`/`kthread_unuse_mm()` pair
+/// must not be used after `kthread_unuse_mm()`.
+#[repr(transparent)]
+pub struct CurrentTask(Task, NotThreadSafe);
+
+// Make all `Task` methods available on `CurrentTask`.
+impl Deref for CurrentTask {
+ type Target = Task;
+ #[inline]
+ fn deref(&self) -> &Task {
+ &self.0
+ }
+}
+
/// The type of process identifiers (PIDs).
type Pid = bindings::pid_t;
@@ -131,119 +168,29 @@ pub fn current_raw() -> *mut bindings::task_struct {
///
/// # Safety
///
- /// Callers must ensure that the returned object doesn't outlive the current task/thread.
- pub unsafe fn current() -> impl Deref<Target = Task> {
- struct TaskRef<'a> {
- task: &'a Task,
- _not_send: NotThreadSafe,
+ /// Callers must ensure that the returned object is only used to access a [`CurrentTask`]
+ /// within the task context that was active when this function was called. For more details,
+ /// see the invariants section for [`CurrentTask`].
+ pub unsafe fn current() -> impl Deref<Target = CurrentTask> {
+ struct TaskRef {
+ task: *const CurrentTask,
}
- impl Deref for TaskRef<'_> {
- type Target = Task;
+ impl Deref for TaskRef {
+ type Target = CurrentTask;
fn deref(&self) -> &Self::Target {
- self.task
+ // SAFETY: The returned reference borrows from this `TaskRef`, so it cannot outlive
+ // the `TaskRef`, which the caller of `Task::current()` has promised will not
+ // outlive the task/thread for which `self.task` is the `current` pointer. Thus, it
+ // is okay to return a `CurrentTask` reference here.
+ unsafe { &*self.task }
}
}
- let current = Task::current_raw();
TaskRef {
- // SAFETY: If the current thread is still running, the current task is valid. Given
- // that `TaskRef` is not `Send`, we know it cannot be transferred to another thread
- // (where it could potentially outlive the caller).
- task: unsafe { &*current.cast() },
- _not_send: NotThreadSafe,
- }
- }
-
- /// Returns a PidNamespace reference for the currently executing task's/thread's pid namespace.
- ///
- /// This function can be used to create an unbounded lifetime by e.g., storing the returned
- /// PidNamespace in a global variable which would be a bug. So the recommended way to get the
- /// current task's/thread's pid namespace is to use the [`current_pid_ns`] macro because it is
- /// safe.
- ///
- /// # Safety
- ///
- /// Callers must ensure that the returned object doesn't outlive the current task/thread.
- pub unsafe fn current_pid_ns() -> impl Deref<Target = PidNamespace> {
- struct PidNamespaceRef<'a> {
- task: &'a PidNamespace,
- _not_send: NotThreadSafe,
- }
-
- impl Deref for PidNamespaceRef<'_> {
- type Target = PidNamespace;
-
- fn deref(&self) -> &Self::Target {
- self.task
- }
- }
-
- // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`.
- //
- // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive. A
- // `unshare(CLONE_NEWPID)` or `setns(fd_pidns/pidfd, CLONE_NEWPID)` will not have an effect
- // on the calling `Task`'s pid namespace. It will only effect the pid namespace of children
- // created by the calling `Task`. This invariant guarantees that after having acquired a
- // reference to a `Task`'s pid namespace it will remain unchanged.
- //
- // When a task has exited and been reaped `release_task()` will be called. This will set
- // the `PidNamespace` of the task to `NULL`. So retrieving the `PidNamespace` of a task
- // that is dead will return `NULL`. Note, that neither holding the RCU lock nor holding a
- // referencing count to
- // the `Task` will prevent `release_task()` being called.
- //
- // In order to retrieve the `PidNamespace` of a `Task` the `task_active_pid_ns()` function
- // can be used. There are two cases to consider:
- //
- // (1) retrieving the `PidNamespace` of the `current` task
- // (2) retrieving the `PidNamespace` of a non-`current` task
- //
- // From system call context retrieving the `PidNamespace` for case (1) is always safe and
- // requires neither RCU locking nor a reference count to be held. Retrieving the
- // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath
- // like that is exposed to Rust.
- //
- // Retrieving the `PidNamespace` from system call context for (2) requires RCU protection.
- // Accessing `PidNamespace` outside of RCU protection requires a reference count that
- // must've been acquired while holding the RCU lock. Note that accessing a non-`current`
- // task means `NULL` can be returned as the non-`current` task could have already passed
- // through `release_task()`.
- //
- // To retrieve (1) the `current_pid_ns!()` macro should be used which ensure that the
- // returned `PidNamespace` cannot outlive the calling scope. The associated
- // `current_pid_ns()` function should not be called directly as it could be abused to
- // created an unbounded lifetime for `PidNamespace`. The `current_pid_ns!()` macro allows
- // Rust to handle the common case of accessing `current`'s `PidNamespace` without RCU
- // protection and without having to acquire a reference count.
- //
- // For (2) the `task_get_pid_ns()` method must be used. This will always acquire a
- // reference on `PidNamespace` and will return an `Option` to force the caller to
- // explicitly handle the case where `PidNamespace` is `None`, something that tends to be
- // forgotten when doing the equivalent operation in `C`. Missing RCU primitives make it
- // difficult to perform operations that are otherwise safe without holding a reference
- // count as long as RCU protection is guaranteed. But it is not important currently. But we
- // do want it in the future.
- //
- // Note for (2) the required RCU protection around calling `task_active_pid_ns()`
- // synchronizes against putting the last reference of the associated `struct pid` of
- // `task->thread_pid`. The `struct pid` stored in that field is used to retrieve the
- // `PidNamespace` of the caller. When `release_task()` is called `task->thread_pid` will be
- // `NULL`ed and `put_pid()` on said `struct pid` will be delayed in `free_pid()` via
- // `call_rcu()` allowing everyone with an RCU protected access to the `struct pid` acquired
- // from `task->thread_pid` to finish.
- //
- // SAFETY: The current task's pid namespace is valid as long as the current task is running.
- let pidns = unsafe { bindings::task_active_pid_ns(Task::current_raw()) };
- PidNamespaceRef {
- // SAFETY: If the current thread is still running, the current task and its associated
- // pid namespace are valid. `PidNamespaceRef` is not `Send`, so we know it cannot be
- // transferred to another thread (where it could potentially outlive the current
- // `Task`). The caller needs to ensure that the PidNamespaceRef doesn't outlive the
- // current task/thread.
- task: unsafe { PidNamespace::from_ptr(pidns) },
- _not_send: NotThreadSafe,
+ // CAST: The layout of `struct task_struct` and `CurrentTask` is identical.
+ task: Task::current_raw().cast(),
}
}
@@ -326,6 +273,70 @@ pub fn wake_up(&self) {
}
}
+impl CurrentTask {
+ /// Access the address space of the current task.
+ ///
+ /// This function does not touch the refcount of the mm.
+ #[inline]
+ pub fn mm(&self) -> Option<&MmWithUser> {
+ // SAFETY: The `mm` field of `current` is not modified from other threads, so reading it is
+ // not a data race.
+ let mm = unsafe { (*self.as_ptr()).mm };
+
+ if mm.is_null() {
+ return None;
+ }
+
+ // SAFETY: If `current->mm` is non-null, then it references a valid mm with a non-zero
+ // value of `mm_users`. Furthermore, the returned `&MmWithUser` borrows from this
+ // `CurrentTask`, so it cannot escape the scope in which the current pointer was obtained.
+ //
+ // This is safe even if `kthread_use_mm()`/`kthread_unuse_mm()` are used. There are two
+ // relevant cases:
+ // * If the `&CurrentTask` was created before `kthread_use_mm()`, then it cannot be
+ // accessed during the `kthread_use_mm()`/`kthread_unuse_mm()` scope due to the
+ // `NotThreadSafe` field of `CurrentTask`.
+ // * If the `&CurrentTask` was created within a `kthread_use_mm()`/`kthread_unuse_mm()`
+ // scope, then the `&CurrentTask` cannot escape that scope, so the returned `&MmWithUser`
+ // also cannot escape that scope.
+ // In either case, it's not possible to read `current->mm` and keep using it after the
+ // scope is ended with `kthread_unuse_mm()`.
+ Some(unsafe { MmWithUser::from_raw(mm) })
+ }
+
+ /// Access the pid namespace of the current task.
+ ///
+ /// This function does not touch the refcount of the namespace or use RCU protection.
+ ///
+ /// To access the pid namespace of another task, see [`Task::get_pid_ns`].
+ #[doc(alias = "task_active_pid_ns")]
+ #[inline]
+ pub fn active_pid_ns(&self) -> Option<&PidNamespace> {
+ // SAFETY: It is safe to call `task_active_pid_ns` without RCU protection when calling it
+ // on the current task.
+ let active_ns = unsafe { bindings::task_active_pid_ns(self.as_ptr()) };
+
+ if active_ns.is_null() {
+ return None;
+ }
+
+ // The lifetime of `PidNamespace` is bound to `Task` and `struct pid`.
+ //
+ // The `PidNamespace` of a `Task` doesn't ever change once the `Task` is alive.
+ //
+ // From system call context retrieving the `PidNamespace` for the current task is always
+ // safe and requires neither RCU locking nor a reference count to be held. Retrieving the
+ // `PidNamespace` after `release_task()` for current will return `NULL` but no codepath
+ // like that is exposed to Rust.
+ //
+ // SAFETY: If `current`'s pid ns is non-null, then it references a valid pid ns.
+ // Furthermore, the returned `&PidNamespace` borrows from this `CurrentTask`, so it cannot
+ // escape the scope in which the current pointer was obtained, e.g. it cannot live past a
+ // `release_task()` call.
+ Some(unsafe { PidNamespace::from_ptr(active_ns) })
+ }
+}
+
// SAFETY: The type invariants guarantee that `Task` is always refcounted.
unsafe impl crate::types::AlwaysRefCounted for Task {
fn inc_ref(&self) {