Message ID | 20231018122518.128049-5-wedsonaf@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Rust abstractions for VFS | expand |
On 18.10.23 14:25, Wedson Almeida Filho wrote: > From: Wedson Almeida Filho <walmeida@microsoft.com> > > Allow Rust file systems to initialise superblocks, which allows them > to be mounted (though they are still empty). > > Some scaffolding code is added to create an empty directory as the root. > It is replaced by proper inode creation in a subsequent patch in this > series. > > Signed-off-by: Wedson Almeida Filho <walmeida@microsoft.com> > --- > rust/bindings/bindings_helper.h | 5 + > rust/bindings/lib.rs | 4 + > rust/kernel/fs.rs | 176 ++++++++++++++++++++++++++++++-- > samples/rust/rust_rofs.rs | 10 ++ > 4 files changed, 189 insertions(+), 6 deletions(-) > > diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h > index 9c23037b33d0..ca1898ce9527 100644 > --- a/rust/bindings/bindings_helper.h > +++ b/rust/bindings/bindings_helper.h > @@ -9,6 +9,7 @@ > #include <kunit/test.h> > #include <linux/errname.h> > #include <linux/fs.h> > +#include <linux/fs_context.h> > #include <linux/slab.h> > #include <linux/refcount.h> > #include <linux/wait.h> > @@ -22,3 +23,7 @@ const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; > const slab_flags_t BINDINGS_SLAB_RECLAIM_ACCOUNT = SLAB_RECLAIM_ACCOUNT; > const slab_flags_t BINDINGS_SLAB_MEM_SPREAD = SLAB_MEM_SPREAD; > const slab_flags_t BINDINGS_SLAB_ACCOUNT = SLAB_ACCOUNT; > + > +const unsigned long BINDINGS_SB_RDONLY = SB_RDONLY; > + > +const loff_t BINDINGS_MAX_LFS_FILESIZE = MAX_LFS_FILESIZE; > diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs > index 6a8c6cd17e45..426915d3fb57 100644 > --- a/rust/bindings/lib.rs > +++ b/rust/bindings/lib.rs > @@ -55,3 +55,7 @@ mod bindings_helper { > pub const SLAB_RECLAIM_ACCOUNT: slab_flags_t = BINDINGS_SLAB_RECLAIM_ACCOUNT; > pub const SLAB_MEM_SPREAD: slab_flags_t = BINDINGS_SLAB_MEM_SPREAD; > pub const SLAB_ACCOUNT: slab_flags_t = BINDINGS_SLAB_ACCOUNT; > + > +pub const SB_RDONLY: core::ffi::c_ulong = BINDINGS_SB_RDONLY; > + > +pub const MAX_LFS_FILESIZE: loff_t = BINDINGS_MAX_LFS_FILESIZE; > diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs > index 1df54c234101..31cf643aaded 100644 > --- a/rust/kernel/fs.rs > +++ b/rust/kernel/fs.rs > @@ -6,16 +6,22 @@ > //! > //! C headers: [`include/linux/fs.h`](../../include/linux/fs.h) > > -use crate::error::{code::*, from_result, to_result, Error}; > +use crate::error::{code::*, from_result, to_result, Error, Result}; > use crate::types::Opaque; > use crate::{bindings, init::PinInit, str::CStr, try_pin_init, ThisModule}; > use core::{marker::PhantomData, marker::PhantomPinned, pin::Pin}; > use macros::{pin_data, pinned_drop}; > > +/// Maximum size of an inode. > +pub const MAX_LFS_FILESIZE: i64 = bindings::MAX_LFS_FILESIZE; > + > /// A file system type. > pub trait FileSystem { > /// The name of the file system type. > const NAME: &'static CStr; > + > + /// Returns the parameters to initialise a super block. > + fn super_params(sb: &NewSuperBlock<Self>) -> Result<SuperParams>; > } > > /// A registration of a file system. > @@ -49,7 +55,7 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< > let fs = unsafe { &mut *fs_ptr }; > fs.owner = module.0; > fs.name = T::NAME.as_char_ptr(); > - fs.init_fs_context = Some(Self::init_fs_context_callback); > + fs.init_fs_context = Some(Self::init_fs_context_callback::<T>); > fs.kill_sb = Some(Self::kill_sb_callback); > fs.fs_flags = 0; > > @@ -60,13 +66,22 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< > }) > } > > - unsafe extern "C" fn init_fs_context_callback( > - _fc_ptr: *mut bindings::fs_context, > + unsafe extern "C" fn init_fs_context_callback<T: FileSystem + ?Sized>( > + fc_ptr: *mut bindings::fs_context, > ) -> core::ffi::c_int { > - from_result(|| Err(ENOTSUPP)) > + from_result(|| { > + // SAFETY: The C callback API guarantees that `fc_ptr` is valid. > + let fc = unsafe { &mut *fc_ptr }; This safety comment is not enough, the pointer needs to be unique and pointing to a valid value for this to be ok. I would recommend to do this instead: unsafe { addr_of_mut!((*fc_ptr).ops).write(&Tables::<T>::CONTEXT) }; > + fc.ops = &Tables::<T>::CONTEXT; > + Ok(0) > + }) > } > > - unsafe extern "C" fn kill_sb_callback(_sb_ptr: *mut bindings::super_block) {} > + unsafe extern "C" fn kill_sb_callback(sb_ptr: *mut bindings::super_block) { > + // SAFETY: In `get_tree_callback` we always call `get_tree_nodev`, so `kill_anon_super` is > + // the appropriate function to call for cleanup. > + unsafe { bindings::kill_anon_super(sb_ptr) }; > + } > } > > #[pinned_drop] > @@ -79,6 +94,151 @@ fn drop(self: Pin<&mut Self>) { > } > } > > +/// A file system super block. > +/// > +/// Wraps the kernel's `struct super_block`. > +#[repr(transparent)] > +pub struct SuperBlock<T: FileSystem + ?Sized>(Opaque<bindings::super_block>, PhantomData<T>); > + > +/// Required superblock parameters. > +/// > +/// This is returned by implementations of [`FileSystem::super_params`]. > +pub struct SuperParams { > + /// The magic number of the superblock. > + pub magic: u32, > + > + /// The size of a block in powers of 2 (i.e., for a value of `n`, the size is `2^n`). > + pub blocksize_bits: u8, > + > + /// Maximum size of a file. > + /// > + /// The maximum allowed value is [`MAX_LFS_FILESIZE`]. > + pub maxbytes: i64, > + > + /// Granularity of c/m/atime in ns (cannot be worse than a second). > + pub time_gran: u32, > +} > + > +/// A superblock that is still being initialised. > +/// > +/// # Invariants > +/// > +/// The superblock is a newly-created one and this is the only active pointer to it. This struct is not wrapping a pointer? > +#[repr(transparent)] > +pub struct NewSuperBlock<T: FileSystem + ?Sized>(bindings::super_block, PhantomData<T>); No `Opaque`? > + > +struct Tables<T: FileSystem + ?Sized>(T); Please add a newline here. Also the field `self.0` is never actually used, should it be `PhantomData<T>` instead? > +impl<T: FileSystem + ?Sized> Tables<T> { > + const CONTEXT: bindings::fs_context_operations = bindings::fs_context_operations { > + free: None, > + parse_param: None, > + get_tree: Some(Self::get_tree_callback), > + reconfigure: None, > + parse_monolithic: None, > + dup: None, > + }; > + > + unsafe extern "C" fn get_tree_callback(fc: *mut bindings::fs_context) -> core::ffi::c_int { > + // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has > + // the right type and is a valid callback. > + unsafe { bindings::get_tree_nodev(fc, Some(Self::fill_super_callback)) } > + } > + > + unsafe extern "C" fn fill_super_callback( > + sb_ptr: *mut bindings::super_block, > + _fc: *mut bindings::fs_context, > + ) -> core::ffi::c_int { > + from_result(|| { > + // SAFETY: The callback contract guarantees that `sb_ptr` is a unique pointer to a > + // newly-created superblock. > + let sb = unsafe { &mut *sb_ptr.cast() }; It would be helpful if you spelled out the `NewSuperBlock` type here somewhere (e.g. on the `cast::<NewSuperBlock>`). Is it really ok to create a mutable reference to a `bindings::super_block`? Since it is not wrapped in `Opaque`, I would rather have you avoid this. > + let params = T::super_params(sb)?; > + > + sb.0.s_magic = params.magic as _; > + sb.0.s_op = &Tables::<T>::SUPER_BLOCK; > + sb.0.s_maxbytes = params.maxbytes; > + sb.0.s_time_gran = params.time_gran; > + sb.0.s_blocksize_bits = params.blocksize_bits; > + sb.0.s_blocksize = 1; > + if sb.0.s_blocksize.leading_zeros() < params.blocksize_bits.into() { > + return Err(EINVAL); > + } I think you could add a comment that explains what this `if` does. > + sb.0.s_blocksize = 1 << sb.0.s_blocksize_bits; > + sb.0.s_flags |= bindings::SB_RDONLY; > + > + // The following is scaffolding code that will be removed in a subsequent patch. It is > + // needed to build a root dentry, otherwise core code will BUG(). > + // SAFETY: `sb` is the superblock being initialised, it is valid for read and write. > + let inode = unsafe { bindings::new_inode(&mut sb.0) }; > + if inode.is_null() { > + return Err(ENOMEM); > + } > + > + // SAFETY: `inode` is valid for write. > + unsafe { bindings::set_nlink(inode, 2) }; > + > + { > + // SAFETY: This is a newly-created inode. No other references to it exist, so it is > + // safe to mutably dereference it. > + let inode = unsafe { &mut *inode }; The inode also needs to be initialized and have valid values as its fields. Not sure if this is kept and it would probably be better to keep using raw pointers here. -- Cheers, Benno > + inode.i_ino = 1; > + inode.i_mode = (bindings::S_IFDIR | 0o755) as _; > + > + // SAFETY: `simple_dir_operations` never changes, it's safe to reference it. > + inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations }; > + > + // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it. > + inode.i_op = unsafe { &bindings::simple_dir_inode_operations }; > + } > + > + // SAFETY: `d_make_root` requires that `inode` be valid and referenced, which is the > + // case for this call. > + // > + // It takes over the inode, even on failure, so we don't need to clean it up. > + let dentry = unsafe { bindings::d_make_root(inode) }; > + if dentry.is_null() { > + return Err(ENOMEM); > + } > + > + sb.0.s_root = dentry; > + > + Ok(0) > + }) > + } > + > + const SUPER_BLOCK: bindings::super_operations = bindings::super_operations { > + alloc_inode: None, > + destroy_inode: None, > + free_inode: None, > + dirty_inode: None, > + write_inode: None, > + drop_inode: None, > + evict_inode: None, > + put_super: None, > + sync_fs: None, > + freeze_super: None, > + freeze_fs: None, > + thaw_super: None, > + unfreeze_fs: None, > + statfs: None, > + remount_fs: None, > + umount_begin: None, > + show_options: None, > + show_devname: None, > + show_path: None, > + show_stats: None, > + #[cfg(CONFIG_QUOTA)] > + quota_read: None, > + #[cfg(CONFIG_QUOTA)] > + quota_write: None, > + #[cfg(CONFIG_QUOTA)] > + get_dquots: None, > + nr_cached_objects: None, > + free_cached_objects: None, > + shutdown: None, > + }; > +} > + > /// Kernel module that exposes a single file system implemented by `T`. > #[pin_data] > pub struct Module<T: FileSystem + ?Sized> { > @@ -105,6 +265,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { > /// > /// ``` > /// # mod module_fs_sample { > +/// use kernel::fs::{NewSuperBlock, SuperParams}; > /// use kernel::prelude::*; > /// use kernel::{c_str, fs}; > /// > @@ -119,6 +280,9 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { > /// struct MyFs; > /// impl fs::FileSystem for MyFs { > /// const NAME: &'static CStr = c_str!("myfs"); > +/// fn super_params(_: &NewSuperBlock<Self>) -> Result<SuperParams> { > +/// todo!() > +/// } > /// } > /// # } > /// ``` > diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs > index 1c00b1da8b94..9878bf88b991 100644 > --- a/samples/rust/rust_rofs.rs > +++ b/samples/rust/rust_rofs.rs > @@ -2,6 +2,7 @@ > > //! Rust read-only file system sample. > > +use kernel::fs::{NewSuperBlock, SuperParams}; > use kernel::prelude::*; > use kernel::{c_str, fs}; > > @@ -16,4 +17,13 @@ > struct RoFs; > impl fs::FileSystem for RoFs { > const NAME: &'static CStr = c_str!("rust-fs"); > + > + fn super_params(_sb: &NewSuperBlock<Self>) -> Result<SuperParams> { > + Ok(SuperParams { > + magic: 0x52555354, > + blocksize_bits: 12, > + maxbytes: fs::MAX_LFS_FILESIZE, > + time_gran: 1, > + }) > + } > } > -- > 2.34.1 > >
On 23/10/18 09:25AM, Wedson Almeida Filho wrote: > From: Wedson Almeida Filho <walmeida@microsoft.com> > > Allow Rust file systems to initialise superblocks, which allows them > to be mounted (though they are still empty). > > Some scaffolding code is added to create an empty directory as the root. > It is replaced by proper inode creation in a subsequent patch in this > series. > > Signed-off-by: Wedson Almeida Filho <walmeida@microsoft.com> > --- > rust/bindings/bindings_helper.h | 5 + > rust/bindings/lib.rs | 4 + > rust/kernel/fs.rs | 176 ++++++++++++++++++++++++++++++-- > samples/rust/rust_rofs.rs | 10 ++ > 4 files changed, 189 insertions(+), 6 deletions(-) > > diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h > index 9c23037b33d0..ca1898ce9527 100644 > --- a/rust/bindings/bindings_helper.h > +++ b/rust/bindings/bindings_helper.h > @@ -9,6 +9,7 @@ > #include <kunit/test.h> > #include <linux/errname.h> > #include <linux/fs.h> > +#include <linux/fs_context.h> > #include <linux/slab.h> > #include <linux/refcount.h> > #include <linux/wait.h> > @@ -22,3 +23,7 @@ const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; > const slab_flags_t BINDINGS_SLAB_RECLAIM_ACCOUNT = SLAB_RECLAIM_ACCOUNT; > const slab_flags_t BINDINGS_SLAB_MEM_SPREAD = SLAB_MEM_SPREAD; > const slab_flags_t BINDINGS_SLAB_ACCOUNT = SLAB_ACCOUNT; > + > +const unsigned long BINDINGS_SB_RDONLY = SB_RDONLY; > + > +const loff_t BINDINGS_MAX_LFS_FILESIZE = MAX_LFS_FILESIZE; > diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs > index 6a8c6cd17e45..426915d3fb57 100644 > --- a/rust/bindings/lib.rs > +++ b/rust/bindings/lib.rs > @@ -55,3 +55,7 @@ mod bindings_helper { > pub const SLAB_RECLAIM_ACCOUNT: slab_flags_t = BINDINGS_SLAB_RECLAIM_ACCOUNT; > pub const SLAB_MEM_SPREAD: slab_flags_t = BINDINGS_SLAB_MEM_SPREAD; > pub const SLAB_ACCOUNT: slab_flags_t = BINDINGS_SLAB_ACCOUNT; > + > +pub const SB_RDONLY: core::ffi::c_ulong = BINDINGS_SB_RDONLY; > + > +pub const MAX_LFS_FILESIZE: loff_t = BINDINGS_MAX_LFS_FILESIZE; > diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs > index 1df54c234101..31cf643aaded 100644 > --- a/rust/kernel/fs.rs > +++ b/rust/kernel/fs.rs > @@ -6,16 +6,22 @@ > //! > //! C headers: [`include/linux/fs.h`](../../include/linux/fs.h) > > -use crate::error::{code::*, from_result, to_result, Error}; > +use crate::error::{code::*, from_result, to_result, Error, Result}; > use crate::types::Opaque; > use crate::{bindings, init::PinInit, str::CStr, try_pin_init, ThisModule}; > use core::{marker::PhantomData, marker::PhantomPinned, pin::Pin}; > use macros::{pin_data, pinned_drop}; > > +/// Maximum size of an inode. > +pub const MAX_LFS_FILESIZE: i64 = bindings::MAX_LFS_FILESIZE; > + > /// A file system type. > pub trait FileSystem { > /// The name of the file system type. > const NAME: &'static CStr; > + > + /// Returns the parameters to initialise a super block. > + fn super_params(sb: &NewSuperBlock<Self>) -> Result<SuperParams>; > } > > /// A registration of a file system. > @@ -49,7 +55,7 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< > let fs = unsafe { &mut *fs_ptr }; > fs.owner = module.0; > fs.name = T::NAME.as_char_ptr(); > - fs.init_fs_context = Some(Self::init_fs_context_callback); > + fs.init_fs_context = Some(Self::init_fs_context_callback::<T>); > fs.kill_sb = Some(Self::kill_sb_callback); > fs.fs_flags = 0; > > @@ -60,13 +66,22 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< > }) > } > > - unsafe extern "C" fn init_fs_context_callback( > - _fc_ptr: *mut bindings::fs_context, > + unsafe extern "C" fn init_fs_context_callback<T: FileSystem + ?Sized>( > + fc_ptr: *mut bindings::fs_context, > ) -> core::ffi::c_int { > - from_result(|| Err(ENOTSUPP)) > + from_result(|| { > + // SAFETY: The C callback API guarantees that `fc_ptr` is valid. > + let fc = unsafe { &mut *fc_ptr }; > + fc.ops = &Tables::<T>::CONTEXT; > + Ok(0) > + }) > } > > - unsafe extern "C" fn kill_sb_callback(_sb_ptr: *mut bindings::super_block) {} > + unsafe extern "C" fn kill_sb_callback(sb_ptr: *mut bindings::super_block) { > + // SAFETY: In `get_tree_callback` we always call `get_tree_nodev`, so `kill_anon_super` is > + // the appropriate function to call for cleanup. > + unsafe { bindings::kill_anon_super(sb_ptr) }; > + } > } > > #[pinned_drop] > @@ -79,6 +94,151 @@ fn drop(self: Pin<&mut Self>) { > } > } > > +/// A file system super block. > +/// > +/// Wraps the kernel's `struct super_block`. > +#[repr(transparent)] > +pub struct SuperBlock<T: FileSystem + ?Sized>(Opaque<bindings::super_block>, PhantomData<T>); > + > +/// Required superblock parameters. > +/// > +/// This is returned by implementations of [`FileSystem::super_params`]. > +pub struct SuperParams { > + /// The magic number of the superblock. > + pub magic: u32, > + > + /// The size of a block in powers of 2 (i.e., for a value of `n`, the size is `2^n`). > + pub blocksize_bits: u8, > + > + /// Maximum size of a file. > + /// > + /// The maximum allowed value is [`MAX_LFS_FILESIZE`]. > + pub maxbytes: i64, > + > + /// Granularity of c/m/atime in ns (cannot be worse than a second). > + pub time_gran: u32, > +} > + > +/// A superblock that is still being initialised. > +/// > +/// # Invariants > +/// > +/// The superblock is a newly-created one and this is the only active pointer to it. > +#[repr(transparent)] > +pub struct NewSuperBlock<T: FileSystem + ?Sized>(bindings::super_block, PhantomData<T>); How about using the state type parameter [1] instead of using a separate struct for each state? I think Andreas Hindborg mentioned this during Kangrejos [2]. The gist of it is that you define a trait and implement it for the two states of the superblock: NewSuperBlockState and InitializedSuperblockState: ``` pub trait SuperBlockState {} /// A superblock that is still being initialised. pub enum NewSuperBlockState {} /// An initialized superblock pub enum InitializedSuperBlockState {} impl SuperBlockState for NewSuperBlockState {} impl SuperBlockState for InitializedSuperBlockState {} ``` Then add another generic parameter (the state) to the SuperBlock: ``` #[repr(transparent)] pub struct SuperBlock<T: FileSystem + ?Sized, S: SuperBlockState>(Opaque<bindings::super_block>, PhantomData<T>, PhantomData<S>); ``` Now you implement the functions separately on each variant of the generic instead of implementing them on separate structs: ``` impl<T: FileSystem + ?Sized> SuperBlock<T, NewSuperBlockState> { ... impl<T: FileSystem + ?Sized> SuperBlock<T, InitializedSuperBlockState> { ... ``` I think this pattern makes it clearer that there's only one SuperBlock object which can be in different states, and it more clearly conveys that the Typestate pattern is being used (we could find shorter names for the states). See [3] for the complete example. Cheers, Ariel [1] https://cliffle.com/blog/rust-typestate/#variation-state-type-parameter [2] https://kangrejos.com/ [3] https://github.com/ariel-miculas/linux/commit/655607228ff4ac9e56295ddd74fff8910dfbef14#diff-9b893393ed2a537222d79f6e2fceffb7e9d8967791c2016962be3171c446210f > + > +struct Tables<T: FileSystem + ?Sized>(T); > +impl<T: FileSystem + ?Sized> Tables<T> { > + const CONTEXT: bindings::fs_context_operations = bindings::fs_context_operations { > + free: None, > + parse_param: None, > + get_tree: Some(Self::get_tree_callback), > + reconfigure: None, > + parse_monolithic: None, > + dup: None, > + }; > + > + unsafe extern "C" fn get_tree_callback(fc: *mut bindings::fs_context) -> core::ffi::c_int { > + // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has > + // the right type and is a valid callback. > + unsafe { bindings::get_tree_nodev(fc, Some(Self::fill_super_callback)) } > + } > + > + unsafe extern "C" fn fill_super_callback( > + sb_ptr: *mut bindings::super_block, > + _fc: *mut bindings::fs_context, > + ) -> core::ffi::c_int { > + from_result(|| { > + // SAFETY: The callback contract guarantees that `sb_ptr` is a unique pointer to a > + // newly-created superblock. > + let sb = unsafe { &mut *sb_ptr.cast() }; > + let params = T::super_params(sb)?; > + > + sb.0.s_magic = params.magic as _; > + sb.0.s_op = &Tables::<T>::SUPER_BLOCK; > + sb.0.s_maxbytes = params.maxbytes; > + sb.0.s_time_gran = params.time_gran; > + sb.0.s_blocksize_bits = params.blocksize_bits; > + sb.0.s_blocksize = 1; > + if sb.0.s_blocksize.leading_zeros() < params.blocksize_bits.into() { > + return Err(EINVAL); > + } > + sb.0.s_blocksize = 1 << sb.0.s_blocksize_bits; > + sb.0.s_flags |= bindings::SB_RDONLY; > + > + // The following is scaffolding code that will be removed in a subsequent patch. It is > + // needed to build a root dentry, otherwise core code will BUG(). > + // SAFETY: `sb` is the superblock being initialised, it is valid for read and write. > + let inode = unsafe { bindings::new_inode(&mut sb.0) }; > + if inode.is_null() { > + return Err(ENOMEM); > + } > + > + // SAFETY: `inode` is valid for write. > + unsafe { bindings::set_nlink(inode, 2) }; > + > + { > + // SAFETY: This is a newly-created inode. No other references to it exist, so it is > + // safe to mutably dereference it. > + let inode = unsafe { &mut *inode }; > + inode.i_ino = 1; > + inode.i_mode = (bindings::S_IFDIR | 0o755) as _; > + > + // SAFETY: `simple_dir_operations` never changes, it's safe to reference it. > + inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations }; > + > + // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it. > + inode.i_op = unsafe { &bindings::simple_dir_inode_operations }; > + } > + > + // SAFETY: `d_make_root` requires that `inode` be valid and referenced, which is the > + // case for this call. > + // > + // It takes over the inode, even on failure, so we don't need to clean it up. > + let dentry = unsafe { bindings::d_make_root(inode) }; > + if dentry.is_null() { > + return Err(ENOMEM); > + } > + > + sb.0.s_root = dentry; > + > + Ok(0) > + }) > + } > + > + const SUPER_BLOCK: bindings::super_operations = bindings::super_operations { > + alloc_inode: None, > + destroy_inode: None, > + free_inode: None, > + dirty_inode: None, > + write_inode: None, > + drop_inode: None, > + evict_inode: None, > + put_super: None, > + sync_fs: None, > + freeze_super: None, > + freeze_fs: None, > + thaw_super: None, > + unfreeze_fs: None, > + statfs: None, > + remount_fs: None, > + umount_begin: None, > + show_options: None, > + show_devname: None, > + show_path: None, > + show_stats: None, > + #[cfg(CONFIG_QUOTA)] > + quota_read: None, > + #[cfg(CONFIG_QUOTA)] > + quota_write: None, > + #[cfg(CONFIG_QUOTA)] > + get_dquots: None, > + nr_cached_objects: None, > + free_cached_objects: None, > + shutdown: None, > + }; > +} > + > /// Kernel module that exposes a single file system implemented by `T`. > #[pin_data] > pub struct Module<T: FileSystem + ?Sized> { > @@ -105,6 +265,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { > /// > /// ``` > /// # mod module_fs_sample { > +/// use kernel::fs::{NewSuperBlock, SuperParams}; > /// use kernel::prelude::*; > /// use kernel::{c_str, fs}; > /// > @@ -119,6 +280,9 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { > /// struct MyFs; > /// impl fs::FileSystem for MyFs { > /// const NAME: &'static CStr = c_str!("myfs"); > +/// fn super_params(_: &NewSuperBlock<Self>) -> Result<SuperParams> { > +/// todo!() > +/// } > /// } > /// # } > /// ``` > diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs > index 1c00b1da8b94..9878bf88b991 100644 > --- a/samples/rust/rust_rofs.rs > +++ b/samples/rust/rust_rofs.rs > @@ -2,6 +2,7 @@ > > //! Rust read-only file system sample. > > +use kernel::fs::{NewSuperBlock, SuperParams}; > use kernel::prelude::*; > use kernel::{c_str, fs}; > > @@ -16,4 +17,13 @@ > struct RoFs; > impl fs::FileSystem for RoFs { > const NAME: &'static CStr = c_str!("rust-fs"); > + > + fn super_params(_sb: &NewSuperBlock<Self>) -> Result<SuperParams> { > + Ok(SuperParams { > + magic: 0x52555354, > + blocksize_bits: 12, > + maxbytes: fs::MAX_LFS_FILESIZE, > + time_gran: 1, > + }) > + } > } > -- > 2.34.1 >
On 10/18/23 18:34, Benno Lossin wrote:>> + from_result(|| { >> + // SAFETY: The C callback API guarantees that `fc_ptr` is valid. >> + let fc = unsafe { &mut *fc_ptr }; > > This safety comment is not enough, the pointer needs to be unique and > pointing to a valid value for this to be ok. I would recommend to do > this instead: > > unsafe { addr_of_mut!((*fc_ptr).ops).write(&Tables::<T>::CONTEXT) }; It doesn't really need to be unique. Or at least, that wording gives the wrong intuition even if it's technically correct when you use the right definition of "unique". To clarify what I mean: Using `ptr::write` on a raw pointer is valid if and only if creating a mutable reference and using that to write is valid. (Assuming the type has no destructor.) Of course, in this case you *also* have the difference of whether you create a mutable to the entire struct or just the field. >> + // SAFETY: This is a newly-created inode. No other references to it exist, so it is >> + // safe to mutably dereference it. >> + let inode = unsafe { &mut *inode }; > > The inode also needs to be initialized and have valid values as its fields. > Not sure if this is kept and it would probably be better to keep using raw > pointers here. My understanding is that this is just a safety invariant, and not a validity invariant, so as long as the uninitialized memory is not read, it's fine. See e.g.: https://github.com/rust-lang/unsafe-code-guidelines/issues/346 Alice
On 28.10.23 18:39, Alice Ryhl wrote: > On 10/18/23 18:34, Benno Lossin wrote:>> + from_result(|| { >>> + // SAFETY: The C callback API guarantees that `fc_ptr` is valid. >>> + let fc = unsafe { &mut *fc_ptr }; >> >> This safety comment is not enough, the pointer needs to be unique and >> pointing to a valid value for this to be ok. I would recommend to do >> this instead: >> >> unsafe { addr_of_mut!((*fc_ptr).ops).write(&Tables::<T>::CONTEXT) }; > > It doesn't really need to be unique. Or at least, that wording gives the > wrong intuition even if it's technically correct when you use the right > definition of "unique". > > To clarify what I mean: Using `ptr::write` on a raw pointer is valid if > and only if creating a mutable reference and using that to write is > valid. (Assuming the type has no destructor.) I tried looking in the nomicon and UCG, but was not able to find this statement, where is it from? > Of course, in this case you *also* have the difference of whether you > create a mutable to the entire struct or just the field. >>> + // SAFETY: This is a newly-created inode. No other references to it exist, so it is >>> + // safe to mutably dereference it. >>> + let inode = unsafe { &mut *inode }; >> >> The inode also needs to be initialized and have valid values as its fields. >> Not sure if this is kept and it would probably be better to keep using raw >> pointers here. > > My understanding is that this is just a safety invariant, and not a > validity invariant, so as long as the uninitialized memory is not read, > it's fine. > > See e.g.: > https://github.com/rust-lang/unsafe-code-guidelines/issues/346 I'm not so sure that that discussion is finished and agreed upon. The nomicon still writes "It is illegal to construct a reference to uninitialized data" [1]. Using this pattern (&mut uninit to initialize data) is also dangerous if the underlying type has drop impls, since then by doing `foo.bar = baz;` you drop the old uninitialized value. Sure in our bindings there are no types that implement drop (AFAIK) so it is less of an issue. If we decide to do this, we should have a comment that explains that this reference might point to uninitialized memory. Since otherwise it might be easy to give the reference to another safe function that then e.g. reads a bool. [1]: https://doc.rust-lang.org/nomicon/unchecked-uninit.html
On 10/30/23 09:21, Benno Lossin wrote: > On 28.10.23 18:39, Alice Ryhl wrote: >> On 10/18/23 18:34, Benno Lossin wrote:>> + from_result(|| { >>>> + // SAFETY: The C callback API guarantees that `fc_ptr` is valid. >>>> + let fc = unsafe { &mut *fc_ptr }; >>> >>> This safety comment is not enough, the pointer needs to be unique and >>> pointing to a valid value for this to be ok. I would recommend to do >>> this instead: >>> >>> unsafe { addr_of_mut!((*fc_ptr).ops).write(&Tables::<T>::CONTEXT) }; >> >> It doesn't really need to be unique. Or at least, that wording gives the >> wrong intuition even if it's technically correct when you use the right >> definition of "unique". >> >> To clarify what I mean: Using `ptr::write` on a raw pointer is valid if >> and only if creating a mutable reference and using that to write is >> valid. (Assuming the type has no destructor.) > > I tried looking in the nomicon and UCG, but was not able to find this > statement, where is it from? Not sure where I got it from originally, but it follows from the tree borrows reference: First, if the type is !Unpin, then the mutable reference gets the same tag as the original pointer, so there's trivially no difference. The more interesting case is for Unpin types. Here, the creation of the mutable reference corresponds to a read, and then there's the write of the mutable reference itself. The write of the mutable reference itself is equivalent to the `ptr::write` operation, since exactly the same tags are considered to be affected by child writes and foreign writes. Next, it must be shown that [read, write] is equivalent to just a write, which can be shown by analyzing the tree borrows rules case-by-case. You can find a nice summary of tree borrows at the last page of: https://github.com/Vanille-N/tree-borrows/blob/master/full/main.pdf I'm pretty sure the same analysis works with stacked borrows. >> Of course, in this case you *also* have the difference of whether you >> create a mutable to the entire struct or just the field. >>>> + // SAFETY: This is a newly-created inode. No other references to it exist, so it is >>>> + // safe to mutably dereference it. >>>> + let inode = unsafe { &mut *inode }; >>> >>> The inode also needs to be initialized and have valid values as its fields. >>> Not sure if this is kept and it would probably be better to keep using raw >>> pointers here. >> >> My understanding is that this is just a safety invariant, and not a >> validity invariant, so as long as the uninitialized memory is not read, >> it's fine. >> >> See e.g.: >> https://github.com/rust-lang/unsafe-code-guidelines/issues/346 > > I'm not so sure that that discussion is finished and agreed upon. The > nomicon still writes "It is illegal to construct a reference to > uninitialized data" [1]. > > Using this pattern (&mut uninit to initialize data) is also dangerous > if the underlying type has drop impls, since then by doing > `foo.bar = baz;` you drop the old uninitialized value. Sure in > our bindings there are no types that implement drop (AFAIK) so > it is less of an issue. > > If we decide to do this, we should have a comment that explains that > this reference might point to uninitialized memory. Since otherwise > it might be easy to give the reference to another safe function that > then e.g. reads a bool. > > [1]: https://doc.rust-lang.org/nomicon/unchecked-uninit.html That's fair. I agree that we should explicitly decide whether or not to allow this kind of thing. Alice
Wedson Almeida Filho <wedsonaf@gmail.com> writes: <snip> > + unsafe extern "C" fn fill_super_callback( > + sb_ptr: *mut bindings::super_block, > + _fc: *mut bindings::fs_context, > + ) -> core::ffi::c_int { > + from_result(|| { > + // SAFETY: The callback contract guarantees that `sb_ptr` is a unique pointer to a > + // newly-created superblock. > + let sb = unsafe { &mut *sb_ptr.cast() }; > + let params = T::super_params(sb)?; > + > + sb.0.s_magic = params.magic as _; I would prefer an explicit target type for the cast. BR Andreas
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 9c23037b33d0..ca1898ce9527 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -9,6 +9,7 @@ #include <kunit/test.h> #include <linux/errname.h> #include <linux/fs.h> +#include <linux/fs_context.h> #include <linux/slab.h> #include <linux/refcount.h> #include <linux/wait.h> @@ -22,3 +23,7 @@ const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; const slab_flags_t BINDINGS_SLAB_RECLAIM_ACCOUNT = SLAB_RECLAIM_ACCOUNT; const slab_flags_t BINDINGS_SLAB_MEM_SPREAD = SLAB_MEM_SPREAD; const slab_flags_t BINDINGS_SLAB_ACCOUNT = SLAB_ACCOUNT; + +const unsigned long BINDINGS_SB_RDONLY = SB_RDONLY; + +const loff_t BINDINGS_MAX_LFS_FILESIZE = MAX_LFS_FILESIZE; diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 6a8c6cd17e45..426915d3fb57 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -55,3 +55,7 @@ mod bindings_helper { pub const SLAB_RECLAIM_ACCOUNT: slab_flags_t = BINDINGS_SLAB_RECLAIM_ACCOUNT; pub const SLAB_MEM_SPREAD: slab_flags_t = BINDINGS_SLAB_MEM_SPREAD; pub const SLAB_ACCOUNT: slab_flags_t = BINDINGS_SLAB_ACCOUNT; + +pub const SB_RDONLY: core::ffi::c_ulong = BINDINGS_SB_RDONLY; + +pub const MAX_LFS_FILESIZE: loff_t = BINDINGS_MAX_LFS_FILESIZE; diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs index 1df54c234101..31cf643aaded 100644 --- a/rust/kernel/fs.rs +++ b/rust/kernel/fs.rs @@ -6,16 +6,22 @@ //! //! C headers: [`include/linux/fs.h`](../../include/linux/fs.h) -use crate::error::{code::*, from_result, to_result, Error}; +use crate::error::{code::*, from_result, to_result, Error, Result}; use crate::types::Opaque; use crate::{bindings, init::PinInit, str::CStr, try_pin_init, ThisModule}; use core::{marker::PhantomData, marker::PhantomPinned, pin::Pin}; use macros::{pin_data, pinned_drop}; +/// Maximum size of an inode. +pub const MAX_LFS_FILESIZE: i64 = bindings::MAX_LFS_FILESIZE; + /// A file system type. pub trait FileSystem { /// The name of the file system type. const NAME: &'static CStr; + + /// Returns the parameters to initialise a super block. + fn super_params(sb: &NewSuperBlock<Self>) -> Result<SuperParams>; } /// A registration of a file system. @@ -49,7 +55,7 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< let fs = unsafe { &mut *fs_ptr }; fs.owner = module.0; fs.name = T::NAME.as_char_ptr(); - fs.init_fs_context = Some(Self::init_fs_context_callback); + fs.init_fs_context = Some(Self::init_fs_context_callback::<T>); fs.kill_sb = Some(Self::kill_sb_callback); fs.fs_flags = 0; @@ -60,13 +66,22 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< }) } - unsafe extern "C" fn init_fs_context_callback( - _fc_ptr: *mut bindings::fs_context, + unsafe extern "C" fn init_fs_context_callback<T: FileSystem + ?Sized>( + fc_ptr: *mut bindings::fs_context, ) -> core::ffi::c_int { - from_result(|| Err(ENOTSUPP)) + from_result(|| { + // SAFETY: The C callback API guarantees that `fc_ptr` is valid. + let fc = unsafe { &mut *fc_ptr }; + fc.ops = &Tables::<T>::CONTEXT; + Ok(0) + }) } - unsafe extern "C" fn kill_sb_callback(_sb_ptr: *mut bindings::super_block) {} + unsafe extern "C" fn kill_sb_callback(sb_ptr: *mut bindings::super_block) { + // SAFETY: In `get_tree_callback` we always call `get_tree_nodev`, so `kill_anon_super` is + // the appropriate function to call for cleanup. + unsafe { bindings::kill_anon_super(sb_ptr) }; + } } #[pinned_drop] @@ -79,6 +94,151 @@ fn drop(self: Pin<&mut Self>) { } } +/// A file system super block. +/// +/// Wraps the kernel's `struct super_block`. +#[repr(transparent)] +pub struct SuperBlock<T: FileSystem + ?Sized>(Opaque<bindings::super_block>, PhantomData<T>); + +/// Required superblock parameters. +/// +/// This is returned by implementations of [`FileSystem::super_params`]. +pub struct SuperParams { + /// The magic number of the superblock. + pub magic: u32, + + /// The size of a block in powers of 2 (i.e., for a value of `n`, the size is `2^n`). + pub blocksize_bits: u8, + + /// Maximum size of a file. + /// + /// The maximum allowed value is [`MAX_LFS_FILESIZE`]. + pub maxbytes: i64, + + /// Granularity of c/m/atime in ns (cannot be worse than a second). + pub time_gran: u32, +} + +/// A superblock that is still being initialised. +/// +/// # Invariants +/// +/// The superblock is a newly-created one and this is the only active pointer to it. +#[repr(transparent)] +pub struct NewSuperBlock<T: FileSystem + ?Sized>(bindings::super_block, PhantomData<T>); + +struct Tables<T: FileSystem + ?Sized>(T); +impl<T: FileSystem + ?Sized> Tables<T> { + const CONTEXT: bindings::fs_context_operations = bindings::fs_context_operations { + free: None, + parse_param: None, + get_tree: Some(Self::get_tree_callback), + reconfigure: None, + parse_monolithic: None, + dup: None, + }; + + unsafe extern "C" fn get_tree_callback(fc: *mut bindings::fs_context) -> core::ffi::c_int { + // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has + // the right type and is a valid callback. + unsafe { bindings::get_tree_nodev(fc, Some(Self::fill_super_callback)) } + } + + unsafe extern "C" fn fill_super_callback( + sb_ptr: *mut bindings::super_block, + _fc: *mut bindings::fs_context, + ) -> core::ffi::c_int { + from_result(|| { + // SAFETY: The callback contract guarantees that `sb_ptr` is a unique pointer to a + // newly-created superblock. + let sb = unsafe { &mut *sb_ptr.cast() }; + let params = T::super_params(sb)?; + + sb.0.s_magic = params.magic as _; + sb.0.s_op = &Tables::<T>::SUPER_BLOCK; + sb.0.s_maxbytes = params.maxbytes; + sb.0.s_time_gran = params.time_gran; + sb.0.s_blocksize_bits = params.blocksize_bits; + sb.0.s_blocksize = 1; + if sb.0.s_blocksize.leading_zeros() < params.blocksize_bits.into() { + return Err(EINVAL); + } + sb.0.s_blocksize = 1 << sb.0.s_blocksize_bits; + sb.0.s_flags |= bindings::SB_RDONLY; + + // The following is scaffolding code that will be removed in a subsequent patch. It is + // needed to build a root dentry, otherwise core code will BUG(). + // SAFETY: `sb` is the superblock being initialised, it is valid for read and write. + let inode = unsafe { bindings::new_inode(&mut sb.0) }; + if inode.is_null() { + return Err(ENOMEM); + } + + // SAFETY: `inode` is valid for write. + unsafe { bindings::set_nlink(inode, 2) }; + + { + // SAFETY: This is a newly-created inode. No other references to it exist, so it is + // safe to mutably dereference it. + let inode = unsafe { &mut *inode }; + inode.i_ino = 1; + inode.i_mode = (bindings::S_IFDIR | 0o755) as _; + + // SAFETY: `simple_dir_operations` never changes, it's safe to reference it. + inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations }; + + // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it. + inode.i_op = unsafe { &bindings::simple_dir_inode_operations }; + } + + // SAFETY: `d_make_root` requires that `inode` be valid and referenced, which is the + // case for this call. + // + // It takes over the inode, even on failure, so we don't need to clean it up. + let dentry = unsafe { bindings::d_make_root(inode) }; + if dentry.is_null() { + return Err(ENOMEM); + } + + sb.0.s_root = dentry; + + Ok(0) + }) + } + + const SUPER_BLOCK: bindings::super_operations = bindings::super_operations { + alloc_inode: None, + destroy_inode: None, + free_inode: None, + dirty_inode: None, + write_inode: None, + drop_inode: None, + evict_inode: None, + put_super: None, + sync_fs: None, + freeze_super: None, + freeze_fs: None, + thaw_super: None, + unfreeze_fs: None, + statfs: None, + remount_fs: None, + umount_begin: None, + show_options: None, + show_devname: None, + show_path: None, + show_stats: None, + #[cfg(CONFIG_QUOTA)] + quota_read: None, + #[cfg(CONFIG_QUOTA)] + quota_write: None, + #[cfg(CONFIG_QUOTA)] + get_dquots: None, + nr_cached_objects: None, + free_cached_objects: None, + shutdown: None, + }; +} + /// Kernel module that exposes a single file system implemented by `T`. #[pin_data] pub struct Module<T: FileSystem + ?Sized> { @@ -105,6 +265,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { /// /// ``` /// # mod module_fs_sample { +/// use kernel::fs::{NewSuperBlock, SuperParams}; /// use kernel::prelude::*; /// use kernel::{c_str, fs}; /// @@ -119,6 +280,9 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { /// struct MyFs; /// impl fs::FileSystem for MyFs { /// const NAME: &'static CStr = c_str!("myfs"); +/// fn super_params(_: &NewSuperBlock<Self>) -> Result<SuperParams> { +/// todo!() +/// } /// } /// # } /// ``` diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs index 1c00b1da8b94..9878bf88b991 100644 --- a/samples/rust/rust_rofs.rs +++ b/samples/rust/rust_rofs.rs @@ -2,6 +2,7 @@ //! Rust read-only file system sample. +use kernel::fs::{NewSuperBlock, SuperParams}; use kernel::prelude::*; use kernel::{c_str, fs}; @@ -16,4 +17,13 @@ struct RoFs; impl fs::FileSystem for RoFs { const NAME: &'static CStr = c_str!("rust-fs"); + + fn super_params(_sb: &NewSuperBlock<Self>) -> Result<SuperParams> { + Ok(SuperParams { + magic: 0x52555354, + blocksize_bits: 12, + maxbytes: fs::MAX_LFS_FILESIZE, + time_gran: 1, + }) + } }