Message ID | 20231018122518.128049-8-wedsonaf@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Rust abstractions for VFS | expand |
On 18.10.23 14:25, Wedson Almeida Filho wrote: > From: Wedson Almeida Filho <walmeida@microsoft.com> > > Allow Rust file systems to report the contents of their directory > inodes. The reported entries cannot be opened yet. > > Signed-off-by: Wedson Almeida Filho <walmeida@microsoft.com> > --- > rust/kernel/fs.rs | 193 +++++++++++++++++++++++++++++++++++++- > samples/rust/rust_rofs.rs | 49 +++++++++- > 2 files changed, 236 insertions(+), 6 deletions(-) > > diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs > index f3a41cf57502..89611c44e4c5 100644 > --- a/rust/kernel/fs.rs > +++ b/rust/kernel/fs.rs > @@ -28,6 +28,70 @@ pub trait FileSystem { > /// This is called during initialisation of a superblock after [`FileSystem::super_params`] has > /// completed successfully. > fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>>; > + > + /// Reads directory entries from directory inodes. > + /// > + /// [`DirEmitter::pos`] holds the current position of the directory reader. > + fn read_dir(inode: &INode<Self>, emitter: &mut DirEmitter) -> Result; > +} > + > +/// The types of directory entries reported by [`FileSystem::read_dir`]. > +#[repr(u32)] > +#[derive(Copy, Clone)] > +pub enum DirEntryType { > + /// Unknown type. > + Unknown = bindings::DT_UNKNOWN, > + > + /// Named pipe (first-in, first-out) type. > + Fifo = bindings::DT_FIFO, > + > + /// Character device type. > + Chr = bindings::DT_CHR, > + > + /// Directory type. > + Dir = bindings::DT_DIR, > + > + /// Block device type. > + Blk = bindings::DT_BLK, > + > + /// Regular file type. > + Reg = bindings::DT_REG, > + > + /// Symbolic link type. > + Lnk = bindings::DT_LNK, > + > + /// Named unix-domain socket type. > + Sock = bindings::DT_SOCK, > + > + /// White-out type. > + Wht = bindings::DT_WHT, > +} > + > +impl From<INodeType> for DirEntryType { > + fn from(value: INodeType) -> Self { > + match value { > + INodeType::Dir => DirEntryType::Dir, > + } > + } > +} > + > +impl core::convert::TryFrom<u32> for DirEntryType { > + type Error = crate::error::Error; > + > + fn try_from(v: u32) -> Result<Self> { > + match v { > + v if v == Self::Unknown as u32 => Ok(Self::Unknown), > + v if v == Self::Fifo as u32 => Ok(Self::Fifo), > + v if v == Self::Chr as u32 => Ok(Self::Chr), > + v if v == Self::Dir as u32 => Ok(Self::Dir), > + v if v == Self::Blk as u32 => Ok(Self::Blk), > + v if v == Self::Reg as u32 => Ok(Self::Reg), > + v if v == Self::Lnk as u32 => Ok(Self::Lnk), > + v if v == Self::Sock as u32 => Ok(Self::Sock), > + v if v == Self::Wht as u32 => Ok(Self::Wht), > + _ => Err(EDOM), > + } > + } > } > > /// A registration of a file system. > @@ -161,9 +225,7 @@ pub fn init(self, params: INodeParams) -> Result<ARef<INode<T>>> { > > let mode = match params.typ { > INodeType::Dir => { > - // SAFETY: `simple_dir_operations` never changes, it's safe to reference it. > - inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations }; > - > + inode.__bindgen_anon_3.i_fop = &Tables::<T>::DIR_FILE_OPERATIONS; > // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it. > inode.i_op = unsafe { &bindings::simple_dir_inode_operations }; > bindings::S_IFDIR > @@ -403,6 +465,126 @@ impl<T: FileSystem + ?Sized> Tables<T> { > free_cached_objects: None, > shutdown: None, > }; > + > + const DIR_FILE_OPERATIONS: bindings::file_operations = bindings::file_operations { > + owner: ptr::null_mut(), > + llseek: Some(bindings::generic_file_llseek), > + read: Some(bindings::generic_read_dir), > + write: None, > + read_iter: None, > + write_iter: None, > + iopoll: None, > + iterate_shared: Some(Self::read_dir_callback), > + poll: None, > + unlocked_ioctl: None, > + compat_ioctl: None, > + mmap: None, > + mmap_supported_flags: 0, > + open: None, > + flush: None, > + release: None, > + fsync: None, > + fasync: None, > + lock: None, > + get_unmapped_area: None, > + check_flags: None, > + flock: None, > + splice_write: None, > + splice_read: None, > + splice_eof: None, > + setlease: None, > + fallocate: None, > + show_fdinfo: None, > + copy_file_range: None, > + remap_file_range: None, > + fadvise: None, > + uring_cmd: None, > + uring_cmd_iopoll: None, > + }; > + > + unsafe extern "C" fn read_dir_callback( > + file: *mut bindings::file, > + ctx_ptr: *mut bindings::dir_context, > + ) -> core::ffi::c_int { > + from_result(|| { > + // SAFETY: The C API guarantees that `file` is valid for read. And since `f_inode` is > + // immutable, we can read it directly. > + let inode = unsafe { &*(*file).f_inode.cast::<INode<T>>() }; > + > + // SAFETY: The C API guarantees that this is the only reference to the `dir_context` > + // instance. > + let emitter = unsafe { &mut *ctx_ptr.cast::<DirEmitter>() }; > + let orig_pos = emitter.pos(); > + > + // Call the module implementation. We ignore errors if directory entries have been > + // succesfully emitted: this is because we want users to see them before the error. > + match T::read_dir(inode, emitter) { > + Ok(_) => Ok(0), > + Err(e) => { > + if emitter.pos() == orig_pos { > + Err(e) > + } else { > + Ok(0) > + } > + } > + } > + }) > + } > +} > + > +/// Directory entry emitter. > +/// > +/// This is used in [`FileSystem::read_dir`] implementations to report the directory entry. > +#[repr(transparent)] > +pub struct DirEmitter(bindings::dir_context); No `Opaque`? > + > +impl DirEmitter { > + /// Returns the current position of the emitter. > + pub fn pos(&self) -> i64 { > + self.0.pos > + } > + > + /// Emits a directory entry. > + /// > + /// `pos_inc` is the number with which to increment the current position on success. > + /// > + /// `name` is the name of the entry. > + /// > + /// `ino` is the inode number of the entry. > + /// > + /// `etype` is the type of the entry. It might make sense to create a struct for all these parameters. > + /// > + /// Returns `false` when the entry could not be emitted, possibly because the user-provided > + /// buffer is full. > + pub fn emit(&mut self, pos_inc: i64, name: &[u8], ino: Ino, etype: DirEntryType) -> bool { > + let Ok(name_len) = i32::try_from(name.len()) else { > + return false; > + }; > + > + let Some(actor) = self.0.actor else { > + return false; > + }; > + > + let Some(new_pos) = self.0.pos.checked_add(pos_inc) else { > + return false; > + }; > + > + // SAFETY: `name` is valid at least for the duration of the `actor` call. What about `&mut self.0`? Since this is a function pointer, can we really be sure about the safety requirements?
Wedson Almeida Filho <wedsonaf@gmail.com> writes: [...] > + unsafe extern "C" fn read_dir_callback( > + file: *mut bindings::file, > + ctx_ptr: *mut bindings::dir_context, > + ) -> core::ffi::c_int { > + from_result(|| { > + // SAFETY: The C API guarantees that `file` is valid for read. And since `f_inode` is > + // immutable, we can read it directly. Should this be "the pointee of `f_inode` is immutable" instead? [...] > + pub fn emit(&mut self, pos_inc: i64, name: &[u8], ino: Ino, etype: DirEntryType) -> bool { > + let Ok(name_len) = i32::try_from(name.len()) else { > + return false; > + }; > + > + let Some(actor) = self.0.actor else { > + return false; > + }; > + > + let Some(new_pos) = self.0.pos.checked_add(pos_inc) else { > + return false; > + }; > + > + // SAFETY: `name` is valid at least for the duration of the `actor` call. > + let ret = unsafe { > + actor( > + &mut self.0, > + name.as_ptr().cast(), > + name_len, > + self.0.pos, > + ino, > + etype as _, I would prefer an explicit target type here. BR Andreas
Wedson Almeida Filho: > + /// White-out type. > + Wht = bindings::DT_WHT, As well as I understand, filesystems supposed not to return DT_WHT from readdir to user space. But I'm not sure. Please, do expirement! Create whiteout on ext4 and see what readdir will return. As well as I understand, it will return DT_CHR. So, I think DT_WHT should be deleted here. Askar Safin
On Mon, Jan 22, 2024 at 12:00:49AM +0300, Askar Safin wrote: > Wedson Almeida Filho: > > + /// White-out type. > > + Wht = bindings::DT_WHT, > > As well as I understand, filesystems supposed not to return > DT_WHT from readdir to user space. But I'm not sure. Please, > do expirement! Create whiteout on ext4 and see what readdir > will return. As well as I understand, it will return DT_CHR. DT_WHT is defined in /usr/include/dirent.h, so it is actually present in the userspace support for readdir. If the kernel returns DT_WHT to userspace, applications should know what it is. However, filesystems like ext4 and btrfs don't have DT_WHT on disk and few userspace applications support it. Way back when overlay required whiteout support to be added, the magical char device representation was invented for filesystems without DT_WHT and that was exposed to userspace. We're kind of stuck with it now, though there is nothign stopping filesysetms from returning DT_WHT to userspace instead of DT_CHR and requiring userspace to stat the inode to look at the major/minor numbers to determine if the dirent is a whiteout or not. Indeed, it would be more optimal for overlay if filesystems returned DT_WHT instead of DT_CHR for whiteouts. Put simply: DT_WHT is part of the readdir kernel and userspace API and therefore should be present in the Rust interfaces. Cheers, Dave.
diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs index f3a41cf57502..89611c44e4c5 100644 --- a/rust/kernel/fs.rs +++ b/rust/kernel/fs.rs @@ -28,6 +28,70 @@ pub trait FileSystem { /// This is called during initialisation of a superblock after [`FileSystem::super_params`] has /// completed successfully. fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>>; + + /// Reads directory entries from directory inodes. + /// + /// [`DirEmitter::pos`] holds the current position of the directory reader. + fn read_dir(inode: &INode<Self>, emitter: &mut DirEmitter) -> Result; +} + +/// The types of directory entries reported by [`FileSystem::read_dir`]. +#[repr(u32)] +#[derive(Copy, Clone)] +pub enum DirEntryType { + /// Unknown type. + Unknown = bindings::DT_UNKNOWN, + + /// Named pipe (first-in, first-out) type. + Fifo = bindings::DT_FIFO, + + /// Character device type. + Chr = bindings::DT_CHR, + + /// Directory type. + Dir = bindings::DT_DIR, + + /// Block device type. + Blk = bindings::DT_BLK, + + /// Regular file type. + Reg = bindings::DT_REG, + + /// Symbolic link type. + Lnk = bindings::DT_LNK, + + /// Named unix-domain socket type. + Sock = bindings::DT_SOCK, + + /// White-out type. + Wht = bindings::DT_WHT, +} + +impl From<INodeType> for DirEntryType { + fn from(value: INodeType) -> Self { + match value { + INodeType::Dir => DirEntryType::Dir, + } + } +} + +impl core::convert::TryFrom<u32> for DirEntryType { + type Error = crate::error::Error; + + fn try_from(v: u32) -> Result<Self> { + match v { + v if v == Self::Unknown as u32 => Ok(Self::Unknown), + v if v == Self::Fifo as u32 => Ok(Self::Fifo), + v if v == Self::Chr as u32 => Ok(Self::Chr), + v if v == Self::Dir as u32 => Ok(Self::Dir), + v if v == Self::Blk as u32 => Ok(Self::Blk), + v if v == Self::Reg as u32 => Ok(Self::Reg), + v if v == Self::Lnk as u32 => Ok(Self::Lnk), + v if v == Self::Sock as u32 => Ok(Self::Sock), + v if v == Self::Wht as u32 => Ok(Self::Wht), + _ => Err(EDOM), + } + } } /// A registration of a file system. @@ -161,9 +225,7 @@ pub fn init(self, params: INodeParams) -> Result<ARef<INode<T>>> { let mode = match params.typ { INodeType::Dir => { - // SAFETY: `simple_dir_operations` never changes, it's safe to reference it. - inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations }; - + inode.__bindgen_anon_3.i_fop = &Tables::<T>::DIR_FILE_OPERATIONS; // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it. inode.i_op = unsafe { &bindings::simple_dir_inode_operations }; bindings::S_IFDIR @@ -403,6 +465,126 @@ impl<T: FileSystem + ?Sized> Tables<T> { free_cached_objects: None, shutdown: None, }; + + const DIR_FILE_OPERATIONS: bindings::file_operations = bindings::file_operations { + owner: ptr::null_mut(), + llseek: Some(bindings::generic_file_llseek), + read: Some(bindings::generic_read_dir), + write: None, + read_iter: None, + write_iter: None, + iopoll: None, + iterate_shared: Some(Self::read_dir_callback), + poll: None, + unlocked_ioctl: None, + compat_ioctl: None, + mmap: None, + mmap_supported_flags: 0, + open: None, + flush: None, + release: None, + fsync: None, + fasync: None, + lock: None, + get_unmapped_area: None, + check_flags: None, + flock: None, + splice_write: None, + splice_read: None, + splice_eof: None, + setlease: None, + fallocate: None, + show_fdinfo: None, + copy_file_range: None, + remap_file_range: None, + fadvise: None, + uring_cmd: None, + uring_cmd_iopoll: None, + }; + + unsafe extern "C" fn read_dir_callback( + file: *mut bindings::file, + ctx_ptr: *mut bindings::dir_context, + ) -> core::ffi::c_int { + from_result(|| { + // SAFETY: The C API guarantees that `file` is valid for read. And since `f_inode` is + // immutable, we can read it directly. + let inode = unsafe { &*(*file).f_inode.cast::<INode<T>>() }; + + // SAFETY: The C API guarantees that this is the only reference to the `dir_context` + // instance. + let emitter = unsafe { &mut *ctx_ptr.cast::<DirEmitter>() }; + let orig_pos = emitter.pos(); + + // Call the module implementation. We ignore errors if directory entries have been + // succesfully emitted: this is because we want users to see them before the error. + match T::read_dir(inode, emitter) { + Ok(_) => Ok(0), + Err(e) => { + if emitter.pos() == orig_pos { + Err(e) + } else { + Ok(0) + } + } + } + }) + } +} + +/// Directory entry emitter. +/// +/// This is used in [`FileSystem::read_dir`] implementations to report the directory entry. +#[repr(transparent)] +pub struct DirEmitter(bindings::dir_context); + +impl DirEmitter { + /// Returns the current position of the emitter. + pub fn pos(&self) -> i64 { + self.0.pos + } + + /// Emits a directory entry. + /// + /// `pos_inc` is the number with which to increment the current position on success. + /// + /// `name` is the name of the entry. + /// + /// `ino` is the inode number of the entry. + /// + /// `etype` is the type of the entry. + /// + /// Returns `false` when the entry could not be emitted, possibly because the user-provided + /// buffer is full. + pub fn emit(&mut self, pos_inc: i64, name: &[u8], ino: Ino, etype: DirEntryType) -> bool { + let Ok(name_len) = i32::try_from(name.len()) else { + return false; + }; + + let Some(actor) = self.0.actor else { + return false; + }; + + let Some(new_pos) = self.0.pos.checked_add(pos_inc) else { + return false; + }; + + // SAFETY: `name` is valid at least for the duration of the `actor` call. + let ret = unsafe { + actor( + &mut self.0, + name.as_ptr().cast(), + name_len, + self.0.pos, + ino, + etype as _, + ) + }; + if ret { + self.0.pos = new_pos; + } + ret + } } /// Kernel module that exposes a single file system implemented by `T`. @@ -431,7 +613,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { /// /// ``` /// # mod module_fs_sample { -/// use kernel::fs::{INode, NewSuperBlock, SuperBlock, SuperParams}; +/// use kernel::fs::{DirEmitter, INode, NewSuperBlock, SuperBlock, SuperParams}; /// use kernel::prelude::*; /// use kernel::{c_str, fs, types::ARef}; /// @@ -452,6 +634,9 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { /// fn init_root(_sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> { /// todo!() /// } +/// fn read_dir(_: &INode<Self>, _: &mut DirEmitter) -> Result { +/// todo!() +/// } /// } /// # } /// ``` diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs index 9e5f4c7d1c06..4e61a94afa70 100644 --- a/samples/rust/rust_rofs.rs +++ b/samples/rust/rust_rofs.rs @@ -2,7 +2,9 @@ //! Rust read-only file system sample. -use kernel::fs::{INode, INodeParams, INodeType, NewSuperBlock, SuperBlock, SuperParams}; +use kernel::fs::{ + DirEmitter, INode, INodeParams, INodeType, NewSuperBlock, SuperBlock, SuperParams, +}; use kernel::prelude::*; use kernel::{c_str, fs, time::UNIX_EPOCH, types::ARef, types::Either}; @@ -14,6 +16,30 @@ license: "GPL", } +struct Entry { + name: &'static [u8], + ino: u64, + etype: INodeType, +} + +const ENTRIES: [Entry; 3] = [ + Entry { + name: b".", + ino: 1, + etype: INodeType::Dir, + }, + Entry { + name: b"..", + ino: 1, + etype: INodeType::Dir, + }, + Entry { + name: b"subdir", + ino: 2, + etype: INodeType::Dir, + }, +]; + struct RoFs; impl fs::FileSystem for RoFs { const NAME: &'static CStr = c_str!("rust-fs"); @@ -33,7 +59,7 @@ fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> { Either::Right(new) => new.init(INodeParams { typ: INodeType::Dir, mode: 0o555, - size: 1, + size: ENTRIES.len().try_into()?, blocks: 1, nlink: 2, uid: 0, @@ -44,4 +70,23 @@ fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> { }), } } + + fn read_dir(inode: &INode<Self>, emitter: &mut DirEmitter) -> Result { + if inode.ino() != 1 { + return Ok(()); + } + + let pos = emitter.pos(); + if pos >= ENTRIES.len().try_into()? { + return Ok(()); + } + + for e in ENTRIES.iter().skip(pos.try_into()?) { + if !emitter.emit(1, e.name, e.ino, e.etype.into()) { + break; + } + } + + Ok(()) + } }