diff mbox series

[RFC,07/19] rust: fs: introduce `FileSystem::read_dir`

Message ID 20231018122518.128049-8-wedsonaf@gmail.com (mailing list archive)
State New, archived
Headers show
Series Rust abstractions for VFS | expand

Commit Message

Wedson Almeida Filho Oct. 18, 2023, 12:25 p.m. UTC
From: Wedson Almeida Filho <walmeida@microsoft.com>

Allow Rust file systems to report the contents of their directory
inodes. The reported entries cannot be opened yet.

Signed-off-by: Wedson Almeida Filho <walmeida@microsoft.com>
---
 rust/kernel/fs.rs         | 193 +++++++++++++++++++++++++++++++++++++-
 samples/rust/rust_rofs.rs |  49 +++++++++-
 2 files changed, 236 insertions(+), 6 deletions(-)

Comments

Benno Lossin Oct. 21, 2023, 8:33 a.m. UTC | #1
On 18.10.23 14:25, Wedson Almeida Filho wrote:
> From: Wedson Almeida Filho <walmeida@microsoft.com>
> 
> Allow Rust file systems to report the contents of their directory
> inodes. The reported entries cannot be opened yet.
> 
> Signed-off-by: Wedson Almeida Filho <walmeida@microsoft.com>
> ---
>   rust/kernel/fs.rs         | 193 +++++++++++++++++++++++++++++++++++++-
>   samples/rust/rust_rofs.rs |  49 +++++++++-
>   2 files changed, 236 insertions(+), 6 deletions(-)
> 
> diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs
> index f3a41cf57502..89611c44e4c5 100644
> --- a/rust/kernel/fs.rs
> +++ b/rust/kernel/fs.rs
> @@ -28,6 +28,70 @@ pub trait FileSystem {
>       /// This is called during initialisation of a superblock after [`FileSystem::super_params`] has
>       /// completed successfully.
>       fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>>;
> +
> +    /// Reads directory entries from directory inodes.
> +    ///
> +    /// [`DirEmitter::pos`] holds the current position of the directory reader.
> +    fn read_dir(inode: &INode<Self>, emitter: &mut DirEmitter) -> Result;
> +}
> +
> +/// The types of directory entries reported by [`FileSystem::read_dir`].
> +#[repr(u32)]
> +#[derive(Copy, Clone)]
> +pub enum DirEntryType {
> +    /// Unknown type.
> +    Unknown = bindings::DT_UNKNOWN,
> +
> +    /// Named pipe (first-in, first-out) type.
> +    Fifo = bindings::DT_FIFO,
> +
> +    /// Character device type.
> +    Chr = bindings::DT_CHR,
> +
> +    /// Directory type.
> +    Dir = bindings::DT_DIR,
> +
> +    /// Block device type.
> +    Blk = bindings::DT_BLK,
> +
> +    /// Regular file type.
> +    Reg = bindings::DT_REG,
> +
> +    /// Symbolic link type.
> +    Lnk = bindings::DT_LNK,
> +
> +    /// Named unix-domain socket type.
> +    Sock = bindings::DT_SOCK,
> +
> +    /// White-out type.
> +    Wht = bindings::DT_WHT,
> +}
> +
> +impl From<INodeType> for DirEntryType {
> +    fn from(value: INodeType) -> Self {
> +        match value {
> +            INodeType::Dir => DirEntryType::Dir,
> +        }
> +    }
> +}
> +
> +impl core::convert::TryFrom<u32> for DirEntryType {
> +    type Error = crate::error::Error;
> +
> +    fn try_from(v: u32) -> Result<Self> {
> +        match v {
> +            v if v == Self::Unknown as u32 => Ok(Self::Unknown),
> +            v if v == Self::Fifo as u32 => Ok(Self::Fifo),
> +            v if v == Self::Chr as u32 => Ok(Self::Chr),
> +            v if v == Self::Dir as u32 => Ok(Self::Dir),
> +            v if v == Self::Blk as u32 => Ok(Self::Blk),
> +            v if v == Self::Reg as u32 => Ok(Self::Reg),
> +            v if v == Self::Lnk as u32 => Ok(Self::Lnk),
> +            v if v == Self::Sock as u32 => Ok(Self::Sock),
> +            v if v == Self::Wht as u32 => Ok(Self::Wht),
> +            _ => Err(EDOM),
> +        }
> +    }
>   }
> 
>   /// A registration of a file system.
> @@ -161,9 +225,7 @@ pub fn init(self, params: INodeParams) -> Result<ARef<INode<T>>> {
> 
>           let mode = match params.typ {
>               INodeType::Dir => {
> -                // SAFETY: `simple_dir_operations` never changes, it's safe to reference it.
> -                inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations };
> -
> +                inode.__bindgen_anon_3.i_fop = &Tables::<T>::DIR_FILE_OPERATIONS;
>                   // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it.
>                   inode.i_op = unsafe { &bindings::simple_dir_inode_operations };
>                   bindings::S_IFDIR
> @@ -403,6 +465,126 @@ impl<T: FileSystem + ?Sized> Tables<T> {
>           free_cached_objects: None,
>           shutdown: None,
>       };
> +
> +    const DIR_FILE_OPERATIONS: bindings::file_operations = bindings::file_operations {
> +        owner: ptr::null_mut(),
> +        llseek: Some(bindings::generic_file_llseek),
> +        read: Some(bindings::generic_read_dir),
> +        write: None,
> +        read_iter: None,
> +        write_iter: None,
> +        iopoll: None,
> +        iterate_shared: Some(Self::read_dir_callback),
> +        poll: None,
> +        unlocked_ioctl: None,
> +        compat_ioctl: None,
> +        mmap: None,
> +        mmap_supported_flags: 0,
> +        open: None,
> +        flush: None,
> +        release: None,
> +        fsync: None,
> +        fasync: None,
> +        lock: None,
> +        get_unmapped_area: None,
> +        check_flags: None,
> +        flock: None,
> +        splice_write: None,
> +        splice_read: None,
> +        splice_eof: None,
> +        setlease: None,
> +        fallocate: None,
> +        show_fdinfo: None,
> +        copy_file_range: None,
> +        remap_file_range: None,
> +        fadvise: None,
> +        uring_cmd: None,
> +        uring_cmd_iopoll: None,
> +    };
> +
> +    unsafe extern "C" fn read_dir_callback(
> +        file: *mut bindings::file,
> +        ctx_ptr: *mut bindings::dir_context,
> +    ) -> core::ffi::c_int {
> +        from_result(|| {
> +            // SAFETY: The C API guarantees that `file` is valid for read. And since `f_inode` is
> +            // immutable, we can read it directly.
> +            let inode = unsafe { &*(*file).f_inode.cast::<INode<T>>() };
> +
> +            // SAFETY: The C API guarantees that this is the only reference to the `dir_context`
> +            // instance.
> +            let emitter = unsafe { &mut *ctx_ptr.cast::<DirEmitter>() };
> +            let orig_pos = emitter.pos();
> +
> +            // Call the module implementation. We ignore errors if directory entries have been
> +            // succesfully emitted: this is because we want users to see them before the error.
> +            match T::read_dir(inode, emitter) {
> +                Ok(_) => Ok(0),
> +                Err(e) => {
> +                    if emitter.pos() == orig_pos {
> +                        Err(e)
> +                    } else {
> +                        Ok(0)
> +                    }
> +                }
> +            }
> +        })
> +    }
> +}
> +
> +/// Directory entry emitter.
> +///
> +/// This is used in [`FileSystem::read_dir`] implementations to report the directory entry.
> +#[repr(transparent)]
> +pub struct DirEmitter(bindings::dir_context);

No `Opaque`?

> +
> +impl DirEmitter {
> +    /// Returns the current position of the emitter.
> +    pub fn pos(&self) -> i64 {
> +        self.0.pos
> +    }
> +
> +    /// Emits a directory entry.
> +    ///
> +    /// `pos_inc` is the number with which to increment the current position on success.
> +    ///
> +    /// `name` is the name of the entry.
> +    ///
> +    /// `ino` is the inode number of the entry.
> +    ///
> +    /// `etype` is the type of the entry.

It might make sense to create a struct for all these parameters.

> +    ///
> +    /// Returns `false` when the entry could not be emitted, possibly because the user-provided
> +    /// buffer is full.
> +    pub fn emit(&mut self, pos_inc: i64, name: &[u8], ino: Ino, etype: DirEntryType) -> bool {
> +        let Ok(name_len) = i32::try_from(name.len()) else {
> +            return false;
> +        };
> +
> +        let Some(actor) = self.0.actor else {
> +            return false;
> +        };
> +
> +        let Some(new_pos) = self.0.pos.checked_add(pos_inc) else {
> +            return false;
> +        };
> +
> +        // SAFETY: `name` is valid at least for the duration of the `actor` call.

What about `&mut self.0`?  Since this is a function pointer, can we
really be sure about the safety requirements?
Andreas Hindborg Jan. 3, 2024, 2:09 p.m. UTC | #2
Wedson Almeida Filho <wedsonaf@gmail.com> writes:

[...]

> +    unsafe extern "C" fn read_dir_callback(
> +        file: *mut bindings::file,
> +        ctx_ptr: *mut bindings::dir_context,
> +    ) -> core::ffi::c_int {
> +        from_result(|| {
> +            // SAFETY: The C API guarantees that `file` is valid for read. And since `f_inode` is
> +            // immutable, we can read it directly.

Should this be "the pointee of `f_inode` is immutable" instead?

[...]

> +    pub fn emit(&mut self, pos_inc: i64, name: &[u8], ino: Ino, etype: DirEntryType) -> bool {
> +        let Ok(name_len) = i32::try_from(name.len()) else {
> +            return false;
> +        };
> +
> +        let Some(actor) = self.0.actor else {
> +            return false;
> +        };
> +
> +        let Some(new_pos) = self.0.pos.checked_add(pos_inc) else {
> +            return false;
> +        };
> +
> +        // SAFETY: `name` is valid at least for the duration of the `actor` call.
> +        let ret = unsafe {
> +            actor(
> +                &mut self.0,
> +                name.as_ptr().cast(),
> +                name_len,
> +                self.0.pos,
> +                ino,
> +                etype as _,

I would prefer an explicit target type here.

BR Andreas
Askar Safin Jan. 21, 2024, 9 p.m. UTC | #3
Wedson Almeida Filho:
> +    /// White-out type.
> +    Wht = bindings::DT_WHT,

As well as I understand, filesystems supposed not to return
DT_WHT from readdir to user space. But I'm not sure. Please,
do expirement! Create whiteout on ext4 and see what readdir
will return. As well as I understand, it will return DT_CHR.

So, I think DT_WHT should be deleted here.

Askar Safin
Dave Chinner Jan. 21, 2024, 9:51 p.m. UTC | #4
On Mon, Jan 22, 2024 at 12:00:49AM +0300, Askar Safin wrote:
> Wedson Almeida Filho:
> > +    /// White-out type.
> > +    Wht = bindings::DT_WHT,
> 
> As well as I understand, filesystems supposed not to return
> DT_WHT from readdir to user space. But I'm not sure. Please,
> do expirement! Create whiteout on ext4 and see what readdir
> will return. As well as I understand, it will return DT_CHR.

DT_WHT is defined in /usr/include/dirent.h, so it is actually
present in the userspace support for readdir. If the kernel returns
DT_WHT to userspace, applications should know what it is.

However, filesystems like ext4 and btrfs don't have DT_WHT on disk
and few userspace applications support it. Way back when overlay
required whiteout support to be added, the magical char device
representation was invented for filesystems without DT_WHT and that
was exposed to userspace.

We're kind of stuck with it now, though there is nothign stopping
filesysetms from returning DT_WHT to userspace instead of DT_CHR and
requiring userspace to stat the inode to look at the major/minor
numbers to determine if the dirent is a whiteout or not.  Indeed, it
would be more optimal for overlay if filesystems returned DT_WHT
instead of DT_CHR for whiteouts.

Put simply: DT_WHT is part of the readdir kernel and userspace API
and therefore should be present in the Rust interfaces.

Cheers,

Dave.
diff mbox series

Patch

diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs
index f3a41cf57502..89611c44e4c5 100644
--- a/rust/kernel/fs.rs
+++ b/rust/kernel/fs.rs
@@ -28,6 +28,70 @@  pub trait FileSystem {
     /// This is called during initialisation of a superblock after [`FileSystem::super_params`] has
     /// completed successfully.
     fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>>;
+
+    /// Reads directory entries from directory inodes.
+    ///
+    /// [`DirEmitter::pos`] holds the current position of the directory reader.
+    fn read_dir(inode: &INode<Self>, emitter: &mut DirEmitter) -> Result;
+}
+
+/// The types of directory entries reported by [`FileSystem::read_dir`].
+#[repr(u32)]
+#[derive(Copy, Clone)]
+pub enum DirEntryType {
+    /// Unknown type.
+    Unknown = bindings::DT_UNKNOWN,
+
+    /// Named pipe (first-in, first-out) type.
+    Fifo = bindings::DT_FIFO,
+
+    /// Character device type.
+    Chr = bindings::DT_CHR,
+
+    /// Directory type.
+    Dir = bindings::DT_DIR,
+
+    /// Block device type.
+    Blk = bindings::DT_BLK,
+
+    /// Regular file type.
+    Reg = bindings::DT_REG,
+
+    /// Symbolic link type.
+    Lnk = bindings::DT_LNK,
+
+    /// Named unix-domain socket type.
+    Sock = bindings::DT_SOCK,
+
+    /// White-out type.
+    Wht = bindings::DT_WHT,
+}
+
+impl From<INodeType> for DirEntryType {
+    fn from(value: INodeType) -> Self {
+        match value {
+            INodeType::Dir => DirEntryType::Dir,
+        }
+    }
+}
+
+impl core::convert::TryFrom<u32> for DirEntryType {
+    type Error = crate::error::Error;
+
+    fn try_from(v: u32) -> Result<Self> {
+        match v {
+            v if v == Self::Unknown as u32 => Ok(Self::Unknown),
+            v if v == Self::Fifo as u32 => Ok(Self::Fifo),
+            v if v == Self::Chr as u32 => Ok(Self::Chr),
+            v if v == Self::Dir as u32 => Ok(Self::Dir),
+            v if v == Self::Blk as u32 => Ok(Self::Blk),
+            v if v == Self::Reg as u32 => Ok(Self::Reg),
+            v if v == Self::Lnk as u32 => Ok(Self::Lnk),
+            v if v == Self::Sock as u32 => Ok(Self::Sock),
+            v if v == Self::Wht as u32 => Ok(Self::Wht),
+            _ => Err(EDOM),
+        }
+    }
 }
 
 /// A registration of a file system.
@@ -161,9 +225,7 @@  pub fn init(self, params: INodeParams) -> Result<ARef<INode<T>>> {
 
         let mode = match params.typ {
             INodeType::Dir => {
-                // SAFETY: `simple_dir_operations` never changes, it's safe to reference it.
-                inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations };
-
+                inode.__bindgen_anon_3.i_fop = &Tables::<T>::DIR_FILE_OPERATIONS;
                 // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it.
                 inode.i_op = unsafe { &bindings::simple_dir_inode_operations };
                 bindings::S_IFDIR
@@ -403,6 +465,126 @@  impl<T: FileSystem + ?Sized> Tables<T> {
         free_cached_objects: None,
         shutdown: None,
     };
+
+    const DIR_FILE_OPERATIONS: bindings::file_operations = bindings::file_operations {
+        owner: ptr::null_mut(),
+        llseek: Some(bindings::generic_file_llseek),
+        read: Some(bindings::generic_read_dir),
+        write: None,
+        read_iter: None,
+        write_iter: None,
+        iopoll: None,
+        iterate_shared: Some(Self::read_dir_callback),
+        poll: None,
+        unlocked_ioctl: None,
+        compat_ioctl: None,
+        mmap: None,
+        mmap_supported_flags: 0,
+        open: None,
+        flush: None,
+        release: None,
+        fsync: None,
+        fasync: None,
+        lock: None,
+        get_unmapped_area: None,
+        check_flags: None,
+        flock: None,
+        splice_write: None,
+        splice_read: None,
+        splice_eof: None,
+        setlease: None,
+        fallocate: None,
+        show_fdinfo: None,
+        copy_file_range: None,
+        remap_file_range: None,
+        fadvise: None,
+        uring_cmd: None,
+        uring_cmd_iopoll: None,
+    };
+
+    unsafe extern "C" fn read_dir_callback(
+        file: *mut bindings::file,
+        ctx_ptr: *mut bindings::dir_context,
+    ) -> core::ffi::c_int {
+        from_result(|| {
+            // SAFETY: The C API guarantees that `file` is valid for read. And since `f_inode` is
+            // immutable, we can read it directly.
+            let inode = unsafe { &*(*file).f_inode.cast::<INode<T>>() };
+
+            // SAFETY: The C API guarantees that this is the only reference to the `dir_context`
+            // instance.
+            let emitter = unsafe { &mut *ctx_ptr.cast::<DirEmitter>() };
+            let orig_pos = emitter.pos();
+
+            // Call the module implementation. We ignore errors if directory entries have been
+            // succesfully emitted: this is because we want users to see them before the error.
+            match T::read_dir(inode, emitter) {
+                Ok(_) => Ok(0),
+                Err(e) => {
+                    if emitter.pos() == orig_pos {
+                        Err(e)
+                    } else {
+                        Ok(0)
+                    }
+                }
+            }
+        })
+    }
+}
+
+/// Directory entry emitter.
+///
+/// This is used in [`FileSystem::read_dir`] implementations to report the directory entry.
+#[repr(transparent)]
+pub struct DirEmitter(bindings::dir_context);
+
+impl DirEmitter {
+    /// Returns the current position of the emitter.
+    pub fn pos(&self) -> i64 {
+        self.0.pos
+    }
+
+    /// Emits a directory entry.
+    ///
+    /// `pos_inc` is the number with which to increment the current position on success.
+    ///
+    /// `name` is the name of the entry.
+    ///
+    /// `ino` is the inode number of the entry.
+    ///
+    /// `etype` is the type of the entry.
+    ///
+    /// Returns `false` when the entry could not be emitted, possibly because the user-provided
+    /// buffer is full.
+    pub fn emit(&mut self, pos_inc: i64, name: &[u8], ino: Ino, etype: DirEntryType) -> bool {
+        let Ok(name_len) = i32::try_from(name.len()) else {
+            return false;
+        };
+
+        let Some(actor) = self.0.actor else {
+            return false;
+        };
+
+        let Some(new_pos) = self.0.pos.checked_add(pos_inc) else {
+            return false;
+        };
+
+        // SAFETY: `name` is valid at least for the duration of the `actor` call.
+        let ret = unsafe {
+            actor(
+                &mut self.0,
+                name.as_ptr().cast(),
+                name_len,
+                self.0.pos,
+                ino,
+                etype as _,
+            )
+        };
+        if ret {
+            self.0.pos = new_pos;
+        }
+        ret
+    }
 }
 
 /// Kernel module that exposes a single file system implemented by `T`.
@@ -431,7 +613,7 @@  fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> {
 ///
 /// ```
 /// # mod module_fs_sample {
-/// use kernel::fs::{INode, NewSuperBlock, SuperBlock, SuperParams};
+/// use kernel::fs::{DirEmitter, INode, NewSuperBlock, SuperBlock, SuperParams};
 /// use kernel::prelude::*;
 /// use kernel::{c_str, fs, types::ARef};
 ///
@@ -452,6 +634,9 @@  fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> {
 ///     fn init_root(_sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> {
 ///         todo!()
 ///     }
+///     fn read_dir(_: &INode<Self>, _: &mut DirEmitter) -> Result {
+///         todo!()
+///     }
 /// }
 /// # }
 /// ```
diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs
index 9e5f4c7d1c06..4e61a94afa70 100644
--- a/samples/rust/rust_rofs.rs
+++ b/samples/rust/rust_rofs.rs
@@ -2,7 +2,9 @@ 
 
 //! Rust read-only file system sample.
 
-use kernel::fs::{INode, INodeParams, INodeType, NewSuperBlock, SuperBlock, SuperParams};
+use kernel::fs::{
+    DirEmitter, INode, INodeParams, INodeType, NewSuperBlock, SuperBlock, SuperParams,
+};
 use kernel::prelude::*;
 use kernel::{c_str, fs, time::UNIX_EPOCH, types::ARef, types::Either};
 
@@ -14,6 +16,30 @@ 
     license: "GPL",
 }
 
+struct Entry {
+    name: &'static [u8],
+    ino: u64,
+    etype: INodeType,
+}
+
+const ENTRIES: [Entry; 3] = [
+    Entry {
+        name: b".",
+        ino: 1,
+        etype: INodeType::Dir,
+    },
+    Entry {
+        name: b"..",
+        ino: 1,
+        etype: INodeType::Dir,
+    },
+    Entry {
+        name: b"subdir",
+        ino: 2,
+        etype: INodeType::Dir,
+    },
+];
+
 struct RoFs;
 impl fs::FileSystem for RoFs {
     const NAME: &'static CStr = c_str!("rust-fs");
@@ -33,7 +59,7 @@  fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> {
             Either::Right(new) => new.init(INodeParams {
                 typ: INodeType::Dir,
                 mode: 0o555,
-                size: 1,
+                size: ENTRIES.len().try_into()?,
                 blocks: 1,
                 nlink: 2,
                 uid: 0,
@@ -44,4 +70,23 @@  fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> {
             }),
         }
     }
+
+    fn read_dir(inode: &INode<Self>, emitter: &mut DirEmitter) -> Result {
+        if inode.ino() != 1 {
+            return Ok(());
+        }
+
+        let pos = emitter.pos();
+        if pos >= ENTRIES.len().try_into()? {
+            return Ok(());
+        }
+
+        for e in ENTRIES.iter().skip(pos.try_into()?) {
+            if !emitter.emit(1, e.name, e.ino, e.etype.into()) {
+                break;
+            }
+        }
+
+        Ok(())
+    }
 }