diff mbox series

[2/2] hugetlbfs: use tracepoints in hugetlbfs functions.

Message ID 20240612011156.2891254-3-lihongbo22@huawei.com (mailing list archive)
State New
Headers show
Series Introduce tracepoint for hugetlbfs | expand

Commit Message

Hongbo Li June 12, 2024, 1:11 a.m. UTC
Here we use the hugetlbfs tracepoint to track the call stack. And
the output in trace is as follows:

```
touch-5307    [004] .....  1402.167607: hugetlbfs_alloc_inode: dev = (0,50), ino = 21380, dir = 16921, mode = 0100644
touch-5307    [004] .....  1402.167638: hugetlbfs_setattr: dev = (0,50), ino = 21380, name = testfile1, ia_valid = 131184, ia_mode = 0132434, ia_uid = 2863018275, ia_gid = 4294967295, old_size = 0, ia_size = 4064
truncate-5328    [003] .....  1436.031054: hugetlbfs_setattr: dev = (0,50), ino = 21380, name = testfile1, ia_valid = 8296, ia_mode = 0177777, ia_uid = 2862574544, ia_gid = 4294967295, old_size = 0, ia_size = 2097152
rm-5338    [004] .....  1484.426247: hugetlbfs_evict_inode: dev = (0,50), ino = 21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
<idle>-0       [004] ..s1.  1484.446668: hugetlbfs_free_inode: dev = (0,50), ino = 21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
```

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
---
 fs/hugetlbfs/inode.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

Comments

Hongbo Li June 20, 2024, 11:50 a.m. UTC | #1
Just a friendly ping to the patch :)

https://lore.kernel.org/all/20240612011156.2891254-1-lihongbo22@huawei.com/

Thanks,
Hongbo

On 2024/6/12 9:11, Hongbo Li wrote:
> Here we use the hugetlbfs tracepoint to track the call stack. And
> the output in trace is as follows:
> 
> ```
> touch-5307    [004] .....  1402.167607: hugetlbfs_alloc_inode: dev = (0,50), ino = 21380, dir = 16921, mode = 0100644
> touch-5307    [004] .....  1402.167638: hugetlbfs_setattr: dev = (0,50), ino = 21380, name = testfile1, ia_valid = 131184, ia_mode = 0132434, ia_uid = 2863018275, ia_gid = 4294967295, old_size = 0, ia_size = 4064
> truncate-5328    [003] .....  1436.031054: hugetlbfs_setattr: dev = (0,50), ino = 21380, name = testfile1, ia_valid = 8296, ia_mode = 0177777, ia_uid = 2862574544, ia_gid = 4294967295, old_size = 0, ia_size = 2097152
> rm-5338    [004] .....  1484.426247: hugetlbfs_evict_inode: dev = (0,50), ino = 21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
> <idle>-0       [004] ..s1.  1484.446668: hugetlbfs_free_inode: dev = (0,50), ino = 21380, i_mode = 0100644, i_size = 2097152, i_nlink = 0, seals = 1, i_blocks = 0
> ```
> 
> Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
> ---
>   fs/hugetlbfs/inode.c | 21 +++++++++++++++++++--
>   1 file changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index 412f295acebe..f3399c6a02ca 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -39,6 +39,9 @@
>   #include <linux/uaccess.h>
>   #include <linux/sched/mm.h>
>   
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/hugetlbfs.h>
> +
>   static const struct address_space_operations hugetlbfs_aops;
>   static const struct file_operations hugetlbfs_file_operations;
>   static const struct inode_operations hugetlbfs_dir_inode_operations;
> @@ -686,6 +689,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
>   {
>   	struct resv_map *resv_map;
>   
> +	trace_hugetlbfs_evict_inode(inode);
>   	remove_inode_hugepages(inode, 0, LLONG_MAX);
>   
>   	/*
> @@ -813,8 +817,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
>   	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
>   		return -EOPNOTSUPP;
>   
> -	if (mode & FALLOC_FL_PUNCH_HOLE)
> -		return hugetlbfs_punch_hole(inode, offset, len);
> +	if (mode & FALLOC_FL_PUNCH_HOLE) {
> +		error = hugetlbfs_punch_hole(inode, offset, len);
> +		goto out_nolock;
> +	}
>   
>   	/*
>   	 * Default preallocate case.
> @@ -918,6 +924,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
>   	inode_set_ctime_current(inode);
>   out:
>   	inode_unlock(inode);
> +
> +out_nolock:
> +	trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
>   	return error;
>   }
>   
> @@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
>   	if (error)
>   		return error;
>   
> +	trace_hugetlbfs_setattr(inode, dentry->d_name.len, dentry->d_name.name,
> +			attr->ia_valid, attr->ia_mode,
> +			from_kuid(&init_user_ns, attr->ia_uid),
> +			from_kgid(&init_user_ns, attr->ia_gid),
> +			inode->i_size, attr->ia_size);
> +
>   	if (ia_valid & ATTR_SIZE) {
>   		loff_t oldsize = inode->i_size;
>   		loff_t newsize = attr->ia_size;
> @@ -1032,6 +1047,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
>   			break;
>   		}
>   		lockdep_annotate_inode_mutex_key(inode);
> +		trace_hugetlbfs_alloc_inode(inode, dir, mode);
>   	} else {
>   		if (resv_map)
>   			kref_put(&resv_map->refs, resv_map_release);
> @@ -1274,6 +1290,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
>   
>   static void hugetlbfs_free_inode(struct inode *inode)
>   {
> +	trace_hugetlbfs_free_inode(inode);
>   	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
>   }
>
Steven Rostedt July 1, 2024, 11:49 p.m. UTC | #2
On Wed, 12 Jun 2024 09:11:56 +0800
Hongbo Li <lihongbo22@huawei.com> wrote:

> @@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
>  	if (error)
>  		return error;
>  
> +	trace_hugetlbfs_setattr(inode, dentry->d_name.len, dentry->d_name.name,
> +			attr->ia_valid, attr->ia_mode,
> +			from_kuid(&init_user_ns, attr->ia_uid),
> +			from_kgid(&init_user_ns, attr->ia_gid),
> +			inode->i_size, attr->ia_size);
> +

That's a lot of parameters to pass to a tracepoint. Why not just pass the
dentry and attr and do the above in the TP_fast_assign() logic? That would
put less pressure on the icache for the code part.

-- Steve
Hongbo Li July 2, 2024, 11:55 a.m. UTC | #3
On 2024/7/2 7:49, Steven Rostedt wrote:
> On Wed, 12 Jun 2024 09:11:56 +0800
> Hongbo Li <lihongbo22@huawei.com> wrote:
> 
>> @@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
>>   	if (error)
>>   		return error;
>>   
>> +	trace_hugetlbfs_setattr(inode, dentry->d_name.len, dentry->d_name.name,
>> +			attr->ia_valid, attr->ia_mode,
>> +			from_kuid(&init_user_ns, attr->ia_uid),
>> +			from_kgid(&init_user_ns, attr->ia_gid),
>> +			inode->i_size, attr->ia_size);
>> +
> 
> That's a lot of parameters to pass to a tracepoint. Why not just pass the
> dentry and attr and do the above in the TP_fast_assign() logic? That would
> put less pressure on the icache for the code part.

Thanks for reviewing!

Some logic such as kuid_t --> uid_t might be reasonable obtained in 
filesystem layer. Passing the dentry and attr will let trace know the 
meaning of structure, perhaps tracepoint should not be aware of the
members of these structures as much as possible.

Thanks,
Hongbo

> 
> -- Steve
>
Mathieu Desnoyers July 2, 2024, 1:30 p.m. UTC | #4
On 2024-07-02 07:55, Hongbo Li wrote:
> 
> 
> On 2024/7/2 7:49, Steven Rostedt wrote:
>> On Wed, 12 Jun 2024 09:11:56 +0800
>> Hongbo Li <lihongbo22@huawei.com> wrote:
>>
>>> @@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap 
>>> *idmap,
>>>       if (error)
>>>           return error;
>>> +    trace_hugetlbfs_setattr(inode, dentry->d_name.len, 
>>> dentry->d_name.name,
>>> +            attr->ia_valid, attr->ia_mode,
>>> +            from_kuid(&init_user_ns, attr->ia_uid),
>>> +            from_kgid(&init_user_ns, attr->ia_gid),
>>> +            inode->i_size, attr->ia_size);
>>> +
>>
>> That's a lot of parameters to pass to a tracepoint. Why not just pass the
>> dentry and attr and do the above in the TP_fast_assign() logic? That 
>> would
>> put less pressure on the icache for the code part.
> 
> Thanks for reviewing!
> 
> Some logic such as kuid_t --> uid_t might be reasonable obtained in 
> filesystem layer. Passing the dentry and attr will let trace know the 
> meaning of structure, perhaps tracepoint should not be aware of the
> members of these structures as much as possible.

As maintainer of the LTTng out-of-tree kernel tracer, I appreciate the
effort to decouple instrumentation from the subsystem instrumentation,
but as long as the structure sits in public headers and the global
variables used within the TP_fast_assign() logic (e.g. init_user_ns)
are export-gpl, this is enough to make it easy for tracer integration
and it keeps the tracepoint caller code footprint to a minimum.

The TRACE_EVENT definitions are specific to the subsystem anyway,
so I don't think it matters that the TRACE_EVENT() need to access
the dentry and attr structures.

So I agree with Steven's suggestion. However, just as a precision,
I suspect it will have mainly an impact on code size, but not
necessarily on icache footprint, because it will shrink the code
size within the tracepoint unlikely branch (cold instructions).

Thanks,

Mathieu

> 
> Thanks,
> Hongbo
> 
>>
>> -- Steve
>>
Hongbo Li July 2, 2024, 2:21 p.m. UTC | #5
On 2024/7/2 21:30, Mathieu Desnoyers wrote:
> On 2024-07-02 07:55, Hongbo Li wrote:
>>
>>
>> On 2024/7/2 7:49, Steven Rostedt wrote:
>>> On Wed, 12 Jun 2024 09:11:56 +0800
>>> Hongbo Li <lihongbo22@huawei.com> wrote:
>>>
>>>> @@ -934,6 +943,12 @@ static int hugetlbfs_setattr(struct mnt_idmap 
>>>> *idmap,
>>>>       if (error)
>>>>           return error;
>>>> +    trace_hugetlbfs_setattr(inode, dentry->d_name.len, 
>>>> dentry->d_name.name,
>>>> +            attr->ia_valid, attr->ia_mode,
>>>> +            from_kuid(&init_user_ns, attr->ia_uid),
>>>> +            from_kgid(&init_user_ns, attr->ia_gid),
>>>> +            inode->i_size, attr->ia_size);
>>>> +
>>>
>>> That's a lot of parameters to pass to a tracepoint. Why not just pass 
>>> the
>>> dentry and attr and do the above in the TP_fast_assign() logic? That 
>>> would
>>> put less pressure on the icache for the code part.
>>
>> Thanks for reviewing!
>>
>> Some logic such as kuid_t --> uid_t might be reasonable obtained in 
>> filesystem layer. Passing the dentry and attr will let trace know the 
>> meaning of structure, perhaps tracepoint should not be aware of the
>> members of these structures as much as possible.
> 
> As maintainer of the LTTng out-of-tree kernel tracer, I appreciate the
> effort to decouple instrumentation from the subsystem instrumentation,
> but as long as the structure sits in public headers and the global
> variables used within the TP_fast_assign() logic (e.g. init_user_ns)
> are export-gpl, this is enough to make it easy for tracer integration
Thank you for your friendly elaboration and suggestion!
I will update this part based on your suggestion in next version.

Thanks,
Hongbo
> and it keeps the tracepoint caller code footprint to a minimum.
> 
> The TRACE_EVENT definitions are specific to the subsystem anyway,
> so I don't think it matters that the TRACE_EVENT() need to access
> the dentry and attr structures.
> 
> So I agree with Steven's suggestion. However, just as a precision,
> I suspect it will have mainly an impact on code size, but not
> necessarily on icache footprint, because it will shrink the code
> size within the tracepoint unlikely branch (cold instructions).
> 
> Thanks,
> 
> Mathieu
> 
>>
>> Thanks,
>> Hongbo
>>
>>>
>>> -- Steve
>>>
>
diff mbox series

Patch

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 412f295acebe..f3399c6a02ca 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -39,6 +39,9 @@ 
 #include <linux/uaccess.h>
 #include <linux/sched/mm.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/hugetlbfs.h>
+
 static const struct address_space_operations hugetlbfs_aops;
 static const struct file_operations hugetlbfs_file_operations;
 static const struct inode_operations hugetlbfs_dir_inode_operations;
@@ -686,6 +689,7 @@  static void hugetlbfs_evict_inode(struct inode *inode)
 {
 	struct resv_map *resv_map;
 
+	trace_hugetlbfs_evict_inode(inode);
 	remove_inode_hugepages(inode, 0, LLONG_MAX);
 
 	/*
@@ -813,8 +817,10 @@  static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 		return -EOPNOTSUPP;
 
-	if (mode & FALLOC_FL_PUNCH_HOLE)
-		return hugetlbfs_punch_hole(inode, offset, len);
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		error = hugetlbfs_punch_hole(inode, offset, len);
+		goto out_nolock;
+	}
 
 	/*
 	 * Default preallocate case.
@@ -918,6 +924,9 @@  static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 	inode_set_ctime_current(inode);
 out:
 	inode_unlock(inode);
+
+out_nolock:
+	trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
 	return error;
 }
 
@@ -934,6 +943,12 @@  static int hugetlbfs_setattr(struct mnt_idmap *idmap,
 	if (error)
 		return error;
 
+	trace_hugetlbfs_setattr(inode, dentry->d_name.len, dentry->d_name.name,
+			attr->ia_valid, attr->ia_mode,
+			from_kuid(&init_user_ns, attr->ia_uid),
+			from_kgid(&init_user_ns, attr->ia_gid),
+			inode->i_size, attr->ia_size);
+
 	if (ia_valid & ATTR_SIZE) {
 		loff_t oldsize = inode->i_size;
 		loff_t newsize = attr->ia_size;
@@ -1032,6 +1047,7 @@  static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 			break;
 		}
 		lockdep_annotate_inode_mutex_key(inode);
+		trace_hugetlbfs_alloc_inode(inode, dir, mode);
 	} else {
 		if (resv_map)
 			kref_put(&resv_map->refs, resv_map_release);
@@ -1274,6 +1290,7 @@  static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 
 static void hugetlbfs_free_inode(struct inode *inode)
 {
+	trace_hugetlbfs_free_inode(inode);
 	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }