[RFC] iomap: introduce IOMAP_TAIL
diff mbox series

Message ID 20190629073020.22759-1-yuchao0@huawei.com
State New
Headers show
Series
  • [RFC] iomap: introduce IOMAP_TAIL
Related show

Commit Message

Chao Yu June 29, 2019, 7:30 a.m. UTC
Some filesystems like erofs/reiserfs have the ability to pack tail
data into metadata, however iomap framework can only support mapping
inline data with IOMAP_INLINE type, it restricts that:
- inline data should be locating at page #0.
- inline size should equal to .i_size
So we can not use IOMAP_INLINE to handle tail-packing case.

This patch introduces new mapping type IOMAP_TAIL to map tail-packed
data for further use of erofs.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
---
 fs/iomap.c            | 22 ++++++++++++++++++++++
 include/linux/iomap.h |  1 +
 2 files changed, 23 insertions(+)

Comments

Gao Xiang June 29, 2019, 9:34 a.m. UTC | #1
Hi Chao,

On 2019/6/29 15:30, Chao Yu wrote:
> Some filesystems like erofs/reiserfs have the ability to pack tail
> data into metadata, however iomap framework can only support mapping
> inline data with IOMAP_INLINE type, it restricts that:
> - inline data should be locating at page #0.
> - inline size should equal to .i_size
> So we can not use IOMAP_INLINE to handle tail-packing case.
> 
> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
> data for further use of erofs.
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  fs/iomap.c            | 22 ++++++++++++++++++++++
>  include/linux/iomap.h |  1 +
>  2 files changed, 23 insertions(+)
> 
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 12654c2e78f8..ae7777ce77d0 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>  	SetPageUptodate(page);
>  }
>  
> +static void
> +iomap_read_tail_data(struct inode *inode, struct page *page,
> +		struct iomap *iomap)
> +{
> +	size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
> +	void *addr;
> +
> +	if (PageUptodate(page))
> +		return;
> +
> +	addr = kmap_atomic(page);
> +	memcpy(addr, iomap->inline_data, size);
> +	memset(addr + size, 0, PAGE_SIZE - size);

need flush_dcache_page(page) here for new page cache page since
it's generic iomap code (althrough not necessary for x86, arm), I am not sure...
see commit d2b2c6dd227b and c01778001a4f...

Thanks,
Gao Xiang

> +	kunmap_atomic(addr);
> +	SetPageUptodate(page);
> +}
> +
>  static loff_t
>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		struct iomap *iomap)
> @@ -298,6 +315,11 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		return PAGE_SIZE;
>  	}
>  
> +	if (iomap->type == IOMAP_TAIL) {
> +		iomap_read_tail_data(inode, page, iomap);
> +		return PAGE_SIZE;
> +	}
> +
>  	/* zero post-eof blocks as the page may be mapped */
>  	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>  	if (plen == 0)
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 2103b94cb1bf..7e1ee48e3db7 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -25,6 +25,7 @@ struct vm_fault;
>  #define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
>  #define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
>  #define IOMAP_INLINE	0x05	/* data inline in the inode */
> +#define IOMAP_TAIL	0x06	/* tail data packed in metdata */
>  
>  /*
>   * Flags for all iomap mappings:
>
Darrick J. Wong June 30, 2019, 11:19 p.m. UTC | #2
On Sat, Jun 29, 2019 at 03:30:20PM +0800, Chao Yu wrote:
> Some filesystems like erofs/reiserfs have the ability to pack tail
> data into metadata, however iomap framework can only support mapping
> inline data with IOMAP_INLINE type, it restricts that:
> - inline data should be locating at page #0.
> - inline size should equal to .i_size

Wouldn't it be easier simply to fix the meaning of IOMAP_INLINE so that
it can be used at something other than offset 0 and length == isize?
IOWs, make it mean "use the *inline_data pointer to read/write data
as a direct memory access"?

I also don't really like the idea of leaving the write paths
unimplemented in core code, though I suppose as an erofs developer
you're not likely to have a good means for testing... :/

/me starts wondering if a better solution would be to invent iomaptestfs
which exists solely to test all iomap code with as little other
intelligence as possible...

--D

> So we can not use IOMAP_INLINE to handle tail-packing case.
> 
> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
> data for further use of erofs.
> 
> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> ---
>  fs/iomap.c            | 22 ++++++++++++++++++++++
>  include/linux/iomap.h |  1 +
>  2 files changed, 23 insertions(+)
> 
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 12654c2e78f8..ae7777ce77d0 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>  	SetPageUptodate(page);
>  }
>  
> +static void
> +iomap_read_tail_data(struct inode *inode, struct page *page,
> +		struct iomap *iomap)
> +{
> +	size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
> +	void *addr;
> +
> +	if (PageUptodate(page))
> +		return;
> +
> +	addr = kmap_atomic(page);
> +	memcpy(addr, iomap->inline_data, size);
> +	memset(addr + size, 0, PAGE_SIZE - size);
> +	kunmap_atomic(addr);
> +	SetPageUptodate(page);
> +}
> +
>  static loff_t
>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		struct iomap *iomap)
> @@ -298,6 +315,11 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		return PAGE_SIZE;
>  	}
>  
> +	if (iomap->type == IOMAP_TAIL) {
> +		iomap_read_tail_data(inode, page, iomap);
> +		return PAGE_SIZE;
> +	}
> +
>  	/* zero post-eof blocks as the page may be mapped */
>  	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>  	if (plen == 0)
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 2103b94cb1bf..7e1ee48e3db7 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -25,6 +25,7 @@ struct vm_fault;
>  #define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
>  #define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
>  #define IOMAP_INLINE	0x05	/* data inline in the inode */
> +#define IOMAP_TAIL	0x06	/* tail data packed in metdata */
>  
>  /*
>   * Flags for all iomap mappings:
> -- 
> 2.18.0.rc1
>
Christoph Hellwig July 1, 2019, 6:08 a.m. UTC | #3
On Sun, Jun 30, 2019 at 04:19:32PM -0700, Darrick J. Wong wrote:
> On Sat, Jun 29, 2019 at 03:30:20PM +0800, Chao Yu wrote:
> > Some filesystems like erofs/reiserfs have the ability to pack tail
> > data into metadata, however iomap framework can only support mapping
> > inline data with IOMAP_INLINE type, it restricts that:
> > - inline data should be locating at page #0.
> > - inline size should equal to .i_size
> 
> Wouldn't it be easier simply to fix the meaning of IOMAP_INLINE so that
> it can be used at something other than offset 0 and length == isize?
> IOWs, make it mean "use the *inline_data pointer to read/write data
> as a direct memory access"?

Yes.  I vaguely remember Andreas pointed out some issues with a
general scheme, which is why we put the limits in.  If that is not just
me making things up we'll need to address them.  Either way just copy
and pasting code isn't very helpful.
Chao Yu July 1, 2019, 6:40 a.m. UTC | #4
Hi Xiang,

On 2019/6/29 17:34, Gao Xiang wrote:
> Hi Chao,
> 
> On 2019/6/29 15:30, Chao Yu wrote:
>> Some filesystems like erofs/reiserfs have the ability to pack tail
>> data into metadata, however iomap framework can only support mapping
>> inline data with IOMAP_INLINE type, it restricts that:
>> - inline data should be locating at page #0.
>> - inline size should equal to .i_size
>> So we can not use IOMAP_INLINE to handle tail-packing case.
>>
>> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
>> data for further use of erofs.
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>>  fs/iomap.c            | 22 ++++++++++++++++++++++
>>  include/linux/iomap.h |  1 +
>>  2 files changed, 23 insertions(+)
>>
>> diff --git a/fs/iomap.c b/fs/iomap.c
>> index 12654c2e78f8..ae7777ce77d0 100644
>> --- a/fs/iomap.c
>> +++ b/fs/iomap.c
>> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>>  	SetPageUptodate(page);
>>  }
>>  
>> +static void
>> +iomap_read_tail_data(struct inode *inode, struct page *page,
>> +		struct iomap *iomap)
>> +{
>> +	size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
>> +	void *addr;
>> +
>> +	if (PageUptodate(page))
>> +		return;
>> +
>> +	addr = kmap_atomic(page);
>> +	memcpy(addr, iomap->inline_data, size);
>> +	memset(addr + size, 0, PAGE_SIZE - size);
> 
> need flush_dcache_page(page) here for new page cache page since
> it's generic iomap code (althrough not necessary for x86, arm), I am not sure...
> see commit d2b2c6dd227b and c01778001a4f...

Thanks for your reminding, these all codes were copied from
iomap_read_inline_data(), so I think we need a separated patch to fix this issue
if necessary.

Thanks,

> 
> Thanks,
> Gao Xiang
> 
>> +	kunmap_atomic(addr);
>> +	SetPageUptodate(page);
>> +}
>> +
>>  static loff_t
>>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>  		struct iomap *iomap)
>> @@ -298,6 +315,11 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>  		return PAGE_SIZE;
>>  	}
>>  
>> +	if (iomap->type == IOMAP_TAIL) {
>> +		iomap_read_tail_data(inode, page, iomap);
>> +		return PAGE_SIZE;
>> +	}
>> +
>>  	/* zero post-eof blocks as the page may be mapped */
>>  	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>>  	if (plen == 0)
>> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
>> index 2103b94cb1bf..7e1ee48e3db7 100644
>> --- a/include/linux/iomap.h
>> +++ b/include/linux/iomap.h
>> @@ -25,6 +25,7 @@ struct vm_fault;
>>  #define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
>>  #define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
>>  #define IOMAP_INLINE	0x05	/* data inline in the inode */
>> +#define IOMAP_TAIL	0x06	/* tail data packed in metdata */
>>  
>>  /*
>>   * Flags for all iomap mappings:
>>
> .
>
Gao Xiang July 1, 2019, 7:03 a.m. UTC | #5
On 2019/7/1 14:40, Chao Yu wrote:
> Hi Xiang,
> 
> On 2019/6/29 17:34, Gao Xiang wrote:
>> Hi Chao,
>>
>> On 2019/6/29 15:30, Chao Yu wrote:
>>> Some filesystems like erofs/reiserfs have the ability to pack tail
>>> data into metadata, however iomap framework can only support mapping
>>> inline data with IOMAP_INLINE type, it restricts that:
>>> - inline data should be locating at page #0.
>>> - inline size should equal to .i_size
>>> So we can not use IOMAP_INLINE to handle tail-packing case.
>>>
>>> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
>>> data for further use of erofs.
>>>
>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>> ---
>>>  fs/iomap.c            | 22 ++++++++++++++++++++++
>>>  include/linux/iomap.h |  1 +
>>>  2 files changed, 23 insertions(+)
>>>
>>> diff --git a/fs/iomap.c b/fs/iomap.c
>>> index 12654c2e78f8..ae7777ce77d0 100644
>>> --- a/fs/iomap.c
>>> +++ b/fs/iomap.c
>>> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>>>  	SetPageUptodate(page);
>>>  }
>>>  
>>> +static void
>>> +iomap_read_tail_data(struct inode *inode, struct page *page,
>>> +		struct iomap *iomap)
>>> +{
>>> +	size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
>>> +	void *addr;
>>> +
>>> +	if (PageUptodate(page))
>>> +		return;
>>> +
>>> +	addr = kmap_atomic(page);
>>> +	memcpy(addr, iomap->inline_data, size);
>>> +	memset(addr + size, 0, PAGE_SIZE - size);
>>
>> need flush_dcache_page(page) here for new page cache page since
>> it's generic iomap code (althrough not necessary for x86, arm), I am not sure...
>> see commit d2b2c6dd227b and c01778001a4f...
> 
> Thanks for your reminding, these all codes were copied from
> iomap_read_inline_data(), so I think we need a separated patch to fix this issue
> if necessary.

Yes, just a reminder, it is good as it-is.

Thanks,
Gao Xiang

> 
> Thanks,
> 
>>
>> Thanks,
>> Gao Xiang
>>
>>> +	kunmap_atomic(addr);
>>> +	SetPageUptodate(page);
>>> +}
>>> +
>>>  static loff_t
>>>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>>  		struct iomap *iomap)
>>> @@ -298,6 +315,11 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>>  		return PAGE_SIZE;
>>>  	}
>>>  
>>> +	if (iomap->type == IOMAP_TAIL) {
>>> +		iomap_read_tail_data(inode, page, iomap);
>>> +		return PAGE_SIZE;
>>> +	}
>>> +
>>>  	/* zero post-eof blocks as the page may be mapped */
>>>  	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>>>  	if (plen == 0)
>>> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
>>> index 2103b94cb1bf..7e1ee48e3db7 100644
>>> --- a/include/linux/iomap.h
>>> +++ b/include/linux/iomap.h
>>> @@ -25,6 +25,7 @@ struct vm_fault;
>>>  #define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
>>>  #define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
>>>  #define IOMAP_INLINE	0x05	/* data inline in the inode */
>>> +#define IOMAP_TAIL	0x06	/* tail data packed in metdata */
>>>  
>>>  /*
>>>   * Flags for all iomap mappings:
>>>
>> .
>>
Chao Yu July 1, 2019, 7:28 a.m. UTC | #6
On 2019/7/1 7:19, Darrick J. Wong wrote:
> On Sat, Jun 29, 2019 at 03:30:20PM +0800, Chao Yu wrote:
>> Some filesystems like erofs/reiserfs have the ability to pack tail
>> data into metadata, however iomap framework can only support mapping
>> inline data with IOMAP_INLINE type, it restricts that:
>> - inline data should be locating at page #0.
>> - inline size should equal to .i_size
> 
> Wouldn't it be easier simply to fix the meaning of IOMAP_INLINE so that
> it can be used at something other than offset 0 and length == isize?
> IOWs, make it mean "use the *inline_data pointer to read/write data
> as a direct memory access"?

I thought about that, finally I implemented this by adding a new type because:
-  I checked fs.h, noticing that there are two separated flags:
FS_INLINE_DATA_FL and FS_NOTAIL_FL, I guess they are separated features, but not
sure about it...
- If we keep those restriction of inline data, maybe we can help to do sanity
check on inline data for most inline function callers additionally, since most
filesystems implement inline_data feature with restriction by default.

Anyway, I can change IOMAP_INLINE's restriction, and adjust erofs to use it if
there is no further concern on those restrictions.

> 
> I also don't really like the idea of leaving the write paths
> unimplemented in core code, though I suppose as an erofs developer
> you're not likely to have a good means for testing... :/

Yes, I don't like it too, but previously I didn't add it because I'm not sure
that, recently we have potential user of IOMAP_TAIL's write path except
reiserfs, it may be out-of-{maintain,test} due to lack of user later....

> 
> /me starts wondering if a better solution would be to invent iomaptestfs
> which exists solely to test all iomap code with as little other
> intelligence as possible...

Good idea, any plan on this fs? :)

Now, for erofs, as we don't support mapping hole, so I just inject code to force
covering IOMAP_HOLE path. :P

Thanks,

> 
> --D
> 
>> So we can not use IOMAP_INLINE to handle tail-packing case.
>>
>> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
>> data for further use of erofs.
>>
>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>> ---
>>  fs/iomap.c            | 22 ++++++++++++++++++++++
>>  include/linux/iomap.h |  1 +
>>  2 files changed, 23 insertions(+)
>>
>> diff --git a/fs/iomap.c b/fs/iomap.c
>> index 12654c2e78f8..ae7777ce77d0 100644
>> --- a/fs/iomap.c
>> +++ b/fs/iomap.c
>> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>>  	SetPageUptodate(page);
>>  }
>>  
>> +static void
>> +iomap_read_tail_data(struct inode *inode, struct page *page,
>> +		struct iomap *iomap)
>> +{
>> +	size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
>> +	void *addr;
>> +
>> +	if (PageUptodate(page))
>> +		return;
>> +
>> +	addr = kmap_atomic(page);
>> +	memcpy(addr, iomap->inline_data, size);
>> +	memset(addr + size, 0, PAGE_SIZE - size);
>> +	kunmap_atomic(addr);
>> +	SetPageUptodate(page);
>> +}
>> +
>>  static loff_t
>>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>  		struct iomap *iomap)
>> @@ -298,6 +315,11 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>  		return PAGE_SIZE;
>>  	}
>>  
>> +	if (iomap->type == IOMAP_TAIL) {
>> +		iomap_read_tail_data(inode, page, iomap);
>> +		return PAGE_SIZE;
>> +	}
>> +
>>  	/* zero post-eof blocks as the page may be mapped */
>>  	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>>  	if (plen == 0)
>> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
>> index 2103b94cb1bf..7e1ee48e3db7 100644
>> --- a/include/linux/iomap.h
>> +++ b/include/linux/iomap.h
>> @@ -25,6 +25,7 @@ struct vm_fault;
>>  #define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
>>  #define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
>>  #define IOMAP_INLINE	0x05	/* data inline in the inode */
>> +#define IOMAP_TAIL	0x06	/* tail data packed in metdata */
>>  
>>  /*
>>   * Flags for all iomap mappings:
>> -- 
>> 2.18.0.rc1
>>
> .
>
Chao Yu July 1, 2019, 7:38 a.m. UTC | #7
On 2019/7/1 14:08, Christoph Hellwig wrote:
> On Sun, Jun 30, 2019 at 04:19:32PM -0700, Darrick J. Wong wrote:
>> On Sat, Jun 29, 2019 at 03:30:20PM +0800, Chao Yu wrote:
>>> Some filesystems like erofs/reiserfs have the ability to pack tail
>>> data into metadata, however iomap framework can only support mapping
>>> inline data with IOMAP_INLINE type, it restricts that:
>>> - inline data should be locating at page #0.
>>> - inline size should equal to .i_size
>>
>> Wouldn't it be easier simply to fix the meaning of IOMAP_INLINE so that
>> it can be used at something other than offset 0 and length == isize?
>> IOWs, make it mean "use the *inline_data pointer to read/write data
>> as a direct memory access"?
> 
> Yes.  I vaguely remember Andreas pointed out some issues with a

@Andreas, could you please help to explain which issue we may encounter without
those limits?

Thanks,

> general scheme, which is why we put the limits in.  If that is not just
> me making things up we'll need to address them.  Either way just copy
> and pasting code isn't very helpful.
> .
>
Andreas Grünbacher July 1, 2019, 9:49 a.m. UTC | #8
Am Mo., 1. Juli 2019 um 10:04 Uhr schrieb Gao Xiang <gaoxiang25@huawei.com>:
> On 2019/7/1 14:40, Chao Yu wrote:
> > Hi Xiang,
> >
> > On 2019/6/29 17:34, Gao Xiang wrote:
> >> Hi Chao,
> >>
> >> On 2019/6/29 15:30, Chao Yu wrote:
> >>> Some filesystems like erofs/reiserfs have the ability to pack tail
> >>> data into metadata, however iomap framework can only support mapping
> >>> inline data with IOMAP_INLINE type, it restricts that:
> >>> - inline data should be locating at page #0.
> >>> - inline size should equal to .i_size
> >>> So we can not use IOMAP_INLINE to handle tail-packing case.
> >>>
> >>> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
> >>> data for further use of erofs.
> >>>
> >>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>> ---
> >>>  fs/iomap.c            | 22 ++++++++++++++++++++++
> >>>  include/linux/iomap.h |  1 +
> >>>  2 files changed, 23 insertions(+)
> >>>
> >>> diff --git a/fs/iomap.c b/fs/iomap.c
> >>> index 12654c2e78f8..ae7777ce77d0 100644
> >>> --- a/fs/iomap.c
> >>> +++ b/fs/iomap.c
> >>> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
> >>>     SetPageUptodate(page);
> >>>  }
> >>>
> >>> +static void
> >>> +iomap_read_tail_data(struct inode *inode, struct page *page,
> >>> +           struct iomap *iomap)
> >>> +{
> >>> +   size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
> >>> +   void *addr;
> >>> +
> >>> +   if (PageUptodate(page))
> >>> +           return;
> >>> +
> >>> +   addr = kmap_atomic(page);
> >>> +   memcpy(addr, iomap->inline_data, size);
> >>> +   memset(addr + size, 0, PAGE_SIZE - size);
> >>
> >> need flush_dcache_page(page) here for new page cache page since
> >> it's generic iomap code (althrough not necessary for x86, arm), I am not sure...
> >> see commit d2b2c6dd227b and c01778001a4f...
> >
> > Thanks for your reminding, these all codes were copied from
> > iomap_read_inline_data(), so I think we need a separated patch to fix this issue
> > if necessary.
>
> Yes, just a reminder, it is good as it-is.

Not sure if that means that IOMAP_INLINE as is works for you now. In
any case, if the inline data isn't transparently copied into the page
cache at index 0, memory-mapped I/O isn't going to work.

The code further assumes that "packed" files consist of exactly one
IOMAP_INLINE mapping; no IOMAP_MAPPED or other mappings may follow. Is
it that assumption that's causing you trouble? If so, what's the
layout at the filesystem level you want to support?

Thanks,
Andreas

> Thanks,
> Gao Xiang
>
> >
> > Thanks,
> >
> >>
> >> Thanks,
> >> Gao Xiang
> >>
> >>> +   kunmap_atomic(addr);
> >>> +   SetPageUptodate(page);
> >>> +}
> >>> +
> >>>  static loff_t
> >>>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >>>             struct iomap *iomap)
> >>> @@ -298,6 +315,11 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >>>             return PAGE_SIZE;
> >>>     }
> >>>
> >>> +   if (iomap->type == IOMAP_TAIL) {
> >>> +           iomap_read_tail_data(inode, page, iomap);
> >>> +           return PAGE_SIZE;
> >>> +   }
> >>> +
> >>>     /* zero post-eof blocks as the page may be mapped */
> >>>     iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
> >>>     if (plen == 0)
> >>> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> >>> index 2103b94cb1bf..7e1ee48e3db7 100644
> >>> --- a/include/linux/iomap.h
> >>> +++ b/include/linux/iomap.h
> >>> @@ -25,6 +25,7 @@ struct vm_fault;
> >>>  #define IOMAP_MAPPED       0x03    /* blocks allocated at @addr */
> >>>  #define IOMAP_UNWRITTEN    0x04    /* blocks allocated at @addr in unwritten state */
> >>>  #define IOMAP_INLINE       0x05    /* data inline in the inode */
> >>> +#define IOMAP_TAIL 0x06    /* tail data packed in metdata */
> >>>
> >>>  /*
> >>>   * Flags for all iomap mappings:
> >>>
> >> .
> >>
Chao Yu July 1, 2019, 10:07 a.m. UTC | #9
On 2019/7/1 17:49, Andreas Grünbacher wrote:
> Am Mo., 1. Juli 2019 um 10:04 Uhr schrieb Gao Xiang <gaoxiang25@huawei.com>:
>> On 2019/7/1 14:40, Chao Yu wrote:
>>> Hi Xiang,
>>>
>>> On 2019/6/29 17:34, Gao Xiang wrote:
>>>> Hi Chao,
>>>>
>>>> On 2019/6/29 15:30, Chao Yu wrote:
>>>>> Some filesystems like erofs/reiserfs have the ability to pack tail
>>>>> data into metadata, however iomap framework can only support mapping
>>>>> inline data with IOMAP_INLINE type, it restricts that:
>>>>> - inline data should be locating at page #0.
>>>>> - inline size should equal to .i_size
>>>>> So we can not use IOMAP_INLINE to handle tail-packing case.
>>>>>
>>>>> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
>>>>> data for further use of erofs.
>>>>>
>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>> ---
>>>>>  fs/iomap.c            | 22 ++++++++++++++++++++++
>>>>>  include/linux/iomap.h |  1 +
>>>>>  2 files changed, 23 insertions(+)
>>>>>
>>>>> diff --git a/fs/iomap.c b/fs/iomap.c
>>>>> index 12654c2e78f8..ae7777ce77d0 100644
>>>>> --- a/fs/iomap.c
>>>>> +++ b/fs/iomap.c
>>>>> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>>>>>     SetPageUptodate(page);
>>>>>  }
>>>>>
>>>>> +static void
>>>>> +iomap_read_tail_data(struct inode *inode, struct page *page,
>>>>> +           struct iomap *iomap)
>>>>> +{
>>>>> +   size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
>>>>> +   void *addr;
>>>>> +
>>>>> +   if (PageUptodate(page))
>>>>> +           return;
>>>>> +
>>>>> +   addr = kmap_atomic(page);
>>>>> +   memcpy(addr, iomap->inline_data, size);
>>>>> +   memset(addr + size, 0, PAGE_SIZE - size);
>>>>
>>>> need flush_dcache_page(page) here for new page cache page since
>>>> it's generic iomap code (althrough not necessary for x86, arm), I am not sure...
>>>> see commit d2b2c6dd227b and c01778001a4f...
>>>
>>> Thanks for your reminding, these all codes were copied from
>>> iomap_read_inline_data(), so I think we need a separated patch to fix this issue
>>> if necessary.
>>
>> Yes, just a reminder, it is good as it-is.
> 
> Not sure if that means that IOMAP_INLINE as is works for you now. In
> any case, if the inline data isn't transparently copied into the page
> cache at index 0, memory-mapped I/O isn't going to work.
> 
> The code further assumes that "packed" files consist of exactly one
> IOMAP_INLINE mapping; no IOMAP_MAPPED or other mappings may follow. Is
> it that assumption that's causing you trouble? If so, what's the

Yes, that's the problem we met.

> layout at the filesystem level you want to support?

The layout which can support tail-merge one, it means if the data locating at
the tail of file has small enough size, we can inline the tail data into
metadata area. e.g.:

IOMAP_MAPPED [0, 8192]
IOMAP_INLINE (or maybe IOMAP_TAIL) [8192, 8200]

Thanks,

> 
> Thanks,
> Andreas
> 
>> Thanks,
>> Gao Xiang
>>
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>> Gao Xiang
>>>>
>>>>> +   kunmap_atomic(addr);
>>>>> +   SetPageUptodate(page);
>>>>> +}
>>>>> +
>>>>>  static loff_t
>>>>>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>>>>             struct iomap *iomap)
>>>>> @@ -298,6 +315,11 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>>>>             return PAGE_SIZE;
>>>>>     }
>>>>>
>>>>> +   if (iomap->type == IOMAP_TAIL) {
>>>>> +           iomap_read_tail_data(inode, page, iomap);
>>>>> +           return PAGE_SIZE;
>>>>> +   }
>>>>> +
>>>>>     /* zero post-eof blocks as the page may be mapped */
>>>>>     iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
>>>>>     if (plen == 0)
>>>>> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
>>>>> index 2103b94cb1bf..7e1ee48e3db7 100644
>>>>> --- a/include/linux/iomap.h
>>>>> +++ b/include/linux/iomap.h
>>>>> @@ -25,6 +25,7 @@ struct vm_fault;
>>>>>  #define IOMAP_MAPPED       0x03    /* blocks allocated at @addr */
>>>>>  #define IOMAP_UNWRITTEN    0x04    /* blocks allocated at @addr in unwritten state */
>>>>>  #define IOMAP_INLINE       0x05    /* data inline in the inode */
>>>>> +#define IOMAP_TAIL 0x06    /* tail data packed in metdata */
>>>>>
>>>>>  /*
>>>>>   * Flags for all iomap mappings:
>>>>>
>>>> .
>>>>
> .
>
Andreas Grünbacher July 1, 2019, 10:13 a.m. UTC | #10
Am Mo., 1. Juli 2019 um 12:09 Uhr schrieb Chao Yu <yuchao0@huawei.com>:
> On 2019/7/1 17:49, Andreas Grünbacher wrote:
> > Am Mo., 1. Juli 2019 um 10:04 Uhr schrieb Gao Xiang <gaoxiang25@huawei.com>:
> >> On 2019/7/1 14:40, Chao Yu wrote:
> >>> Hi Xiang,
> >>>
> >>> On 2019/6/29 17:34, Gao Xiang wrote:
> >>>> Hi Chao,
> >>>>
> >>>> On 2019/6/29 15:30, Chao Yu wrote:
> >>>>> Some filesystems like erofs/reiserfs have the ability to pack tail
> >>>>> data into metadata, however iomap framework can only support mapping
> >>>>> inline data with IOMAP_INLINE type, it restricts that:
> >>>>> - inline data should be locating at page #0.
> >>>>> - inline size should equal to .i_size
> >>>>> So we can not use IOMAP_INLINE to handle tail-packing case.
> >>>>>
> >>>>> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
> >>>>> data for further use of erofs.
> >>>>>
> >>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
> >>>>> ---
> >>>>>  fs/iomap.c            | 22 ++++++++++++++++++++++
> >>>>>  include/linux/iomap.h |  1 +
> >>>>>  2 files changed, 23 insertions(+)
> >>>>>
> >>>>> diff --git a/fs/iomap.c b/fs/iomap.c
> >>>>> index 12654c2e78f8..ae7777ce77d0 100644
> >>>>> --- a/fs/iomap.c
> >>>>> +++ b/fs/iomap.c
> >>>>> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
> >>>>>     SetPageUptodate(page);
> >>>>>  }
> >>>>>
> >>>>> +static void
> >>>>> +iomap_read_tail_data(struct inode *inode, struct page *page,
> >>>>> +           struct iomap *iomap)
> >>>>> +{
> >>>>> +   size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
> >>>>> +   void *addr;
> >>>>> +
> >>>>> +   if (PageUptodate(page))
> >>>>> +           return;
> >>>>> +
> >>>>> +   addr = kmap_atomic(page);
> >>>>> +   memcpy(addr, iomap->inline_data, size);
> >>>>> +   memset(addr + size, 0, PAGE_SIZE - size);
> >>>>
> >>>> need flush_dcache_page(page) here for new page cache page since
> >>>> it's generic iomap code (althrough not necessary for x86, arm), I am not sure...
> >>>> see commit d2b2c6dd227b and c01778001a4f...
> >>>
> >>> Thanks for your reminding, these all codes were copied from
> >>> iomap_read_inline_data(), so I think we need a separated patch to fix this issue
> >>> if necessary.
> >>
> >> Yes, just a reminder, it is good as it-is.
> >
> > Not sure if that means that IOMAP_INLINE as is works for you now. In
> > any case, if the inline data isn't transparently copied into the page
> > cache at index 0, memory-mapped I/O isn't going to work.
> >
> > The code further assumes that "packed" files consist of exactly one
> > IOMAP_INLINE mapping; no IOMAP_MAPPED or other mappings may follow. Is
> > it that assumption that's causing you trouble? If so, what's the
>
> Yes, that's the problem we met.
>
> > layout at the filesystem level you want to support?
>
> The layout which can support tail-merge one, it means if the data locating at
> the tail of file has small enough size, we can inline the tail data into
> metadata area. e.g.:
>
> IOMAP_MAPPED [0, 8192]
> IOMAP_INLINE (or maybe IOMAP_TAIL) [8192, 8200]

Ah, now I see. Let's generalize the IOMAP_INLINE code to support that
and rename it to IOMAP_TAIL then.

Thanks,
Andreas
Chao Yu July 1, 2019, 10:22 a.m. UTC | #11
On 2019/7/1 18:13, Andreas Grünbacher wrote:
> Am Mo., 1. Juli 2019 um 12:09 Uhr schrieb Chao Yu <yuchao0@huawei.com>:
>> On 2019/7/1 17:49, Andreas Grünbacher wrote:
>>> Am Mo., 1. Juli 2019 um 10:04 Uhr schrieb Gao Xiang <gaoxiang25@huawei.com>:
>>>> On 2019/7/1 14:40, Chao Yu wrote:
>>>>> Hi Xiang,
>>>>>
>>>>> On 2019/6/29 17:34, Gao Xiang wrote:
>>>>>> Hi Chao,
>>>>>>
>>>>>> On 2019/6/29 15:30, Chao Yu wrote:
>>>>>>> Some filesystems like erofs/reiserfs have the ability to pack tail
>>>>>>> data into metadata, however iomap framework can only support mapping
>>>>>>> inline data with IOMAP_INLINE type, it restricts that:
>>>>>>> - inline data should be locating at page #0.
>>>>>>> - inline size should equal to .i_size
>>>>>>> So we can not use IOMAP_INLINE to handle tail-packing case.
>>>>>>>
>>>>>>> This patch introduces new mapping type IOMAP_TAIL to map tail-packed
>>>>>>> data for further use of erofs.
>>>>>>>
>>>>>>> Signed-off-by: Chao Yu <yuchao0@huawei.com>
>>>>>>> ---
>>>>>>>  fs/iomap.c            | 22 ++++++++++++++++++++++
>>>>>>>  include/linux/iomap.h |  1 +
>>>>>>>  2 files changed, 23 insertions(+)
>>>>>>>
>>>>>>> diff --git a/fs/iomap.c b/fs/iomap.c
>>>>>>> index 12654c2e78f8..ae7777ce77d0 100644
>>>>>>> --- a/fs/iomap.c
>>>>>>> +++ b/fs/iomap.c
>>>>>>> @@ -280,6 +280,23 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
>>>>>>>     SetPageUptodate(page);
>>>>>>>  }
>>>>>>>
>>>>>>> +static void
>>>>>>> +iomap_read_tail_data(struct inode *inode, struct page *page,
>>>>>>> +           struct iomap *iomap)
>>>>>>> +{
>>>>>>> +   size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
>>>>>>> +   void *addr;
>>>>>>> +
>>>>>>> +   if (PageUptodate(page))
>>>>>>> +           return;
>>>>>>> +
>>>>>>> +   addr = kmap_atomic(page);
>>>>>>> +   memcpy(addr, iomap->inline_data, size);
>>>>>>> +   memset(addr + size, 0, PAGE_SIZE - size);
>>>>>>
>>>>>> need flush_dcache_page(page) here for new page cache page since
>>>>>> it's generic iomap code (althrough not necessary for x86, arm), I am not sure...
>>>>>> see commit d2b2c6dd227b and c01778001a4f...
>>>>>
>>>>> Thanks for your reminding, these all codes were copied from
>>>>> iomap_read_inline_data(), so I think we need a separated patch to fix this issue
>>>>> if necessary.
>>>>
>>>> Yes, just a reminder, it is good as it-is.
>>>
>>> Not sure if that means that IOMAP_INLINE as is works for you now. In
>>> any case, if the inline data isn't transparently copied into the page
>>> cache at index 0, memory-mapped I/O isn't going to work.
>>>
>>> The code further assumes that "packed" files consist of exactly one
>>> IOMAP_INLINE mapping; no IOMAP_MAPPED or other mappings may follow. Is
>>> it that assumption that's causing you trouble? If so, what's the
>>
>> Yes, that's the problem we met.
>>
>>> layout at the filesystem level you want to support?
>>
>> The layout which can support tail-merge one, it means if the data locating at
>> the tail of file has small enough size, we can inline the tail data into
>> metadata area. e.g.:
>>
>> IOMAP_MAPPED [0, 8192]
>> IOMAP_INLINE (or maybe IOMAP_TAIL) [8192, 8200]
> 
> Ah, now I see. Let's generalize the IOMAP_INLINE code to support that
> and rename it to IOMAP_TAIL then.

Okay, it's fine to me, let me refactor the RFC patch. ;)

Thanks,

> 
> Thanks,
> Andreas
> .
>

Patch
diff mbox series

diff --git a/fs/iomap.c b/fs/iomap.c
index 12654c2e78f8..ae7777ce77d0 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -280,6 +280,23 @@  iomap_read_inline_data(struct inode *inode, struct page *page,
 	SetPageUptodate(page);
 }
 
+static void
+iomap_read_tail_data(struct inode *inode, struct page *page,
+		struct iomap *iomap)
+{
+	size_t size = i_size_read(inode) & (PAGE_SIZE - 1);
+	void *addr;
+
+	if (PageUptodate(page))
+		return;
+
+	addr = kmap_atomic(page);
+	memcpy(addr, iomap->inline_data, size);
+	memset(addr + size, 0, PAGE_SIZE - size);
+	kunmap_atomic(addr);
+	SetPageUptodate(page);
+}
+
 static loff_t
 iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		struct iomap *iomap)
@@ -298,6 +315,11 @@  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		return PAGE_SIZE;
 	}
 
+	if (iomap->type == IOMAP_TAIL) {
+		iomap_read_tail_data(inode, page, iomap);
+		return PAGE_SIZE;
+	}
+
 	/* zero post-eof blocks as the page may be mapped */
 	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
 	if (plen == 0)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 2103b94cb1bf..7e1ee48e3db7 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -25,6 +25,7 @@  struct vm_fault;
 #define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
 #define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
 #define IOMAP_INLINE	0x05	/* data inline in the inode */
+#define IOMAP_TAIL	0x06	/* tail data packed in metdata */
 
 /*
  * Flags for all iomap mappings: