diff mbox

convert dm_ulog_request data to little endian

Message ID 1407459365-697-1-git-send-email-dmzhang@suse.com (mailing list archive)
State Rejected, archived
Delegated to: Mike Snitzer
Headers show

Commit Message

dongmao zhang Aug. 8, 2014, 12:56 a.m. UTC
the dm_ulog_request might be little endian or big endian depending on
the architecture. This is not right. This patch is to convert
dm_ulog_request to little endian.

I met a bug when running cmirrord on s390 linux

Signed-off-by: Dongmao Zhang <dmzhang@suse.com>
---
 drivers/md/dm-log-userspace-transfer.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

Comments

Mike Snitzer Aug. 11, 2014, 3:15 a.m. UTC | #1
On Thu, Aug 07 2014 at  8:56pm -0400,
Dongmao Zhang <dmzhang@suse.com> wrote:

> the dm_ulog_request might be little endian or big endian depending on
> the architecture. This is not right. This patch is to convert
> dm_ulog_request to little endian.
> 
> I met a bug when running cmirrord on s390 linux
> 
> Signed-off-by: Dongmao Zhang <dmzhang@suse.com>
> ---
>  drivers/md/dm-log-userspace-transfer.c | 31 ++++++++++++++++++++++++++++++-
>  1 file changed, 30 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
> index b428c0a..cddef2f 100644
> --- a/drivers/md/dm-log-userspace-transfer.c
> +++ b/drivers/md/dm-log-userspace-transfer.c
> @@ -53,6 +53,29 @@ struct receiving_pkg {
>  static DEFINE_SPINLOCK(receiving_list_lock);
>  static struct list_head receiving_list;
>  
> +static void cpu_to_network(struct dm_ulog_request *tfr)
> +{
> +	if (tfr == NULL)
> +		return;
> +	tfr->luid = cpu_to_le64(tfr->luid);
> +	tfr->version =  cpu_to_le32(tfr->version);
> +	tfr->seq = cpu_to_le32(tfr->seq);
> +	tfr->request_type = cpu_to_le32(tfr->request_type);
> +	tfr->data_size = cpu_to_le32(tfr->data_size);
> +}
> +
> +static void network_to_cpu(struct dm_ulog_request *tfr)
> +{
> +	if (tfr == NULL)
> +		return;
> +	tfr->luid = le64_to_cpu(tfr->luid);
> +	tfr->version =  le32_to_cpu(tfr->version);
> +	tfr->seq = le32_to_cpu(tfr->seq);
> +	tfr->request_type = le32_to_cpu(tfr->request_type);
> +	tfr->data_size = le32_to_cpu(tfr->data_size);
> +	tfr->error = le32_to_cpu(tfr->error);
> +}
> +
>  static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
>  {
>  	int r;
> @@ -66,6 +89,7 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
>  	msg->seq = tfr->seq;
>  	msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
>  
> +	cpu_to_network(tfr);
>  	r = cn_netlink_send(msg, 0, 0, gfp_any());
>  
>  	return r;
> @@ -81,8 +105,11 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
>   */
>  static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
>  {
> -	uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
>  	struct receiving_pkg *pkg;
> +	uint32_t rtn_seq;
> +
> +	network_to_cpu(tfr);
> +	rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
>  
>  	/*
>  	 * The 'receiving_pkg' entries in this list are statically
> @@ -148,6 +175,8 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
>  	spin_unlock(&receiving_list_lock);
>  }
>  
> +
> +
>  /**
>   * dm_consult_userspace
>   * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)

Other than this last hunk (extra whitespace) this patch looks fine to me.

Jon/Mikulas/Alasdair: could you review this too?  If you guys agree I'll
get it staged for 3.17 (and mark it for stable).

Thanks,
Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mikulas Patocka Aug. 11, 2014, 2:03 p.m. UTC | #2
On Sun, 10 Aug 2014, Mike Snitzer wrote:

> On Thu, Aug 07 2014 at  8:56pm -0400,
> Dongmao Zhang <dmzhang@suse.com> wrote:
> 
> > the dm_ulog_request might be little endian or big endian depending on
> > the architecture. This is not right. This patch is to convert
> > dm_ulog_request to little endian.
> > 
> > I met a bug when running cmirrord on s390 linux
> > 
> > Signed-off-by: Dongmao Zhang <dmzhang@suse.com>
> > ---
> >  drivers/md/dm-log-userspace-transfer.c | 31 ++++++++++++++++++++++++++++++-
> >  1 file changed, 30 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
> > index b428c0a..cddef2f 100644
> > --- a/drivers/md/dm-log-userspace-transfer.c
> > +++ b/drivers/md/dm-log-userspace-transfer.c
> > @@ -53,6 +53,29 @@ struct receiving_pkg {
> >  static DEFINE_SPINLOCK(receiving_list_lock);
> >  static struct list_head receiving_list;
> >  
> > +static void cpu_to_network(struct dm_ulog_request *tfr)
> > +{
> > +	if (tfr == NULL)
> > +		return;
> > +	tfr->luid = cpu_to_le64(tfr->luid);
> > +	tfr->version =  cpu_to_le32(tfr->version);
> > +	tfr->seq = cpu_to_le32(tfr->seq);
> > +	tfr->request_type = cpu_to_le32(tfr->request_type);
> > +	tfr->data_size = cpu_to_le32(tfr->data_size);
> > +}
> > +
> > +static void network_to_cpu(struct dm_ulog_request *tfr)
> > +{
> > +	if (tfr == NULL)
> > +		return;
> > +	tfr->luid = le64_to_cpu(tfr->luid);
> > +	tfr->version =  le32_to_cpu(tfr->version);
> > +	tfr->seq = le32_to_cpu(tfr->seq);
> > +	tfr->request_type = le32_to_cpu(tfr->request_type);
> > +	tfr->data_size = le32_to_cpu(tfr->data_size);
> > +	tfr->error = le32_to_cpu(tfr->error);
> > +}
> > +
> >  static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
> >  {
> >  	int r;
> > @@ -66,6 +89,7 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
> >  	msg->seq = tfr->seq;
> >  	msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
> >  
> > +	cpu_to_network(tfr);
> >  	r = cn_netlink_send(msg, 0, 0, gfp_any());
> >  
> >  	return r;
> > @@ -81,8 +105,11 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
> >   */
> >  static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
> >  {
> > -	uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
> >  	struct receiving_pkg *pkg;
> > +	uint32_t rtn_seq;
> > +
> > +	network_to_cpu(tfr);
> > +	rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
> >  
> >  	/*
> >  	 * The 'receiving_pkg' entries in this list are statically
> > @@ -148,6 +175,8 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
> >  	spin_unlock(&receiving_list_lock);
> >  }
> >  
> > +
> > +
> >  /**
> >   * dm_consult_userspace
> >   * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
> 
> Other than this last hunk (extra whitespace) this patch looks fine to me.
> 
> Jon/Mikulas/Alasdair: could you review this too?  If you guys agree I'll
> get it staged for 3.17 (and mark it for stable).
> 
> Thanks,
> Mike

Hi

Red Hat is shipping RHEL7 on s390x and ppc64, so why haven't QA caught 
this?

Is there a possibility that this patch fixes one case and breaks some 
other case?

There is a risk that we fix one userspace program that assumes messages to 
be in little endian and break another userspace program that assumes that 
the messages are in native endian - to avoid this risk, it may be better 
to fix the userspace program and leave the kernel unchanged.

I think you need to find the piece of userspace code that processes the 
information on the netlink socket and explain why it needs the information 
in little endian. (and you also need to show that there is no userspace 
code that expects the data in native endian).

Mikulas

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Jonthan Brassow Aug. 20, 2014, 12:58 a.m. UTC | #3
On Aug 7, 2014, at 7:56 PM, Dongmao Zhang wrote:

> the dm_ulog_request might be little endian or big endian depending on
> the architecture. This is not right. This patch is to convert
> dm_ulog_request to little endian.
> 
> I met a bug when running cmirrord on s390 linux

This seems odd to me.  I don't understand why you would get a bug.  Do you have a mixed-architecture cluster?  Some x86-64 and the s390?  Otherwise, all operations should be in a format that is understood (unless there is a bug).

Also, you don't really need to translate the structure just to go to userspace.  Only the process that communicates between (potentially different architectures of) machines needs to do that.  That process is cmirrord and the code that should translate for that daemon is in lvm2/daemons/cmirrord/compat.c.

There could be a bug, but unless I'm not understanding right, we shouldn't need to switch endian in the kernel.

 brassow


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
dongmao zhang Aug. 20, 2014, 3:19 a.m. UTC | #4
? 2014?08?20? 08:58, Brassow Jonathan ??:
> This seems odd to me.  I don't understand why you would get a bug.  Do you have a mixed-architecture cluster?  Some x86-64 and the s390?  Otherwise, all operations should be in a format that is understood (unless there is a bug).
>
> Also, you don't really need to translate the structure just to go to userspace.  Only the process that communicates between (potentially different architectures of) machines needs to do that.  That process is cmirrord and the code that should translate for that daemon is in lvm2/daemons/cmirrord/compat.c.
>
> There could be a bug, but unless I'm not understanding right, we shouldn't need to switch endian in the kernel.
>
>   brassow
>
I think this is because cmirrord always assume the network package is 
little-endian.(see compat.c: v5_endian_from_network) .
So if cmirrord is running on S390, kernel will send package in its 
native endian(big endian), this is why data is broken.
There might have two solutions:

1. cmirrord always use native endian.
2. kernel always send little endian data.






--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Jonthan Brassow Aug. 20, 2014, 4:20 a.m. UTC | #5
On Aug 19, 2014, at 10:19 PM, zhangdongmao wrote:

> 
> ? 2014?08?20? 08:58, Brassow Jonathan ??:
>> This seems odd to me.  I don't understand why you would get a bug.  Do you have a mixed-architecture cluster?  Some x86-64 and the s390?  Otherwise, all operations should be in a format that is understood (unless there is a bug).
>> 
>> Also, you don't really need to translate the structure just to go to userspace.  Only the process that communicates between (potentially different architectures of) machines needs to do that.  That process is cmirrord and the code that should translate for that daemon is in lvm2/daemons/cmirrord/compat.c.
>> 
>> There could be a bug, but unless I'm not understanding right, we shouldn't need to switch endian in the kernel.
>> 
>>  brassow
>> 
> I think this is because cmirrord always assume the network package is little-endian.(see compat.c: v5_endian_from_network) .
> So if cmirrord is running on S390, kernel will send package in its native endian(big endian), this is why data is broken.
> There might have two solutions:
> 
> 1. cmirrord always use native endian.
> 2. kernel always send little endian data.

v5_endian_from_network() only gets called if it is needed.  I suspect there is a bug here, 'clog_request_to_network' should detect if big endian is being used and switch it to little endian.  In-coming communication must always be little endian.  I'm not sure which end the problem is on (I think the send side).  You could add some prints to detect the issue.

lvm2/daemons/cmirrord/cluster.c:cluster_send() sets two version numbers - one is forced to be little endian via xlate64().  'clog_request_to_network()' is then called and compares the two version numbers.  If they are different, the machine is big endian and the contents must be converted.  If this is not happening, it is bad.  Then, *from_network() performs a similar action and test and then must xlate back to big endian.  If this is not happening, it is bad.  So, for big endian machines, ensure that v5_endian_*_network() is being called.  (For little endian, it should not be.)

 brassow

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Jonthan Brassow Aug. 21, 2014, 6 a.m. UTC | #6
On Aug 10, 2014, at 10:15 PM, Mike Snitzer wrote:

> 
> Other than this last hunk (extra whitespace) this patch looks fine to me.
> 
> Jon/Mikulas/Alasdair: could you review this too?  If you guys agree I'll
> get it staged for 3.17 (and mark it for stable).

please do not stage.  I think we will fix this in userspace.

 brassow

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
dongmao zhang Aug. 21, 2014, 11:08 a.m. UTC | #7
? 2014?08?21? 14:00, Brassow Jonathan ??:
> On Aug 10, 2014, at 10:15 PM, Mike Snitzer wrote:
>
>> Other than this last hunk (extra whitespace) this patch looks fine to me.
>>
>> Jon/Mikulas/Alasdair: could you review this too?  If you guys agree I'll
>> get it staged for 3.17 (and mark it for stable).
> please do not stage.  I think we will fix this in userspace.
>
>   brassow

Yes, I think so, after comments from brassow,
this could be fixed in userspace. I have a new patch for cmirrord.
Only wait timeslot to test it on S390 and mixed arch.



> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel
>

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox

Patch

diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index b428c0a..cddef2f 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -53,6 +53,29 @@  struct receiving_pkg {
 static DEFINE_SPINLOCK(receiving_list_lock);
 static struct list_head receiving_list;
 
+static void cpu_to_network(struct dm_ulog_request *tfr)
+{
+	if (tfr == NULL)
+		return;
+	tfr->luid = cpu_to_le64(tfr->luid);
+	tfr->version =  cpu_to_le32(tfr->version);
+	tfr->seq = cpu_to_le32(tfr->seq);
+	tfr->request_type = cpu_to_le32(tfr->request_type);
+	tfr->data_size = cpu_to_le32(tfr->data_size);
+}
+
+static void network_to_cpu(struct dm_ulog_request *tfr)
+{
+	if (tfr == NULL)
+		return;
+	tfr->luid = le64_to_cpu(tfr->luid);
+	tfr->version =  le32_to_cpu(tfr->version);
+	tfr->seq = le32_to_cpu(tfr->seq);
+	tfr->request_type = le32_to_cpu(tfr->request_type);
+	tfr->data_size = le32_to_cpu(tfr->data_size);
+	tfr->error = le32_to_cpu(tfr->error);
+}
+
 static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
 {
 	int r;
@@ -66,6 +89,7 @@  static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
 	msg->seq = tfr->seq;
 	msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
 
+	cpu_to_network(tfr);
 	r = cn_netlink_send(msg, 0, 0, gfp_any());
 
 	return r;
@@ -81,8 +105,11 @@  static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
  */
 static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
 {
-	uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
 	struct receiving_pkg *pkg;
+	uint32_t rtn_seq;
+
+	network_to_cpu(tfr);
+	rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
 
 	/*
 	 * The 'receiving_pkg' entries in this list are statically
@@ -148,6 +175,8 @@  static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 	spin_unlock(&receiving_list_lock);
 }
 
+
+
 /**
  * dm_consult_userspace
  * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)