Message ID | 1407459365-697-1-git-send-email-dmzhang@suse.com (mailing list archive) |
---|---|
State | Rejected, archived |
Delegated to: | Mike Snitzer |
Headers | show |
On Thu, Aug 07 2014 at 8:56pm -0400, Dongmao Zhang <dmzhang@suse.com> wrote: > the dm_ulog_request might be little endian or big endian depending on > the architecture. This is not right. This patch is to convert > dm_ulog_request to little endian. > > I met a bug when running cmirrord on s390 linux > > Signed-off-by: Dongmao Zhang <dmzhang@suse.com> > --- > drivers/md/dm-log-userspace-transfer.c | 31 ++++++++++++++++++++++++++++++- > 1 file changed, 30 insertions(+), 1 deletion(-) > > diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c > index b428c0a..cddef2f 100644 > --- a/drivers/md/dm-log-userspace-transfer.c > +++ b/drivers/md/dm-log-userspace-transfer.c > @@ -53,6 +53,29 @@ struct receiving_pkg { > static DEFINE_SPINLOCK(receiving_list_lock); > static struct list_head receiving_list; > > +static void cpu_to_network(struct dm_ulog_request *tfr) > +{ > + if (tfr == NULL) > + return; > + tfr->luid = cpu_to_le64(tfr->luid); > + tfr->version = cpu_to_le32(tfr->version); > + tfr->seq = cpu_to_le32(tfr->seq); > + tfr->request_type = cpu_to_le32(tfr->request_type); > + tfr->data_size = cpu_to_le32(tfr->data_size); > +} > + > +static void network_to_cpu(struct dm_ulog_request *tfr) > +{ > + if (tfr == NULL) > + return; > + tfr->luid = le64_to_cpu(tfr->luid); > + tfr->version = le32_to_cpu(tfr->version); > + tfr->seq = le32_to_cpu(tfr->seq); > + tfr->request_type = le32_to_cpu(tfr->request_type); > + tfr->data_size = le32_to_cpu(tfr->data_size); > + tfr->error = le32_to_cpu(tfr->error); > +} > + > static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) > { > int r; > @@ -66,6 +89,7 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) > msg->seq = tfr->seq; > msg->len = sizeof(struct dm_ulog_request) + tfr->data_size; > > + cpu_to_network(tfr); > r = cn_netlink_send(msg, 0, 0, gfp_any()); > > return r; > @@ -81,8 +105,11 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) > */ > static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr) > { > - uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; > struct receiving_pkg *pkg; > + uint32_t rtn_seq; > + > + network_to_cpu(tfr); > + rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; > > /* > * The 'receiving_pkg' entries in this list are statically > @@ -148,6 +175,8 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) > spin_unlock(&receiving_list_lock); > } > > + > + > /** > * dm_consult_userspace > * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size) Other than this last hunk (extra whitespace) this patch looks fine to me. Jon/Mikulas/Alasdair: could you review this too? If you guys agree I'll get it staged for 3.17 (and mark it for stable). Thanks, Mike -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On Sun, 10 Aug 2014, Mike Snitzer wrote: > On Thu, Aug 07 2014 at 8:56pm -0400, > Dongmao Zhang <dmzhang@suse.com> wrote: > > > the dm_ulog_request might be little endian or big endian depending on > > the architecture. This is not right. This patch is to convert > > dm_ulog_request to little endian. > > > > I met a bug when running cmirrord on s390 linux > > > > Signed-off-by: Dongmao Zhang <dmzhang@suse.com> > > --- > > drivers/md/dm-log-userspace-transfer.c | 31 ++++++++++++++++++++++++++++++- > > 1 file changed, 30 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c > > index b428c0a..cddef2f 100644 > > --- a/drivers/md/dm-log-userspace-transfer.c > > +++ b/drivers/md/dm-log-userspace-transfer.c > > @@ -53,6 +53,29 @@ struct receiving_pkg { > > static DEFINE_SPINLOCK(receiving_list_lock); > > static struct list_head receiving_list; > > > > +static void cpu_to_network(struct dm_ulog_request *tfr) > > +{ > > + if (tfr == NULL) > > + return; > > + tfr->luid = cpu_to_le64(tfr->luid); > > + tfr->version = cpu_to_le32(tfr->version); > > + tfr->seq = cpu_to_le32(tfr->seq); > > + tfr->request_type = cpu_to_le32(tfr->request_type); > > + tfr->data_size = cpu_to_le32(tfr->data_size); > > +} > > + > > +static void network_to_cpu(struct dm_ulog_request *tfr) > > +{ > > + if (tfr == NULL) > > + return; > > + tfr->luid = le64_to_cpu(tfr->luid); > > + tfr->version = le32_to_cpu(tfr->version); > > + tfr->seq = le32_to_cpu(tfr->seq); > > + tfr->request_type = le32_to_cpu(tfr->request_type); > > + tfr->data_size = le32_to_cpu(tfr->data_size); > > + tfr->error = le32_to_cpu(tfr->error); > > +} > > + > > static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) > > { > > int r; > > @@ -66,6 +89,7 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) > > msg->seq = tfr->seq; > > msg->len = sizeof(struct dm_ulog_request) + tfr->data_size; > > > > + cpu_to_network(tfr); > > r = cn_netlink_send(msg, 0, 0, gfp_any()); > > > > return r; > > @@ -81,8 +105,11 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) > > */ > > static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr) > > { > > - uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; > > struct receiving_pkg *pkg; > > + uint32_t rtn_seq; > > + > > + network_to_cpu(tfr); > > + rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; > > > > /* > > * The 'receiving_pkg' entries in this list are statically > > @@ -148,6 +175,8 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) > > spin_unlock(&receiving_list_lock); > > } > > > > + > > + > > /** > > * dm_consult_userspace > > * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size) > > Other than this last hunk (extra whitespace) this patch looks fine to me. > > Jon/Mikulas/Alasdair: could you review this too? If you guys agree I'll > get it staged for 3.17 (and mark it for stable). > > Thanks, > Mike Hi Red Hat is shipping RHEL7 on s390x and ppc64, so why haven't QA caught this? Is there a possibility that this patch fixes one case and breaks some other case? There is a risk that we fix one userspace program that assumes messages to be in little endian and break another userspace program that assumes that the messages are in native endian - to avoid this risk, it may be better to fix the userspace program and leave the kernel unchanged. I think you need to find the piece of userspace code that processes the information on the netlink socket and explain why it needs the information in little endian. (and you also need to show that there is no userspace code that expects the data in native endian). Mikulas -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On Aug 7, 2014, at 7:56 PM, Dongmao Zhang wrote: > the dm_ulog_request might be little endian or big endian depending on > the architecture. This is not right. This patch is to convert > dm_ulog_request to little endian. > > I met a bug when running cmirrord on s390 linux This seems odd to me. I don't understand why you would get a bug. Do you have a mixed-architecture cluster? Some x86-64 and the s390? Otherwise, all operations should be in a format that is understood (unless there is a bug). Also, you don't really need to translate the structure just to go to userspace. Only the process that communicates between (potentially different architectures of) machines needs to do that. That process is cmirrord and the code that should translate for that daemon is in lvm2/daemons/cmirrord/compat.c. There could be a bug, but unless I'm not understanding right, we shouldn't need to switch endian in the kernel. brassow -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
? 2014?08?20? 08:58, Brassow Jonathan ??: > This seems odd to me. I don't understand why you would get a bug. Do you have a mixed-architecture cluster? Some x86-64 and the s390? Otherwise, all operations should be in a format that is understood (unless there is a bug). > > Also, you don't really need to translate the structure just to go to userspace. Only the process that communicates between (potentially different architectures of) machines needs to do that. That process is cmirrord and the code that should translate for that daemon is in lvm2/daemons/cmirrord/compat.c. > > There could be a bug, but unless I'm not understanding right, we shouldn't need to switch endian in the kernel. > > brassow > I think this is because cmirrord always assume the network package is little-endian.(see compat.c: v5_endian_from_network) . So if cmirrord is running on S390, kernel will send package in its native endian(big endian), this is why data is broken. There might have two solutions: 1. cmirrord always use native endian. 2. kernel always send little endian data. -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On Aug 19, 2014, at 10:19 PM, zhangdongmao wrote: > > ? 2014?08?20? 08:58, Brassow Jonathan ??: >> This seems odd to me. I don't understand why you would get a bug. Do you have a mixed-architecture cluster? Some x86-64 and the s390? Otherwise, all operations should be in a format that is understood (unless there is a bug). >> >> Also, you don't really need to translate the structure just to go to userspace. Only the process that communicates between (potentially different architectures of) machines needs to do that. That process is cmirrord and the code that should translate for that daemon is in lvm2/daemons/cmirrord/compat.c. >> >> There could be a bug, but unless I'm not understanding right, we shouldn't need to switch endian in the kernel. >> >> brassow >> > I think this is because cmirrord always assume the network package is little-endian.(see compat.c: v5_endian_from_network) . > So if cmirrord is running on S390, kernel will send package in its native endian(big endian), this is why data is broken. > There might have two solutions: > > 1. cmirrord always use native endian. > 2. kernel always send little endian data. v5_endian_from_network() only gets called if it is needed. I suspect there is a bug here, 'clog_request_to_network' should detect if big endian is being used and switch it to little endian. In-coming communication must always be little endian. I'm not sure which end the problem is on (I think the send side). You could add some prints to detect the issue. lvm2/daemons/cmirrord/cluster.c:cluster_send() sets two version numbers - one is forced to be little endian via xlate64(). 'clog_request_to_network()' is then called and compares the two version numbers. If they are different, the machine is big endian and the contents must be converted. If this is not happening, it is bad. Then, *from_network() performs a similar action and test and then must xlate back to big endian. If this is not happening, it is bad. So, for big endian machines, ensure that v5_endian_*_network() is being called. (For little endian, it should not be.) brassow -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On Aug 10, 2014, at 10:15 PM, Mike Snitzer wrote: > > Other than this last hunk (extra whitespace) this patch looks fine to me. > > Jon/Mikulas/Alasdair: could you review this too? If you guys agree I'll > get it staged for 3.17 (and mark it for stable). please do not stage. I think we will fix this in userspace. brassow -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
? 2014?08?21? 14:00, Brassow Jonathan ??: > On Aug 10, 2014, at 10:15 PM, Mike Snitzer wrote: > >> Other than this last hunk (extra whitespace) this patch looks fine to me. >> >> Jon/Mikulas/Alasdair: could you review this too? If you guys agree I'll >> get it staged for 3.17 (and mark it for stable). > please do not stage. I think we will fix this in userspace. > > brassow Yes, I think so, after comments from brassow, this could be fixed in userspace. I have a new patch for cmirrord. Only wait timeslot to test it on S390 and mixed arch. > -- > dm-devel mailing list > dm-devel@redhat.com > https://www.redhat.com/mailman/listinfo/dm-devel > -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index b428c0a..cddef2f 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -53,6 +53,29 @@ struct receiving_pkg { static DEFINE_SPINLOCK(receiving_list_lock); static struct list_head receiving_list; +static void cpu_to_network(struct dm_ulog_request *tfr) +{ + if (tfr == NULL) + return; + tfr->luid = cpu_to_le64(tfr->luid); + tfr->version = cpu_to_le32(tfr->version); + tfr->seq = cpu_to_le32(tfr->seq); + tfr->request_type = cpu_to_le32(tfr->request_type); + tfr->data_size = cpu_to_le32(tfr->data_size); +} + +static void network_to_cpu(struct dm_ulog_request *tfr) +{ + if (tfr == NULL) + return; + tfr->luid = le64_to_cpu(tfr->luid); + tfr->version = le32_to_cpu(tfr->version); + tfr->seq = le32_to_cpu(tfr->seq); + tfr->request_type = le32_to_cpu(tfr->request_type); + tfr->data_size = le32_to_cpu(tfr->data_size); + tfr->error = le32_to_cpu(tfr->error); +} + static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) { int r; @@ -66,6 +89,7 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) msg->seq = tfr->seq; msg->len = sizeof(struct dm_ulog_request) + tfr->data_size; + cpu_to_network(tfr); r = cn_netlink_send(msg, 0, 0, gfp_any()); return r; @@ -81,8 +105,11 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr) */ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr) { - uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; struct receiving_pkg *pkg; + uint32_t rtn_seq; + + network_to_cpu(tfr); + rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0; /* * The 'receiving_pkg' entries in this list are statically @@ -148,6 +175,8 @@ static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp) spin_unlock(&receiving_list_lock); } + + /** * dm_consult_userspace * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
the dm_ulog_request might be little endian or big endian depending on the architecture. This is not right. This patch is to convert dm_ulog_request to little endian. I met a bug when running cmirrord on s390 linux Signed-off-by: Dongmao Zhang <dmzhang@suse.com> --- drivers/md/dm-log-userspace-transfer.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-)