Message ID | 20190622093655.80092-2-dimitriy.ryazantcev@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] l10n: localizable upload progress messages | expand |
On Sat, Jun 22, 2019 at 4:37 PM Dimitriy Ryazantcev <dimitriy.ryazantcev@gmail.com> wrote: > diff --git a/strbuf.c b/strbuf.c > index 0e18b259ce..0a3ebc3749 100644 > --- a/strbuf.c > +++ b/strbuf.c > @@ -814,20 +814,28 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, > void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) > { > if (bytes > 1 << 30) { > - strbuf_addf(buf, "%u.%2.2u GiB", > + strbuf_addf(buf, "%u.%2.2u ", > (unsigned)(bytes >> 30), > (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: gibi */ > + strbuf_addstr(buf, _("Gi")); It may be ISO standard, but GiB to me looks much easier to understand. What's the reason for changing GiB to Gi btw? > } else if (bytes > 1 << 20) { > unsigned x = bytes + 5243; /* for rounding */ > - strbuf_addf(buf, "%u.%2.2u MiB", > + strbuf_addf(buf, "%u.%2.2u ", > x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: mebi */ > + strbuf_addstr(buf, _("Mi")); > } else if (bytes > 1 << 10) { > unsigned x = bytes + 5; /* for rounding */ > - strbuf_addf(buf, "%u.%2.2u KiB", > + strbuf_addf(buf, "%u.%2.2u ", > x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: kibi */ > + strbuf_addstr(buf, _("Ki")); > } else { > - strbuf_addf(buf, "%u bytes", (unsigned)bytes); > + strbuf_addf(buf, "%u ", (unsigned)bytes); > } > + /* TRANSLATORS: ISO/IEC 80000-13:2008, subclause 13-9.c: byte */ > + strbuf_addstr(buf, _("B")); > } > > void strbuf_add_absolute_path(struct strbuf *sb, const char *path) > -- > 2.22.0 >
Duy Nguyen <pclouds@gmail.com> wrote: > > On Sat, Jun 22, 2019 at 4:37 PM Dimitriy Ryazantcev > <dimitriy.ryazantcev@gmail.com> wrote: > > diff --git a/strbuf.c b/strbuf.c > > index 0e18b259ce..0a3ebc3749 100644 > > --- a/strbuf.c > > +++ b/strbuf.c > > @@ -814,20 +814,28 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, > > void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) > > { > > if (bytes > 1 << 30) { > > - strbuf_addf(buf, "%u.%2.2u GiB", > > + strbuf_addf(buf, "%u.%2.2u ", > > (unsigned)(bytes >> 30), > > (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: gibi */ > > + strbuf_addstr(buf, _("Gi")); > > It may be ISO standard, but GiB to me looks much easier to understand. > What's the reason for changing GiB to Gi btw? I just moved 'B' out of else block so it will be prepended with 'Gi'/'Mi'/'Ki' when needed. Note: I changed 'bytes' to just 'B' unit. > > > } else if (bytes > 1 << 20) { > > unsigned x = bytes + 5243; /* for rounding */ > > - strbuf_addf(buf, "%u.%2.2u MiB", > > + strbuf_addf(buf, "%u.%2.2u ", > > x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: mebi */ > > + strbuf_addstr(buf, _("Mi")); > > } else if (bytes > 1 << 10) { > > unsigned x = bytes + 5; /* for rounding */ > > - strbuf_addf(buf, "%u.%2.2u KiB", > > + strbuf_addf(buf, "%u.%2.2u ", > > x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: kibi */ > > + strbuf_addstr(buf, _("Ki")); > > } else { > > - strbuf_addf(buf, "%u bytes", (unsigned)bytes); > > + strbuf_addf(buf, "%u ", (unsigned)bytes); > > } > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, subclause 13-9.c: byte */ > > + strbuf_addstr(buf, _("B")); > > } > > > > void strbuf_add_absolute_path(struct strbuf *sb, const char *path) > > -- > > 2.22.0 > > > > > -- > Duy
Am 22.06.19 um 11:36 schrieb Dimitriy Ryazantcev: > Signed-off-by: Dimitriy Ryazantcev <dimitriy.ryazantcev@gmail.com> > --- > progress.c | 4 +++- > strbuf.c | 16 ++++++++++++---- > 2 files changed, 15 insertions(+), 5 deletions(-) > > diff --git a/progress.c b/progress.c > index a2e8cf64a8..61d8cf5d04 100644 > --- a/progress.c > +++ b/progress.c > @@ -151,7 +151,9 @@ static void throughput_string(struct strbuf *buf, uint64_t total, > strbuf_humanise_bytes(buf, total); > strbuf_addstr(buf, " | "); > strbuf_humanise_bytes(buf, rate * 1024); > - strbuf_addstr(buf, "/s"); > + strbuf_addstr(buf, "/"); > + /* TRANSLATORS: IEC 80000-13:2008, subclause 13-12.b: second */ > + strbuf_addstr(buf, _("s")); > } > > void display_throughput(struct progress *progress, uint64_t total) > diff --git a/strbuf.c b/strbuf.c > index 0e18b259ce..0a3ebc3749 100644 > --- a/strbuf.c > +++ b/strbuf.c > @@ -814,20 +814,28 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, > void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) > { > if (bytes > 1 << 30) { > - strbuf_addf(buf, "%u.%2.2u GiB", > + strbuf_addf(buf, "%u.%2.2u ", > (unsigned)(bytes >> 30), > (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: gibi */ > + strbuf_addstr(buf, _("Gi")); > } else if (bytes > 1 << 20) { > unsigned x = bytes + 5243; /* for rounding */ > - strbuf_addf(buf, "%u.%2.2u MiB", > + strbuf_addf(buf, "%u.%2.2u ", > x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: mebi */ > + strbuf_addstr(buf, _("Mi")); > } else if (bytes > 1 << 10) { > unsigned x = bytes + 5; /* for rounding */ > - strbuf_addf(buf, "%u.%2.2u KiB", > + strbuf_addf(buf, "%u.%2.2u ", > x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: kibi */ > + strbuf_addstr(buf, _("Ki")); > } else { > - strbuf_addf(buf, "%u bytes", (unsigned)bytes); > + strbuf_addf(buf, "%u ", (unsigned)bytes); > } > + /* TRANSLATORS: ISO/IEC 80000-13:2008, subclause 13-9.c: byte */ > + strbuf_addstr(buf, _("B")); > } > > void strbuf_add_absolute_path(struct strbuf *sb, const char *path) > All of the prefixes are in ISO/IEC, i.e., standardized. Why do they have to be translated? Isn't the way of presentation of magnitudes with a unit also standardized, and should not need to be translated? -- Hannes
On 2019-06-22 at 21:42:33, Johannes Sixt wrote: > Am 22.06.19 um 11:36 schrieb Dimitriy Ryazantcev: > > diff --git a/strbuf.c b/strbuf.c > > index 0e18b259ce..0a3ebc3749 100644 > > --- a/strbuf.c > > +++ b/strbuf.c > > @@ -814,20 +814,28 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, > > void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) > > { > > if (bytes > 1 << 30) { > > - strbuf_addf(buf, "%u.%2.2u GiB", > > + strbuf_addf(buf, "%u.%2.2u ", > > (unsigned)(bytes >> 30), > > (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: gibi */ > > + strbuf_addstr(buf, _("Gi")); > > } else if (bytes > 1 << 20) { > > unsigned x = bytes + 5243; /* for rounding */ > > - strbuf_addf(buf, "%u.%2.2u MiB", > > + strbuf_addf(buf, "%u.%2.2u ", > > x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: mebi */ > > + strbuf_addstr(buf, _("Mi")); > > } else if (bytes > 1 << 10) { > > unsigned x = bytes + 5; /* for rounding */ > > - strbuf_addf(buf, "%u.%2.2u KiB", > > + strbuf_addf(buf, "%u.%2.2u ", > > x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: kibi */ > > + strbuf_addstr(buf, _("Ki")); > > } else { > > - strbuf_addf(buf, "%u bytes", (unsigned)bytes); > > + strbuf_addf(buf, "%u ", (unsigned)bytes); > > } > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, subclause 13-9.c: byte */ > > + strbuf_addstr(buf, _("B")); > > } > > > > void strbuf_add_absolute_path(struct strbuf *sb, const char *path) > > > > All of the prefixes are in ISO/IEC, i.e., standardized. Why do they have > to be translated? > > Isn't the way of presentation of magnitudes with a unit also > standardized, and should not need to be translated? In my view, the translation is less important for the prefixes and more important for the unit: at least French prefers the term "octet" over "byte"[0], so instead of writing "MB", you'd write "Mo". In general, I think it's better to keep the prefixes and units together, since trying to translate a single letter runs the risk of collisions with other places in the code. It's likely to be easier for translators as well. [0] Technically, as in English, they have different meanings, but I've always seen French units written with "o" for "octet", not "B" for "byte".
brian m. carlson <sandals@crustytoothpaste.net> wrote: > > On 2019-06-22 at 21:42:33, Johannes Sixt wrote: > > Am 22.06.19 um 11:36 schrieb Dimitriy Ryazantcev: > > > diff --git a/strbuf.c b/strbuf.c > > > index 0e18b259ce..0a3ebc3749 100644 > > > --- a/strbuf.c > > > +++ b/strbuf.c > > > @@ -814,20 +814,28 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, > > > void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) > > > { > > > if (bytes > 1 << 30) { > > > - strbuf_addf(buf, "%u.%2.2u GiB", > > > + strbuf_addf(buf, "%u.%2.2u ", > > > (unsigned)(bytes >> 30), > > > (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); > > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: gibi */ > > > + strbuf_addstr(buf, _("Gi")); > > > } else if (bytes > 1 << 20) { > > > unsigned x = bytes + 5243; /* for rounding */ > > > - strbuf_addf(buf, "%u.%2.2u MiB", > > > + strbuf_addf(buf, "%u.%2.2u ", > > > x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); > > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: mebi */ > > > + strbuf_addstr(buf, _("Mi")); > > > } else if (bytes > 1 << 10) { > > > unsigned x = bytes + 5; /* for rounding */ > > > - strbuf_addf(buf, "%u.%2.2u KiB", > > > + strbuf_addf(buf, "%u.%2.2u ", > > > x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); > > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: kibi */ > > > + strbuf_addstr(buf, _("Ki")); > > > } else { > > > - strbuf_addf(buf, "%u bytes", (unsigned)bytes); > > > + strbuf_addf(buf, "%u ", (unsigned)bytes); > > > } > > > + /* TRANSLATORS: ISO/IEC 80000-13:2008, subclause 13-9.c: byte */ > > > + strbuf_addstr(buf, _("B")); > > > } > > > > > > void strbuf_add_absolute_path(struct strbuf *sb, const char *path) > > > > > > > All of the prefixes are in ISO/IEC, i.e., standardized. Why do they have > > to be translated? > > > > Isn't the way of presentation of magnitudes with a unit also > > standardized, and should not need to be translated? > > In my view, the translation is less important for the prefixes and more > important for the unit: at least French prefers the term "octet" over > "byte"[0], so instead of writing "MB", you'd write "Mo". Localization according to local rules is important for every unit part. There is a Russian adoption of IEC 80000-13:2008 called GOST R IEC 80000-13-2016[0]. And in this document there is national translations for these units\prefixes. So 'KiB' should become 'КиБ' according to this national standard. Same story with Ukrainian adoption called DSTU IEC 80000-13:2016[1]: 'KiB' -> 'КіБ'. Also according to ISO website seems that there is French version of IEC 80000-13:2008 exist. Not sure about French translation through. > In general, I think it's better to keep the prefixes and units together, > since trying to translate a single letter runs the risk of collisions > with other places in the code. It's likely to be easier for translators > as well. I agree with you in this part. I searched for similar code in other codebases and found such in KDE codebase[2]. I'll update patch if there is no objections. > [0] Technically, as in English, they have different meanings, but I've > always seen French units written with "o" for "octet", not "B" for > "byte". To solve this ambiguity IEC standard in subclause 13-9.с says that 'byte' implies 'octet' (8-bit byte). > -- > brian m. carlson: Houston, Texas, US > OpenPGP: https://keybase.io/bk2204 [0] http://docs.cntd.ru/document/1200143231 [1] http://online.budstandart.com/ua/catalog/doc-page.html?id_doc=69033 [1] https://cgit.kde.org/kcoreaddons.git/tree/src/lib/util/kformatprivate.cpp#n254
Am 23.06.19 um 10:28 schrieb Dimitriy: > brian m. carlson <sandals@crustytoothpaste.net> wrote: >> >> On 2019-06-22 at 21:42:33, Johannes Sixt wrote: >>> Am 22.06.19 um 11:36 schrieb Dimitriy Ryazantcev: >>>> diff --git a/strbuf.c b/strbuf.c >>>> index 0e18b259ce..0a3ebc3749 100644 >>>> --- a/strbuf.c >>>> +++ b/strbuf.c >>>> @@ -814,20 +814,28 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, >>>> void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) >>>> { >>>> if (bytes > 1 << 30) { >>>> - strbuf_addf(buf, "%u.%2.2u GiB", >>>> + strbuf_addf(buf, "%u.%2.2u ", >>>> (unsigned)(bytes >> 30), >>>> (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); >>>> + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: gibi */ >>>> + strbuf_addstr(buf, _("Gi")); >>>> } else if (bytes > 1 << 20) { >>>> unsigned x = bytes + 5243; /* for rounding */ >>>> - strbuf_addf(buf, "%u.%2.2u MiB", >>>> + strbuf_addf(buf, "%u.%2.2u ", >>>> x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); >>>> + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: mebi */ >>>> + strbuf_addstr(buf, _("Mi")); >>>> } else if (bytes > 1 << 10) { >>>> unsigned x = bytes + 5; /* for rounding */ >>>> - strbuf_addf(buf, "%u.%2.2u KiB", >>>> + strbuf_addf(buf, "%u.%2.2u ", >>>> x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); >>>> + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: kibi */ >>>> + strbuf_addstr(buf, _("Ki")); >>>> } else { >>>> - strbuf_addf(buf, "%u bytes", (unsigned)bytes); >>>> + strbuf_addf(buf, "%u ", (unsigned)bytes); >>>> } >>>> + /* TRANSLATORS: ISO/IEC 80000-13:2008, subclause 13-9.c: byte */ >>>> + strbuf_addstr(buf, _("B")); >>>> } >>>> >>>> void strbuf_add_absolute_path(struct strbuf *sb, const char *path) >>>> >>> >>> All of the prefixes are in ISO/IEC, i.e., standardized. Why do they have >>> to be translated? >>> >>> Isn't the way of presentation of magnitudes with a unit also >>> standardized, and should not need to be translated? >> >> In my view, the translation is less important for the prefixes and more >> important for the unit: at least French prefers the term "octet" over >> "byte"[0], so instead of writing "MB", you'd write "Mo". > > Localization according to local rules is important for every unit part. > There is a Russian adoption of IEC 80000-13:2008 called > GOST R IEC 80000-13-2016[0]. > And in this document there is national translations for these units\prefixes. > So 'KiB' should become 'КиБ' according to this national standard. > Same story with Ukrainian adoption called DSTU IEC 80000-13:2016[1]: > 'KiB' -> 'КіБ'. > Also according to ISO website seems that there is French version of > IEC 80000-13:2008 exist. Not sure about French translation through. Thanks for educating me. Given your explanations, wouldn't it be much more natural to keep the prefix with the unit instead of separating them, as the patch does? -- Hannes
On 2019-06-23 at 08:28:41, Dimitriy wrote: > brian m. carlson <sandals@crustytoothpaste.net> wrote: > > In my view, the translation is less important for the prefixes and more > > important for the unit: at least French prefers the term "octet" over > > "byte"[0], so instead of writing "MB", you'd write "Mo". > > Localization according to local rules is important for every unit part. > There is a Russian adoption of IEC 80000-13:2008 called > GOST R IEC 80000-13-2016[0]. > And in this document there is national translations for these units\prefixes. > So 'KiB' should become 'КиБ' according to this national standard. > Same story with Ukrainian adoption called DSTU IEC 80000-13:2016[1]: > 'KiB' -> 'КіБ'. > Also according to ISO website seems that there is French version of > IEC 80000-13:2008 exist. Not sure about French translation through. I figured the impetus for this change was something like this. > > In general, I think it's better to keep the prefixes and units together, > > since trying to translate a single letter runs the risk of collisions > > with other places in the code. It's likely to be easier for translators > > as well. > > I agree with you in this part. > I searched for similar code in other codebases and found such in KDE > codebase[2]. I'll update patch if there is no objections. Yeah, that sounds good. It's probably sufficient in this case to simply mark the existing format strings for translation, since we know translators can already handle other format strings we have. Also, perhaps translators will want to switch the period for a comma, as is common in some languages. > > [0] Technically, as in English, they have different meanings, but I've > > always seen French units written with "o" for "octet", not "B" for > > "byte". > > To solve this ambiguity IEC standard in subclause 13-9.с says > that 'byte' implies 'octet' (8-bit byte). French Wikipedia also tells me that "B" is the symbol for the bel, so "o" is preferable because it doesn't conflict. I have no strong opinions either way.
Dimitriy <dimitriy.ryazantcev@gmail.com> writes: > I just moved 'B' out of else block so it will be prepended with > 'Gi'/'Mi'/'Ki' when needed. > Note: I changed 'bytes' to just 'B' unit. That makes this change more than "localizable messages", doesn't it? As an old timer, I do not mind MiB (I'd prefer the old-fashioned MB myself), but I think "20 bytes" (or "1 byte") spelled out is much easier to grok and less prone to get misunderstood than "20B". Like it or not, with scale prefixes, e.g. "20 kiB", "20 MiB", etc., it is hard to misunderstand what 'B' means in there, but without any scale prefix, i.e. "20 B", it is rather easy to get confused what 'B' means.
Johannes Sixt <j6t@kdbg.org> writes: > Thanks for educating me. Given your explanations, wouldn't it be much > more natural to keep the prefix with the unit instead of separating > them, as the patch does? Yup, that "octet" comment was illuminating. Great discussion. Also I am with Duy's message in the other subthread. We should keep the number placeholder "%[udf]" and the unit in a single string), instead of hardcoding the "a number must come and then unit" in the code. Thanks.
diff --git a/progress.c b/progress.c index a2e8cf64a8..61d8cf5d04 100644 --- a/progress.c +++ b/progress.c @@ -151,7 +151,9 @@ static void throughput_string(struct strbuf *buf, uint64_t total, strbuf_humanise_bytes(buf, total); strbuf_addstr(buf, " | "); strbuf_humanise_bytes(buf, rate * 1024); - strbuf_addstr(buf, "/s"); + strbuf_addstr(buf, "/"); + /* TRANSLATORS: IEC 80000-13:2008, subclause 13-12.b: second */ + strbuf_addstr(buf, _("s")); } void display_throughput(struct progress *progress, uint64_t total) diff --git a/strbuf.c b/strbuf.c index 0e18b259ce..0a3ebc3749 100644 --- a/strbuf.c +++ b/strbuf.c @@ -814,20 +814,28 @@ void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) { if (bytes > 1 << 30) { - strbuf_addf(buf, "%u.%2.2u GiB", + strbuf_addf(buf, "%u.%2.2u ", (unsigned)(bytes >> 30), (unsigned)(bytes & ((1 << 30) - 1)) / 10737419); + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: gibi */ + strbuf_addstr(buf, _("Gi")); } else if (bytes > 1 << 20) { unsigned x = bytes + 5243; /* for rounding */ - strbuf_addf(buf, "%u.%2.2u MiB", + strbuf_addf(buf, "%u.%2.2u ", x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20); + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: mebi */ + strbuf_addstr(buf, _("Mi")); } else if (bytes > 1 << 10) { unsigned x = bytes + 5; /* for rounding */ - strbuf_addf(buf, "%u.%2.2u KiB", + strbuf_addf(buf, "%u.%2.2u ", x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10); + /* TRANSLATORS: ISO/IEC 80000-13:2008, clause 4: kibi */ + strbuf_addstr(buf, _("Ki")); } else { - strbuf_addf(buf, "%u bytes", (unsigned)bytes); + strbuf_addf(buf, "%u ", (unsigned)bytes); } + /* TRANSLATORS: ISO/IEC 80000-13:2008, subclause 13-9.c: byte */ + strbuf_addstr(buf, _("B")); } void strbuf_add_absolute_path(struct strbuf *sb, const char *path)
Signed-off-by: Dimitriy Ryazantcev <dimitriy.ryazantcev@gmail.com> --- progress.c | 4 +++- strbuf.c | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-)