diff mbox

[2/2] x86emul: correct 64-bit mode repeated string insn handling with zero count

Message ID 5846CD8F0200007800125B5F@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Dec. 6, 2016, 1:39 p.m. UTC
When a 32-bit address override is in effect these zero-extend all
registers which would also get updated in case of non-zero repeat
count.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
x86emul: correct 64-bit mode repeated string insn handling with zero count

When a 32-bit address override is in effect these zero-extend all
registers which would also get updated in case of non-zero repeat
count.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -933,15 +933,24 @@ static inline void put_loop_count(
         int_regs->ecx = ad_bytes == 4 ? (uint32_t)count : count;
 }
 
-#define get_rep_prefix() ({                                             \
+#define get_rep_prefix(using_si, using_di) ({                           \
     unsigned long max_reps = 1;                                         \
     if ( rep_prefix() )                                                 \
         max_reps = get_loop_count(&_regs, ad_bytes);                    \
     if ( max_reps == 0 )                                                \
     {                                                                   \
-        /* Skip the instruction if no repetitions are required. */      \
-        dst.type = OP_NONE;                                             \
-        goto writeback;                                                 \
+        /*                                                              \
+         * Skip the instruction if no repetitions are required, but     \
+         * zero extend involved registers first when using 32-bit       \
+         * addressing in 64-bit mode.                                   \
+         */                                                             \
+        if ( mode_64bit() && ad_bytes == 4 )                            \
+        {                                                               \
+            _regs.ecx = 0;                                              \
+            if ( using_si ) _regs.esi = (uint32_t)_regs.esi;            \
+            if ( using_di ) _regs.edi = (uint32_t)_regs.edi;            \
+        }                                                               \
+        goto no_writeback;                                              \
     }                                                                   \
     max_reps;                                                           \
 })
@@ -2908,7 +2917,7 @@ x86_emulate(
         goto imul;
 
     case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(false, true);
         unsigned int port = (uint16_t)_regs.edx;
         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
         dst.mem.seg = x86_seg_es;
@@ -2935,7 +2944,7 @@ x86_emulate(
     }
 
     case 0x6e ... 0x6f: /* outs %esi,%dx */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(true, false);
         unsigned int port = (uint16_t)_regs.edx;
         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
         ea.mem.off = truncate_ea_and_reps(_regs.esi, nr_reps, dst.bytes);
@@ -3167,7 +3176,8 @@ x86_emulate(
         break;
 
     case 0xa4 ... 0xa5: /* movs */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(true, true);
+
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.mem.seg = x86_seg_es;
         dst.mem.off = truncate_ea_and_reps(_regs.edi, nr_reps, dst.bytes);
@@ -3197,7 +3207,8 @@ x86_emulate(
 
     case 0xa6 ... 0xa7: /* cmps */ {
         unsigned long next_eip = _regs.eip;
-        get_rep_prefix();
+
+        get_rep_prefix(true, true);
         src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.esi),
                               &dst.val, dst.bytes, ctxt, ops)) ||
@@ -3218,7 +3229,8 @@ x86_emulate(
     }
 
     case 0xaa ... 0xab: /* stos */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(false, true);
+
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.mem.seg = x86_seg_es;
         dst.mem.off = truncate_ea(_regs.edi);
@@ -3241,8 +3253,8 @@ x86_emulate(
         break;
     }
 
-    case 0xac ... 0xad: /* lods */ {
-        /* unsigned long max_reps = */get_rep_prefix();
+    case 0xac ... 0xad: /* lods */
+        get_rep_prefix(true, false);
         dst.type  = OP_REG;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.reg   = (unsigned long *)&_regs.eax;
@@ -3253,11 +3265,11 @@ x86_emulate(
             _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         put_rep_prefix(1);
         break;
-    }
 
     case 0xae ... 0xaf: /* scas */ {
         unsigned long next_eip = _regs.eip;
-        get_rep_prefix();
+
+        get_rep_prefix(false, true);
         src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.val = _regs.eax;
         if ( (rc = read_ulong(x86_seg_es, truncate_ea(_regs.edi),
@@ -5424,7 +5436,6 @@ x86_emulate(
         goto cannot_emulate;
     }
 
- writeback:
     switch ( dst.type )
     {
     case OP_REG:

Comments

Andrew Cooper Dec. 6, 2016, 4:35 p.m. UTC | #1
On 06/12/16 13:39, Jan Beulich wrote:
> When a 32-bit address override is in effect these zero-extend all
> registers which would also get updated in case of non-zero repeat
> count.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>.  However,

> @@ -5424,7 +5436,6 @@ x86_emulate(
>          goto cannot_emulate;
>      }
>  
> - writeback:

This removal highlights that the writeback and no_writeback lables are
incorrectly named.

There intended meaning is {no,}dest_writeback, but no_writeback still
performs a full commit of the shadow GPR state, which is what people
logically associate with the term "writeback" in this context.

I think we should have a followup patch renaming the no_writeback label
to to gpr_writeback.

~Andrew
Jan Beulich Dec. 7, 2016, 8:14 a.m. UTC | #2
>>> On 06.12.16 at 17:35, <andrew.cooper3@citrix.com> wrote:
> On 06/12/16 13:39, Jan Beulich wrote:
>> @@ -5424,7 +5436,6 @@ x86_emulate(
>>          goto cannot_emulate;
>>      }
>>  
>> - writeback:
> 
> This removal highlights that the writeback and no_writeback lables are
> incorrectly named.
> 
> There intended meaning is {no,}dest_writeback, but no_writeback still
> performs a full commit of the shadow GPR state, which is what people
> logically associate with the term "writeback" in this context.
> 
> I think we should have a followup patch renaming the no_writeback label
> to to gpr_writeback.

I can see about doing this.

Jan
Jan Beulich Dec. 7, 2016, 10:22 a.m. UTC | #3
>>> On 06.12.16 at 17:35, <andrew.cooper3@citrix.com> wrote:
> On 06/12/16 13:39, Jan Beulich wrote:
>> @@ -5424,7 +5436,6 @@ x86_emulate(
>>          goto cannot_emulate;
>>      }
>>  
>> - writeback:
> 
> This removal highlights that the writeback and no_writeback lables are
> incorrectly named.
> 
> There intended meaning is {no,}dest_writeback, but no_writeback still
> performs a full commit of the shadow GPR state, which is what people
> logically associate with the term "writeback" in this context.
> 
> I think we should have a followup patch renaming the no_writeback label
> to to gpr_writeback.

I've done this, but for some of the uses of the label the new name
doesn't appear to be much better than the old one ...

Jan
Andrew Cooper Dec. 22, 2016, 5:42 p.m. UTC | #4
On 07/12/16 10:22, Jan Beulich wrote:
>>>> On 06.12.16 at 17:35, <andrew.cooper3@citrix.com> wrote:
>> On 06/12/16 13:39, Jan Beulich wrote:
>>> @@ -5424,7 +5436,6 @@ x86_emulate(
>>>           goto cannot_emulate;
>>>       }
>>>   
>>> - writeback:
>> This removal highlights that the writeback and no_writeback lables are
>> incorrectly named.
>>
>> There intended meaning is {no,}dest_writeback, but no_writeback still
>> performs a full commit of the shadow GPR state, which is what people
>> logically associate with the term "writeback" in this context.
>>
>> I think we should have a followup patch renaming the no_writeback label
>> to to gpr_writeback.
> I've done this, but for some of the uses of the label the new name
> doesn't appear to be much better than the old one ...

Just had an idea.  How about "complete_insn" ?

~Andrew
diff mbox

Patch

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -933,15 +933,24 @@  static inline void put_loop_count(
         int_regs->ecx = ad_bytes == 4 ? (uint32_t)count : count;
 }
 
-#define get_rep_prefix() ({                                             \
+#define get_rep_prefix(using_si, using_di) ({                           \
     unsigned long max_reps = 1;                                         \
     if ( rep_prefix() )                                                 \
         max_reps = get_loop_count(&_regs, ad_bytes);                    \
     if ( max_reps == 0 )                                                \
     {                                                                   \
-        /* Skip the instruction if no repetitions are required. */      \
-        dst.type = OP_NONE;                                             \
-        goto writeback;                                                 \
+        /*                                                              \
+         * Skip the instruction if no repetitions are required, but     \
+         * zero extend involved registers first when using 32-bit       \
+         * addressing in 64-bit mode.                                   \
+         */                                                             \
+        if ( mode_64bit() && ad_bytes == 4 )                            \
+        {                                                               \
+            _regs.ecx = 0;                                              \
+            if ( using_si ) _regs.esi = (uint32_t)_regs.esi;            \
+            if ( using_di ) _regs.edi = (uint32_t)_regs.edi;            \
+        }                                                               \
+        goto no_writeback;                                              \
     }                                                                   \
     max_reps;                                                           \
 })
@@ -2908,7 +2917,7 @@  x86_emulate(
         goto imul;
 
     case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(false, true);
         unsigned int port = (uint16_t)_regs.edx;
         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
         dst.mem.seg = x86_seg_es;
@@ -2935,7 +2944,7 @@  x86_emulate(
     }
 
     case 0x6e ... 0x6f: /* outs %esi,%dx */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(true, false);
         unsigned int port = (uint16_t)_regs.edx;
         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
         ea.mem.off = truncate_ea_and_reps(_regs.esi, nr_reps, dst.bytes);
@@ -3167,7 +3176,8 @@  x86_emulate(
         break;
 
     case 0xa4 ... 0xa5: /* movs */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(true, true);
+
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.mem.seg = x86_seg_es;
         dst.mem.off = truncate_ea_and_reps(_regs.edi, nr_reps, dst.bytes);
@@ -3197,7 +3207,8 @@  x86_emulate(
 
     case 0xa6 ... 0xa7: /* cmps */ {
         unsigned long next_eip = _regs.eip;
-        get_rep_prefix();
+
+        get_rep_prefix(true, true);
         src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.esi),
                               &dst.val, dst.bytes, ctxt, ops)) ||
@@ -3218,7 +3229,8 @@  x86_emulate(
     }
 
     case 0xaa ... 0xab: /* stos */ {
-        unsigned long nr_reps = get_rep_prefix();
+        unsigned long nr_reps = get_rep_prefix(false, true);
+
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.mem.seg = x86_seg_es;
         dst.mem.off = truncate_ea(_regs.edi);
@@ -3241,8 +3253,8 @@  x86_emulate(
         break;
     }
 
-    case 0xac ... 0xad: /* lods */ {
-        /* unsigned long max_reps = */get_rep_prefix();
+    case 0xac ... 0xad: /* lods */
+        get_rep_prefix(true, false);
         dst.type  = OP_REG;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.reg   = (unsigned long *)&_regs.eax;
@@ -3253,11 +3265,11 @@  x86_emulate(
             _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         put_rep_prefix(1);
         break;
-    }
 
     case 0xae ... 0xaf: /* scas */ {
         unsigned long next_eip = _regs.eip;
-        get_rep_prefix();
+
+        get_rep_prefix(false, true);
         src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.val = _regs.eax;
         if ( (rc = read_ulong(x86_seg_es, truncate_ea(_regs.edi),
@@ -5424,7 +5436,6 @@  x86_emulate(
         goto cannot_emulate;
     }
 
- writeback:
     switch ( dst.type )
     {
     case OP_REG: