diff mbox

[V2,2/4] pci: reserve 64 bit MMIO range for PCI hotplug

Message ID 1463340214-8721-3-git-send-email-marcel@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marcel Apfelbaum May 15, 2016, 7:23 p.m. UTC
Using the firmware assigned MMIO ranges for 64-bit PCI window
leads to zero space for hot-plugging PCI devices over 4G.

PC machines can use the whole CPU addressable range after
the space reserved for memory-hotplug.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
---
 hw/pci/pci.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

Comments

Igor Mammedov May 16, 2016, 8:24 a.m. UTC | #1
On Sun, 15 May 2016 22:23:32 +0300
Marcel Apfelbaum <marcel@redhat.com> wrote:

> Using the firmware assigned MMIO ranges for 64-bit PCI window
> leads to zero space for hot-plugging PCI devices over 4G.
> 
> PC machines can use the whole CPU addressable range after
> the space reserved for memory-hotplug.
> 
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> ---
>  hw/pci/pci.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index bb605ef..44dd949 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -41,6 +41,7 @@
>  #include "hw/hotplug.h"
>  #include "hw/boards.h"
>  #include "qemu/cutils.h"
> +#include "hw/i386/pc.h"
>  
>  //#define DEBUG_PCI
>  #ifdef DEBUG_PCI
> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>  
>  void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>  {
> -    range->begin = range->end = 0;
> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> +    Object *machine = qdev_get_machine();
> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> +        PCMachineState *pcms = PC_MACHINE(machine);
> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
that line should break linking on other targets which don't have
pc_machine_get_reserved_memory_end()
probably for got to add stub.

> +        if (!range->begin) {
> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> +                                    1ULL << 30);
> +        }
> +        range->end = 1ULL << 40; /* 40 bits physical */
x86 specific in generic code

ARM also has 64-bit PCI MMIO /git grep VIRT_PCIE_MMIO_HIGH/
perhaps range should be a property of PCI bus,
where a board sets its own values for start/size

> +    } else {
> +        range->begin = range->end = 0;
> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> +    }
>  }
>  
>  static bool pcie_has_upstream_port(PCIDevice *dev)
Marcel Apfelbaum May 16, 2016, 10:14 a.m. UTC | #2
On 05/16/2016 11:24 AM, Igor Mammedov wrote:
> On Sun, 15 May 2016 22:23:32 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
>
>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>> leads to zero space for hot-plugging PCI devices over 4G.
>>
>> PC machines can use the whole CPU addressable range after
>> the space reserved for memory-hotplug.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>> ---
>>   hw/pci/pci.c | 16 ++++++++++++++--
>>   1 file changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index bb605ef..44dd949 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -41,6 +41,7 @@
>>   #include "hw/hotplug.h"
>>   #include "hw/boards.h"
>>   #include "qemu/cutils.h"
>> +#include "hw/i386/pc.h"
>>
>>   //#define DEBUG_PCI
>>   #ifdef DEBUG_PCI
>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>


Hi Igor,
Thanks for reviewing this series.

>>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>   {
>> -    range->begin = range->end = 0;
>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>> +    Object *machine = qdev_get_machine();
>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>> +        PCMachineState *pcms = PC_MACHINE(machine);
>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
> that line should break linking on other targets which don't have
> pc_machine_get_reserved_memory_end()
> probably for got to add stub.
>

I cross-compiled all the targets with no problem.  It seems no stub is required.


>> +        if (!range->begin) {
>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>> +                                    1ULL << 30);
>> +        }
>> +        range->end = 1ULL << 40; /* 40 bits physical */
> x86 specific in generic code
>

I am aware I put x86 code in the generic pci code, but I limited it with
     if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
So we have a generic rule for getting the w64 range and a specific one for PC machines.

The alternative would be a w64 range per host-bridge, not bus.
Adding a pci_bus_get_w64_range function to PCIHostBridgeClass,
defaulting in current implementation for the base class and
overriding it for piix/q35 looks OK to you?

I thought about it, but it seemed over-engineered as opposed
to the 'simple' if statement, however I can try it if you think is better.

> ARM also has 64-bit PCI MMIO /git grep VIRT_PCIE_MMIO_HIGH/

I had a look and ARM does not use this infrastructure, it has its own abstractions,
a memmap list. This is the other reason I selected this implementation,
because is not really used by other targets (but it can be used in the future)


Thanks,
Marcel

> perhaps range should be a property of PCI bus,
> where a board sets its own values for start/size
>
>> +    } else {
>> +        range->begin = range->end = 0;
>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>> +    }
>>   }
>>
>>   static bool pcie_has_upstream_port(PCIDevice *dev)
>
Igor Mammedov May 16, 2016, 2:19 p.m. UTC | #3
On Mon, 16 May 2016 13:14:51 +0300
Marcel Apfelbaum <marcel@redhat.com> wrote:

> On 05/16/2016 11:24 AM, Igor Mammedov wrote:
> > On Sun, 15 May 2016 22:23:32 +0300
> > Marcel Apfelbaum <marcel@redhat.com> wrote:
> >  
> >> Using the firmware assigned MMIO ranges for 64-bit PCI window
> >> leads to zero space for hot-plugging PCI devices over 4G.
> >>
> >> PC machines can use the whole CPU addressable range after
> >> the space reserved for memory-hotplug.
> >>
> >> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> >> ---
> >>   hw/pci/pci.c | 16 ++++++++++++++--
> >>   1 file changed, 14 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >> index bb605ef..44dd949 100644
> >> --- a/hw/pci/pci.c
> >> +++ b/hw/pci/pci.c
> >> @@ -41,6 +41,7 @@
> >>   #include "hw/hotplug.h"
> >>   #include "hw/boards.h"
> >>   #include "qemu/cutils.h"
> >> +#include "hw/i386/pc.h"
> >>
> >>   //#define DEBUG_PCI
> >>   #ifdef DEBUG_PCI
> >> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
> >>  
> 
> 
> Hi Igor,
> Thanks for reviewing this series.
> 
> >>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
> >>   {
> >> -    range->begin = range->end = 0;
> >> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >> +    Object *machine = qdev_get_machine();
> >> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> >> +        PCMachineState *pcms = PC_MACHINE(machine);
> >> +        range->begin = pc_machine_get_reserved_memory_end(pcms);  
> > that line should break linking on other targets which don't have
> > pc_machine_get_reserved_memory_end()
> > probably for got to add stub.
> >  
> 
> I cross-compiled all the targets with no problem.  It seems no stub is required.
./configure && make

  LINK  aarch64-softmmu/qemu-system-aarch64
../hw/pci/pci.o: In function `pci_bus_get_w64_range':
/home/imammedo/builds/qemu/hw/pci/pci.c:2474: undefined reference to `pc_machine_get_reserved_memory_end'
collect2: error: ld returned 1 exit status

> 
> 
> >> +        if (!range->begin) {
> >> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> >> +                                    1ULL << 30);
> >> +        }
mayby move above hunk to pc_machine_get_reserved_memory_end()
so it would always return valid value.

> >> +        range->end = 1ULL << 40; /* 40 bits physical */  
> > x86 specific in generic code
> >  
> 
> I am aware I put x86 code in the generic pci code, but I limited it with
>      if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> So we have a generic rule for getting the w64 range and a specific one for PC machines.
> 
> The alternative would be a w64 range per host-bridge, not bus.
> Adding a pci_bus_get_w64_range function to PCIHostBridgeClass,
> defaulting in current implementation for the base class and
> overriding it for piix/q35 looks OK to you?
> 
> I thought about it, but it seemed over-engineered as opposed
> to the 'simple' if statement, however I can try it if you think is better.
> 
> > ARM also has 64-bit PCI MMIO /git grep VIRT_PCIE_MMIO_HIGH/  
> 
> I had a look and ARM does not use this infrastructure, it has its own abstractions,
> a memmap list. This is the other reason I selected this implementation,
> because is not really used by other targets (but it can be used in the future)

if it's only for x86 and whatever was programmed by BIOS is ignored,
I'd drop PCI_HOST_PROP_PCI_HOLE64_*/pci_bus_get_w64_range() indirection
and just directly use pc_machine_get_reserved_memory_end()
from acpi-build.c

in that case you won't need a stub for pc_machine_get_reserved_memory_end()
as its usage will be limited only to hw/i386 scope.

Given opportunity, I'd drop PCI_HOST_PROP_PCI_HOLE* properties,
but for it we need ACK from libvirt side in case they are using it
for some reason.

> 
> 
> Thanks,
> Marcel
> 
> > perhaps range should be a property of PCI bus,
> > where a board sets its own values for start/size
> >  
> >> +    } else {
> >> +        range->begin = range->end = 0;
> >> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >> +    }
> >>   }
> >>
> >>   static bool pcie_has_upstream_port(PCIDevice *dev)  
> >  
>
Igor Mammedov May 18, 2016, 1:59 p.m. UTC | #4
On Sun, 15 May 2016 22:23:32 +0300
Marcel Apfelbaum <marcel@redhat.com> wrote:

> Using the firmware assigned MMIO ranges for 64-bit PCI window
> leads to zero space for hot-plugging PCI devices over 4G.
> 
> PC machines can use the whole CPU addressable range after
> the space reserved for memory-hotplug.
> 
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
that patch also has side effect of unconditionally adding
QWordMemory() resource in PCI0._CRS
on all machine types with QEMU generated ACPI tables.

Have you tested that it won't break boot of legacy OSes
(XP, WS2003, old linux with 32bit kernel)?

> ---
>  hw/pci/pci.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index bb605ef..44dd949 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -41,6 +41,7 @@
>  #include "hw/hotplug.h"
>  #include "hw/boards.h"
>  #include "qemu/cutils.h"
> +#include "hw/i386/pc.h"
>  
>  //#define DEBUG_PCI
>  #ifdef DEBUG_PCI
> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>  
>  void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>  {
> -    range->begin = range->end = 0;
> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> +    Object *machine = qdev_get_machine();
> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> +        PCMachineState *pcms = PC_MACHINE(machine);
> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
> +        if (!range->begin) {
> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> +                                    1ULL << 30);
> +        }
> +        range->end = 1ULL << 40; /* 40 bits physical */
> +    } else {
> +        range->begin = range->end = 0;
> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> +    }
>  }
>  
>  static bool pcie_has_upstream_port(PCIDevice *dev)
Marcel Apfelbaum May 18, 2016, 2:07 p.m. UTC | #5
On 05/16/2016 05:19 PM, Igor Mammedov wrote:
> On Mon, 16 May 2016 13:14:51 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
>
>> On 05/16/2016 11:24 AM, Igor Mammedov wrote:
>>> On Sun, 15 May 2016 22:23:32 +0300
>>> Marcel Apfelbaum <marcel@redhat.com> wrote:
>>>
>>>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>>>> leads to zero space for hot-plugging PCI devices over 4G.
>>>>
>>>> PC machines can use the whole CPU addressable range after
>>>> the space reserved for memory-hotplug.
>>>>
>>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>>>> ---
>>>>    hw/pci/pci.c | 16 ++++++++++++++--
>>>>    1 file changed, 14 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index bb605ef..44dd949 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -41,6 +41,7 @@
>>>>    #include "hw/hotplug.h"
>>>>    #include "hw/boards.h"
>>>>    #include "qemu/cutils.h"
>>>> +#include "hw/i386/pc.h"
>>>>
>>>>    //#define DEBUG_PCI
>>>>    #ifdef DEBUG_PCI
>>>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>>>
>>
>>
>> Hi Igor,
>> Thanks for reviewing this series.
>>
>>>>    void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>>>    {
>>>> -    range->begin = range->end = 0;
>>>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>> +    Object *machine = qdev_get_machine();
>>>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>>>> +        PCMachineState *pcms = PC_MACHINE(machine);
>>>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>>> that line should break linking on other targets which don't have
>>> pc_machine_get_reserved_memory_end()
>>> probably for got to add stub.
>>>
>>
>> I cross-compiled all the targets with no problem.  It seems no stub is required.
> ./configure && make
>
>    LINK  aarch64-softmmu/qemu-system-aarch64
> ../hw/pci/pci.o: In function `pci_bus_get_w64_range':
> /home/imammedo/builds/qemu/hw/pci/pci.c:2474: undefined reference to `pc_machine_get_reserved_memory_end'
> collect2: error: ld returned 1 exit status

Ooops, I did configured it to cross-compile everything, but I somehow missed it.
I'll take care of it.


>
>>
>>
>>>> +        if (!range->begin) {
>>>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>>>> +                                    1ULL << 30);
>>>> +        }
> mayby move above hunk to pc_machine_get_reserved_memory_end()
> so it would always return valid value.
>
>>>> +        range->end = 1ULL << 40; /* 40 bits physical */
>>> x86 specific in generic code
>>>
>>
>> I am aware I put x86 code in the generic pci code, but I limited it with
>>       if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>> So we have a generic rule for getting the w64 range and a specific one for PC machines.
>>
>> The alternative would be a w64 range per host-bridge, not bus.
>> Adding a pci_bus_get_w64_range function to PCIHostBridgeClass,
>> defaulting in current implementation for the base class and
>> overriding it for piix/q35 looks OK to you?
>>
>> I thought about it, but it seemed over-engineered as opposed
>> to the 'simple' if statement, however I can try it if you think is better.
>>
>>> ARM also has 64-bit PCI MMIO /git grep VIRT_PCIE_MMIO_HIGH/
>>
>> I had a look and ARM does not use this infrastructure, it has its own abstractions,
>> a memmap list. This is the other reason I selected this implementation,
>> because is not really used by other targets (but it can be used in the future)
>
> if it's only for x86 and whatever was programmed by BIOS is ignored,
> I'd drop PCI_HOST_PROP_PCI_HOLE64_*/pci_bus_get_w64_range() indirection
> and just directly use pc_machine_get_reserved_memory_end()
> from acpi-build.c
>
> in that case you won't need a stub for pc_machine_get_reserved_memory_end()
> as its usage will be limited only to hw/i386 scope.
>

Good point, I'll try this.

> Given opportunity, I'd drop PCI_HOST_PROP_PCI_HOLE* properties,
> but for it we need ACK from libvirt side in case they are using it
> for some reason.

It seems out of the scope of this series, however I can do it on top.

Thanks,
Marcel

>
>>
>>
>> Thanks,
>> Marcel
>>
>>> perhaps range should be a property of PCI bus,
>>> where a board sets its own values for start/size
>>>
>>>> +    } else {
>>>> +        range->begin = range->end = 0;
>>>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>> +    }
>>>>    }
>>>>
>>>>    static bool pcie_has_upstream_port(PCIDevice *dev)
>>>
>>
>
Laszlo Ersek May 18, 2016, 2:10 p.m. UTC | #6
On 05/18/16 15:59, Igor Mammedov wrote:
> On Sun, 15 May 2016 22:23:32 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
> 
>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>> leads to zero space for hot-plugging PCI devices over 4G.
>>
>> PC machines can use the whole CPU addressable range after
>> the space reserved for memory-hotplug.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>

> that patch also has side effect of unconditionally adding
> QWordMemory() resource in PCI0._CRS
> on all machine types with QEMU generated ACPI tables.
> 
> Have you tested that it won't break boot of legacy OSes
> (XP, WS2003, old linux with 32bit kernel)?

Ah, very good point. I recall that in my initial patch (which I meant
mostly as a discussion-starter :)) I paid attention to call
aml_qword_memory() only when unavoidable:

http://thread.gmane.org/gmane.comp.emulators.qemu/400375

Thanks
Laszlo

>> ---
>>  hw/pci/pci.c | 16 ++++++++++++++--
>>  1 file changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index bb605ef..44dd949 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -41,6 +41,7 @@
>>  #include "hw/hotplug.h"
>>  #include "hw/boards.h"
>>  #include "qemu/cutils.h"
>> +#include "hw/i386/pc.h"
>>  
>>  //#define DEBUG_PCI
>>  #ifdef DEBUG_PCI
>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>  
>>  void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>  {
>> -    range->begin = range->end = 0;
>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>> +    Object *machine = qdev_get_machine();
>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>> +        PCMachineState *pcms = PC_MACHINE(machine);
>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>> +        if (!range->begin) {
>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>> +                                    1ULL << 30);
>> +        }
>> +        range->end = 1ULL << 40; /* 40 bits physical */
>> +    } else {
>> +        range->begin = range->end = 0;
>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>> +    }
>>  }
>>  
>>  static bool pcie_has_upstream_port(PCIDevice *dev)
>
Marcel Apfelbaum May 18, 2016, 2:11 p.m. UTC | #7
On 05/18/2016 04:59 PM, Igor Mammedov wrote:
> On Sun, 15 May 2016 22:23:32 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
>
>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>> leads to zero space for hot-plugging PCI devices over 4G.
>>
>> PC machines can use the whole CPU addressable range after
>> the space reserved for memory-hotplug.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> that patch also has side effect of unconditionally adding
> QWordMemory() resource in PCI0._CRS
> on all machine types with QEMU generated ACPI tables.

Right.

>
> Have you tested that it won't break boot of legacy OSes
> (XP, WS2003, old linux with 32bit kernel)?

I tested it with an old 32-bit CPU with a Linux guest and all
I received was a warning message that is not CPU addressable and will be discarded.
I didn't try it with WinXp though, I'll give it a try and report the issues, if any.

Thanks,
Marcel

>
>> ---
>>   hw/pci/pci.c | 16 ++++++++++++++--
>>   1 file changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index bb605ef..44dd949 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -41,6 +41,7 @@
>>   #include "hw/hotplug.h"
>>   #include "hw/boards.h"
>>   #include "qemu/cutils.h"
>> +#include "hw/i386/pc.h"
>>
>>   //#define DEBUG_PCI
>>   #ifdef DEBUG_PCI
>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>
>>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>   {
>> -    range->begin = range->end = 0;
>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>> +    Object *machine = qdev_get_machine();
>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>> +        PCMachineState *pcms = PC_MACHINE(machine);
>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>> +        if (!range->begin) {
>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>> +                                    1ULL << 30);
>> +        }
>> +        range->end = 1ULL << 40; /* 40 bits physical */
>> +    } else {
>> +        range->begin = range->end = 0;
>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>> +    }
>>   }
>>
>>   static bool pcie_has_upstream_port(PCIDevice *dev)
>
Michael S. Tsirkin May 18, 2016, 2:11 p.m. UTC | #8
On Wed, May 18, 2016 at 03:59:29PM +0200, Igor Mammedov wrote:
> On Sun, 15 May 2016 22:23:32 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
> 
> > Using the firmware assigned MMIO ranges for 64-bit PCI window
> > leads to zero space for hot-plugging PCI devices over 4G.
> > 
> > PC machines can use the whole CPU addressable range after
> > the space reserved for memory-hotplug.
> > 
> > Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> that patch also has side effect of unconditionally adding
> QWordMemory() resource in PCI0._CRS
> on all machine types with QEMU generated ACPI tables.
> 
> Have you tested that it won't break boot of legacy OSes
> (XP, WS2003, old linux with 32bit kernel)?

It's almost sure it break it.
Maybe you can check _REV in _CRS to work around this for XP.

> > ---
> >  hw/pci/pci.c | 16 ++++++++++++++--
> >  1 file changed, 14 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index bb605ef..44dd949 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -41,6 +41,7 @@
> >  #include "hw/hotplug.h"
> >  #include "hw/boards.h"
> >  #include "qemu/cutils.h"
> > +#include "hw/i386/pc.h"
> >  
> >  //#define DEBUG_PCI
> >  #ifdef DEBUG_PCI
> > @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
> >  
> >  void pci_bus_get_w64_range(PCIBus *bus, Range *range)
> >  {
> > -    range->begin = range->end = 0;
> > -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> > +    Object *machine = qdev_get_machine();
> > +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> > +        PCMachineState *pcms = PC_MACHINE(machine);
> > +        range->begin = pc_machine_get_reserved_memory_end(pcms);
> > +        if (!range->begin) {
> > +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> > +                                    1ULL << 30);
> > +        }
> > +        range->end = 1ULL << 40; /* 40 bits physical */
> > +    } else {
> > +        range->begin = range->end = 0;
> > +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> > +    }
> >  }
> >  
> >  static bool pcie_has_upstream_port(PCIDevice *dev)
Marcel Apfelbaum May 18, 2016, 2:12 p.m. UTC | #9
On 05/18/2016 05:11 PM, Michael S. Tsirkin wrote:
> On Wed, May 18, 2016 at 03:59:29PM +0200, Igor Mammedov wrote:
>> On Sun, 15 May 2016 22:23:32 +0300
>> Marcel Apfelbaum <marcel@redhat.com> wrote:
>>
>>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>>> leads to zero space for hot-plugging PCI devices over 4G.
>>>
>>> PC machines can use the whole CPU addressable range after
>>> the space reserved for memory-hotplug.
>>>
>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>> that patch also has side effect of unconditionally adding
>> QWordMemory() resource in PCI0._CRS
>> on all machine types with QEMU generated ACPI tables.
>>
>> Have you tested that it won't break boot of legacy OSes
>> (XP, WS2003, old linux with 32bit kernel)?
>
> It's almost sure it break it.
> Maybe you can check _REV in _CRS to work around this for XP.

I'll try it.

Thanks,
Marcel

>
>>> ---
>>>   hw/pci/pci.c | 16 ++++++++++++++--
>>>   1 file changed, 14 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index bb605ef..44dd949 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -41,6 +41,7 @@
>>>   #include "hw/hotplug.h"
>>>   #include "hw/boards.h"
>>>   #include "qemu/cutils.h"
>>> +#include "hw/i386/pc.h"
>>>
>>>   //#define DEBUG_PCI
>>>   #ifdef DEBUG_PCI
>>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>>
>>>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>>   {
>>> -    range->begin = range->end = 0;
>>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>> +    Object *machine = qdev_get_machine();
>>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>>> +        PCMachineState *pcms = PC_MACHINE(machine);
>>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>>> +        if (!range->begin) {
>>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>>> +                                    1ULL << 30);
>>> +        }
>>> +        range->end = 1ULL << 40; /* 40 bits physical */
>>> +    } else {
>>> +        range->begin = range->end = 0;
>>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>> +    }
>>>   }
>>>
>>>   static bool pcie_has_upstream_port(PCIDevice *dev)
Michael S. Tsirkin May 18, 2016, 2:14 p.m. UTC | #10
On Sun, May 15, 2016 at 10:23:32PM +0300, Marcel Apfelbaum wrote:
> Using the firmware assigned MMIO ranges for 64-bit PCI window
> leads to zero space for hot-plugging PCI devices over 4G.
> 
> PC machines can use the whole CPU addressable range after
> the space reserved for memory-hotplug.
> 
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> ---
>  hw/pci/pci.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index bb605ef..44dd949 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -41,6 +41,7 @@
>  #include "hw/hotplug.h"
>  #include "hw/boards.h"
>  #include "qemu/cutils.h"
> +#include "hw/i386/pc.h"
>  
>  //#define DEBUG_PCI
>  #ifdef DEBUG_PCI

I don't want pci to depend on PC.
Pls find another way to do this.


> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>  
>  void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>  {
> -    range->begin = range->end = 0;
> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> +    Object *machine = qdev_get_machine();
An empty line won't hurt here after the declaration.

> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> +        PCMachineState *pcms = PC_MACHINE(machine);

An empty line won't hurt here after the declaration.

> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
> +        if (!range->begin) {
> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> +                                    1ULL << 30);

Why 30? what is the logic here?

> +        }
> +        range->end = 1ULL << 40; /* 40 bits physical */

This comment does not help. Physical what? And why is 40 bit right?

> +    } else {
> +        range->begin = range->end = 0;
> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);

When does this trigger?
Pls add a comment.

> +    }
>  }
>  
>  static bool pcie_has_upstream_port(PCIDevice *dev)
> -- 
> 2.4.3
Igor Mammedov May 18, 2016, 2:26 p.m. UTC | #11
On Wed, 18 May 2016 17:07:05 +0300
Marcel Apfelbaum <marcel@redhat.com> wrote:

> On 05/16/2016 05:19 PM, Igor Mammedov wrote:
> > On Mon, 16 May 2016 13:14:51 +0300
> > Marcel Apfelbaum <marcel@redhat.com> wrote:
> >  
> >> On 05/16/2016 11:24 AM, Igor Mammedov wrote:  
> >>> On Sun, 15 May 2016 22:23:32 +0300
> >>> Marcel Apfelbaum <marcel@redhat.com> wrote:
> >>>  
> >>>> Using the firmware assigned MMIO ranges for 64-bit PCI window
> >>>> leads to zero space for hot-plugging PCI devices over 4G.
> >>>>
> >>>> PC machines can use the whole CPU addressable range after
> >>>> the space reserved for memory-hotplug.
> >>>>
> >>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> >>>> ---
> >>>>    hw/pci/pci.c | 16 ++++++++++++++--
> >>>>    1 file changed, 14 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>>> index bb605ef..44dd949 100644
> >>>> --- a/hw/pci/pci.c
> >>>> +++ b/hw/pci/pci.c
> >>>> @@ -41,6 +41,7 @@
> >>>>    #include "hw/hotplug.h"
> >>>>    #include "hw/boards.h"
> >>>>    #include "qemu/cutils.h"
> >>>> +#include "hw/i386/pc.h"
> >>>>
> >>>>    //#define DEBUG_PCI
> >>>>    #ifdef DEBUG_PCI
> >>>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
> >>>>  
> >>
> >>
> >> Hi Igor,
> >> Thanks for reviewing this series.
> >>  
> >>>>    void pci_bus_get_w64_range(PCIBus *bus, Range *range)
> >>>>    {
> >>>> -    range->begin = range->end = 0;
> >>>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >>>> +    Object *machine = qdev_get_machine();
> >>>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> >>>> +        PCMachineState *pcms = PC_MACHINE(machine);
> >>>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);  
> >>> that line should break linking on other targets which don't have
> >>> pc_machine_get_reserved_memory_end()
> >>> probably for got to add stub.
> >>>  
> >>
> >> I cross-compiled all the targets with no problem.  It seems no stub is required.  
> > ./configure && make
> >
> >    LINK  aarch64-softmmu/qemu-system-aarch64
> > ../hw/pci/pci.o: In function `pci_bus_get_w64_range':
> > /home/imammedo/builds/qemu/hw/pci/pci.c:2474: undefined reference to `pc_machine_get_reserved_memory_end'
> > collect2: error: ld returned 1 exit status  
> 
> Ooops, I did configured it to cross-compile everything, but I somehow missed it.
> I'll take care of it.
> 
> 
> >  
> >>
> >>  
> >>>> +        if (!range->begin) {
> >>>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> >>>> +                                    1ULL << 30);
> >>>> +        }  
> > mayby move above hunk to pc_machine_get_reserved_memory_end()
> > so it would always return valid value.
> >  
> >>>> +        range->end = 1ULL << 40; /* 40 bits physical */  
> >>> x86 specific in generic code
> >>>  
> >>
> >> I am aware I put x86 code in the generic pci code, but I limited it with
> >>       if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> >> So we have a generic rule for getting the w64 range and a specific one for PC machines.
> >>
> >> The alternative would be a w64 range per host-bridge, not bus.
> >> Adding a pci_bus_get_w64_range function to PCIHostBridgeClass,
> >> defaulting in current implementation for the base class and
> >> overriding it for piix/q35 looks OK to you?
> >>
> >> I thought about it, but it seemed over-engineered as opposed
> >> to the 'simple' if statement, however I can try it if you think is better.
> >>  
> >>> ARM also has 64-bit PCI MMIO /git grep VIRT_PCIE_MMIO_HIGH/  
> >>
> >> I had a look and ARM does not use this infrastructure, it has its own abstractions,
> >> a memmap list. This is the other reason I selected this implementation,
> >> because is not really used by other targets (but it can be used in the future)  
> >
> > if it's only for x86 and whatever was programmed by BIOS is ignored,
> > I'd drop PCI_HOST_PROP_PCI_HOLE64_*/pci_bus_get_w64_range() indirection
> > and just directly use pc_machine_get_reserved_memory_end()
> > from acpi-build.c
> >
> > in that case you won't need a stub for pc_machine_get_reserved_memory_end()
> > as its usage will be limited only to hw/i386 scope.
> >  
> 
> Good point, I'll try this.
> 
> > Given opportunity, I'd drop PCI_HOST_PROP_PCI_HOLE* properties,
> > but for it we need ACK from libvirt side in case they are using it
> > for some reason.  
> 
> It seems out of the scope of this series, however I can do it on top.
it's just nice to have cleanup, but I don't insist on it.

the thing here is that if pc_machine_get_reserved_memory_end() were used
directly for acpi-build.c then properties as is would give old/different values.

> 
> Thanks,
> Marcel
> 
> >  
> >>
> >>
> >> Thanks,
> >> Marcel
> >>  
> >>> perhaps range should be a property of PCI bus,
> >>> where a board sets its own values for start/size
> >>>  
> >>>> +    } else {
> >>>> +        range->begin = range->end = 0;
> >>>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >>>> +    }
> >>>>    }
> >>>>
> >>>>    static bool pcie_has_upstream_port(PCIDevice *dev)  
> >>>  
> >>  
> >  
>
Igor Mammedov May 18, 2016, 2:31 p.m. UTC | #12
On Wed, 18 May 2016 17:12:09 +0300
Marcel Apfelbaum <marcel@redhat.com> wrote:

> On 05/18/2016 05:11 PM, Michael S. Tsirkin wrote:
> > On Wed, May 18, 2016 at 03:59:29PM +0200, Igor Mammedov wrote:  
> >> On Sun, 15 May 2016 22:23:32 +0300
> >> Marcel Apfelbaum <marcel@redhat.com> wrote:
> >>  
> >>> Using the firmware assigned MMIO ranges for 64-bit PCI window
> >>> leads to zero space for hot-plugging PCI devices over 4G.
> >>>
> >>> PC machines can use the whole CPU addressable range after
> >>> the space reserved for memory-hotplug.
> >>>
> >>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>  
> >> that patch also has side effect of unconditionally adding
> >> QWordMemory() resource in PCI0._CRS
> >> on all machine types with QEMU generated ACPI tables.
> >>
> >> Have you tested that it won't break boot of legacy OSes
> >> (XP, WS2003, old linux with 32bit kernel)?  
> >
> > It's almost sure it break it.
> > Maybe you can check _REV in _CRS to work around this for XP.  
> 
> I'll try it.
but only after you check if just presence of QWord would crash XP,
so in case it doesn't crash we would keep _CRS simple static
structure.

I very vaguely recall that XP ignored QWord in PCI0._CRS,
but it was long time ago so it won't hurt to recheck.

> 
> Thanks,
> Marcel
> 
> >  
> >>> ---
> >>>   hw/pci/pci.c | 16 ++++++++++++++--
> >>>   1 file changed, 14 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>> index bb605ef..44dd949 100644
> >>> --- a/hw/pci/pci.c
> >>> +++ b/hw/pci/pci.c
> >>> @@ -41,6 +41,7 @@
> >>>   #include "hw/hotplug.h"
> >>>   #include "hw/boards.h"
> >>>   #include "qemu/cutils.h"
> >>> +#include "hw/i386/pc.h"
> >>>
> >>>   //#define DEBUG_PCI
> >>>   #ifdef DEBUG_PCI
> >>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
> >>>
> >>>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
> >>>   {
> >>> -    range->begin = range->end = 0;
> >>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >>> +    Object *machine = qdev_get_machine();
> >>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> >>> +        PCMachineState *pcms = PC_MACHINE(machine);
> >>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
> >>> +        if (!range->begin) {
> >>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> >>> +                                    1ULL << 30);
> >>> +        }
> >>> +        range->end = 1ULL << 40; /* 40 bits physical */
> >>> +    } else {
> >>> +        range->begin = range->end = 0;
> >>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >>> +    }
> >>>   }
> >>>
> >>>   static bool pcie_has_upstream_port(PCIDevice *dev)  
>
Marcel Apfelbaum May 18, 2016, 2:33 p.m. UTC | #13
On 05/18/2016 05:26 PM, Igor Mammedov wrote:
> On Wed, 18 May 2016 17:07:05 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
>
>> On 05/16/2016 05:19 PM, Igor Mammedov wrote:
>>> On Mon, 16 May 2016 13:14:51 +0300
>>> Marcel Apfelbaum <marcel@redhat.com> wrote:
>>>
>>>> On 05/16/2016 11:24 AM, Igor Mammedov wrote:
>>>>> On Sun, 15 May 2016 22:23:32 +0300
>>>>> Marcel Apfelbaum <marcel@redhat.com> wrote:
>>>>>
>>>>>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>>>>>> leads to zero space for hot-plugging PCI devices over 4G.
>>>>>>
>>>>>> PC machines can use the whole CPU addressable range after
>>>>>> the space reserved for memory-hotplug.
>>>>>>
>>>>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>>>>>> ---
>>>>>>     hw/pci/pci.c | 16 ++++++++++++++--
>>>>>>     1 file changed, 14 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>>> index bb605ef..44dd949 100644
>>>>>> --- a/hw/pci/pci.c
>>>>>> +++ b/hw/pci/pci.c
>>>>>> @@ -41,6 +41,7 @@
>>>>>>     #include "hw/hotplug.h"
>>>>>>     #include "hw/boards.h"
>>>>>>     #include "qemu/cutils.h"
>>>>>> +#include "hw/i386/pc.h"
>>>>>>
>>>>>>     //#define DEBUG_PCI
>>>>>>     #ifdef DEBUG_PCI
>>>>>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>>>>>
>>>>
>>>>
>>>> Hi Igor,
>>>> Thanks for reviewing this series.
>>>>
>>>>>>     void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>>>>>     {
>>>>>> -    range->begin = range->end = 0;
>>>>>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>>>> +    Object *machine = qdev_get_machine();
>>>>>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>>>>>> +        PCMachineState *pcms = PC_MACHINE(machine);
>>>>>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>>>>> that line should break linking on other targets which don't have
>>>>> pc_machine_get_reserved_memory_end()
>>>>> probably for got to add stub.
>>>>>
>>>>
>>>> I cross-compiled all the targets with no problem.  It seems no stub is required.
>>> ./configure && make
>>>
>>>     LINK  aarch64-softmmu/qemu-system-aarch64
>>> ../hw/pci/pci.o: In function `pci_bus_get_w64_range':
>>> /home/imammedo/builds/qemu/hw/pci/pci.c:2474: undefined reference to `pc_machine_get_reserved_memory_end'
>>> collect2: error: ld returned 1 exit status
>>
>> Ooops, I did configured it to cross-compile everything, but I somehow missed it.
>> I'll take care of it.
>>
>>
>>>
>>>>
>>>>
>>>>>> +        if (!range->begin) {
>>>>>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>>>>>> +                                    1ULL << 30);
>>>>>> +        }
>>> mayby move above hunk to pc_machine_get_reserved_memory_end()
>>> so it would always return valid value.
>>>
>>>>>> +        range->end = 1ULL << 40; /* 40 bits physical */
>>>>> x86 specific in generic code
>>>>>
>>>>
>>>> I am aware I put x86 code in the generic pci code, but I limited it with
>>>>        if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>>>> So we have a generic rule for getting the w64 range and a specific one for PC machines.
>>>>
>>>> The alternative would be a w64 range per host-bridge, not bus.
>>>> Adding a pci_bus_get_w64_range function to PCIHostBridgeClass,
>>>> defaulting in current implementation for the base class and
>>>> overriding it for piix/q35 looks OK to you?
>>>>
>>>> I thought about it, but it seemed over-engineered as opposed
>>>> to the 'simple' if statement, however I can try it if you think is better.
>>>>
>>>>> ARM also has 64-bit PCI MMIO /git grep VIRT_PCIE_MMIO_HIGH/
>>>>
>>>> I had a look and ARM does not use this infrastructure, it has its own abstractions,
>>>> a memmap list. This is the other reason I selected this implementation,
>>>> because is not really used by other targets (but it can be used in the future)
>>>
>>> if it's only for x86 and whatever was programmed by BIOS is ignored,
>>> I'd drop PCI_HOST_PROP_PCI_HOLE64_*/pci_bus_get_w64_range() indirection
>>> and just directly use pc_machine_get_reserved_memory_end()
>>> from acpi-build.c
>>>
>>> in that case you won't need a stub for pc_machine_get_reserved_memory_end()
>>> as its usage will be limited only to hw/i386 scope.
>>>
>>
>> Good point, I'll try this.
>>
>>> Given opportunity, I'd drop PCI_HOST_PROP_PCI_HOLE* properties,
>>> but for it we need ACK from libvirt side in case they are using it
>>> for some reason.
>>
>> It seems out of the scope of this series, however I can do it on top.
> it's just nice to have cleanup, but I don't insist on it.
>
> the thing here is that if pc_machine_get_reserved_memory_end() were used
> directly for acpi-build.c then properties as is would give old/different values.

You are right, I'll remove at least the 64-bit properties.

Thanks,
Marcel

>
>>
>> Thanks,
>> Marcel
>>
>>>
>>>>
>>>>
>>>> Thanks,
>>>> Marcel
>>>>
>>>>> perhaps range should be a property of PCI bus,
>>>>> where a board sets its own values for start/size
>>>>>
>>>>>> +    } else {
>>>>>> +        range->begin = range->end = 0;
>>>>>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>>>> +    }
>>>>>>     }
>>>>>>
>>>>>>     static bool pcie_has_upstream_port(PCIDevice *dev)
>>>>>
>>>>
>>>
>>
>
Marcel Apfelbaum May 18, 2016, 2:33 p.m. UTC | #14
On 05/18/2016 05:31 PM, Igor Mammedov wrote:
> On Wed, 18 May 2016 17:12:09 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
>
>> On 05/18/2016 05:11 PM, Michael S. Tsirkin wrote:
>>> On Wed, May 18, 2016 at 03:59:29PM +0200, Igor Mammedov wrote:
>>>> On Sun, 15 May 2016 22:23:32 +0300
>>>> Marcel Apfelbaum <marcel@redhat.com> wrote:
>>>>
>>>>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>>>>> leads to zero space for hot-plugging PCI devices over 4G.
>>>>>
>>>>> PC machines can use the whole CPU addressable range after
>>>>> the space reserved for memory-hotplug.
>>>>>
>>>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>>>> that patch also has side effect of unconditionally adding
>>>> QWordMemory() resource in PCI0._CRS
>>>> on all machine types with QEMU generated ACPI tables.
>>>>
>>>> Have you tested that it won't break boot of legacy OSes
>>>> (XP, WS2003, old linux with 32bit kernel)?
>>>
>>> It's almost sure it break it.
>>> Maybe you can check _REV in _CRS to work around this for XP.
>>
>> I'll try it.
> but only after you check if just presence of QWord would crash XP,
> so in case it doesn't crash we would keep _CRS simple static
> structure.
>
> I very vaguely recall that XP ignored QWord in PCI0._CRS,
> but it was long time ago so it won't hurt to recheck.

Thanks for the pointer!
Marcel

>
>>
>> Thanks,
>> Marcel
>>
>>>
>>>>> ---
>>>>>    hw/pci/pci.c | 16 ++++++++++++++--
>>>>>    1 file changed, 14 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>> index bb605ef..44dd949 100644
>>>>> --- a/hw/pci/pci.c
>>>>> +++ b/hw/pci/pci.c
>>>>> @@ -41,6 +41,7 @@
>>>>>    #include "hw/hotplug.h"
>>>>>    #include "hw/boards.h"
>>>>>    #include "qemu/cutils.h"
>>>>> +#include "hw/i386/pc.h"
>>>>>
>>>>>    //#define DEBUG_PCI
>>>>>    #ifdef DEBUG_PCI
>>>>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>>>>
>>>>>    void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>>>>    {
>>>>> -    range->begin = range->end = 0;
>>>>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>>> +    Object *machine = qdev_get_machine();
>>>>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>>>>> +        PCMachineState *pcms = PC_MACHINE(machine);
>>>>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>>>>> +        if (!range->begin) {
>>>>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>>>>> +                                    1ULL << 30);
>>>>> +        }
>>>>> +        range->end = 1ULL << 40; /* 40 bits physical */
>>>>> +    } else {
>>>>> +        range->begin = range->end = 0;
>>>>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>>> +    }
>>>>>    }
>>>>>
>>>>>    static bool pcie_has_upstream_port(PCIDevice *dev)
>>
>
Michael S. Tsirkin May 18, 2016, 2:42 p.m. UTC | #15
On Wed, May 18, 2016 at 04:31:46PM +0200, Igor Mammedov wrote:
> On Wed, 18 May 2016 17:12:09 +0300
> Marcel Apfelbaum <marcel@redhat.com> wrote:
> 
> > On 05/18/2016 05:11 PM, Michael S. Tsirkin wrote:
> > > On Wed, May 18, 2016 at 03:59:29PM +0200, Igor Mammedov wrote:  
> > >> On Sun, 15 May 2016 22:23:32 +0300
> > >> Marcel Apfelbaum <marcel@redhat.com> wrote:
> > >>  
> > >>> Using the firmware assigned MMIO ranges for 64-bit PCI window
> > >>> leads to zero space for hot-plugging PCI devices over 4G.
> > >>>
> > >>> PC machines can use the whole CPU addressable range after
> > >>> the space reserved for memory-hotplug.
> > >>>
> > >>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>  
> > >> that patch also has side effect of unconditionally adding
> > >> QWordMemory() resource in PCI0._CRS
> > >> on all machine types with QEMU generated ACPI tables.
> > >>
> > >> Have you tested that it won't break boot of legacy OSes
> > >> (XP, WS2003, old linux with 32bit kernel)?  
> > >
> > > It's almost sure it break it.
> > > Maybe you can check _REV in _CRS to work around this for XP.  
> > 
> > I'll try it.
> but only after you check if just presence of QWord would crash XP,
> so in case it doesn't crash we would keep _CRS simple static
> structure.
> 
> I very vaguely recall that XP ignored QWord in PCI0._CRS,
> but it was long time ago so it won't hurt to recheck.

I played with different guests (32 and 64 bit)
at some point.

Generally, windows tends to crash when CRS resources exceed the
supported limits
of physical memory (sometimes with weird off by one errors,
e.g. win7 32 bit seems to survive with a 36 bit pci hole even though
this means the max address is 2^37-1 which it can't address).

This might depend on CPU as well.

Which makes me ask: why don't we fix this in BIOS?
If you want it to allocate a large window, do it.

> > 
> > Thanks,
> > Marcel
> > 
> > >  
> > >>> ---
> > >>>   hw/pci/pci.c | 16 ++++++++++++++--
> > >>>   1 file changed, 14 insertions(+), 2 deletions(-)
> > >>>
> > >>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > >>> index bb605ef..44dd949 100644
> > >>> --- a/hw/pci/pci.c
> > >>> +++ b/hw/pci/pci.c
> > >>> @@ -41,6 +41,7 @@
> > >>>   #include "hw/hotplug.h"
> > >>>   #include "hw/boards.h"
> > >>>   #include "qemu/cutils.h"
> > >>> +#include "hw/i386/pc.h"
> > >>>
> > >>>   //#define DEBUG_PCI
> > >>>   #ifdef DEBUG_PCI
> > >>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
> > >>>
> > >>>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
> > >>>   {
> > >>> -    range->begin = range->end = 0;
> > >>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> > >>> +    Object *machine = qdev_get_machine();
> > >>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> > >>> +        PCMachineState *pcms = PC_MACHINE(machine);
> > >>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
> > >>> +        if (!range->begin) {
> > >>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> > >>> +                                    1ULL << 30);
> > >>> +        }
> > >>> +        range->end = 1ULL << 40; /* 40 bits physical */
> > >>> +    } else {
> > >>> +        range->begin = range->end = 0;
> > >>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> > >>> +    }
> > >>>   }
> > >>>
> > >>>   static bool pcie_has_upstream_port(PCIDevice *dev)  
> >
Marcel Apfelbaum May 18, 2016, 2:43 p.m. UTC | #16
On 05/18/2016 05:14 PM, Michael S. Tsirkin wrote:
> On Sun, May 15, 2016 at 10:23:32PM +0300, Marcel Apfelbaum wrote:
>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>> leads to zero space for hot-plugging PCI devices over 4G.
>>
>> PC machines can use the whole CPU addressable range after
>> the space reserved for memory-hotplug.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>> ---
>>   hw/pci/pci.c | 16 ++++++++++++++--
>>   1 file changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index bb605ef..44dd949 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -41,6 +41,7 @@
>>   #include "hw/hotplug.h"
>>   #include "hw/boards.h"
>>   #include "qemu/cutils.h"
>> +#include "hw/i386/pc.h"
>>
>>   //#define DEBUG_PCI
>>   #ifdef DEBUG_PCI
>
> I don't want pci to depend on PC.
> Pls find another way to do this.
>

Igor has an idea to not call pci_dev_get_w64 and make the computations
in the acpi code. I'll follow this idea.

>
>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>
>>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>   {
>> -    range->begin = range->end = 0;
>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>> +    Object *machine = qdev_get_machine();
> An empty line won't hurt here after the declaration.
>
>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>> +        PCMachineState *pcms = PC_MACHINE(machine);
>
> An empty line won't hurt here after the declaration.
>
>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>> +        if (!range->begin) {
>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>> +                                    1ULL << 30);
>
> Why 30? what is the logic here?
>

Will put it inside pci_dev_get_w64 and explain.

>> +        }
>> +        range->end = 1ULL << 40; /* 40 bits physical */
>
> This comment does not help. Physical what? And why is 40 bit right?

It refers to how many bits are CPU addressable. (I will add a better comment)
cpu_x86_cpuid from target-i386/cpu.c it always returns 40
so hard-coding it looked like a safe choice.

Thanks,
Marcel

>> +    } else {
>> +        range->begin = range->end = 0;
>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>
> When does this trigger?
> Pls add a comment.
>
>> +    }
>>   }
>>
>>   static bool pcie_has_upstream_port(PCIDevice *dev)
>> --
>> 2.4.3
Marcel Apfelbaum May 18, 2016, 2:52 p.m. UTC | #17
On 05/18/2016 05:42 PM, Michael S. Tsirkin wrote:
> On Wed, May 18, 2016 at 04:31:46PM +0200, Igor Mammedov wrote:
>> On Wed, 18 May 2016 17:12:09 +0300
>> Marcel Apfelbaum <marcel@redhat.com> wrote:
>>
>>> On 05/18/2016 05:11 PM, Michael S. Tsirkin wrote:
>>>> On Wed, May 18, 2016 at 03:59:29PM +0200, Igor Mammedov wrote:
>>>>> On Sun, 15 May 2016 22:23:32 +0300
>>>>> Marcel Apfelbaum <marcel@redhat.com> wrote:
>>>>>
>>>>>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>>>>>> leads to zero space for hot-plugging PCI devices over 4G.
>>>>>>
>>>>>> PC machines can use the whole CPU addressable range after
>>>>>> the space reserved for memory-hotplug.
>>>>>>
>>>>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>>>>> that patch also has side effect of unconditionally adding
>>>>> QWordMemory() resource in PCI0._CRS
>>>>> on all machine types with QEMU generated ACPI tables.
>>>>>
>>>>> Have you tested that it won't break boot of legacy OSes
>>>>> (XP, WS2003, old linux with 32bit kernel)?
>>>>
>>>> It's almost sure it break it.
>>>> Maybe you can check _REV in _CRS to work around this for XP.
>>>
>>> I'll try it.
>> but only after you check if just presence of QWord would crash XP,
>> so in case it doesn't crash we would keep _CRS simple static
>> structure.
>>
>> I very vaguely recall that XP ignored QWord in PCI0._CRS,
>> but it was long time ago so it won't hurt to recheck.
>
> I played with different guests (32 and 64 bit)
> at some point.
>
> Generally, windows tends to crash when CRS resources exceed the
> supported limits
> of physical memory (sometimes with weird off by one errors,
> e.g. win7 32 bit seems to survive with a 36 bit pci hole even though
> this means the max address is 2^37-1 which it can't address).
>
> This might depend on CPU as well.

It seems QEMU returns 40-bit as CPU addressable bits, but I might got it wrong.

>
> Which makes me ask: why don't we fix this in BIOS?
> If you want it to allocate a large window, do it.

I don't follow, BIOS can assign resources to PCI devices, but how to
specify a MMIO range for hot-plug? Add it to the CRS of the host-bridge, right?

Thanks,
Marcel

[...]
Michael S. Tsirkin May 18, 2016, 2:57 p.m. UTC | #18
On Wed, May 18, 2016 at 05:43:37PM +0300, Marcel Apfelbaum wrote:
> On 05/18/2016 05:14 PM, Michael S. Tsirkin wrote:
> >On Sun, May 15, 2016 at 10:23:32PM +0300, Marcel Apfelbaum wrote:
> >>Using the firmware assigned MMIO ranges for 64-bit PCI window
> >>leads to zero space for hot-plugging PCI devices over 4G.
> >>
> >>PC machines can use the whole CPU addressable range after
> >>the space reserved for memory-hotplug.
> >>
> >>Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> >>---
> >>  hw/pci/pci.c | 16 ++++++++++++++--
> >>  1 file changed, 14 insertions(+), 2 deletions(-)
> >>
> >>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>index bb605ef..44dd949 100644
> >>--- a/hw/pci/pci.c
> >>+++ b/hw/pci/pci.c
> >>@@ -41,6 +41,7 @@
> >>  #include "hw/hotplug.h"
> >>  #include "hw/boards.h"
> >>  #include "qemu/cutils.h"
> >>+#include "hw/i386/pc.h"
> >>
> >>  //#define DEBUG_PCI
> >>  #ifdef DEBUG_PCI
> >
> >I don't want pci to depend on PC.
> >Pls find another way to do this.
> >
> 
> Igor has an idea to not call pci_dev_get_w64 and make the computations
> in the acpi code. I'll follow this idea.
> 
> >
> >>@@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
> >>
> >>  void pci_bus_get_w64_range(PCIBus *bus, Range *range)
> >>  {
> >>-    range->begin = range->end = 0;
> >>-    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >>+    Object *machine = qdev_get_machine();
> >An empty line won't hurt here after the declaration.
> >
> >>+    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> >>+        PCMachineState *pcms = PC_MACHINE(machine);
> >
> >An empty line won't hurt here after the declaration.
> >
> >>+        range->begin = pc_machine_get_reserved_memory_end(pcms);
> >>+        if (!range->begin) {
> >>+            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> >>+                                    1ULL << 30);
> >
> >Why 30? what is the logic here?
> >
> 
> Will put it inside pci_dev_get_w64 and explain.
> 
> >>+        }
> >>+        range->end = 1ULL << 40; /* 40 bits physical */
> >
> >This comment does not help. Physical what? And why is 40 bit right?
> 
> It refers to how many bits are CPU addressable. (I will add a better comment)
> cpu_x86_cpuid from target-i386/cpu.c it always returns 40
> so hard-coding it looked like a safe choice.
> 
> Thanks,
> Marcel

Besides being ugly (we should get this from CPU code, not hard-code it)
not all guests look at CPUID unfortunately.
E.g. try win7 32 bit.


> >>+    } else {
> >>+        range->begin = range->end = 0;
> >>+        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >
> >When does this trigger?
> >Pls add a comment.
> >
> >>+    }
> >>  }
> >>
> >>  static bool pcie_has_upstream_port(PCIDevice *dev)
> >>--
> >>2.4.3
Marcel Apfelbaum May 18, 2016, 3:01 p.m. UTC | #19
On 05/18/2016 05:57 PM, Michael S. Tsirkin wrote:
> On Wed, May 18, 2016 at 05:43:37PM +0300, Marcel Apfelbaum wrote:
>> On 05/18/2016 05:14 PM, Michael S. Tsirkin wrote:
>>> On Sun, May 15, 2016 at 10:23:32PM +0300, Marcel Apfelbaum wrote:
>>>> Using the firmware assigned MMIO ranges for 64-bit PCI window
>>>> leads to zero space for hot-plugging PCI devices over 4G.
>>>>
>>>> PC machines can use the whole CPU addressable range after
>>>> the space reserved for memory-hotplug.
>>>>
>>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>>>> ---
>>>>   hw/pci/pci.c | 16 ++++++++++++++--
>>>>   1 file changed, 14 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index bb605ef..44dd949 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -41,6 +41,7 @@
>>>>   #include "hw/hotplug.h"
>>>>   #include "hw/boards.h"
>>>>   #include "qemu/cutils.h"
>>>> +#include "hw/i386/pc.h"
>>>>
>>>>   //#define DEBUG_PCI
>>>>   #ifdef DEBUG_PCI
>>>
>>> I don't want pci to depend on PC.
>>> Pls find another way to do this.
>>>
>>
>> Igor has an idea to not call pci_dev_get_w64 and make the computations
>> in the acpi code. I'll follow this idea.
>>
>>>
>>>> @@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
>>>>
>>>>   void pci_bus_get_w64_range(PCIBus *bus, Range *range)
>>>>   {
>>>> -    range->begin = range->end = 0;
>>>> -    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>> +    Object *machine = qdev_get_machine();
>>> An empty line won't hurt here after the declaration.
>>>
>>>> +    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
>>>> +        PCMachineState *pcms = PC_MACHINE(machine);
>>>
>>> An empty line won't hurt here after the declaration.
>>>
>>>> +        range->begin = pc_machine_get_reserved_memory_end(pcms);
>>>> +        if (!range->begin) {
>>>> +            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
>>>> +                                    1ULL << 30);
>>>
>>> Why 30? what is the logic here?
>>>
>>
>> Will put it inside pci_dev_get_w64 and explain.
>>
>>>> +        }
>>>> +        range->end = 1ULL << 40; /* 40 bits physical */
>>>
>>> This comment does not help. Physical what? And why is 40 bit right?
>>
>> It refers to how many bits are CPU addressable. (I will add a better comment)
>> cpu_x86_cpuid from target-i386/cpu.c it always returns 40
>> so hard-coding it looked like a safe choice.
>>
>> Thanks,
>> Marcel
>
> Besides being ugly (we should get this from CPU code, not hard-code it)
> not all guests look at CPUID unfortunately.
> E.g. try win7 32 bit.
>

I am open to better ideas, can you please advice?

Thanks,
Marcel

>
>>>> +    } else {
>>>> +        range->begin = range->end = 0;
>>>> +        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
>>>
>>> When does this trigger?
>>> Pls add a comment.
>>>
>>>> +    }
>>>>   }
>>>>
>>>>   static bool pcie_has_upstream_port(PCIDevice *dev)
>>>> --
>>>> 2.4.3
Michael S. Tsirkin May 18, 2016, 3:06 p.m. UTC | #20
On Wed, May 18, 2016 at 05:52:29PM +0300, Marcel Apfelbaum wrote:
> On 05/18/2016 05:42 PM, Michael S. Tsirkin wrote:
> >On Wed, May 18, 2016 at 04:31:46PM +0200, Igor Mammedov wrote:
> >>On Wed, 18 May 2016 17:12:09 +0300
> >>Marcel Apfelbaum <marcel@redhat.com> wrote:
> >>
> >>>On 05/18/2016 05:11 PM, Michael S. Tsirkin wrote:
> >>>>On Wed, May 18, 2016 at 03:59:29PM +0200, Igor Mammedov wrote:
> >>>>>On Sun, 15 May 2016 22:23:32 +0300
> >>>>>Marcel Apfelbaum <marcel@redhat.com> wrote:
> >>>>>
> >>>>>>Using the firmware assigned MMIO ranges for 64-bit PCI window
> >>>>>>leads to zero space for hot-plugging PCI devices over 4G.
> >>>>>>
> >>>>>>PC machines can use the whole CPU addressable range after
> >>>>>>the space reserved for memory-hotplug.
> >>>>>>
> >>>>>>Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> >>>>>that patch also has side effect of unconditionally adding
> >>>>>QWordMemory() resource in PCI0._CRS
> >>>>>on all machine types with QEMU generated ACPI tables.
> >>>>>
> >>>>>Have you tested that it won't break boot of legacy OSes
> >>>>>(XP, WS2003, old linux with 32bit kernel)?
> >>>>
> >>>>It's almost sure it break it.
> >>>>Maybe you can check _REV in _CRS to work around this for XP.
> >>>
> >>>I'll try it.
> >>but only after you check if just presence of QWord would crash XP,
> >>so in case it doesn't crash we would keep _CRS simple static
> >>structure.
> >>
> >>I very vaguely recall that XP ignored QWord in PCI0._CRS,
> >>but it was long time ago so it won't hurt to recheck.
> >
> >I played with different guests (32 and 64 bit)
> >at some point.
> >
> >Generally, windows tends to crash when CRS resources exceed the
> >supported limits
> >of physical memory (sometimes with weird off by one errors,
> >e.g. win7 32 bit seems to survive with a 36 bit pci hole even though
> >this means the max address is 2^37-1 which it can't address).
> >
> >This might depend on CPU as well.
> 
> It seems QEMU returns 40-bit as CPU addressable bits, but I might got it wrong.
> 
> >
> >Which makes me ask: why don't we fix this in BIOS?
> >If you want it to allocate a large window, do it.
> 
> I don't follow, BIOS can assign resources to PCI devices, but how to
> specify a MMIO range for hot-plug? Add it to the CRS of the host-bridge, right?
> 
> Thanks,
> Marcel
> 
> [...]

Ah I forgot on PC there's no register for this. On Q35 there is.
Michael S. Tsirkin May 18, 2016, 3:30 p.m. UTC | #21
On Wed, May 18, 2016 at 06:01:15PM +0300, Marcel Apfelbaum wrote:
> On 05/18/2016 05:57 PM, Michael S. Tsirkin wrote:
> >On Wed, May 18, 2016 at 05:43:37PM +0300, Marcel Apfelbaum wrote:
> >>On 05/18/2016 05:14 PM, Michael S. Tsirkin wrote:
> >>>On Sun, May 15, 2016 at 10:23:32PM +0300, Marcel Apfelbaum wrote:
> >>>>Using the firmware assigned MMIO ranges for 64-bit PCI window
> >>>>leads to zero space for hot-plugging PCI devices over 4G.
> >>>>
> >>>>PC machines can use the whole CPU addressable range after
> >>>>the space reserved for memory-hotplug.
> >>>>
> >>>>Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> >>>>---
> >>>>  hw/pci/pci.c | 16 ++++++++++++++--
> >>>>  1 file changed, 14 insertions(+), 2 deletions(-)
> >>>>
> >>>>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>>>index bb605ef..44dd949 100644
> >>>>--- a/hw/pci/pci.c
> >>>>+++ b/hw/pci/pci.c
> >>>>@@ -41,6 +41,7 @@
> >>>>  #include "hw/hotplug.h"
> >>>>  #include "hw/boards.h"
> >>>>  #include "qemu/cutils.h"
> >>>>+#include "hw/i386/pc.h"
> >>>>
> >>>>  //#define DEBUG_PCI
> >>>>  #ifdef DEBUG_PCI
> >>>
> >>>I don't want pci to depend on PC.
> >>>Pls find another way to do this.
> >>>
> >>
> >>Igor has an idea to not call pci_dev_get_w64 and make the computations
> >>in the acpi code. I'll follow this idea.
> >>
> >>>
> >>>>@@ -2467,8 +2468,19 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
> >>>>
> >>>>  void pci_bus_get_w64_range(PCIBus *bus, Range *range)
> >>>>  {
> >>>>-    range->begin = range->end = 0;
> >>>>-    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >>>>+    Object *machine = qdev_get_machine();
> >>>An empty line won't hurt here after the declaration.
> >>>
> >>>>+    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
> >>>>+        PCMachineState *pcms = PC_MACHINE(machine);
> >>>
> >>>An empty line won't hurt here after the declaration.
> >>>
> >>>>+        range->begin = pc_machine_get_reserved_memory_end(pcms);
> >>>>+        if (!range->begin) {
> >>>>+            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
> >>>>+                                    1ULL << 30);
> >>>
> >>>Why 30? what is the logic here?
> >>>
> >>
> >>Will put it inside pci_dev_get_w64 and explain.
> >>
> >>>>+        }
> >>>>+        range->end = 1ULL << 40; /* 40 bits physical */
> >>>
> >>>This comment does not help. Physical what? And why is 40 bit right?
> >>
> >>It refers to how many bits are CPU addressable. (I will add a better comment)
> >>cpu_x86_cpuid from target-i386/cpu.c it always returns 40
> >>so hard-coding it looked like a safe choice.
> >>
> >>Thanks,
> >>Marcel
> >
> >Besides being ugly (we should get this from CPU code, not hard-code it)
> >not all guests look at CPUID unfortunately.
> >E.g. try win7 32 bit.
> >
> 
> I am open to better ideas, can you please advice?
> 
> Thanks,
> Marcel

I'm afraid you will have to try a ton of guests and collect 
data on which window ranges make them crash :(

> >
> >>>>+    } else {
> >>>>+        range->begin = range->end = 0;
> >>>>+        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
> >>>
> >>>When does this trigger?
> >>>Pls add a comment.
> >>>
> >>>>+    }
> >>>>  }
> >>>>
> >>>>  static bool pcie_has_upstream_port(PCIDevice *dev)
> >>>>--
> >>>>2.4.3
diff mbox

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index bb605ef..44dd949 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -41,6 +41,7 @@ 
 #include "hw/hotplug.h"
 #include "hw/boards.h"
 #include "qemu/cutils.h"
+#include "hw/i386/pc.h"
 
 //#define DEBUG_PCI
 #ifdef DEBUG_PCI
@@ -2467,8 +2468,19 @@  static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
 
 void pci_bus_get_w64_range(PCIBus *bus, Range *range)
 {
-    range->begin = range->end = 0;
-    pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
+    Object *machine = qdev_get_machine();
+    if (object_dynamic_cast(machine, TYPE_PC_MACHINE)) {
+        PCMachineState *pcms = PC_MACHINE(machine);
+        range->begin = pc_machine_get_reserved_memory_end(pcms);
+        if (!range->begin) {
+            range->begin = ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size,
+                                    1ULL << 30);
+        }
+        range->end = 1ULL << 40; /* 40 bits physical */
+    } else {
+        range->begin = range->end = 0;
+        pci_for_each_device_under_bus(bus, pci_dev_get_w64, range);
+    }
 }
 
 static bool pcie_has_upstream_port(PCIDevice *dev)